From 4648d29df4b616f49c06ca9fcbfbc27717d97135 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 9 Oct 2022 12:01:25 -1000 Subject: [PATCH] feat: optimize for reading a{sv} messages and headers (#98) --- src/dbus_fast/_private/unmarshaller.pxd | 17 +++++-- src/dbus_fast/_private/unmarshaller.py | 61 +++++++++++++++++++++---- 2 files changed, 65 insertions(+), 13 deletions(-) diff --git a/src/dbus_fast/_private/unmarshaller.pxd b/src/dbus_fast/_private/unmarshaller.pxd index 5d57819..4c30dd8 100644 --- a/src/dbus_fast/_private/unmarshaller.pxd +++ b/src/dbus_fast/_private/unmarshaller.pxd @@ -50,10 +50,18 @@ cdef class Unmarshaller: cpdef read_uint32_cast(self, object type_) + cpdef read_string_unpack(self, object type_) + + cdef _read_string_unpack(self) + + cpdef read_string_cast(self, object type_) + @cython.locals( buf_bytes=cython.bytearray, ) - cpdef read_string_cast(self, object type_) + cdef _read_string_cast(self) + + cdef _read_variant(self) @cython.locals( beginning_pos=cython.ulong, @@ -61,16 +69,19 @@ cdef class Unmarshaller: ) cpdef read_array(self, object type_) + cpdef read_signature(self, object type_) + @cython.locals( o=cython.ulong, signature_len=cython.uint, ) - cpdef read_signature(self, object type_) + cdef _read_signature(self) @cython.locals( endian=cython.uint, protocol_version=cython.uint, - can_cast=cython.bint + can_cast=cython.bint, + key=cython.str ) cdef _read_header(self) diff --git a/src/dbus_fast/_private/unmarshaller.py b/src/dbus_fast/_private/unmarshaller.py index 23d8b2d..30f7874 100644 --- a/src/dbus_fast/_private/unmarshaller.py +++ b/src/dbus_fast/_private/unmarshaller.py @@ -253,6 +253,9 @@ class Unmarshaller: def read_string_cast(self, type_: SignatureType) -> str: """Read a string using cast.""" + return self._read_string_cast() + + def _read_string_cast(self) -> str: self._pos += UINT32_SIZE + (-self._pos & (UINT32_SIZE - 1)) # align str_start = self._pos # read terminating '\0' byte as well (str_length + 1) @@ -261,6 +264,9 @@ class Unmarshaller: return self._buf[str_start : self._pos - 1].decode() def read_string_unpack(self, type_: SignatureType) -> str: + return self._read_string_unpack() + + def _read_string_unpack(self) -> str: """Read a string using unpack.""" self._pos += UINT32_SIZE + (-self._pos & (UINT32_SIZE - 1)) # align str_start = self._pos @@ -269,6 +275,9 @@ class Unmarshaller: return self._buf[str_start : self._pos - 1].decode() def read_signature(self, type_: SignatureType) -> str: + return self._read_signature() + + def _read_signature(self) -> str: signature_len = self._buf[self._pos] # byte o = self._pos + 1 # read terminating '\0' byte as well (str_length + 1) @@ -276,7 +285,10 @@ class Unmarshaller: return self._buf[o : o + signature_len].decode() def read_variant(self, type_: SignatureType) -> Variant: - tree = get_signature_tree(self.read_signature(type_)) + return self._read_variant() + + def _read_variant(self) -> Variant: + tree = get_signature_tree(self._read_signature()) signature_type = tree.types[0] # verify in Variant is only useful on construction not unmarshalling return Variant( @@ -326,13 +338,28 @@ class Unmarshaller: beginning_pos = self._pos child_0 = child_type.children[0] child_1 = child_type.children[1] - reader_1 = self._readers[child_1.token] - reader_0 = self._readers[child_0.token] + child_0_token = child_0.token + child_1_token = child_1.token + + # Strings with variant values are the most common case + # so we optimize for that by inlining the string reading + # and the variant reading here + if child_0_token in "os" and child_1_token == "v": + while self._pos - beginning_pos < array_length: + self._pos += -self._pos & 7 # align 8 + if self._uint32_unpack: # cannot cast + key = self._read_string_unpack() + else: + key = self._read_string_cast() + result_dict[key] = self._read_variant() + else: + reader_1 = self._readers[child_1_token] + reader_0 = self._readers[child_0_token] + while self._pos - beginning_pos < array_length: + self._pos += -self._pos & 7 # align 8 + key = reader_0(self, child_0) + result_dict[key] = reader_1(self, child_1) - while self._pos - beginning_pos < array_length: - self._pos += -self._pos & 7 # align 8 - key = reader_0(self, child_0) - result_dict[key] = reader_1(self, child_1) return result_dict if array_length == 0: @@ -357,13 +384,27 @@ class Unmarshaller: field_0 = buf[self._pos - 1] # Now read the v (variant) of struct (yv) + # first we read the signature signature_len = buf[self._pos] # byte o = self._pos + 1 self._pos += signature_len + 2 # one for the byte, one for the '\0' type_ = get_signature_tree(buf[o : o + signature_len].decode()).types[0] - headers[HEADER_MESSAGE_ARG_NAME[field_0]] = readers[type_.token]( - self, type_ - ) + token = type_.token + # Now that we have the token we can read the variant value + key = HEADER_MESSAGE_ARG_NAME[field_0] + # Strings and signatures are the most common types + # so we inline them for performance + if token in "os": + if self._uint32_unpack: # cannot cast + headers[key] = self._read_string_unpack() + else: + headers[key] = self._read_string_cast() + elif token == "g": + headers[key] = self._read_signature() + else: + # There shouldn't be any other types in the header + # but just in case, we'll read it using the slow path + headers[key] = readers[type_.token](self, type_) return headers def _read_header(self) -> None: