feat: optimize for reading a{sv} messages and headers (#98)

This commit is contained in:
J. Nick Koston 2022-10-09 12:01:25 -10:00 committed by GitHub
parent 88878f0611
commit 4648d29df4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 65 additions and 13 deletions

View File

@ -50,10 +50,18 @@ cdef class Unmarshaller:
cpdef read_uint32_cast(self, object type_)
cpdef read_string_unpack(self, object type_)
cdef _read_string_unpack(self)
cpdef read_string_cast(self, object type_)
@cython.locals(
buf_bytes=cython.bytearray,
)
cpdef read_string_cast(self, object type_)
cdef _read_string_cast(self)
cdef _read_variant(self)
@cython.locals(
beginning_pos=cython.ulong,
@ -61,16 +69,19 @@ cdef class Unmarshaller:
)
cpdef read_array(self, object type_)
cpdef read_signature(self, object type_)
@cython.locals(
o=cython.ulong,
signature_len=cython.uint,
)
cpdef read_signature(self, object type_)
cdef _read_signature(self)
@cython.locals(
endian=cython.uint,
protocol_version=cython.uint,
can_cast=cython.bint
can_cast=cython.bint,
key=cython.str
)
cdef _read_header(self)

View File

@ -253,6 +253,9 @@ class Unmarshaller:
def read_string_cast(self, type_: SignatureType) -> str:
"""Read a string using cast."""
return self._read_string_cast()
def _read_string_cast(self) -> str:
self._pos += UINT32_SIZE + (-self._pos & (UINT32_SIZE - 1)) # align
str_start = self._pos
# read terminating '\0' byte as well (str_length + 1)
@ -261,6 +264,9 @@ class Unmarshaller:
return self._buf[str_start : self._pos - 1].decode()
def read_string_unpack(self, type_: SignatureType) -> str:
return self._read_string_unpack()
def _read_string_unpack(self) -> str:
"""Read a string using unpack."""
self._pos += UINT32_SIZE + (-self._pos & (UINT32_SIZE - 1)) # align
str_start = self._pos
@ -269,6 +275,9 @@ class Unmarshaller:
return self._buf[str_start : self._pos - 1].decode()
def read_signature(self, type_: SignatureType) -> str:
return self._read_signature()
def _read_signature(self) -> str:
signature_len = self._buf[self._pos] # byte
o = self._pos + 1
# read terminating '\0' byte as well (str_length + 1)
@ -276,7 +285,10 @@ class Unmarshaller:
return self._buf[o : o + signature_len].decode()
def read_variant(self, type_: SignatureType) -> Variant:
tree = get_signature_tree(self.read_signature(type_))
return self._read_variant()
def _read_variant(self) -> Variant:
tree = get_signature_tree(self._read_signature())
signature_type = tree.types[0]
# verify in Variant is only useful on construction not unmarshalling
return Variant(
@ -326,13 +338,28 @@ class Unmarshaller:
beginning_pos = self._pos
child_0 = child_type.children[0]
child_1 = child_type.children[1]
reader_1 = self._readers[child_1.token]
reader_0 = self._readers[child_0.token]
child_0_token = child_0.token
child_1_token = child_1.token
# Strings with variant values are the most common case
# so we optimize for that by inlining the string reading
# and the variant reading here
if child_0_token in "os" and child_1_token == "v":
while self._pos - beginning_pos < array_length:
self._pos += -self._pos & 7 # align 8
if self._uint32_unpack: # cannot cast
key = self._read_string_unpack()
else:
key = self._read_string_cast()
result_dict[key] = self._read_variant()
else:
reader_1 = self._readers[child_1_token]
reader_0 = self._readers[child_0_token]
while self._pos - beginning_pos < array_length:
self._pos += -self._pos & 7 # align 8
key = reader_0(self, child_0)
result_dict[key] = reader_1(self, child_1)
while self._pos - beginning_pos < array_length:
self._pos += -self._pos & 7 # align 8
key = reader_0(self, child_0)
result_dict[key] = reader_1(self, child_1)
return result_dict
if array_length == 0:
@ -357,13 +384,27 @@ class Unmarshaller:
field_0 = buf[self._pos - 1]
# Now read the v (variant) of struct (yv)
# first we read the signature
signature_len = buf[self._pos] # byte
o = self._pos + 1
self._pos += signature_len + 2 # one for the byte, one for the '\0'
type_ = get_signature_tree(buf[o : o + signature_len].decode()).types[0]
headers[HEADER_MESSAGE_ARG_NAME[field_0]] = readers[type_.token](
self, type_
)
token = type_.token
# Now that we have the token we can read the variant value
key = HEADER_MESSAGE_ARG_NAME[field_0]
# Strings and signatures are the most common types
# so we inline them for performance
if token in "os":
if self._uint32_unpack: # cannot cast
headers[key] = self._read_string_unpack()
else:
headers[key] = self._read_string_cast()
elif token == "g":
headers[key] = self._read_signature()
else:
# There shouldn't be any other types in the header
# but just in case, we'll read it using the slow path
headers[key] = readers[type_.token](self, type_)
return headers
def _read_header(self) -> None: