feat: improve unmarshall performance (#35)

This commit is contained in:
J. Nick Koston 2022-09-24 13:24:49 -10:00 committed by GitHub
parent 4a7c4c400d
commit db436b7a10
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 95 additions and 47 deletions

View File

@ -17,17 +17,25 @@ from .constants import (
HeaderField,
)
IS_LITTLE_ENDIAN = sys.byteorder == "little"
IS_BIG_ENDIAN = sys.byteorder == "big"
MAX_UNIX_FDS = 16
UNPACK_SYMBOL = {LITTLE_ENDIAN: "<", BIG_ENDIAN: ">"}
UNPACK_LENGTHS = {BIG_ENDIAN: Struct(">III"), LITTLE_ENDIAN: Struct("<III")}
UINT32_CAST = "I"
UINT32_SIZE = 4
UINT32_DBUS_TYPE = "u"
UINT32_SIGNATURE = SignatureTree._get(UINT32_DBUS_TYPE).types[0]
DBUS_TO_CTYPE = {
"y": ("B", 1), # byte
"n": ("h", 2), # int16
"q": ("H", 2), # uint16
"i": ("i", 4), # int32
"u": ("I", 4), # uint32
UINT32_DBUS_TYPE: (UINT32_CAST, UINT32_SIZE), # uint32
"x": ("q", 8), # int64
"t": ("Q", 8), # uint64
"d": ("d", 8), # double
@ -37,8 +45,6 @@ DBUS_TO_CTYPE = {
HEADER_SIGNATURE_SIZE = 16
HEADER_ARRAY_OF_STRUCT_SIGNATURE_POSITION = 12
UINT32_SIGNATURE = SignatureTree._get("u").types[0]
HEADER_DESTINATION = HeaderField.DESTINATION.name
HEADER_PATH = HeaderField.PATH.name
HEADER_INTERFACE = HeaderField.INTERFACE.name
@ -179,12 +185,28 @@ class Unmarshaller:
def read_boolean(self, _=None):
return bool(self.read_argument(UINT32_SIGNATURE))
def read_string(self, _=None):
str_length = self.read_argument(UINT32_SIGNATURE)
def read_string_cast(self, _=None):
"""Read a string using cast."""
self.offset += UINT32_SIZE + (-self.offset & (UINT32_SIZE - 1)) # align
str_start = self.offset
# read terminating '\0' byte as well (str_length + 1)
self.offset += str_length + 1
return self.buf[str_start : str_start + str_length].decode()
self.offset += (
self.view[self.offset - UINT32_SIZE : self.offset].cast(UINT32_CAST)[0] + 1
)
return self.buf[str_start : self.offset - 1].decode()
def read_string_unpack(self, _=None):
"""Read a string using unpack."""
self.offset += UINT32_SIZE + (-self.offset & (UINT32_SIZE - 1)) # align
str_start = self.offset
# read terminating '\0' byte as well (str_length + 1)
self.offset += (
self.readers[UINT32_DBUS_TYPE][3].unpack_from(
self.view, str_start - UINT32_SIZE
)[0]
+ 1
)
return self.buf[str_start : self.offset - 1].decode()
def read_signature(self, _=None):
signature_len = self.view[self.offset] # byte
@ -288,10 +310,15 @@ class Unmarshaller:
self.msg_len = (
self.header_len + (-self.header_len & 7) + self.body_len
) # align 8
can_cast = bool(sys.byteorder == "little" and endian == LITTLE_ENDIAN) or (
sys.byteorder == "big" and endian == BIG_ENDIAN
)
self.readers = self._readers_by_type[(endian, can_cast)]
self.readers = self._readers_by_type[
(
endian,
bool(
(IS_LITTLE_ENDIAN and endian == LITTLE_ENDIAN)
or (IS_BIG_ENDIAN and endian == BIG_ENDIAN)
),
)
]
def _read_body(self):
"""Read the body of the message."""
@ -335,12 +362,25 @@ class Unmarshaller:
return None
return self.message
_complex_parsers: Dict[
_complex_parsers_unpack: Dict[
str, Tuple[Callable[["Unmarshaller", SignatureType], Any], None, None, None]
] = {
"b": (read_boolean, None, None, None),
"o": (read_string, None, None, None),
"s": (read_string, None, None, None),
"o": (read_string_unpack, None, None, None),
"s": (read_string_unpack, None, None, None),
"g": (read_signature, None, None, None),
"a": (read_array, None, None, None),
"(": (read_struct, None, None, None),
"{": (read_dict_entry, None, None, None),
"v": (read_variant, None, None, None),
}
_complex_parsers_cast: Dict[
str, Tuple[Callable[["Unmarshaller", SignatureType], Any], None, None, None]
] = {
"b": (read_boolean, None, None, None),
"o": (read_string_cast, None, None, None),
"s": (read_string_cast, None, None, None),
"g": (read_signature, None, None, None),
"a": (read_array, None, None, None),
"(": (read_struct, None, None, None),
@ -372,18 +412,18 @@ class Unmarshaller:
_readers_by_type: Dict[Tuple[int, bool], READER_TYPE] = {
(LITTLE_ENDIAN, True): {
**_ctype_by_endian[(LITTLE_ENDIAN, True)],
**_complex_parsers,
**_complex_parsers_cast,
},
(LITTLE_ENDIAN, False): {
**_ctype_by_endian[(LITTLE_ENDIAN, False)],
**_complex_parsers,
**_complex_parsers_unpack,
},
(BIG_ENDIAN, True): {
**_ctype_by_endian[(BIG_ENDIAN, True)],
**_complex_parsers,
**_complex_parsers_cast,
},
(BIG_ENDIAN, False): {
**_ctype_by_endian[(BIG_ENDIAN, False)],
**_complex_parsers,
**_complex_parsers_unpack,
},
}

View File

@ -2,6 +2,7 @@ import io
import json
import os
from typing import Any, Dict
from unittest.mock import patch
import pytest
@ -93,39 +94,46 @@ def test_marshalling_with_table():
@pytest.mark.parametrize("unmarshall_table", (table,))
def test_unmarshalling_with_table(unmarshall_table):
for item in unmarshall_table:
@pytest.mark.parametrize("endians", ((True, False), (False, True)))
def test_unmarshalling_with_table(unmarshall_table, endians):
from dbus_fast._private import unmarshaller
stream = io.BytesIO(bytes.fromhex(item["data"]))
unmarshaller = Unmarshaller(stream)
try:
unmarshaller.unmarshall()
except Exception as e:
print("message failed to unmarshall:")
print(json_dump(item["message"]))
raise e
with patch.object(unmarshaller, "IS_BIG_ENDIAN", endians[0]), patch.object(
unmarshaller, "IS_LITTLE_ENDIAN", endians[1]
):
message = json_to_message(item["message"])
for item in unmarshall_table:
body = []
for i, type_ in enumerate(message.signature_tree.types):
body.append(replace_variants(type_, message.body[i]))
message.body = body
stream = io.BytesIO(bytes.fromhex(item["data"]))
unmarshaller = Unmarshaller(stream)
try:
unmarshaller.unmarshall()
except Exception as e:
print("message failed to unmarshall:")
print(json_dump(item["message"]))
raise e
for attr in [
"body",
"signature",
"message_type",
"destination",
"path",
"interface",
"member",
"flags",
"serial",
]:
assert getattr(unmarshaller.message, attr) == getattr(
message, attr
), f"attr doesnt match: {attr}"
message = json_to_message(item["message"])
body = []
for i, type_ in enumerate(message.signature_tree.types):
body.append(replace_variants(type_, message.body[i]))
message.body = body
for attr in [
"body",
"signature",
"message_type",
"destination",
"path",
"interface",
"member",
"flags",
"serial",
]:
assert getattr(unmarshaller.message, attr) == getattr(
message, attr
), f"attr doesnt match: {attr}"
def test_unmarshall_can_resume():