feat: optimize signature readers for most common messages (#107)

This commit is contained in:
J. Nick Koston 2022-10-13 10:29:53 -10:00 committed by GitHub
parent 094ca4ad7d
commit d5fb4d9c8b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 123 additions and 161 deletions

View File

@ -15,7 +15,13 @@ cdef unsigned int PROTOCOL_VERSION
cdef str UINT32_CAST
cdef str INT16_CAST
cdef object UINT32_SIGNATURE
cdef object UNPACK_HEADER_LITTLE_ENDIAN
cdef object UNPACK_HEADER_BIG_ENDIAN
cdef object UINT32_UNPACK_LITTLE_ENDIAN
cdef object UINT32_UNPACK_BIG_ENDIAN
cdef object INT16_UNPACK_LITTLE_ENDIAN
cdef object INT16_UNPACK_BIG_ENDIAN
cdef class MarshallerStreamEndError(Exception):
pass
@ -24,7 +30,6 @@ cdef class Unmarshaller:
cdef object _unix_fds
cdef bytearray _buf
cdef object _view
cdef unsigned int _pos
cdef object _stream
cdef object _sock
@ -37,6 +42,7 @@ cdef class Unmarshaller:
cdef unsigned int _flag
cdef unsigned int _msg_len
cdef object _uint32_unpack
cdef object _int16_unpack
cpdef reset(self)
@ -48,30 +54,32 @@ cdef class Unmarshaller:
)
cdef read_to_pos(self, unsigned long pos)
cpdef read_uint32_cast(self, object type_)
cpdef read_uint32_unpack(self, object type_)
cpdef read_int16_cast(self, object type_)
cdef unsigned int _read_uint32_unpack(self)
cdef _read_int16_cast(self)
cpdef read_int16_unpack(self, object type_)
cdef int _read_int16_unpack(self)
cpdef read_string_unpack(self, object type_)
cdef _read_string_unpack(self)
cpdef read_string_cast(self, object type_)
@cython.locals(
buf_bytes=cython.bytearray,
)
cdef _read_string_cast(self)
cdef _read_string_unpack(self)
cdef _read_variant(self)
cpdef read_array(self, object type_)
@cython.locals(
beginning_pos=cython.ulong,
array_length=cython.uint,
)
cpdef read_array(self, object type_)
cdef _read_array(self, object type_)
cpdef read_signature(self, object type_)
@ -84,7 +92,6 @@ cdef class Unmarshaller:
@cython.locals(
endian=cython.uint,
protocol_version=cython.uint,
can_cast=cython.bint,
key=cython.str
)
cdef _read_header(self)

View File

@ -1,5 +1,6 @@
import array
import io
import pprint
import socket
import sys
from struct import Struct
@ -11,18 +12,13 @@ from ..message import Message
from ..signature import SignatureType, Variant, get_signature_tree
from .constants import BIG_ENDIAN, LITTLE_ENDIAN, PROTOCOL_VERSION
IS_LITTLE_ENDIAN = sys.byteorder == "little"
IS_BIG_ENDIAN = sys.byteorder == "big"
MAX_UNIX_FDS = 16
UNPACK_SYMBOL = {LITTLE_ENDIAN: "<", BIG_ENDIAN: ">"}
UNPACK_LENGTHS = {BIG_ENDIAN: Struct(">III"), LITTLE_ENDIAN: Struct("<III")}
UINT32_CAST = "I"
UINT32_SIZE = 4
UINT32_DBUS_TYPE = "u"
UINT32_SIGNATURE = get_signature_tree(UINT32_DBUS_TYPE).types[0]
INT16_CAST = "h"
INT16_SIZE = 2
@ -40,10 +36,13 @@ DBUS_TO_CTYPE = {
"h": (UINT32_CAST, UINT32_SIZE), # uint32
}
UINT32_UNPACK_BY_ENDIAN = {
LITTLE_ENDIAN: Struct("<I").unpack_from,
BIG_ENDIAN: Struct(">I").unpack_from,
}
UNPACK_HEADER_LITTLE_ENDIAN = Struct("<III").unpack_from
UINT32_UNPACK_LITTLE_ENDIAN = Struct("<I").unpack_from
INT16_UNPACK_LITTLE_ENDIAN = Struct("<h").unpack_from
UNPACK_HEADER_BIG_ENDIAN = Struct(">III").unpack_from
UINT32_UNPACK_BIG_ENDIAN = Struct(">I").unpack_from
INT16_UNPACK_BIG_ENDIAN = Struct(">h").unpack_from
HEADER_SIGNATURE_SIZE = 16
HEADER_ARRAY_OF_STRUCT_SIGNATURE_POSITION = 12
@ -65,40 +64,27 @@ HEADER_MESSAGE_ARG_NAME = {
READER_TYPE = Callable[["Unmarshaller", SignatureType], Any]
def cast_parser_factory(ctype: str, size: int) -> READER_TYPE:
"""Build a parser that casts the bytes to the given ctype."""
def _cast_parser(self: "Unmarshaller", signature: SignatureType) -> Any:
self._pos += size + (-self._pos & (size - 1)) # align
return self._view[self._pos - size : self._pos].cast(ctype)[0]
return _cast_parser
def unpack_parser_factory(unpack_from: Callable, size: int) -> READER_TYPE:
"""Build a parser that unpacks the bytes using the given unpack_from function."""
def _unpack_from_parser(self: "Unmarshaller", signature: SignatureType) -> Any:
self._pos += size + (-self._pos & (size - 1)) # align
return unpack_from(self._view, self._pos - size)[0]
return unpack_from(self._buf, self._pos - size)[0]
return _unpack_from_parser
def build_simple_parsers(
endian: int, can_cast: bool
endian: int,
) -> Dict[str, Callable[["Unmarshaller", SignatureType], Any]]:
"""Build a dict of parsers for simple types."""
parsers: Dict[str, READER_TYPE] = {}
for dbus_type, ctype_size in DBUS_TO_CTYPE.items():
ctype, size = ctype_size
size = ctype_size[1]
if can_cast:
parsers[dbus_type] = cast_parser_factory(ctype, size)
else:
parsers[dbus_type] = unpack_parser_factory(
Struct(f"{UNPACK_SYMBOL[endian]}{ctype}").unpack_from, size
)
parsers[dbus_type] = unpack_parser_factory(
Struct(f"{UNPACK_SYMBOL[endian]}{ctype}").unpack_from, size
)
return parsers
@ -135,7 +121,6 @@ class Unmarshaller:
__slots__ = (
"_unix_fds",
"_buf",
"_view",
"_pos",
"_stream",
"_sock",
@ -148,12 +133,12 @@ class Unmarshaller:
"_flag",
"_msg_len",
"_uint32_unpack",
"_int16_unpack",
)
def __init__(self, stream: io.BufferedRWPair, sock: Optional[socket.socket] = None):
self._unix_fds: List[int] = []
self._buf = bytearray() # Actual buffer
self._view: Optional[memoryview] = None # Memory view of the buffer
self._stream = stream
self._sock = sock
self._message: Optional[Message] = None
@ -165,8 +150,8 @@ class Unmarshaller:
self._message_type = 0
self._flag = 0
self._msg_len = 0
# Only set if we cannot cast
self._uint32_unpack: Callable | None = None
self._int16_unpack: Callable | None = None
def reset(self) -> None:
"""Reset the unmarshaller to its initial state.
@ -174,7 +159,6 @@ class Unmarshaller:
Call this before processing a new message.
"""
self._unix_fds = []
self._view = None
self._buf.clear()
self._message = None
self._pos = 0
@ -185,6 +169,7 @@ class Unmarshaller:
self._flag = 0
self._msg_len = 0
self._uint32_unpack = None
self._int16_unpack = None
@property
def message(self) -> Message:
@ -240,31 +225,22 @@ class Unmarshaller:
if len(data) + start_len != pos:
raise MarshallerStreamEndError()
def read_uint32_cast(self, type_: SignatureType) -> int:
def read_uint32_unpack(self, type_: SignatureType) -> int:
return self._read_uint32_unpack()
def _read_uint32_unpack(self) -> int:
self._pos += UINT32_SIZE + (-self._pos & (UINT32_SIZE - 1)) # align
return self._view[self._pos - UINT32_SIZE : self._pos].cast(UINT32_CAST)[0]
return self._uint32_unpack(self._buf, self._pos - UINT32_SIZE)[0]
def read_int16_cast(self, type_: SignatureType) -> int:
return self._read_int16_cast()
def read_int16_unpack(self, type_: SignatureType) -> int:
return self._read_int16_unpack()
def _read_int16_cast(self) -> int:
def _read_int16_unpack(self) -> int:
self._pos += INT16_SIZE + (-self._pos & (INT16_SIZE - 1)) # align
return self._view[self._pos - INT16_SIZE : self._pos].cast(INT16_CAST)[0]
return self._int16_unpack(self._buf, self._pos - INT16_SIZE)[0]
def read_boolean(self, type_: SignatureType) -> bool:
return bool(self._readers[UINT32_SIGNATURE.token](self, UINT32_SIGNATURE))
def read_string_cast(self, type_: SignatureType) -> str:
"""Read a string using cast."""
return self._read_string_cast()
def _read_string_cast(self) -> str:
self._pos += UINT32_SIZE + (-self._pos & (UINT32_SIZE - 1)) # align
str_start = self._pos
# read terminating '\0' byte as well (str_length + 1)
start_pos = self._pos - UINT32_SIZE
self._pos += self._view[start_pos : self._pos].cast(UINT32_CAST)[0] + 1
return self._buf[str_start : self._pos - 1].decode()
return bool(self._read_uint32_unpack())
def read_string_unpack(self, type_: SignatureType) -> str:
return self._read_string_unpack()
@ -274,7 +250,7 @@ class Unmarshaller:
self._pos += UINT32_SIZE + (-self._pos & (UINT32_SIZE - 1)) # align
str_start = self._pos
# read terminating '\0' byte as well (str_length + 1)
self._pos += self._uint32_unpack(self._view, str_start - UINT32_SIZE)[0] + 1
self._pos += self._uint32_unpack(self._buf, str_start - UINT32_SIZE)[0] + 1
return self._buf[str_start : self._pos - 1].decode()
def read_signature(self, type_: SignatureType) -> str:
@ -295,8 +271,8 @@ class Unmarshaller:
signature_type = tree.types[0]
# verify in Variant is only useful on construction not unmarshalling
token = signature_type.token
if token == "n" and self._uint32_unpack is None:
return Variant(tree, self._read_int16_cast(), False)
if token == "n":
return Variant(tree, self._read_int16_unpack(), False)
return Variant(
tree,
self._readers[token](self, signature_type),
@ -317,16 +293,14 @@ class Unmarshaller:
), self._readers[type_.children[1].token](self, type_.children[1])
def read_array(self, type_: SignatureType) -> Iterable[Any]:
return self._read_array(type_)
def _read_array(self, type_: SignatureType) -> Iterable[Any]:
self._pos += -self._pos & 3 # align 4 for the array
self._pos += (
-self._pos & (UINT32_SIZE - 1)
) + UINT32_SIZE # align for the uint32
if self._uint32_unpack:
array_length = self._uint32_unpack(self._view, self._pos - UINT32_SIZE)[0]
else:
array_length = self._view[self._pos - UINT32_SIZE : self._pos].cast(
UINT32_CAST
)[0]
array_length = self._uint32_unpack(self._buf, self._pos - UINT32_SIZE)[0]
child_type = type_.children[0]
token = child_type.token
@ -353,10 +327,7 @@ class Unmarshaller:
if child_0_token in "os" and child_1_token == "v":
while self._pos - beginning_pos < array_length:
self._pos += -self._pos & 7 # align 8
if self._uint32_unpack: # cannot cast
key = self._read_string_unpack()
else:
key = self._read_string_cast()
key = self._read_string_unpack()
result_dict[key] = self._read_variant()
else:
reader_1 = self._readers[child_1_token]
@ -401,10 +372,7 @@ class Unmarshaller:
# Strings and signatures are the most common types
# so we inline them for performance
if token in "os":
if self._uint32_unpack: # cannot cast
headers[key] = self._read_string_unpack()
else:
headers[key] = self._read_string_cast()
headers[key] = self._read_string_unpack()
elif token == "g":
headers[key] = self._read_signature()
else:
@ -433,42 +401,59 @@ class Unmarshaller:
f"got unknown protocol version: {protocol_version}"
)
self._body_len, self._serial, self._header_len = UNPACK_LENGTHS[
endian
].unpack_from(buffer, 4)
if endian == LITTLE_ENDIAN:
(
self._body_len,
self._serial,
self._header_len,
) = UNPACK_HEADER_LITTLE_ENDIAN(self._buf, 4)
self._uint32_unpack = UINT32_UNPACK_LITTLE_ENDIAN
self._int16_unpack = INT16_UNPACK_LITTLE_ENDIAN
else:
self._body_len, self._serial, self._header_len = UNPACK_HEADER_BIG_ENDIAN(
self._buf, 4
)
self._uint32_unpack = UINT32_UNPACK_BIG_ENDIAN
self._int16_unpack = INT16_UNPACK_BIG_ENDIAN
self._msg_len = (
self._header_len + (-self._header_len & 7) + self._body_len
) # align 8
can_cast = bool(
(IS_LITTLE_ENDIAN and endian == LITTLE_ENDIAN)
or (IS_BIG_ENDIAN and endian == BIG_ENDIAN)
)
self._readers = self._readers_by_type[(endian, can_cast)]
if not can_cast:
self._uint32_unpack = UINT32_UNPACK_BY_ENDIAN[endian]
self._readers = self._readers_by_type[endian]
def _read_body(self) -> None:
"""Read the body of the message."""
self.read_to_pos(HEADER_SIGNATURE_SIZE + self._msg_len)
self._view = memoryview(self._buf)
self._pos = HEADER_ARRAY_OF_STRUCT_SIGNATURE_POSITION
header_fields = self.header_fields(self._header_len)
self._pos += -self._pos & 7 # align 8
header_fields.pop("unix_fds", None) # defined by self._unix_fds
tree = get_signature_tree(header_fields.pop("signature", ""))
if not self._body_len:
body = []
elif tree.signature == "s":
body = [self._read_string_unpack()]
elif tree.signature == "sa{sv}as":
types = tree.types
body = [
self._read_string_unpack(),
self._read_array(types[1]),
self._read_array(types[2]),
]
else:
body = [self._readers[t.token](self, t) for t in tree.types]
self._message = Message(
**header_fields,
message_type=MESSAGE_TYPE_MAP[self._message_type],
flags=MESSAGE_FLAG_MAP[self._flag],
unix_fds=self._unix_fds,
signature=tree,
body=[self._readers[t.token](self, t) for t in tree.types]
if self._body_len
else [],
body=body,
serial=self._serial,
# The D-Bus implementation already validates the message,
# so we don't need to do it again.
validate=False,
**header_fields,
)
def unmarshall(self) -> Optional[Message]:
@ -497,47 +482,22 @@ class Unmarshaller:
"(": read_struct,
"{": read_dict_entry,
"v": read_variant,
"h": read_uint32_unpack,
UINT32_DBUS_TYPE: read_uint32_unpack,
INT16_DBUS_TYPE: read_int16_unpack,
}
_complex_parsers_cast: Dict[str, Callable[["Unmarshaller", SignatureType], Any]] = {
"b": read_boolean,
"o": read_string_cast,
"s": read_string_cast,
"g": read_signature,
"a": read_array,
"(": read_struct,
"{": read_dict_entry,
"v": read_variant,
"h": read_uint32_cast,
UINT32_DBUS_TYPE: read_uint32_cast,
INT16_DBUS_TYPE: read_int16_cast,
_ctype_by_endian: Dict[int, Dict[str, READER_TYPE]] = {
endian: build_simple_parsers(endian) for endian in (LITTLE_ENDIAN, BIG_ENDIAN)
}
_ctype_by_endian: Dict[Tuple[int, bool], Dict[str, READER_TYPE]] = {
endian_can_cast: build_simple_parsers(*endian_can_cast)
for endian_can_cast in (
(LITTLE_ENDIAN, True),
(LITTLE_ENDIAN, False),
(BIG_ENDIAN, True),
(BIG_ENDIAN, False),
)
}
_readers_by_type: Dict[Tuple[int, bool], Dict[str, READER_TYPE]] = {
(LITTLE_ENDIAN, True): {
**_ctype_by_endian[(LITTLE_ENDIAN, True)],
**_complex_parsers_cast,
},
(LITTLE_ENDIAN, False): {
**_ctype_by_endian[(LITTLE_ENDIAN, False)],
_readers_by_type: Dict[int, Dict[str, READER_TYPE]] = {
LITTLE_ENDIAN: {
**_ctype_by_endian[LITTLE_ENDIAN],
**_complex_parsers_unpack,
},
(BIG_ENDIAN, True): {
**_ctype_by_endian[(BIG_ENDIAN, True)],
**_complex_parsers_cast,
},
(BIG_ENDIAN, False): {
**_ctype_by_endian[(BIG_ENDIAN, False)],
BIG_ENDIAN: {
**_ctype_by_endian[BIG_ENDIAN],
**_complex_parsers_unpack,
},
}

View File

@ -95,46 +95,41 @@ def test_marshalling_with_table():
@pytest.mark.parametrize("unmarshall_table", (table,))
@pytest.mark.parametrize("endians", ((True, False), (False, True)))
def test_unmarshalling_with_table(unmarshall_table, endians):
def test_unmarshalling_with_table(unmarshall_table):
from dbus_fast._private import unmarshaller
with patch.object(unmarshaller, "IS_BIG_ENDIAN", endians[0]), patch.object(
unmarshaller, "IS_LITTLE_ENDIAN", endians[1]
):
for item in unmarshall_table:
for item in unmarshall_table:
stream = io.BytesIO(bytes.fromhex(item["data"]))
unmarshaller = Unmarshaller(stream)
try:
unmarshaller.unmarshall()
except Exception as e:
print("message failed to unmarshall:")
print(json_dump(item["message"]))
raise e
stream = io.BytesIO(bytes.fromhex(item["data"]))
unmarshaller = Unmarshaller(stream)
try:
unmarshaller.unmarshall()
except Exception as e:
print("message failed to unmarshall:")
print(json_dump(item["message"]))
raise e
message = json_to_message(item["message"])
message = json_to_message(item["message"])
body = []
for i, type_ in enumerate(message.signature_tree.types):
body.append(replace_variants(type_, message.body[i]))
message.body = body
body = []
for i, type_ in enumerate(message.signature_tree.types):
body.append(replace_variants(type_, message.body[i]))
message.body = body
for attr in [
"body",
"signature",
"message_type",
"destination",
"path",
"interface",
"member",
"flags",
"serial",
]:
assert getattr(unmarshaller.message, attr) == getattr(
message, attr
), f"attr doesnt match: {attr}"
for attr in [
"body",
"signature",
"message_type",
"destination",
"path",
"interface",
"member",
"flags",
"serial",
]:
assert getattr(unmarshaller.message, attr) == getattr(
message, attr
), f"attr doesnt match: {attr}"
def test_unmarshall_can_resume():