feat: add optimized reader for uint16 (#121)

This commit is contained in:
J. Nick Koston 2022-10-28 23:24:42 -05:00 committed by GitHub
parent 1e3c722bfc
commit 52881d9054
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 176 additions and 15 deletions

View File

@ -0,0 +1,30 @@
import io
import timeit
from dbus_fast._private.unmarshaller import Unmarshaller
# cythonize -X language_level=3 -a -i src/dbus_fast/_private/unmarshaller.py
bluez_mfr_data_message = (
b"l\4\1\1x\0\0\0\232\312\n\0\225\0\0\0\1\1o\0%\0\0\0/org/bluez/hci0/dev_D0_C2_4E_08_AB_57\0\0\0\2\1s"
b"\0\37\0\0\0org.freedesktop.DBus.Properties\0\3\1s\0\21\0\0\0PropertiesChanged\0\0\0\0\0\0\0\10\1g\0"
b"\10sa{sv}as\0\0\0\7\1s\0\4\0\0\0:1.4\0\0\0\0\21\0\0\0org.bluez.Device1\0\0\0T\0\0\0\0\0\0\0\4\0\0\0"
b"RSSI\0\1n\0\252\377\0\0\20\0\0\0ManufacturerData\0\5a{qv}\0$\0\0\0u\0\2ay\0\0\0\30\0\0\0B\4\1\1p\320"
b"\302N\10\253W\322\302N\10\253V\1\0\0\0\0\0\0\0\0\0\0l\4\1\0014\0\0\0\233\312\n\0\225\0\0\0\1\1o\0%\0"
)
stream = io.BytesIO(bluez_mfr_data_message)
unmarshaller = Unmarshaller(stream)
def unmarshall_mfr_data_message():
stream.seek(0)
unmarshaller.reset()
unmarshaller.unmarshall()
count = 3000000
time = timeit.Timer(unmarshall_mfr_data_message).timeit(count)
print(f"Unmarshalling {count} bluetooth ManufacturerData messages took {time} seconds")

View File

@ -7,23 +7,33 @@ from ..signature import SignatureType
cdef unsigned int UINT32_SIZE
cdef unsigned int INT16_SIZE
cdef unsigned int UINT16_SIZE
cdef unsigned int HEADER_ARRAY_OF_STRUCT_SIGNATURE_POSITION
cdef unsigned int HEADER_SIGNATURE_SIZE
cdef unsigned int LITTLE_ENDIAN
cdef unsigned int BIG_ENDIAN
cdef unsigned int PROTOCOL_VERSION
cdef str UINT32_CAST
cdef str INT16_CAST
cdef str UINT16_CAST
cdef bint SYS_IS_LITTLE_ENDIAN
cdef bint SYS_IS_BIG_ENDIAN
cdef object UNPACK_HEADER_LITTLE_ENDIAN
cdef object UNPACK_HEADER_BIG_ENDIAN
cdef object UINT32_UNPACK_LITTLE_ENDIAN
cdef object UINT32_UNPACK_BIG_ENDIAN
cdef object INT16_UNPACK_LITTLE_ENDIAN
cdef object INT16_UNPACK_BIG_ENDIAN
cdef object UINT16_UNPACK_LITTLE_ENDIAN
cdef object UINT16_UNPACK_BIG_ENDIAN
cdef object Variant
cdef object Message
cdef object MESSAGE_TYPE_MAP
@ -31,11 +41,15 @@ cdef object MESSAGE_FLAG_MAP
cdef object HEADER_MESSAGE_ARG_NAME
cdef object SIGNATURE_TREE_EMPTY
cdef object SIGNATURE_TREE_SA_SV_AS
cdef object SIGNATURE_TREE_N
cdef object SIGNATURE_TREE_S
cdef object SIGNATURE_TREE_SA_SV_AS
cdef object SIGNATURE_TREE_SA_SV_AS_TYPES_1
cdef object SIGNATURE_TREE_SA_SV_AS_TYPES_2
cdef object SIGNATURE_TREE_AY
cdef object SIGNATURE_TREE_AY_TYPES_0
cdef object SIGNATURE_TREE_A_QV
cdef object SIGNATURE_TREE_A_QV_TYPES_0
cpdef get_signature_tree
@ -48,6 +62,11 @@ cdef inline short _cast_int16_native(const char * payload, unsigned int offset)
cdef short *s16p = <short *> &payload[offset]
return s16p[0]
cdef inline unsigned short _cast_uint16_native(const char * payload, unsigned int offset):
cdef unsigned short *u16p = <unsigned short *> &payload[offset]
return u16p[0]
cdef class MarshallerStreamEndError(Exception):
pass
@ -69,6 +88,7 @@ cdef class Unmarshaller:
cdef unsigned int _is_native
cdef object _uint32_unpack
cdef object _int16_unpack
cdef object _uint16_unpack
cpdef reset(self)
@ -89,6 +109,10 @@ cdef class Unmarshaller:
cdef int _read_int16_unpack(self)
cpdef read_uint16_unpack(self, object type_)
cdef unsigned int _read_uint16_unpack(self)
cpdef read_string_unpack(self, object type_)
@cython.locals(

View File

@ -23,13 +23,17 @@ INT16_CAST = "h"
INT16_SIZE = 2
INT16_DBUS_TYPE = "n"
UINT16_CAST = "H"
UINT16_SIZE = 2
UINT16_DBUS_TYPE = "q"
SYS_IS_LITTLE_ENDIAN = sys.byteorder == "little"
SYS_IS_BIG_ENDIAN = sys.byteorder == "big"
DBUS_TO_CTYPE = {
"y": ("B", 1), # byte
INT16_DBUS_TYPE: (INT16_CAST, INT16_SIZE), # int16
"q": ("H", 2), # uint16
UINT16_DBUS_TYPE: (UINT16_CAST, UINT16_SIZE), # uint16
"i": ("i", 4), # int32
UINT32_DBUS_TYPE: (UINT32_CAST, UINT32_SIZE), # uint32
"x": ("q", 8), # int64
@ -39,12 +43,16 @@ DBUS_TO_CTYPE = {
}
UNPACK_HEADER_LITTLE_ENDIAN = Struct("<III").unpack_from
UINT32_UNPACK_LITTLE_ENDIAN = Struct("<I").unpack_from
INT16_UNPACK_LITTLE_ENDIAN = Struct("<h").unpack_from
UNPACK_HEADER_BIG_ENDIAN = Struct(">III").unpack_from
UINT32_UNPACK_BIG_ENDIAN = Struct(">I").unpack_from
INT16_UNPACK_BIG_ENDIAN = Struct(">h").unpack_from
UINT32_UNPACK_LITTLE_ENDIAN = Struct(f"<{UINT32_CAST}").unpack_from
UINT32_UNPACK_BIG_ENDIAN = Struct(f">{UINT32_CAST}").unpack_from
INT16_UNPACK_LITTLE_ENDIAN = Struct(f"<{INT16_CAST}").unpack_from
INT16_UNPACK_BIG_ENDIAN = Struct(f">{INT16_CAST}").unpack_from
UINT16_UNPACK_LITTLE_ENDIAN = Struct(f"<{UINT16_CAST}").unpack_from
UINT16_UNPACK_BIG_ENDIAN = Struct(f">{UINT16_CAST}").unpack_from
HEADER_SIGNATURE_SIZE = 16
HEADER_ARRAY_OF_STRUCT_SIGNATURE_POSITION = 12
@ -53,6 +61,12 @@ HEADER_ARRAY_OF_STRUCT_SIGNATURE_POSITION = 12
SIGNATURE_TREE_EMPTY = get_signature_tree("")
SIGNATURE_TREE_N = get_signature_tree("n")
SIGNATURE_TREE_S = get_signature_tree("s")
SIGNATURE_TREE_AY = get_signature_tree("ay")
SIGNATURE_TREE_AY_TYPES_0 = SIGNATURE_TREE_AY.types[0]
SIGNATURE_TREE_A_QV = get_signature_tree("a{qv}")
SIGNATURE_TREE_A_QV_TYPES_0 = SIGNATURE_TREE_A_QV.types[0]
SIGNATURE_TREE_SA_SV_AS = get_signature_tree("sa{sv}as")
SIGNATURE_TREE_SA_SV_AS_TYPES_1 = SIGNATURE_TREE_SA_SV_AS.types[1]
SIGNATURE_TREE_SA_SV_AS_TYPES_2 = SIGNATURE_TREE_SA_SV_AS.types[2]
@ -148,6 +162,7 @@ class Unmarshaller:
"_msg_len",
"_uint32_unpack",
"_int16_unpack",
"_uint16_unpack",
"_is_native",
)
@ -168,6 +183,7 @@ class Unmarshaller:
self._is_native = 0
self._uint32_unpack: Callable | None = None
self._int16_unpack: Callable | None = None
self._uint16_unpack: Callable | None = None
def reset(self) -> None:
"""Reset the unmarshaller to its initial state.
@ -185,8 +201,8 @@ class Unmarshaller:
self._flag = 0
self._msg_len = 0
self._is_native = 0
self._uint32_unpack = None
self._int16_unpack = None
# No need to reset the unpack functions, they are set in _read_header
# every time a new message is processed.
@property
def message(self) -> Message:
@ -253,6 +269,17 @@ class Unmarshaller:
)
return self._uint32_unpack(self._buf, self._pos - UINT32_SIZE)[0]
def read_uint16_unpack(self, type_: SignatureType) -> int:
return self._read_uint16_unpack()
def _read_uint16_unpack(self) -> int:
self._pos += UINT16_SIZE + (-self._pos & (UINT16_SIZE - 1)) # align
if self._is_native and cython.compiled:
return _cast_uint16_native( # pragma: no cover
self._buf, self._pos - UINT16_SIZE
)
return self._uint16_unpack(self._buf, self._pos - UINT16_SIZE)[0]
def read_int16_unpack(self, type_: SignatureType) -> int:
return self._read_int16_unpack()
@ -301,6 +328,16 @@ class Unmarshaller:
# verify in Variant is only useful on construction not unmarshalling
if signature == "n":
return Variant(SIGNATURE_TREE_N, self._read_int16_unpack(), False)
elif signature == "ay":
return Variant(
SIGNATURE_TREE_AY, self._read_array(SIGNATURE_TREE_AY_TYPES_0), False
)
elif signature == "a{qv}":
return Variant(
SIGNATURE_TREE_A_QV,
self._read_array(SIGNATURE_TREE_A_QV_TYPES_0),
False,
)
tree = get_signature_tree(signature)
signature_type = tree.types[0]
return Variant(
@ -355,15 +392,20 @@ class Unmarshaller:
child_1 = child_type.children[1]
child_0_token = child_0.token
child_1_token = child_1.token
# Strings with variant values are the most common case
# so we optimize for that by inlining the string reading
# and the variant reading here
if child_0_token in "os" and child_1_token == "v":
while self._pos - beginning_pos < array_length:
self._pos += -self._pos & 7 # align 8
key = self._read_string_unpack()
result_dict[key] = self._read_variant()
if child_1_token == "v":
if child_0_token in "os":
while self._pos - beginning_pos < array_length:
self._pos += -self._pos & 7 # align 8
key = self._read_string_unpack()
result_dict[key] = self._read_variant()
elif child_0_token == "q":
while self._pos - beginning_pos < array_length:
self._pos += -self._pos & 7 # align 8
key = self._read_uint16_unpack()
result_dict[key] = self._read_variant()
else:
reader_1 = self._readers[child_1_token]
reader_0 = self._readers[child_0_token]
@ -447,12 +489,14 @@ class Unmarshaller:
) = UNPACK_HEADER_LITTLE_ENDIAN(self._buf, 4)
self._uint32_unpack = UINT32_UNPACK_LITTLE_ENDIAN
self._int16_unpack = INT16_UNPACK_LITTLE_ENDIAN
self._uint16_unpack = UINT16_UNPACK_LITTLE_ENDIAN
elif endian == BIG_ENDIAN:
self._body_len, self._serial, self._header_len = UNPACK_HEADER_BIG_ENDIAN(
self._buf, 4
)
self._uint32_unpack = UINT32_UNPACK_BIG_ENDIAN
self._int16_unpack = INT16_UNPACK_BIG_ENDIAN
self._uint16_unpack = UINT16_UNPACK_BIG_ENDIAN
else:
raise InvalidMessageError(
f"Expecting endianness as the first byte, got {endian} from {buffer}"
@ -530,6 +574,7 @@ class Unmarshaller:
"h": read_uint32_unpack,
UINT32_DBUS_TYPE: read_uint32_unpack,
INT16_DBUS_TYPE: read_int16_unpack,
UINT16_DBUS_TYPE: read_uint16_unpack,
}
_ctype_by_endian: Dict[int, Dict[str, READER_TYPE]] = {

View File

@ -9,6 +9,7 @@ import pytest
from dbus_fast import Message, MessageFlag, MessageType, SignatureTree, Variant
from dbus_fast._private._cython_compat import FakeCython
from dbus_fast._private.unmarshaller import Unmarshaller
from dbus_fast.unpack import unpack_variants
def print_buf(buf):
@ -166,6 +167,67 @@ def test_unmarshall_can_resume():
assert unmarshaller.message is not None
def test_unmarshall_bluez_message():
bluez_mfr_message = (
"6c040101780000009aca0a009500000001016f00250000002f6f72672f626c75657a2f686369302f646576"
"5f44305f43325f34455f30385f41425f3537000000020173001f0000006f72672e667265656465736b746f"
"702e444275732e50726f7065727469657300030173001100000050726f706572746965734368616e676564"
"00000000000000080167000873617b73767d617300000007017300040000003a312e340000000011000000"
"6f72672e626c75657a2e446576696365310000005400000000000000040000005253534900016e00aaff00"
"00100000004d616e756661637475726572446174610005617b71767d002400000075000261790000001800"
"00004204010170d0c24e08ab57d2c24e08ab5601000000000000000000006c040101340000009bca0a0095"
"00000001016f002500"
)
message_bytes = bytes.fromhex(bluez_mfr_message)
stream = io.BytesIO(message_bytes)
unmarshaller = Unmarshaller(stream)
assert unmarshaller.unmarshall()
message = unmarshaller.message
assert message is not None
assert message.body == [
"org.bluez.Device1",
{
"ManufacturerData": Variant(
"a{qv}",
{
117: Variant(
"ay",
bytearray(
b"B\x04\x01\x01p\xd0\xc2N\x08\xabW\xd2\xc2N\x08\xabV\x01\x00\x00\x00\x00\x00\x00"
),
)
},
),
"RSSI": Variant("n", -86),
},
[],
]
assert message.sender == ":1.4"
assert message.path == "/org/bluez/hci0/dev_D0_C2_4E_08_AB_57"
assert message.interface == "org.freedesktop.DBus.Properties"
assert message.member == "PropertiesChanged"
assert message.signature == "sa{sv}as"
assert message.message_type == MessageType.SIGNAL
assert message.flags == MessageFlag.NO_REPLY_EXPECTED
assert message.serial == 707226
assert message.destination is None
unpacked = unpack_variants(message.body)
assert unpacked == [
"org.bluez.Device1",
{
"ManufacturerData": {
117: bytearray(
b"B\x04\x01\x01p\xd0\xc2N\x08\xabW\xd2"
b"\xc2N\x08\xabV\x01\x00\x00"
b"\x00\x00\x00\x00"
)
},
"RSSI": -86,
},
[],
]
def test_ay_buffer():
body = [bytes(10000)]
msg = Message(path="/test", member="test", signature="ay", body=body)