Skip to content

Commit 9daf121

Browse files
authored
Fix kaitai python version for messages ending in a variable-length string (#1468)
The kaitai python bindings contain an internal helper class called BufferKaitaiStream, which is intended to be a faster work-alike for the KaitaiStream class provided by the kaitai python runtime. This is implemented using an inner class called KaitaiStream.IOBytes, which emulates the io.BytesIO interface expected by KaitaiStream. Unfortunately the previous implementation of BufferKaitaiStruct.BytesIO.read() contained a bug which meant that it did not correctly handle messages (such as MSG_DGNSS_STATUS and MSG_PROFILING_THREAD_INFO) which end in a variable-length string. In this case, KaitaiStream.read_bytes_full() would call BufferKaitaiStruct.BytesIO.read() without any arguments, with the expectation that it would return all of the data until the end of the message.
1 parent e60a464 commit 9daf121

File tree

4 files changed

+52
-12
lines changed

4 files changed

+52
-12
lines changed

kaitai/python/kaitai_sbp/parse_utils.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,19 @@
1313

1414
# wrapper object which allows KaitaiStream to be used with a simple byte array
1515
class BufferKaitaiStream(KaitaiStream):
16+
# simple emulation of io.BytesIO interface expected by KaitaiStream
1617
class IOBytes:
1718
def __init__(self, buf):
1819
self.buf = buf
1920
self.pos = 0
2021

21-
def read(self, num):
22-
if self.pos + num > len(self.buf):
22+
def read(self, size=-1):
23+
if size < 0:
24+
size = len(self.buf) - self.pos
25+
if self.pos + size > len(self.buf):
2326
raise EOFError
24-
buf = self.buf[self.pos:self.pos + num]
25-
self.pos += num
27+
buf = self.buf[self.pos:self.pos + size]
28+
self.pos += size
2629
return buf
2730

2831
def seek(self, pos):

kaitai/python/kaitai_sbp/tests/test_benchmark_lite.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
# WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE.
1010

1111
import timeit
12-
from kaitai_sbp.tests.utils import count_messages, get_next_msg_construct, get_next_msg_kaitai, get_next_msg_hybrid1, get_next_msg_hybrid2, get_next_msg_external, PERL_CMD
12+
from kaitai_sbp.tests.utils import count_messages, get_next_msg_construct, get_next_msg_kaitai, get_next_msg_hybrid1, get_next_msg_hybrid2, get_next_msg_hybrid3, get_next_msg_external, PERL_CMD
1313

1414

1515
TEST_DATA = "test_data/benchmark.sbp"
@@ -21,4 +21,5 @@ def test_benchmarks():
2121
print("kaitai: {}".format(timeit.timeit('count_messages(TEST_DATA, get_next_msg_kaitai)', number=COUNT, globals=globals())))
2222
print("hybrid1: {}".format(timeit.timeit('count_messages(TEST_DATA, get_next_msg_hybrid1)', number=COUNT, globals=globals())))
2323
print("hybrid2: {}".format(timeit.timeit('count_messages(TEST_DATA, get_next_msg_hybrid2)', number=COUNT, globals=globals())))
24+
print("hybrid3: {}".format(timeit.timeit('count_messages(TEST_DATA, get_next_msg_hybrid3)', number=COUNT, globals=globals())))
2425
print("perl: {}".format(timeit.timeit('count_messages(TEST_DATA, get_next_msg_external, PERL_CMD)', number=COUNT, globals=globals())))

kaitai/python/kaitai_sbp/tests/test_parsers.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
# EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED
99
# WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE.
1010

11-
from kaitai_sbp.tests.utils import compare_parser_outputs, count_messages, get_next_msg_construct, get_next_msg_kaitai, get_next_msg_hybrid1, get_next_msg_hybrid2, get_next_msg_external, PERL_CMD
11+
from kaitai_sbp.tests.utils import compare_parser_outputs, count_messages, get_next_msg_construct, get_next_msg_kaitai, get_next_msg_hybrid1, get_next_msg_hybrid2, get_next_msg_hybrid3, get_next_msg_external, PERL_CMD
1212
import os
1313
import random
1414
import tempfile
@@ -61,9 +61,11 @@ def test_corrupted_counts():
6161
num_messages_kaitai = count_messages(filename_corrupted, get_next_msg_kaitai)
6262
num_messages_hybrid1 = count_messages(filename_corrupted, get_next_msg_hybrid1)
6363
num_messages_hybrid2 = count_messages(filename_corrupted, get_next_msg_hybrid2)
64+
num_messages_hybrid3 = count_messages(filename_corrupted, get_next_msg_hybrid3)
6465
num_messages_perl = count_messages(filename_corrupted, get_next_msg_external, PERL_CMD)
6566

6667
assert(num_messages_construct == num_messages_kaitai)
6768
assert(num_messages_construct == num_messages_hybrid1)
6869
assert(num_messages_construct == num_messages_hybrid2)
70+
assert(num_messages_construct == num_messages_hybrid3)
6971
assert(num_messages_construct == num_messages_perl)

kaitai/python/kaitai_sbp/tests/utils.py

+40-6
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,13 @@
55
from generator.sbpg.targets.common import snake_case, snake_case_keys, decode_json
66
from kaitaistruct import KaitaiStream, KaitaiStruct
77
import sys
8+
import base64
89
import sbp.msg as msg_construct
910
import sbp.table as table_construct
1011
from sbp.sbp2json import iter_messages_buffered as parse_file_construct
1112
from subprocess import Popen, PIPE
1213

14+
SBP_PREAMBLE = 0x55
1315
SBP_HEADER_LEN = 6
1416
PERL_CMD = ['perl', 'kaitai/perl/bin/sbp2json.pl']
1517

@@ -31,8 +33,8 @@ def dictify(obj, round_floats=False):
3133
# "original" version of sbp2json based entirely upon construct
3234
def get_next_msg_construct(fileobj):
3335
for msg_type, sender, payload_len, buf, crc_read in parse_file_construct(fileobj):
34-
msg_buf = buf[SBP_HEADER_LEN:SBP_HEADER_LEN + payload_len]
35-
msg = msg_construct.SBP(msg_type, sender, payload_len, msg_buf, crc_read)
36+
payload = buf[SBP_HEADER_LEN:SBP_HEADER_LEN + payload_len]
37+
msg = msg_construct.SBP(msg_type, sender, payload_len, payload, crc_read)
3638

3739
if msg_type not in table_construct._SBP_TABLE:
3840
sys.stderr.write("Skipping unknown message type: {}\n".format(msg_type))
@@ -70,14 +72,40 @@ def get_next_msg_hybrid2(fileobj):
7072
sys.stderr.write("Skipping unknown message type: {}\n".format(msg_type))
7173
continue
7274

73-
msg_buf = buf[SBP_HEADER_LEN:SBP_HEADER_LEN + payload_len]
74-
msg = msg_construct.SBP(msg_type, sender, payload_len, msg_buf, crc_read)
75+
payload = buf[SBP_HEADER_LEN:SBP_HEADER_LEN + payload_len]
76+
msg = msg_construct.SBP(msg_type, sender, payload_len, payload, crc_read)
7577
stream.set_buffer(msg.to_binary())
7678
obj = table_kaitai.SbpMessage(stream)
7779

7880
yield get_flattened_msg(obj)
7981

8082

83+
# hybrid version of sbp2json which uses original parser + kaitai struct to avoid
84+
# calling table_construct.dispatch() as well as avoiding usage of io.BytesIO
85+
def get_next_msg_hybrid3(fileobj):
86+
stream = BufferKaitaiStream()
87+
for msg_type, sender, payload_len, buf, crc_read in parse_file_construct(fileobj):
88+
89+
if msg_type not in table_kaitai.TABLE:
90+
sys.stderr.write("Skipping unknown message type: {}\n".format(msg_type))
91+
continue
92+
93+
# we can construct a kaitai object directly from the payload, but this
94+
# means that we need to manually fill in the preamble/sender/crc/etc
95+
payload = buf[SBP_HEADER_LEN:SBP_HEADER_LEN + payload_len]
96+
cls = table_kaitai.TABLE[msg_type]
97+
stream.set_buffer(bytes(payload))
98+
obj = cls(stream)
99+
obj.preamble = SBP_PREAMBLE
100+
obj.msg_type = msg_type
101+
obj.sender = sender
102+
obj.length = payload_len
103+
obj.payload = base64.standard_b64encode(payload).decode('ascii')
104+
obj.crc = crc_read
105+
106+
yield obj
107+
108+
81109
def get_next_msg_external(cmd, filename):
82110
proc = Popen(cmd + [filename], stdout=PIPE)
83111

@@ -106,7 +134,10 @@ def count_messages(filename, fn, cmd=None):
106134
# (to avoid calling table_construct.dispatch())
107135
# 4. hybrid2: use parsing code from construct version + msg_construct.SBP +
108136
# kaitai struct objects (to avoid calling table_construct.dispatch())
109-
# 5. perl: based completely upon the perl bindings generated by
137+
# 5. hybrid3: use parsing code from construct version + msg_construct.SBP +
138+
# kaitai_table.TABLE (to avoid calling table_construct.dispatch() and
139+
# usage of BytesIO)
140+
# 6. perl: based completely upon the perl bindings generated by
110141
# kaitai-struct-compiler
111142
def compare_parser_outputs(filename):
112143
num_messages = 0
@@ -115,19 +146,22 @@ def compare_parser_outputs(filename):
115146
file2 = open(filename, 'rb')
116147
file3 = open(filename, 'rb')
117148
file4 = open(filename, 'rb')
149+
file5 = open(filename, 'rb')
118150

119-
for msg_construct, msg_kaitai, msg_hybrid1, msg_hybrid2, msg_perl in zip(get_next_msg_construct(file1), get_next_msg_kaitai(file2), get_next_msg_hybrid1(file3), get_next_msg_hybrid2(file4), get_next_msg_external(PERL_CMD, filename)):
151+
for msg_construct, msg_kaitai, msg_hybrid1, msg_hybrid2, msg_hybrid3, msg_perl in zip(get_next_msg_construct(file1), get_next_msg_kaitai(file2), get_next_msg_hybrid1(file3), get_next_msg_hybrid2(file4), get_next_msg_hybrid3(file5), get_next_msg_external(PERL_CMD, filename)):
120152
msg_construct = snake_case_keys(msg_construct)
121153
msg_perl = decode_json(msg_perl)
122154

123155
dict_construct = dictify(msg_construct)
124156
dict_kaitai = dictify(msg_kaitai)
125157
dict_hybrid1 = dictify(msg_hybrid1)
126158
dict_hybrid2 = dictify(msg_hybrid2)
159+
dict_hybrid3 = dictify(msg_hybrid3)
127160

128161
assert dict_construct == dict_kaitai, "Mismatch:\n{}\nvs\n{}".format(dict_construct, dict_kaitai)
129162
assert dict_construct == dict_hybrid1, "Mismatch:\n{}\nvs\n{}".format(dict_construct, dict_hybrid1)
130163
assert dict_construct == dict_hybrid2, "Mismatch:\n{}\nvs\n{}".format(dict_construct, dict_hybrid2)
164+
assert dict_construct == dict_hybrid3, "Mismatch:\n{}\nvs\n{}".format(dict_construct, dict_hybrid3)
131165

132166
# need to round floats due to difference in rounding approaches used
133167
# by perl and python JSON encoders

0 commit comments

Comments
 (0)