Skip to content

Commit 6ef1f3f

Browse files
authored
performance: reduce_payload_size (#46)
* performance: reduce_payload_size change encoding of encrypted payloads to reduce message size * performance: reduce_payload_size change encoding of encrypted payloads to reduce message size * performance: reduce_payload_size change encoding of encrypted payloads to reduce message size
1 parent 252b192 commit 6ef1f3f

File tree

1 file changed

+72
-26
lines changed

1 file changed

+72
-26
lines changed

hivemind_bus_client/util.py

Lines changed: 72 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
11
import json
22
import zlib
3-
from binascii import hexlify
4-
from binascii import unhexlify
3+
from binascii import hexlify, unhexlify
4+
from typing import Union, Dict
55

6+
import pybase64
67
from ovos_utils.security import encrypt, decrypt, AES
78

89
from hivemind_bus_client.exceptions import EncryptionKeyError, DecryptionKeyError
910
from hivemind_bus_client.message import HiveMessage, HiveMessageType, Message
1011

1112

12-
def serialize_message(message):
13+
def serialize_message(message: Union[HiveMessage, Message, Dict]) -> str:
1314
# convert a Message object into raw data that can be sent over
1415
# websocket
1516
if hasattr(message, 'serialize'):
@@ -23,7 +24,7 @@ def serialize_message(message):
2324
return json.dumps(message.__dict__)
2425

2526

26-
def payload2dict(payload):
27+
def payload2dict(payload: Union[HiveMessage, Message, str]) -> Dict:
2728
"""helper to ensure all subobjects of a payload are a dict safe for serialization
2829
eg. ensure payload is valid to send over mycroft messagebus object """
2930
if isinstance(payload, HiveMessage):
@@ -54,7 +55,7 @@ def can_serialize(val):
5455
return payload
5556

5657

57-
def get_payload(msg):
58+
def get_payload(msg: Union[HiveMessage, Message, str, Dict]) -> Dict:
5859
""" helper to read normalized payload
5960
from all supported formats (HiveMessage, Message, json str)
6061
"""
@@ -67,7 +68,7 @@ def get_payload(msg):
6768
return msg
6869

6970

70-
def get_hivemsg(msg):
71+
def get_hivemsg(msg: Union[Message, str, Dict]) -> HiveMessage:
7172
""" helper to create a normalized HiveMessage object
7273
from all supported formats (Message, json str, dict)
7374
"""
@@ -81,7 +82,7 @@ def get_hivemsg(msg):
8182
return msg
8283

8384

84-
def get_mycroft_msg(pload):
85+
def get_mycroft_msg(pload: Union[HiveMessage, str, Dict]) -> Message:
8586
if isinstance(pload, HiveMessage):
8687
assert pload.msg_type == HiveMessageType.BUS
8788
pload = pload.payload
@@ -101,36 +102,62 @@ def get_mycroft_msg(pload):
101102
return pload
102103

103104

104-
def encrypt_as_json(key, data):
105+
def encrypt_as_json(key, data, b64=False) -> str:
106+
# TODO default b64 to True in a future release
107+
# we dont want clients to update before servers, otherwise servers won't be able to decode
108+
# after a reasonable time all servers should support decoding both schemes and the default can change
105109
if isinstance(data, dict):
106110
data = json.dumps(data)
107111
if len(key) > 16:
108112
key = key[0:16]
109113
ciphertext = encrypt_bin(key, data)
110114
nonce, ciphertext, tag = ciphertext[:16], ciphertext[16:-16], ciphertext[-16:]
115+
if b64:
116+
return json.dumps({"ciphertext": pybase64.b64encode(ciphertext).decode('utf-8'),
117+
"tag": pybase64.b64encode(tag).decode('utf-8'),
118+
"nonce": pybase64.b64encode(nonce).decode('utf-8')})
111119
return json.dumps({"ciphertext": hexlify(ciphertext).decode('utf-8'),
112120
"tag": hexlify(tag).decode('utf-8'),
113121
"nonce": hexlify(nonce).decode('utf-8')})
114122

115123

116-
def decrypt_from_json(key, data):
124+
def decrypt_from_json(key, data: Union[str, bytes]):
117125
if isinstance(data, str):
118126
data = json.loads(data)
119127
if len(key) > 16:
120128
key = key[0:16]
121-
ciphertext = unhexlify(data["ciphertext"])
122-
if data.get("tag") is None: # web crypto
123-
ciphertext, tag = ciphertext[:-16], ciphertext[-16:]
124-
else:
125-
tag = unhexlify(data["tag"])
126-
nonce = unhexlify(data["nonce"])
129+
130+
# payloads can be either hex encoded (old style)
131+
# or b64 encoded (new style)
132+
def decode(b64=False):
133+
if b64:
134+
decoder = pybase64.b64decode
135+
else:
136+
decoder = unhexlify
137+
138+
ciphertext = decoder(data["ciphertext"])
139+
if data.get("tag") is None: # web crypto
140+
ciphertext, tag = ciphertext[:-16], ciphertext[-16:]
141+
else:
142+
tag = decoder(data["tag"])
143+
nonce = decoder(data["nonce"])
144+
return ciphertext, tag, nonce
145+
146+
is_b64 = any(a.isupper() for a in str(data)) # if any letter is uppercase, it must be b64
147+
ciphertext, tag, nonce = decode(is_b64)
127148
try:
128149
return decrypt(key, ciphertext, tag, nonce)
129-
except ValueError:
130-
raise DecryptionKeyError
150+
except ValueError as e:
151+
if not is_b64:
152+
try: # maybe it was b64 after all? unlikely but....
153+
ciphertext, tag, nonce = decode(b64=True)
154+
return decrypt(key, ciphertext, tag, nonce)
155+
except ValueError:
156+
pass
157+
raise DecryptionKeyError from e
131158

132159

133-
def encrypt_bin(key, data):
160+
def encrypt_bin(key, data: Union[str, bytes]):
134161
if len(key) > 16:
135162
key = key[0:16]
136163
try:
@@ -141,7 +168,7 @@ def encrypt_bin(key, data):
141168
return nonce + ciphertext + tag
142169

143170

144-
def decrypt_bin(key, ciphertext):
171+
def decrypt_bin(key, ciphertext: bytes):
145172
if len(key) > 16:
146173
key = key[0:16]
147174

@@ -156,7 +183,7 @@ def decrypt_bin(key, ciphertext):
156183
raise DecryptionKeyError
157184

158185

159-
def compress_payload(text):
186+
def compress_payload(text: Union[str, bytes]) -> bytes:
160187
# Compressing text
161188
if isinstance(text, str):
162189
decompressed = text.encode("utf-8")
@@ -165,15 +192,18 @@ def compress_payload(text):
165192
return zlib.compress(decompressed)
166193

167194

168-
def decompress_payload(compressed):
195+
def decompress_payload(compressed: Union[str, bytes]) -> bytes:
169196
# Decompressing text
170-
if isinstance(compressed, str):
171-
# assume hex
172-
compressed = unhexlify(compressed)
197+
if isinstance(compressed, str): # we really should be getting bytes here and not a str
198+
if any(a.isupper() for a in compressed):
199+
decoder = pybase64.b64decode
200+
else: # assume hex
201+
decoder = unhexlify
202+
compressed = decoder(compressed)
173203
return zlib.decompress(compressed)
174204

175205

176-
def cast2bytes(payload, compressed=False):
206+
def cast2bytes(payload: Union[Dict, str], compressed=False) -> bytes:
177207
if isinstance(payload, dict):
178208
payload = json.dumps(payload)
179209
if compressed:
@@ -184,8 +214,24 @@ def cast2bytes(payload, compressed=False):
184214
return payload
185215

186216

187-
def bytes2str(payload, compressed=False):
217+
def bytes2str(payload: bytes, compressed=False) -> str:
188218
if compressed:
189219
return decompress_payload(payload).decode("utf-8")
190220
else:
191221
return payload.decode("utf-8")
222+
223+
224+
if __name__ == "__main__":
225+
k = "*" * 16
226+
test = "this is a test text for checking size of encryption and stuff"
227+
print(len(test)) # 61
228+
229+
encjson = encrypt_as_json(k, test, b64=True)
230+
# {"ciphertext": "MkTc1LSK3jugt5SXapAeSrD6YWnYdSJ5oqF2bWYcnFpAYgjAgcTFXiKL3wBsqVKY52SkO5mjkqr7i/0A5A==", "tag": "37WNN8e23Mj0LlOxu9cjnQ==", "nonce": "inRwcb0H1Xu6pp80WFeJvg=="}
231+
print(len(encjson)) # 174
232+
assert decrypt_from_json(k, encjson) == test
233+
234+
encjson = encrypt_as_json(k, test, b64=False)
235+
# {"ciphertext": "64c65bad86a3582097aa4958b7c9555e8bf7deeac6bdf8b5f648cc360aaf50062ae9c635f602b3c66b2de1eece57666b3412a26f55bbd5ace2f601d8c2", "tag": "ce550c1e399c92bb26bf3c171c212e7d", "nonce": "84d045071b05bf005145ce071df0ed41"}
236+
print(len(encjson)) # 228
237+
assert decrypt_from_json(k, encjson) == test

0 commit comments

Comments
 (0)