Skip to content

Commit c4f8256

Browse files
feat:more encodings (#55)
* feat:more encodings * test * test * rm b100p * Update .github/workflows/unit_tests.yml Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --------- Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
1 parent 3767782 commit c4f8256

File tree

15 files changed

+623
-108
lines changed

15 files changed

+623
-108
lines changed

.github/workflows/build_tests.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ jobs:
1313
- name: Setup Python
1414
uses: actions/setup-python@v1
1515
with:
16-
python-version: 3.8
16+
python-version: "3.10"
1717
- name: Install Build Tools
1818
run: |
1919
python -m pip install build wheel

.github/workflows/unit_tests.yml

+58
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
name: Run UnitTests
2+
on:
3+
pull_request:
4+
branches:
5+
- dev
6+
paths-ignore:
7+
- 'hivemind_bus_client/version.py'
8+
- '.github/**'
9+
- '.gitignore'
10+
- 'LICENSE'
11+
- 'CHANGELOG.md'
12+
- 'MANIFEST.in'
13+
- 'README.md'
14+
push:
15+
branches:
16+
- master
17+
paths-ignore:
18+
- 'hivemind_bus_client/version.py'
19+
- '.github/**'
20+
- '.gitignore'
21+
- 'LICENSE'
22+
- 'CHANGELOG.md'
23+
- 'MANIFEST.in'
24+
- 'README.md'
25+
workflow_dispatch:
26+
27+
jobs:
28+
unit_tests:
29+
strategy:
30+
matrix:
31+
python-version: ["3.10", "3.11" ]
32+
runs-on: ubuntu-latest
33+
timeout-minutes: 15
34+
steps:
35+
- uses: actions/checkout@v2
36+
- name: Set up python ${{ matrix.python-version }}
37+
uses: actions/setup-python@v2
38+
with:
39+
python-version: ${{ matrix.python-version }}
40+
- name: Install System Dependencies
41+
run: |
42+
sudo apt-get update
43+
sudo apt install python3-dev swig
44+
python -m pip install build wheel
45+
- name: Install repo
46+
run: |
47+
pip install -e .
48+
- name: Install test dependencies
49+
run: |
50+
pip install -r test/requirements.txt
51+
- name: Run unittests
52+
run: |
53+
pytest --cov=hivemind_bus_client --cov-report xml test
54+
- name: Upload coverage
55+
if: "${{ matrix.python-version == '3.11' }}"
56+
env:
57+
CODECOV_TOKEN: ${{secrets.CODECOV_TOKEN}}
58+
uses: codecov/codecov-action@v2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from hivemind_bus_client.encodings.z85b import Z85B
2+
from hivemind_bus_client.encodings.z85p import Z85P
3+
from hivemind_bus_client.encodings.b91 import B91

hivemind_bus_client/encodings/b91.py

+100
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
from typing import Union
2+
3+
4+
class B91:
5+
ALPHABET = [
6+
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
7+
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
8+
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
9+
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
10+
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '!', '#', '$',
11+
'%', '&', '(', ')', '*', '+', ',', '.', '/', ':', ';', '<', '=',
12+
'>', '?', '@', '[', ']', '^', '_', '`', '{', '|', '}', '~', '"'
13+
]
14+
15+
DECODE_TABLE = {char: idx for idx, char in enumerate(ALPHABET)}
16+
17+
@classmethod
18+
def decode(cls, encoded_data: Union[str, bytes], encoding: str = "utf-8") -> bytes:
19+
"""
20+
Decodes a Base91-encoded string into its original binary form.
21+
22+
Args:
23+
encoded_data (Union[str, bytes]): Base91-encoded input data. If `bytes`, it is decoded as UTF-8.
24+
encoding (str): The encoding to use if `encoded_data` is provided as a string. Default is 'utf-8'.
25+
26+
Returns:
27+
bytes: The decoded binary data.
28+
29+
Raises:
30+
ValueError: If the input contains invalid Base91 characters.
31+
"""
32+
if isinstance(encoded_data, bytes):
33+
encoded_data = encoded_data.decode(encoding)
34+
35+
v = -1
36+
b = 0
37+
n = 0
38+
out = bytearray()
39+
40+
for char in encoded_data:
41+
if char not in cls.DECODE_TABLE:
42+
raise ValueError(f"Invalid Base91 character: {char}")
43+
c = cls.DECODE_TABLE[char]
44+
if v < 0:
45+
v = c
46+
else:
47+
v += c * 91
48+
b |= v << n
49+
n += 13 if (v & 8191) > 88 else 14
50+
while n >= 8:
51+
out.append(b & 255)
52+
b >>= 8
53+
n -= 8
54+
v = -1
55+
56+
if v >= 0:
57+
out.append((b | v << n) & 255)
58+
59+
return bytes(out)
60+
61+
@classmethod
62+
def encode(cls, data: Union[bytes, str], encoding: str = "utf-8") -> bytes:
63+
"""
64+
Encodes binary data into a Base91-encoded string.
65+
66+
Args:
67+
data (Union[bytes, str]): Input binary data to encode. If `str`, it is encoded as UTF-8.
68+
encoding (str): The encoding to use if `data` is provided as a string. Default is 'utf-8'.
69+
70+
Returns:
71+
str: The Base91-encoded string.
72+
"""
73+
if isinstance(data, str):
74+
data = data.encode(encoding)
75+
76+
b = 0
77+
n = 0
78+
out = []
79+
80+
for byte in data:
81+
b |= byte << n
82+
n += 8
83+
if n > 13:
84+
v = b & 8191
85+
if v > 88:
86+
b >>= 13
87+
n -= 13
88+
else:
89+
v = b & 16383
90+
b >>= 14
91+
n -= 14
92+
out.append(cls.ALPHABET[v % 91])
93+
out.append(cls.ALPHABET[v // 91])
94+
95+
if n:
96+
out.append(cls.ALPHABET[b % 91])
97+
if n > 7 or b > 90:
98+
out.append(cls.ALPHABET[b // 91])
99+
100+
return ''.join(out).encode(encoding)

hivemind_bus_client/encodings/z85b.py

+108
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
"""
2+
Python implementation of Z85b 85-bit encoding.
3+
4+
Z85b is a variation of ZMQ RFC 32 Z85 85-bit encoding with the following differences:
5+
1. Little-endian encoding (to facilitate alignment with lower byte indices).
6+
2. No requirement for a multiple of 4/5 length.
7+
3. `decode_z85b()` eliminates whitespace from the input.
8+
4. `decode_z85b()` raises a clear exception if invalid characters are encountered.
9+
10+
This file is a derivative work of https://gist.github.com/minrk/6357188?permalink_comment_id=2366506#gistcomment-2366506
11+
12+
Copyright (c) 2013 Brian Granger, Min Ragan-Kelley
13+
Distributed under the terms of the New BSD License.
14+
"""
15+
import re
16+
import struct
17+
from typing import Union
18+
19+
from hivemind_bus_client.exceptions import Z85DecodeError
20+
21+
22+
class Z85B:
23+
# Z85CHARS is the base 85 symbol table
24+
Z85CHARS = bytearray(b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#")
25+
26+
# Z85MAP maps integers in [0, 84] to the appropriate character in Z85CHARS
27+
Z85MAP = {char: idx for idx, char in enumerate(Z85CHARS)}
28+
29+
# Powers of 85 for encoding/decoding
30+
_85s = [85 ** i for i in range(5)]
31+
32+
# Padding lengths for encoding and decoding
33+
_E_PADDING = [0, 3, 2, 1]
34+
_D_PADDING = [0, 4, 3, 2, 1]
35+
36+
@classmethod
37+
def encode(cls, data: Union[str, bytes], encoding: str = "utf-8") -> bytes:
38+
"""
39+
Encode raw bytes into Z85b format.
40+
41+
Args:
42+
data (Union[str, bytes]): Input data to encode.
43+
encoding (str): The encoding to use if `data` is provided as a string. Default is 'utf-8'.
44+
45+
Returns:
46+
bytes: Z85b-encoded bytes.
47+
"""
48+
if isinstance(data, str):
49+
data = data.encode(encoding)
50+
data = bytearray(data)
51+
padding = cls._E_PADDING[len(data) % 4]
52+
data += b'\x00' * padding
53+
nvalues = len(data) // 4
54+
55+
# Pack the raw bytes into little-endian 32-bit integers
56+
values = struct.unpack(f'<{nvalues}I', data)
57+
encoded = bytearray()
58+
59+
for value in values:
60+
for offset in cls._85s:
61+
encoded.append(cls.Z85CHARS[(value // offset) % 85])
62+
63+
# Remove padding characters from the encoded output
64+
if padding:
65+
encoded = encoded[:-padding]
66+
return bytes(encoded)
67+
68+
@classmethod
69+
def decode(cls, encoded_data: Union[str, bytes], encoding: str = "utf-8") -> bytes:
70+
"""
71+
Decode Z85b-encoded bytes into raw bytes.
72+
73+
Args:
74+
encoded_data (Union[str, bytes]): Z85b-encoded data.
75+
encoding (str): The encoding to use if `encoded_data` is provided as a string. Default is 'utf-8'.
76+
77+
Returns:
78+
bytes: Decoded raw bytes.
79+
80+
Raises:
81+
Z85DecodeError: If invalid characters are encountered during decoding.
82+
"""
83+
# Normalize input by removing whitespace
84+
encoded_data = bytearray(re.sub(rb'\s+', b'',
85+
encoded_data if isinstance(encoded_data, bytes)
86+
else encoded_data.encode(encoding)))
87+
padding = cls._D_PADDING[len(encoded_data) % 5]
88+
nvalues = (len(encoded_data) + padding) // 5
89+
90+
values = []
91+
for i in range(0, len(encoded_data), 5):
92+
value = 0
93+
for j, offset in enumerate(cls._85s):
94+
try:
95+
value += cls.Z85MAP[encoded_data[i + j]] * offset
96+
except IndexError:
97+
break # End of input reached
98+
except KeyError as e:
99+
raise Z85DecodeError(f"Invalid byte code: {e.args[0]!r}")
100+
values.append(value)
101+
102+
# Unpack the values back into raw bytes
103+
decoded = struct.pack(f'<{nvalues}I', *values)
104+
105+
# Remove padding from the decoded output
106+
if padding:
107+
decoded = decoded[:-padding]
108+
return decoded

hivemind_bus_client/encodings/z85p.py

+88
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
from typing import Union
2+
import struct
3+
4+
class Z85P:
5+
"""
6+
Z85 is a class that provides encoding and decoding methods for transforming raw bytes into the Z85 encoding format.
7+
Z85 encoding represents 32-bit chunks of input bytes into a base85-encoded string with padding applied.
8+
The padding is added to ensure the encoded data's length is a multiple of 4 characters.
9+
The first byte of the encoded data indicates how many padding characters were added, which can be removed during decoding.
10+
"""
11+
Z85CHARS = b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#"
12+
Z85MAP = {c: idx for idx, c in enumerate(Z85CHARS)}
13+
14+
_85s = [85 ** i for i in range(5)][::-1]
15+
16+
@classmethod
17+
def encode(cls, rawbytes: Union[str, bytes]) -> bytes:
18+
"""
19+
Encodes raw bytes into Z85 encoding format with padding, and prepends the padding size.
20+
21+
Args:
22+
rawbytes (Union[str, bytes]): The input raw bytes to be encoded.
23+
24+
Returns:
25+
bytes: The Z85-encoded byte sequence with appropriate padding and padding size indication.
26+
27+
Notes:
28+
The padding is applied to ensure the length of the encoded data is a multiple of 5. The first byte in the
29+
returned byte sequence represents the number of padding characters added.
30+
"""
31+
if isinstance(rawbytes, str):
32+
rawbytes = rawbytes.encode("utf-8")
33+
34+
padding = (4 - len(rawbytes) % 4) % 4 # Padding to make the length a multiple of 4
35+
rawbytes += b'\x00' * padding
36+
37+
# The first byte indicates how many padding characters were added
38+
nvalues = len(rawbytes) // 4
39+
values = struct.unpack('>%dI' % nvalues, rawbytes)
40+
encoded = [padding]
41+
42+
for v in values:
43+
for offset in cls._85s:
44+
encoded.append(cls.Z85CHARS[(v // offset) % 85])
45+
46+
return bytes(encoded)
47+
48+
@classmethod
49+
def decode(cls, z85bytes: Union[str, bytes]) -> bytes:
50+
"""
51+
Decodes a Z85-encoded byte sequence back into raw bytes, removing padding as indicated by the first byte.
52+
53+
Args:
54+
z85bytes (Union[str, bytes]): The Z85-encoded byte sequence to be decoded.
55+
56+
Returns:
57+
bytes: The decoded raw byte sequence with padding removed.
58+
59+
Raises:
60+
ValueError: If the length of the input data is not divisible by 5 or contains invalid Z85 encoding.
61+
62+
Notes:
63+
The first byte of the encoded data indicates the padding size, and this padding is removed during decoding.
64+
"""
65+
if isinstance(z85bytes, str):
66+
z85bytes = z85bytes.encode("utf-8")
67+
68+
if len(z85bytes) == 0:
69+
return z85bytes
70+
71+
if len(z85bytes) % 5 != 1:
72+
raise ValueError('Invalid data length, should be divisible by 5 with 1 extra byte for padding indicator.')
73+
74+
padding = z85bytes[0] # Read the padding size from the first byte
75+
if padding < 0 or padding > 4:
76+
raise ValueError('Padding size must be between 0 and 4.')
77+
78+
z85bytes = z85bytes[1:] # Remove the first byte (padding size byte)
79+
80+
values = []
81+
for i in range(0, len(z85bytes), 5):
82+
value = 0
83+
for j, offset in enumerate(cls._85s):
84+
value += cls.Z85MAP[z85bytes[i + j]] * offset
85+
values.append(value)
86+
87+
decoded = struct.pack('>%dI' % len(values), *values)
88+
return decoded[:-padding] if padding else decoded # Remove padding

0 commit comments

Comments
 (0)