Skip to content
This repository was archived by the owner on Apr 4, 2024. It is now read-only.

Commit 84aaf37

Browse files
authored
SnapshotValueReader completed (#44)
2 parents ba8329a + d04a06d commit 84aaf37

File tree

5 files changed

+296
-4
lines changed

5 files changed

+296
-4
lines changed

Diff for: python/selfie-lib/selfie_lib/LineReader.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,14 @@ def __detect_newline_type(self) -> bool:
2323
def unix_newlines(self) -> bool:
2424
return self.__uses_unix_newlines
2525

26-
def read_line(self) -> str:
26+
def read_line(self) -> str | None:
2727
line_bytes = self.__buffer.readline()
28-
if line_bytes:
28+
if line_bytes == b"":
29+
return None
30+
else:
2931
self.__line_count += 1 # Increment line count for each line read
30-
line = line_bytes.decode("utf-8")
31-
return line.rstrip("\r\n" if not self.__uses_unix_newlines else "\n")
32+
line = line_bytes.decode("utf-8")
33+
return line.rstrip("\r\n" if not self.__uses_unix_newlines else "\n")
3234

3335
# Method to get the current line number
3436
def get_line_number(self) -> int:

Diff for: python/selfie-lib/selfie_lib/ParseException.py

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
class ParseException(Exception):
2+
def __init__(self, line_reader, message):
3+
self.line = line_reader.get_line_number()
4+
super().__init__(f"Line {self.line}: {message}")

Diff for: python/selfie-lib/selfie_lib/SnapshotValueReader.py

+158
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
import base64
2+
3+
from abc import ABC, abstractmethod
4+
from typing import Union
5+
from .PerCharacterEscaper import PerCharacterEscaper
6+
from .ParseException import ParseException
7+
from .LineReader import LineReader
8+
9+
10+
def unix_newlines(string: str) -> str:
11+
return string.replace("\r\n", "\n")
12+
13+
14+
class SnapshotValue(ABC):
15+
@property
16+
def is_binary(self) -> bool:
17+
return isinstance(self, SnapshotValueBinary)
18+
19+
@abstractmethod
20+
def value_binary(self) -> bytes:
21+
pass
22+
23+
@abstractmethod
24+
def value_string(self) -> str:
25+
pass
26+
27+
@staticmethod
28+
def of(value: Union[bytes, str]) -> "SnapshotValue":
29+
if isinstance(value, bytes):
30+
return SnapshotValueBinary(value)
31+
elif isinstance(value, str):
32+
return SnapshotValueString(unix_newlines(value))
33+
else:
34+
raise TypeError("Value must be either bytes or str")
35+
36+
37+
class SnapshotValueBinary(SnapshotValue):
38+
def __init__(self, value: bytes):
39+
self._value = value
40+
41+
def value_binary(self) -> bytes:
42+
return self._value
43+
44+
def value_string(self) -> str:
45+
raise NotImplementedError("This is a binary value.")
46+
47+
48+
class SnapshotValueString(SnapshotValue):
49+
def __init__(self, value: str):
50+
self._value = value
51+
52+
def value_binary(self) -> bytes:
53+
raise NotImplementedError("This is a string value.")
54+
55+
def value_string(self) -> str:
56+
return self._value
57+
58+
59+
class SnapshotValueReader:
60+
KEY_FIRST_CHAR = "╔"
61+
KEY_START = "╔═ "
62+
KEY_END = " ═╗"
63+
FLAG_BASE64 = " ═╗ base64"
64+
name_esc = PerCharacterEscaper.specified_escape("\\\\[(])\nn\tt╔┌╗┐═─")
65+
body_esc = PerCharacterEscaper.self_escape("\ud801\udf43\ud801\udf41")
66+
67+
def __init__(self, line_reader: LineReader):
68+
self.line_reader = line_reader
69+
self.line: str | None = None
70+
self.unix_newlines = self.line_reader.unix_newlines()
71+
72+
def peek_key(self) -> str | None:
73+
return self.__next_key()
74+
75+
def next_value(self) -> SnapshotValue:
76+
# Validate key
77+
self.__next_key()
78+
nextLineCheckForBase64 = self.__next_line()
79+
if nextLineCheckForBase64 is None:
80+
raise ParseException(self.line_reader, "Expected to validate key")
81+
is_base64 = self.FLAG_BASE64 in nextLineCheckForBase64
82+
self.__reset_line()
83+
84+
# Read value
85+
buffer = []
86+
87+
def consumer(line):
88+
# Check for special condition and append to buffer accordingly
89+
if len(line) >= 2 and ord(line[0]) == 0xD801 and ord(line[1]) == 0xDF41:
90+
buffer.append(self.KEY_FIRST_CHAR)
91+
buffer.append(line[2:])
92+
else:
93+
buffer.append(line)
94+
buffer.append("\n")
95+
96+
self.__scan_value(consumer)
97+
98+
raw_string = "" if buffer.__len__() == 0 else ("".join(buffer))[:-1]
99+
100+
# Decode or unescape value
101+
if is_base64:
102+
decoded_bytes = base64.b64decode(raw_string)
103+
return SnapshotValue.of(decoded_bytes)
104+
else:
105+
return SnapshotValue.of(self.body_esc.unescape(raw_string))
106+
107+
def skip_value(self):
108+
self.__next_key()
109+
self.__reset_line()
110+
self.__scan_value(lambda line: None)
111+
112+
def __scan_value(self, consumer):
113+
nextLine = self.__next_line()
114+
while (
115+
nextLine is not None
116+
and nextLine.find(SnapshotValueReader.KEY_FIRST_CHAR) != 0
117+
):
118+
self.__reset_line()
119+
consumer(nextLine)
120+
nextLine = self.__next_line()
121+
122+
def __next_key(self):
123+
line = self.__next_line()
124+
if line is None:
125+
return None
126+
start_index = line.find(self.KEY_START)
127+
end_index = line.find(self.KEY_END)
128+
if start_index == -1:
129+
raise ParseException(
130+
self.line_reader, f"Expected to start with '{self.KEY_START}'"
131+
)
132+
if end_index == -1:
133+
raise ParseException(
134+
self.line_reader, f"Expected to contain '{self.KEY_END}'"
135+
)
136+
key = line[start_index + len(self.KEY_START) : end_index]
137+
if key.startswith(" ") or key.endswith(" "):
138+
space_type = "Leading" if key.startswith(" ") else "Trailing"
139+
raise ParseException(
140+
self.line_reader, f"{space_type} spaces are disallowed: '{key}'"
141+
)
142+
return self.name_esc.unescape(key)
143+
144+
def __next_line(self):
145+
if self.line is None:
146+
self.line = self.line_reader.read_line()
147+
return self.line
148+
149+
def __reset_line(self):
150+
self.line = None
151+
152+
@classmethod
153+
def of(cls, content):
154+
return cls(LineReader.for_string(content))
155+
156+
@classmethod
157+
def of_binary(cls, content):
158+
return cls(LineReader.for_binary(content))

Diff for: python/selfie-lib/selfie_lib/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,5 @@
22
from .Slice import Slice as Slice
33
from .SourceFile import SourceFile as SourceFile
44
from .PerCharacterEscaper import PerCharacterEscaper as PerCharacterEscaper
5+
from .SnapshotValueReader import SnapshotValueReader as SnapshotValueReader
6+
from .ParseException import ParseException as ParseException

Diff for: python/selfie-lib/tests/SnapshotValueReader_test.py

+126
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
import pytest
2+
from selfie_lib import SnapshotValueReader, ParseException
3+
4+
5+
class TestSnapshotValueReader:
6+
def test_no_escaping_needed(self):
7+
reader = SnapshotValueReader.of(
8+
"""╔═ 00_empty ═╗
9+
╔═ 01_singleLineString ═╗
10+
this is one line
11+
╔═ 01a_singleLineLeadingSpace ═╗
12+
the leading space is significant
13+
╔═ 01b_singleLineTrailingSpace ═╗
14+
the trailing space is significant
15+
╔═ 02_multiLineStringTrimmed ═╗
16+
Line 1
17+
Line 2
18+
╔═ 03_multiLineStringTrailingNewline ═╗
19+
Line 1
20+
Line 2
21+
22+
╔═ 04_multiLineStringLeadingNewline ═╗
23+
24+
Line 1
25+
Line 2
26+
╔═ 05_notSureHowPythonMultilineWorks ═╗
27+
"""
28+
)
29+
assert reader.peek_key() == "00_empty"
30+
assert reader.peek_key() == "00_empty"
31+
assert reader.next_value().value_string() == ""
32+
assert reader.peek_key() == "01_singleLineString"
33+
assert reader.peek_key() == "01_singleLineString"
34+
assert reader.next_value().value_string() == "this is one line"
35+
assert reader.peek_key() == "01a_singleLineLeadingSpace"
36+
assert reader.next_value().value_string() == " the leading space is significant"
37+
assert reader.peek_key() == "01b_singleLineTrailingSpace"
38+
assert (
39+
reader.next_value().value_string() == "the trailing space is significant "
40+
)
41+
assert reader.peek_key() == "02_multiLineStringTrimmed"
42+
assert reader.next_value().value_string() == "Line 1\nLine 2"
43+
assert reader.peek_key() == "03_multiLineStringTrailingNewline"
44+
assert reader.next_value().value_string() == "Line 1\nLine 2\n"
45+
assert reader.peek_key() == "04_multiLineStringLeadingNewline"
46+
assert reader.next_value().value_string() == "\nLine 1\nLine 2"
47+
assert reader.peek_key() == "05_notSureHowPythonMultilineWorks"
48+
assert reader.next_value().value_string() == ""
49+
50+
def test_invalid_names(self):
51+
with pytest.raises(ParseException) as exc_info:
52+
SnapshotValueReader.of("╔═name ═╗").peek_key()
53+
assert "Expected to start with '╔═ '" in str(exc_info.value)
54+
55+
with pytest.raises(ParseException) as exc_info:
56+
SnapshotValueReader.of("╔═ name═╗").peek_key()
57+
assert "Expected to contain ' ═╗'" in str(exc_info.value)
58+
59+
with pytest.raises(ParseException) as exc_info:
60+
SnapshotValueReader.of("╔═ name ═╗").peek_key()
61+
assert "Leading spaces are disallowed: ' name'" in str(exc_info.value)
62+
63+
with pytest.raises(ParseException) as exc_info:
64+
SnapshotValueReader.of("╔═ name ═╗").peek_key()
65+
assert "Trailing spaces are disallowed: 'name '" in str(exc_info.value)
66+
67+
assert SnapshotValueReader.of("╔═ name ═╗ comment okay").peek_key() == "name"
68+
assert SnapshotValueReader.of("╔═ name ═╗okay here too").peek_key() == "name"
69+
assert (
70+
SnapshotValueReader.of(
71+
"╔═ name ═╗ okay ╔═ ═╗ (it's the first ' ═╗' that counts)"
72+
).peek_key()
73+
== "name"
74+
)
75+
76+
def test_escape_characters_in_name(self):
77+
reader = SnapshotValueReader.of(
78+
"""╔═ test with \\(square brackets\\) in name ═╗
79+
╔═ test with \\\\backslash\\\\ in name ═╗
80+
╔═ test with\\nnewline\\nin name ═╗
81+
╔═ test with \\ttab\\t in name ═╗
82+
╔═ test with \\\\─ ascii art \\\\┐ in name ═╗"""
83+
)
84+
assert reader.peek_key() == "test with [square brackets] in name"
85+
assert reader.next_value().value_string() == ""
86+
assert reader.peek_key() == "test with \\backslash\\ in name"
87+
assert reader.next_value().value_string() == ""
88+
assert reader.peek_key() == "test with\nnewline\nin name"
89+
assert reader.next_value().value_string() == ""
90+
assert reader.peek_key() == "test with \ttab\t in name"
91+
assert reader.next_value().value_string() == ""
92+
assert reader.peek_key() == "test with ╔═ ascii art ═╗ in name"
93+
assert reader.next_value().value_string() == ""
94+
95+
def assert_key_value_with_skip(self, test_content, key, expected_value):
96+
reader = SnapshotValueReader.of(test_content)
97+
while reader.peek_key() != key:
98+
reader.skip_value()
99+
assert reader.peek_key() == key
100+
assert reader.next_value().value_string() == expected_value
101+
while reader.peek_key() is not None:
102+
reader.skip_value()
103+
104+
def test_skip_values(self):
105+
test_content = """╔═ 00_empty ═╗
106+
╔═ 01_singleLineString ═╗
107+
this is one line
108+
╔═ 02_multiLineStringTrimmed ═╗
109+
Line 1
110+
Line 2
111+
╔═ 05_notSureHowKotlinMultilineWorks ═╗"""
112+
self.assert_key_value_with_skip(test_content, "00_empty", "")
113+
self.assert_key_value_with_skip(
114+
test_content, "01_singleLineString", "this is one line"
115+
)
116+
self.assert_key_value_with_skip(
117+
test_content, "02_multiLineStringTrimmed", "Line 1\nLine 2"
118+
)
119+
120+
def test_binary(self):
121+
reader = SnapshotValueReader.of(
122+
"""╔═ Apple ═╗ base64 length 3 bytes
123+
c2Fk"""
124+
)
125+
assert reader.peek_key() == "Apple"
126+
assert reader.next_value().value_binary() == b"sad"

0 commit comments

Comments
 (0)