Skip to content
This repository was archived by the owner on Apr 4, 2024. It is now read-only.

SnapshotValueReader completed #44

Merged
merged 10 commits into from
Mar 28, 2024
10 changes: 6 additions & 4 deletions python/selfie-lib/selfie_lib/LineReader.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,14 @@ def __detect_newline_type(self) -> bool:
def unix_newlines(self) -> bool:
return self.__uses_unix_newlines

def read_line(self) -> str:
def read_line(self) -> str | None:
line_bytes = self.__buffer.readline()
if line_bytes:
if line_bytes == b"":
return None
else:
self.__line_count += 1 # Increment line count for each line read
line = line_bytes.decode("utf-8")
return line.rstrip("\r\n" if not self.__uses_unix_newlines else "\n")
line = line_bytes.decode("utf-8")
return line.rstrip("\r\n" if not self.__uses_unix_newlines else "\n")

# Method to get the current line number
def get_line_number(self) -> int:
Expand Down
4 changes: 4 additions & 0 deletions python/selfie-lib/selfie_lib/ParseException.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
class ParseException(Exception):
def __init__(self, line_reader, message):
self.line = line_reader.get_line_number()
super().__init__(f"Line {self.line}: {message}")
158 changes: 158 additions & 0 deletions python/selfie-lib/selfie_lib/SnapshotValueReader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
import base64

from abc import ABC, abstractmethod
from typing import Union
from .PerCharacterEscaper import PerCharacterEscaper
from .ParseException import ParseException
from .LineReader import LineReader


def unix_newlines(string: str) -> str:
return string.replace("\r\n", "\n")


class SnapshotValue(ABC):
@property
def is_binary(self) -> bool:
return isinstance(self, SnapshotValueBinary)

@abstractmethod
def value_binary(self) -> bytes:
pass

@abstractmethod
def value_string(self) -> str:
pass

@staticmethod
def of(value: Union[bytes, str]) -> "SnapshotValue":
if isinstance(value, bytes):
return SnapshotValueBinary(value)
elif isinstance(value, str):
return SnapshotValueString(unix_newlines(value))
else:
raise TypeError("Value must be either bytes or str")


class SnapshotValueBinary(SnapshotValue):
def __init__(self, value: bytes):
self._value = value

def value_binary(self) -> bytes:
return self._value

def value_string(self) -> str:
raise NotImplementedError("This is a binary value.")


class SnapshotValueString(SnapshotValue):
def __init__(self, value: str):
self._value = value

def value_binary(self) -> bytes:
raise NotImplementedError("This is a string value.")

def value_string(self) -> str:
return self._value


class SnapshotValueReader:
KEY_FIRST_CHAR = "╔"
KEY_START = "╔═ "
KEY_END = " ═╗"
FLAG_BASE64 = " ═╗ base64"
name_esc = PerCharacterEscaper.specified_escape("\\\\[(])\nn\tt╔┌╗┐═─")
body_esc = PerCharacterEscaper.self_escape("\ud801\udf43\ud801\udf41")

def __init__(self, line_reader: LineReader):
self.line_reader = line_reader
self.line: str | None = None
self.unix_newlines = self.line_reader.unix_newlines()

def peek_key(self) -> str | None:
return self.__next_key()

def next_value(self) -> SnapshotValue:
# Validate key
self.__next_key()
nextLineCheckForBase64 = self.__next_line()
if nextLineCheckForBase64 is None:
raise ParseException(self.line_reader, "Expected to validate key")
is_base64 = self.FLAG_BASE64 in nextLineCheckForBase64
self.__reset_line()

# Read value
buffer = []

def consumer(line):
# Check for special condition and append to buffer accordingly
if len(line) >= 2 and ord(line[0]) == 0xD801 and ord(line[1]) == 0xDF41:
buffer.append(self.KEY_FIRST_CHAR)
buffer.append(line[2:])
else:
buffer.append(line)
buffer.append("\n")

self.__scan_value(consumer)

raw_string = "" if buffer.__len__() == 0 else ("".join(buffer))[:-1]

# Decode or unescape value
if is_base64:
decoded_bytes = base64.b64decode(raw_string)
return SnapshotValue.of(decoded_bytes)
else:
return SnapshotValue.of(self.body_esc.unescape(raw_string))

def skip_value(self):
self.__next_key()
self.__reset_line()
self.__scan_value(lambda line: None)

def __scan_value(self, consumer):
nextLine = self.__next_line()
while (
nextLine is not None
and nextLine.find(SnapshotValueReader.KEY_FIRST_CHAR) != 0
):
self.__reset_line()
consumer(nextLine)
nextLine = self.__next_line()

def __next_key(self):
line = self.__next_line()
if line is None:
return None
start_index = line.find(self.KEY_START)
end_index = line.find(self.KEY_END)
if start_index == -1:
raise ParseException(
self.line_reader, f"Expected to start with '{self.KEY_START}'"
)
if end_index == -1:
raise ParseException(
self.line_reader, f"Expected to contain '{self.KEY_END}'"
)
key = line[start_index + len(self.KEY_START) : end_index]
if key.startswith(" ") or key.endswith(" "):
space_type = "Leading" if key.startswith(" ") else "Trailing"
raise ParseException(
self.line_reader, f"{space_type} spaces are disallowed: '{key}'"
)
return self.name_esc.unescape(key)

def __next_line(self):
if self.line is None:
self.line = self.line_reader.read_line()
return self.line

def __reset_line(self):
self.line = None

@classmethod
def of(cls, content):
return cls(LineReader.for_string(content))

@classmethod
def of_binary(cls, content):
return cls(LineReader.for_binary(content))
2 changes: 2 additions & 0 deletions python/selfie-lib/selfie_lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@
from .Slice import Slice as Slice
from .SourceFile import SourceFile as SourceFile
from .PerCharacterEscaper import PerCharacterEscaper as PerCharacterEscaper
from .SnapshotValueReader import SnapshotValueReader as SnapshotValueReader
from .ParseException import ParseException as ParseException
126 changes: 126 additions & 0 deletions python/selfie-lib/tests/SnapshotValueReader_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import pytest
from selfie_lib import SnapshotValueReader, ParseException


class TestSnapshotValueReader:
def test_no_escaping_needed(self):
reader = SnapshotValueReader.of(
"""╔═ 00_empty ═╗
╔═ 01_singleLineString ═╗
this is one line
╔═ 01a_singleLineLeadingSpace ═╗
the leading space is significant
╔═ 01b_singleLineTrailingSpace ═╗
the trailing space is significant
╔═ 02_multiLineStringTrimmed ═╗
Line 1
Line 2
╔═ 03_multiLineStringTrailingNewline ═╗
Line 1
Line 2

╔═ 04_multiLineStringLeadingNewline ═╗

Line 1
Line 2
╔═ 05_notSureHowPythonMultilineWorks ═╗
"""
)
assert reader.peek_key() == "00_empty"
assert reader.peek_key() == "00_empty"
assert reader.next_value().value_string() == ""
assert reader.peek_key() == "01_singleLineString"
assert reader.peek_key() == "01_singleLineString"
assert reader.next_value().value_string() == "this is one line"
assert reader.peek_key() == "01a_singleLineLeadingSpace"
assert reader.next_value().value_string() == " the leading space is significant"
assert reader.peek_key() == "01b_singleLineTrailingSpace"
assert (
reader.next_value().value_string() == "the trailing space is significant "
)
assert reader.peek_key() == "02_multiLineStringTrimmed"
assert reader.next_value().value_string() == "Line 1\nLine 2"
assert reader.peek_key() == "03_multiLineStringTrailingNewline"
assert reader.next_value().value_string() == "Line 1\nLine 2\n"
assert reader.peek_key() == "04_multiLineStringLeadingNewline"
assert reader.next_value().value_string() == "\nLine 1\nLine 2"
assert reader.peek_key() == "05_notSureHowPythonMultilineWorks"
assert reader.next_value().value_string() == ""

def test_invalid_names(self):
with pytest.raises(ParseException) as exc_info:
SnapshotValueReader.of("╔═name ═╗").peek_key()
assert "Expected to start with '╔═ '" in str(exc_info.value)

with pytest.raises(ParseException) as exc_info:
SnapshotValueReader.of("╔═ name═╗").peek_key()
assert "Expected to contain ' ═╗'" in str(exc_info.value)

with pytest.raises(ParseException) as exc_info:
SnapshotValueReader.of("╔═ name ═╗").peek_key()
assert "Leading spaces are disallowed: ' name'" in str(exc_info.value)

with pytest.raises(ParseException) as exc_info:
SnapshotValueReader.of("╔═ name ═╗").peek_key()
assert "Trailing spaces are disallowed: 'name '" in str(exc_info.value)

assert SnapshotValueReader.of("╔═ name ═╗ comment okay").peek_key() == "name"
assert SnapshotValueReader.of("╔═ name ═╗okay here too").peek_key() == "name"
assert (
SnapshotValueReader.of(
"╔═ name ═╗ okay ╔═ ═╗ (it's the first ' ═╗' that counts)"
).peek_key()
== "name"
)

def test_escape_characters_in_name(self):
reader = SnapshotValueReader.of(
"""╔═ test with \\(square brackets\\) in name ═╗
╔═ test with \\\\backslash\\\\ in name ═╗
╔═ test with\\nnewline\\nin name ═╗
╔═ test with \\ttab\\t in name ═╗
╔═ test with \\┌\\─ ascii art \\─\\┐ in name ═╗"""
)
assert reader.peek_key() == "test with [square brackets] in name"
assert reader.next_value().value_string() == ""
assert reader.peek_key() == "test with \\backslash\\ in name"
assert reader.next_value().value_string() == ""
assert reader.peek_key() == "test with\nnewline\nin name"
assert reader.next_value().value_string() == ""
assert reader.peek_key() == "test with \ttab\t in name"
assert reader.next_value().value_string() == ""
assert reader.peek_key() == "test with ╔═ ascii art ═╗ in name"
assert reader.next_value().value_string() == ""

def assert_key_value_with_skip(self, test_content, key, expected_value):
reader = SnapshotValueReader.of(test_content)
while reader.peek_key() != key:
reader.skip_value()
assert reader.peek_key() == key
assert reader.next_value().value_string() == expected_value
while reader.peek_key() is not None:
reader.skip_value()

def test_skip_values(self):
test_content = """╔═ 00_empty ═╗
╔═ 01_singleLineString ═╗
this is one line
╔═ 02_multiLineStringTrimmed ═╗
Line 1
Line 2
╔═ 05_notSureHowKotlinMultilineWorks ═╗"""
self.assert_key_value_with_skip(test_content, "00_empty", "")
self.assert_key_value_with_skip(
test_content, "01_singleLineString", "this is one line"
)
self.assert_key_value_with_skip(
test_content, "02_multiLineStringTrimmed", "Line 1\nLine 2"
)

def test_binary(self):
reader = SnapshotValueReader.of(
"""╔═ Apple ═╗ base64 length 3 bytes
c2Fk"""
)
assert reader.peek_key() == "Apple"
assert reader.next_value().value_binary() == b"sad"