diff --git a/python/selfie-lib/selfie_lib/LineReader.py b/python/selfie-lib/selfie_lib/LineReader.py new file mode 100644 index 00000000..60aad77a --- /dev/null +++ b/python/selfie-lib/selfie_lib/LineReader.py @@ -0,0 +1,35 @@ +import io + + +class LineReader: + def __init__(self, content: bytes): + self.__buffer = io.BytesIO(content) + self.__uses_unix_newlines = self.__detect_newline_type() + self.__line_count = 0 # Initialize line count + + @classmethod + def for_binary(cls, content: bytes): + return cls(content) + + @classmethod + def for_string(cls, content: str): + return cls(content.encode("utf-8")) + + def __detect_newline_type(self) -> bool: + first_line = self.__buffer.readline() + self.__buffer.seek(0) # Reset buffer for actual reading + return b"\r\n" not in first_line + + def unix_newlines(self) -> bool: + return self.__uses_unix_newlines + + def read_line(self) -> str: + line_bytes = self.__buffer.readline() + if line_bytes: + self.__line_count += 1 # Increment line count for each line read + line = line_bytes.decode("utf-8") + return line.rstrip("\r\n" if not self.__uses_unix_newlines else "\n") + + # Method to get the current line number + def get_line_number(self) -> int: + return self.__line_count diff --git a/python/selfie-lib/selfie_lib/__init__.py b/python/selfie-lib/selfie_lib/__init__.py index 2cd86814..e2859a5d 100644 --- a/python/selfie-lib/selfie_lib/__init__.py +++ b/python/selfie-lib/selfie_lib/__init__.py @@ -1 +1,3 @@ +from .LineReader import LineReader as LineReader from .Slice import Slice as Slice + diff --git a/python/selfie-lib/tests/LineReader_test.py b/python/selfie-lib/tests/LineReader_test.py new file mode 100644 index 00000000..f42b9308 --- /dev/null +++ b/python/selfie-lib/tests/LineReader_test.py @@ -0,0 +1,42 @@ +from selfie_lib import LineReader + +def test_should_find_unix_separator_from_binary(): + reader = LineReader.for_binary(b"This is a new line\n") + assert reader.unix_newlines() is True + assert reader.read_line() == "This is a new line" + +def test_should_find_windows_separator_from_binary(): + reader = LineReader.for_binary(b"This is a new line\r\n") + assert reader.unix_newlines() is False + assert reader.read_line() == "This is a new line" + +def test_should_find_unix_separator_from_string(): + reader = LineReader.for_string("This is a new line\n") + assert reader.unix_newlines() is True + assert reader.read_line() == "This is a new line" + +def test_should_find_windows_separator_from_string(): + reader = LineReader.for_string("This is a new line\r\n") + assert reader.unix_newlines() is False + assert reader.read_line() == "This is a new line" + +def test_should_get_unix_line_separator_when_there_is_none(): + reader = LineReader.for_binary(b"This is a new line") + assert reader.unix_newlines() is True + assert reader.read_line() == "This is a new line" + +def test_should_read_next_line_without_problem(): + reader = LineReader.for_binary(b"First\r\nSecond\r\n") + assert reader.unix_newlines() is False + assert reader.read_line() == "First" + assert reader.unix_newlines() is False + assert reader.read_line() == "Second" + assert reader.unix_newlines() is False + +def test_should_use_first_line_separator_and_ignore_next(): + reader = LineReader.for_binary(b"First\r\nAnother separator\n") + assert reader.unix_newlines() is False + assert reader.read_line() == "First" + assert reader.unix_newlines() is False + assert reader.read_line() == "Another separator" + assert reader.unix_newlines() is False