Skip to content

Commit e7f3c2c

Browse files
mshannon-silCopilot
andcommitted
Add MemoryParatextProjectTextUpdater and move MemoryParatextProjectFileHandler to production code
- Create MemoryParatextProjectFileHandler in machine/corpora/ (promoted from testutils) - Create MemoryParatextProjectTextUpdater in machine/corpora/ - Rename testutils file to default_paratext_project_settings.py - Update all imports to use new locations - Refactor test_update_usfm_parser_handler to use MemoryParatextProjectTextUpdater Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent cf0ea05 commit e7f3c2c

11 files changed

Lines changed: 61 additions & 40 deletions

machine/corpora/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
from .file_paratext_project_versification_error_detector import FileParatextProjectVersificationErrorDetector
1515
from .flatten import flatten
1616
from .memory_alignment_collection import MemoryAlignmentCollection
17+
from .memory_paratext_project_file_handler import MemoryParatextProjectFileHandler
18+
from .memory_paratext_project_text_updater import MemoryParatextProjectTextUpdater
1719
from .memory_stream_container import MemoryStreamContainer
1820
from .memory_text import MemoryText
1921
from .multi_key_ref import MultiKeyRef
@@ -121,6 +123,8 @@
121123
"KeyTerm",
122124
"lowercase",
123125
"MemoryAlignmentCollection",
126+
"MemoryParatextProjectFileHandler",
127+
"MemoryParatextProjectTextUpdater",
124128
"MemoryStreamContainer",
125129
"MemoryText",
126130
"MultiKeyRef",
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
from io import BytesIO
2+
from typing import BinaryIO, Dict, Optional
3+
4+
from .paratext_project_file_handler import ParatextProjectFileHandler
5+
from .usfm_stylesheet import UsfmStylesheet
6+
7+
8+
class MemoryParatextProjectFileHandler(ParatextProjectFileHandler):
9+
def __init__(self, files: Dict[str, str]) -> None:
10+
self.files = files
11+
12+
def exists(self, file_name: str) -> bool:
13+
return file_name in self.files
14+
15+
def open(self, file_name: str) -> BinaryIO:
16+
return BytesIO(self.files[file_name].encode("utf-8"))
17+
18+
def find(self, extension: str) -> Optional[str]:
19+
for name in self.files:
20+
if name.endswith(extension):
21+
return name
22+
return None
23+
24+
def create_stylesheet(self, file_name: str) -> UsfmStylesheet:
25+
return UsfmStylesheet(file_name)
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
from typing import Dict
2+
3+
from .memory_paratext_project_file_handler import MemoryParatextProjectFileHandler
4+
from .paratext_project_settings import ParatextProjectSettings
5+
from .paratext_project_text_updater_base import ParatextProjectTextUpdaterBase
6+
7+
8+
class MemoryParatextProjectTextUpdater(ParatextProjectTextUpdaterBase):
9+
def __init__(self, files: Dict[str, str], settings: ParatextProjectSettings) -> None:
10+
super().__init__(MemoryParatextProjectFileHandler(files), settings)

tests/corpora/test_paratext_project_terms_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from typing import Dict, List, Optional
22

3-
from testutils.memory_paratext_project_file_handler import DefaultParatextProjectSettings
3+
from testutils.default_paratext_project_settings import DefaultParatextProjectSettings
44
from testutils.memory_paratext_project_terms_parser import MemoryParatextProjectTermsParser
55

66
from machine.corpora import KeyTerm, ParatextProjectSettings, ParatextProjectTermsParserBase

tests/corpora/test_update_usfm_parser_handler.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,18 @@
11
from typing import Iterable, List, Optional, Sequence, Tuple, Union
22

33
from testutils.corpora_test_helpers import USFM_TEST_PROJECT_PATH, ignore_line_endings
4+
from testutils.default_paratext_project_settings import DefaultParatextProjectSettings
45

56
from machine.corpora import (
67
FileParatextProjectTextUpdater,
8+
MemoryParatextProjectTextUpdater,
79
ScriptureRef,
810
UpdateUsfmMarkerBehavior,
9-
UpdateUsfmParserHandler,
1011
UpdateUsfmRow,
1112
UpdateUsfmTextBehavior,
12-
UsfmTokenizer,
1313
UsfmUpdateBlock,
1414
UsfmUpdateBlockElementType,
1515
UsfmUpdateBlockHandler,
16-
filter_tokens_by_chapter,
17-
parse_usfm,
1816
)
1917

2018

@@ -1685,8 +1683,12 @@ def update_usfm(
16851683
)
16861684
else:
16871685
source = source.strip().replace("\r\n", "\n") + "\r\n"
1688-
updater = UpdateUsfmParserHandler(
1686+
settings = DefaultParatextProjectSettings(file_name_form="MAT", file_name_suffix="")
1687+
updater = MemoryParatextProjectTextUpdater({"MAT": source}, settings)
1688+
return updater.update_usfm(
1689+
"MAT",
16891690
rows,
1691+
chapters,
16901692
id_text,
16911693
text_behavior,
16921694
paragraph_behavior,
@@ -1698,11 +1700,6 @@ def update_usfm(
16981700
lambda _: False,
16991701
compare_segments,
17001702
)
1701-
tokenizer = UsfmTokenizer()
1702-
tokens = tokenizer.tokenize(source)
1703-
tokens = filter_tokens_by_chapter(tokens, chapters)
1704-
parse_usfm(tokens, updater)
1705-
return updater.get_usfm()
17061703

17071704

17081705
def assert_usfm_equals(target: Optional[str], truth: str) -> None:

tests/corpora/test_usfm_versification_error_detector.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from io import StringIO
22
from typing import Dict, List, Optional
33

4-
from testutils.memory_paratext_project_file_handler import DefaultParatextProjectSettings
4+
from testutils.default_paratext_project_settings import DefaultParatextProjectSettings
55
from testutils.memory_paratext_project_versification_error_detector import (
66
MemoryParatextProjectVersificationErrorDetector,
77
)

tests/punctuation_analysis/test_paratext_project_quote_convention_detector.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from typing import Dict, List, Optional
22

3-
from testutils.memory_paratext_project_file_handler import DefaultParatextProjectSettings
3+
from testutils.default_paratext_project_settings import DefaultParatextProjectSettings
44
from testutils.memory_paratext_project_quote_convention_detector import MemoryParatextProjectQuoteConventionDetector
55

66
from machine.corpora import ParatextProjectSettings

tests/testutils/memory_paratext_project_file_handler.py renamed to tests/testutils/default_paratext_project_settings.py

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,9 @@
1-
from io import BytesIO
2-
from typing import BinaryIO, Dict, Optional
1+
from typing import Optional
32

4-
from machine.corpora import ParatextProjectFileHandler, ParatextProjectSettings, UsfmStylesheet
3+
from machine.corpora import ParatextProjectSettings, UsfmStylesheet
54
from machine.scripture import ORIGINAL_VERSIFICATION, Versification
65

76

8-
class MemoryParatextProjectFileHandler(ParatextProjectFileHandler):
9-
def __init__(self, files: Dict[str, str]) -> None:
10-
11-
self.files = files
12-
13-
def exists(self, file_name: str) -> bool:
14-
return file_name in self.files
15-
16-
def open(self, file_name: str) -> BinaryIO:
17-
return BytesIO(self.files[file_name].encode("utf-8"))
18-
19-
def find(self, extension):
20-
raise NotImplementedError
21-
22-
def create_stylesheet(self, file_name):
23-
raise NotImplementedError
24-
25-
267
class DefaultParatextProjectSettings(ParatextProjectSettings):
278
def __init__(
289
self,

tests/testutils/memory_paratext_project_quote_convention_detector.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
from typing import Dict
22

3-
from machine.corpora import ParatextProjectSettings
3+
from machine.corpora import MemoryParatextProjectFileHandler, ParatextProjectSettings
44
from machine.punctuation_analysis import ParatextProjectQuoteConventionDetector
55

6-
from .memory_paratext_project_file_handler import DefaultParatextProjectSettings, MemoryParatextProjectFileHandler
6+
from .default_paratext_project_settings import DefaultParatextProjectSettings
77

88

99
class MemoryParatextProjectQuoteConventionDetector(ParatextProjectQuoteConventionDetector):

tests/testutils/memory_paratext_project_terms_parser.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
from typing import Dict, Optional
22

3-
from machine.corpora import ParatextProjectSettings, ParatextProjectTermsParserBase
3+
from machine.corpora import MemoryParatextProjectFileHandler, ParatextProjectSettings, ParatextProjectTermsParserBase
44

5-
from .memory_paratext_project_file_handler import DefaultParatextProjectSettings, MemoryParatextProjectFileHandler
5+
from .default_paratext_project_settings import DefaultParatextProjectSettings
66

77

88
class MemoryParatextProjectTermsParser(ParatextProjectTermsParserBase):

0 commit comments

Comments
 (0)