Skip to content

Commit f93a195

Browse files
author
Ben King
committed
Better guesses for ambiguous quotation marks + quote continuer edge case fix
1 parent 29be81e commit f93a195

File tree

3 files changed

+79
-5
lines changed

3 files changed

+79
-5
lines changed

machine/corpora/fallback_quotation_mark_resolver.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,19 @@ def _resolve_quotation_mark(
4242
else:
4343
self._issues.add(QuotationMarkResolutionIssue.UNEXPECTED_QUOTATION_MARK)
4444
else:
45+
# Make a reasonable guess about the direction of the quotation mark
46+
if (
47+
self._last_quotation_mark is None
48+
or self._last_quotation_mark.direction is QuotationMarkDirection.CLOSING
49+
):
50+
quotation_mark: Optional[QuotationMarkMetadata] = self._resolve_opening_mark(quotation_mark_match)
51+
if quotation_mark is not None:
52+
yield quotation_mark
53+
else:
54+
quotation_mark: Optional[QuotationMarkMetadata] = self._resolve_closing_mark(quotation_mark_match)
55+
if quotation_mark is not None:
56+
yield quotation_mark
57+
4558
self._issues.add(QuotationMarkResolutionIssue.AMBIGUOUS_QUOTATION_MARK)
4659

4760
def _is_opening_quotation_mark(

machine/corpora/punctuation_analysis/depth_based_quotation_mark_resolver.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,9 @@ def _meets_quote_continuer_prerequisites(
176176
self,
177177
quotation_mark_match: QuotationMarkStringMatch,
178178
) -> bool:
179+
if self._quote_continuer_state.current_depth >= self._quotation_mark_resolver_state.current_depth:
180+
return False
181+
179182
if (
180183
self._settings.should_rely_on_paragraph_markers
181184
and not quotation_mark_match._text_segment.marker_is_in_preceding_context(UsfmMarkerType.PARAGRAPH)

tests/corpora/test_fallback_quotation_mark_resolver.py

Lines changed: 63 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def test_reset():
2929
assert len(basic_quotation_mark_resolver._issues) == 0
3030

3131

32-
def test_simple_quotation_mark_resolution():
32+
def test_simple_quotation_mark_resolution_with_no_previous_mark():
3333
english_quote_convention = STANDARD_QUOTE_CONVENTIONS.get_quote_convention_by_name("standard_english")
3434
assert english_quote_convention is not None
3535

@@ -40,17 +40,75 @@ def test_simple_quotation_mark_resolution():
4040
actual_resolved_quotation_marks = list(
4141
basic_quotation_mark_resolver.resolve_quotation_marks(
4242
[
43-
QuotationMarkStringMatch(TextSegment.Builder().set_text('"test text"').build(), 0, 1),
44-
QuotationMarkStringMatch(TextSegment.Builder().set_text('"test text"').build(), 10, 11),
43+
QuotationMarkStringMatch(TextSegment.Builder().set_text('test " text').build(), 5, 6),
4544
]
4645
)
4746
)
4847
expected_resolved_quotation_marks = [
4948
QuotationMarkMetadata(
50-
'"', 1, QuotationMarkDirection.OPENING, TextSegment.Builder().set_text('"test text"').build(), 0, 1
49+
'"', 1, QuotationMarkDirection.OPENING, TextSegment.Builder().set_text('test " text').build(), 5, 6
50+
),
51+
]
52+
53+
assert_resolved_quotation_marks_equal(
54+
actual_resolved_quotation_marks,
55+
expected_resolved_quotation_marks,
56+
)
57+
58+
59+
def test_simple_quotation_mark_resolution_with_previous_opening_mark():
60+
english_quote_convention = STANDARD_QUOTE_CONVENTIONS.get_quote_convention_by_name("standard_english")
61+
assert english_quote_convention is not None
62+
63+
basic_quotation_mark_resolver = FallbackQuotationMarkResolver(
64+
QuotationMarkUpdateResolutionSettings(english_quote_convention.normalize())
65+
)
66+
67+
actual_resolved_quotation_marks = list(
68+
basic_quotation_mark_resolver.resolve_quotation_marks(
69+
[
70+
QuotationMarkStringMatch(TextSegment.Builder().set_text('"test " text').build(), 0, 1),
71+
QuotationMarkStringMatch(TextSegment.Builder().set_text('"test " text').build(), 6, 7),
72+
]
73+
)
74+
)
75+
expected_resolved_quotation_marks = [
76+
QuotationMarkMetadata(
77+
'"', 1, QuotationMarkDirection.OPENING, TextSegment.Builder().set_text('"test " text').build(), 0, 1
78+
),
79+
QuotationMarkMetadata(
80+
'"', 1, QuotationMarkDirection.CLOSING, TextSegment.Builder().set_text('"test " text').build(), 6, 7
81+
),
82+
]
83+
84+
assert_resolved_quotation_marks_equal(
85+
actual_resolved_quotation_marks,
86+
expected_resolved_quotation_marks,
87+
)
88+
89+
90+
def test_simple_quotation_mark_resolution_with_previous_closing_mark():
91+
english_quote_convention = STANDARD_QUOTE_CONVENTIONS.get_quote_convention_by_name("standard_english")
92+
assert english_quote_convention is not None
93+
94+
basic_quotation_mark_resolver = FallbackQuotationMarkResolver(
95+
QuotationMarkUpdateResolutionSettings(english_quote_convention.normalize())
96+
)
97+
98+
actual_resolved_quotation_marks = list(
99+
basic_quotation_mark_resolver.resolve_quotation_marks(
100+
[
101+
QuotationMarkStringMatch(TextSegment.Builder().set_text('test" " text').build(), 4, 5),
102+
QuotationMarkStringMatch(TextSegment.Builder().set_text('test" " text').build(), 6, 7),
103+
]
104+
)
105+
)
106+
expected_resolved_quotation_marks = [
107+
QuotationMarkMetadata(
108+
'"', 1, QuotationMarkDirection.CLOSING, TextSegment.Builder().set_text('test" " text').build(), 4, 5
51109
),
52110
QuotationMarkMetadata(
53-
'"', 1, QuotationMarkDirection.CLOSING, TextSegment.Builder().set_text('"test text"').build(), 10, 11
111+
'"', 1, QuotationMarkDirection.OPENING, TextSegment.Builder().set_text('test" " text').build(), 6, 7
54112
),
55113
]
56114

0 commit comments

Comments
 (0)