Skip to content

Commit c17c54f

Browse files
author
Ben King
committed
Code review changes requested for tests
1 parent f93a195 commit c17c54f

File tree

4 files changed

+37
-40
lines changed

4 files changed

+37
-40
lines changed

machine/corpora/quotation_mark_update_first_pass.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from collections import defaultdict
12
from typing import Dict, List, Set
23

34
from .punctuation_analysis.chapter import Chapter
@@ -33,15 +34,12 @@ def __init__(self, source_quote_convention: QuoteConvention, target_quote_conven
3334
def _check_whether_fallback_mode_will_work(
3435
self, source_quote_convention: QuoteConvention, target_quote_convention: QuoteConvention
3536
) -> bool:
36-
target_marks_by_source_marks: Dict[str, Set[str]] = {}
37-
for depth in range(1, source_quote_convention.num_levels + 1):
37+
target_marks_by_source_marks: Dict[str, Set[str]] = defaultdict(set)
38+
for depth in range(1, min(source_quote_convention.num_levels, target_quote_convention.num_levels) + 1):
3839
opening_quotation_mark = source_quote_convention.get_opening_quotation_mark_at_depth(depth)
39-
if opening_quotation_mark not in target_marks_by_source_marks:
40-
target_marks_by_source_marks[opening_quotation_mark] = set()
41-
if depth <= target_quote_convention.num_levels:
42-
target_marks_by_source_marks[opening_quotation_mark].add(
43-
target_quote_convention.get_closing_quotation_mark_at_depth(depth)
44-
)
40+
target_marks_by_source_marks[opening_quotation_mark].add(
41+
target_quote_convention.get_closing_quotation_mark_at_depth(depth)
42+
)
4543

4644
for source_mark in target_marks_by_source_marks:
4745
if len(target_marks_by_source_marks[source_mark]) > 1:

tests/corpora/punctuation_analysis/test_depth_based_quotation_mark_resolver.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -993,19 +993,19 @@ def test_is_opening_quote() -> None:
993993
QuotationMarkStringMatch(TextSegment.Builder().set_text("\u201c").build(), 0, 1)
994994
)
995995
assert not standard_swedish_quotation_mark_categorizer.is_opening_quotation_mark(
996-
QuotationMarkStringMatch(TextSegment.Builder().set_text("\u201d").build(), 1, 2)
996+
QuotationMarkStringMatch(TextSegment.Builder().set_text("\u201d").build(), 0, 1)
997997
)
998998
assert standard_swedish_quotation_mark_categorizer.is_opening_quotation_mark(
999999
QuotationMarkStringMatch(TextSegment.Builder().set_text("\u201c\u201d").build(), 1, 2)
10001000
)
10011001
assert not standard_swedish_quotation_mark_categorizer.is_opening_quotation_mark(
1002-
QuotationMarkStringMatch(TextSegment.Builder().set_text("\u2019").build(), 1, 2)
1002+
QuotationMarkStringMatch(TextSegment.Builder().set_text("\u2019").build(), 0, 1)
10031003
)
10041004
assert standard_swedish_quotation_mark_categorizer.is_opening_quotation_mark(
10051005
QuotationMarkStringMatch(TextSegment.Builder().set_text("\u201c\u2019").build(), 1, 2)
10061006
)
10071007
assert not three_conventions_quotation_mark_categorizer.is_opening_quotation_mark(
1008-
QuotationMarkStringMatch(TextSegment.Builder().set_text("\u201c").build(), 1, 2)
1008+
QuotationMarkStringMatch(TextSegment.Builder().set_text("\u201c").build(), 0, 1)
10091009
)
10101010
assert three_conventions_quotation_mark_categorizer.is_opening_quotation_mark(
10111011
QuotationMarkStringMatch(TextSegment.Builder().set_text("\u201c\u201c").build(), 1, 2)
@@ -2290,7 +2290,6 @@ def test_too_deep_nesting_issue() -> None:
22902290
QuotationMarkMetadata("\u2018", 2, QuotationMarkDirection.OPENING, text_segment, 6, 7),
22912291
QuotationMarkMetadata("\u201c", 3, QuotationMarkDirection.OPENING, text_segment, 10, 11),
22922292
QuotationMarkMetadata("\u2018", 4, QuotationMarkDirection.OPENING, text_segment, 13, 14),
2293-
# QuotationMarkMetadata("\u201c", 5, QuotationMarkDirection.Opening, text_segment, 20, 21),
22942293
]
22952294
assert standard_english_quotation_mark_resolver.get_issues() == {
22962295
QuotationMarkResolutionIssue.TOO_DEEP_NESTING,

tests/corpora/punctuation_analysis/test_quotation_mark_string_match.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ def test_does_trailing_substring_match() -> None:
154154
assert quotation_mark_string_match.trailing_substring_matches(regex.compile(r"^ text$"))
155155

156156
quotation_mark_string_match = QuotationMarkStringMatch(
157-
TextSegment.Builder().set_text("sample text").build(), 11, 12
157+
TextSegment.Builder().set_text("sample text").build(), 10, 11
158158
)
159159
assert not quotation_mark_string_match.trailing_substring_matches(regex.compile(r".+"))
160160

tests/corpora/punctuation_analysis/test_quote_convention.py

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -89,13 +89,13 @@ def test_get_num_levels() -> None:
8989
assert empty_quote_convention.num_levels == 0
9090

9191
one_level_quote_convention = QuoteConvention(
92-
"one-level-quote-convention",
92+
"one_level_quote_convention",
9393
[SingleLevelQuoteConvention("\u201c", "\u201d")],
9494
)
9595
assert one_level_quote_convention.num_levels == 1
9696

9797
two_level_quote_convention = QuoteConvention(
98-
"two-level-quote-convention",
98+
"two_level_quote_convention",
9999
[
100100
SingleLevelQuoteConvention("\u201c", "\u201d"),
101101
SingleLevelQuoteConvention("\u2018", "\u2019"),
@@ -104,7 +104,7 @@ def test_get_num_levels() -> None:
104104
assert two_level_quote_convention.num_levels == 2
105105

106106
three_level_quote_convention = QuoteConvention(
107-
"three-level-quote-convention",
107+
"three_level_quote_convention",
108108
[
109109
SingleLevelQuoteConvention("\u201c", "\u201d"),
110110
SingleLevelQuoteConvention("\u2018", "\u2019"),
@@ -116,7 +116,7 @@ def test_get_num_levels() -> None:
116116

117117
def test_get_opening_quote_at_level() -> None:
118118
quote_convention = QuoteConvention(
119-
"test-quote-convention",
119+
"test_quote_convention",
120120
[
121121
SingleLevelQuoteConvention("\u201c", "\u201d"),
122122
SingleLevelQuoteConvention("\u2018", "\u2019"),
@@ -130,7 +130,7 @@ def test_get_opening_quote_at_level() -> None:
130130

131131
def test_get_closing_quote_at_level() -> None:
132132
quote_convention = QuoteConvention(
133-
"test-quote-convention",
133+
"test_quote_convention",
134134
[
135135
SingleLevelQuoteConvention("\u201c", "\u201d"),
136136
SingleLevelQuoteConvention("\u2018", "\u2019"),
@@ -144,7 +144,7 @@ def test_get_closing_quote_at_level() -> None:
144144

145145
def test_get_expected_quotation_mark() -> None:
146146
quote_convention = QuoteConvention(
147-
"test-quote-convention",
147+
"test_quote_convention",
148148
[
149149
SingleLevelQuoteConvention("\u201c", "\u201d"),
150150
SingleLevelQuoteConvention("\u2018", "\u2019"),
@@ -168,28 +168,28 @@ def test_includes_opening_quotation_mark() -> None:
168168
assert not empty_quote_convention._includes_opening_quotation_mark("\u201c")
169169

170170
positive_quote_convention1 = QuoteConvention(
171-
"positive quote convention 1", [SingleLevelQuoteConvention("\u201c", "\u201d")]
171+
"positive_quote_convention_1", [SingleLevelQuoteConvention("\u201c", "\u201d")]
172172
)
173173
assert positive_quote_convention1._includes_opening_quotation_mark("\u201c")
174174

175175
negative_quote_convention1 = QuoteConvention(
176-
"negative quote convention 1", [SingleLevelQuoteConvention("\u2018", "\u2019")]
176+
"negative_quote_convention_1", [SingleLevelQuoteConvention("\u2018", "\u2019")]
177177
)
178178
assert not negative_quote_convention1._includes_opening_quotation_mark("\u201c")
179179

180180
negative_quote_convention2 = QuoteConvention(
181-
"negative quote convention 2", [SingleLevelQuoteConvention("\u201d", "\u201c")]
181+
"negative_quote_convention_2", [SingleLevelQuoteConvention("\u201d", "\u201c")]
182182
)
183183
assert not negative_quote_convention2._includes_opening_quotation_mark("\u201c")
184184

185185
positive_quote_convention2 = QuoteConvention(
186-
"positive quote convention 2",
186+
"positive_quote_convention_2",
187187
[SingleLevelQuoteConvention("\u201c", "\u201d"), SingleLevelQuoteConvention("\u2018", "\u2019")],
188188
)
189189
assert positive_quote_convention2._includes_opening_quotation_mark("\u201c")
190190

191191
positive_quote_convention3 = QuoteConvention(
192-
"positive quote convention 3",
192+
"positive_quote_convention_3",
193193
[SingleLevelQuoteConvention("\u2018", "\u2019"), SingleLevelQuoteConvention("\u201c", "\u201d")],
194194
)
195195
assert positive_quote_convention3._includes_opening_quotation_mark("\u201c")
@@ -210,34 +210,34 @@ def test_includes_closing_quotation_mark() -> None:
210210
assert not empty_quote_convention._includes_closing_quotation_mark("\u201d")
211211

212212
positive_quote_convention1 = QuoteConvention(
213-
"positive quote convention 1", [SingleLevelQuoteConvention("\u201c", "\u201d")]
213+
"positive_quote_convention_1", [SingleLevelQuoteConvention("\u201c", "\u201d")]
214214
)
215215
assert positive_quote_convention1._includes_closing_quotation_mark("\u201d")
216216

217217
negative_quote_convention1 = QuoteConvention(
218-
"negative quote convention 1", [SingleLevelQuoteConvention("\u2018", "\u2019")]
218+
"negative_quote_convention_1", [SingleLevelQuoteConvention("\u2018", "\u2019")]
219219
)
220220
assert not negative_quote_convention1._includes_closing_quotation_mark("\u201d")
221221

222222
negative_quote_convention2 = QuoteConvention(
223-
"negative quote convention 2", [SingleLevelQuoteConvention("\u201d", "\u201c")]
223+
"negative_quote_convention_2", [SingleLevelQuoteConvention("\u201d", "\u201c")]
224224
)
225225
assert not negative_quote_convention2._includes_closing_quotation_mark("\u201d")
226226

227227
positive_quote_convention2 = QuoteConvention(
228-
"positive quote convention 2",
228+
"positive_quote_convention_2",
229229
[SingleLevelQuoteConvention("\u201c", "\u201d"), SingleLevelQuoteConvention("\u2018", "\u2019")],
230230
)
231231
assert positive_quote_convention2._includes_closing_quotation_mark("\u201d")
232232

233233
positive_quote_convention3 = QuoteConvention(
234-
"positive quote convention 3",
234+
"positive_quote_convention_3",
235235
[SingleLevelQuoteConvention("\u2018", "\u2019"), SingleLevelQuoteConvention("\u201c", "\u201d")],
236236
)
237237
assert positive_quote_convention3._includes_closing_quotation_mark("\u201d")
238238

239239
negative_quote_convention3 = QuoteConvention(
240-
"negative quote convention 3",
240+
"negative_quote_convention_3",
241241
[
242242
SingleLevelQuoteConvention("\u2018", "\u2019"),
243243
SingleLevelQuoteConvention("'", "'"),
@@ -249,7 +249,7 @@ def test_includes_closing_quotation_mark() -> None:
249249

250250
def test_get_possible_depths() -> None:
251251
quote_convention = QuoteConvention(
252-
"test-quote-convention",
252+
"test_quote_convention",
253253
[
254254
SingleLevelQuoteConvention("\u201c", "\u201d"),
255255
SingleLevelQuoteConvention("\u2018", "\u2019"),
@@ -271,7 +271,7 @@ def test_get_possible_depths() -> None:
271271

272272
def test_is_compatible_with_observed_quotation_marks() -> None:
273273
quote_convention = QuoteConvention(
274-
"test-quote-convention",
274+
"test_quote_convention",
275275
[
276276
SingleLevelQuoteConvention("\u201c", "\u201d"),
277277
SingleLevelQuoteConvention("\u2018", "\u2019"),
@@ -302,7 +302,7 @@ def test_normalize() -> None:
302302
assert normalized_empty_quote_convention.num_levels == 0
303303

304304
standard_english_quote_convention = QuoteConvention(
305-
"standard-english-quote-convention",
305+
"standard_english_quote_convention",
306306
[
307307
SingleLevelQuoteConvention("\u201c", "\u201d"),
308308
SingleLevelQuoteConvention("\u2018", "\u2019"),
@@ -311,7 +311,7 @@ def test_normalize() -> None:
311311
],
312312
)
313313
normalized_standard_english_quote_convention = standard_english_quote_convention.normalize()
314-
assert normalized_standard_english_quote_convention.name == "standard-english-quote-convention_normalized"
314+
assert normalized_standard_english_quote_convention.name == "standard_english_quote_convention_normalized"
315315
assert normalized_standard_english_quote_convention.num_levels == 4
316316
assert normalized_standard_english_quote_convention.get_opening_quotation_mark_at_depth(1) == '"'
317317
assert normalized_standard_english_quote_convention.get_closing_quotation_mark_at_depth(1) == '"'
@@ -323,15 +323,15 @@ def test_normalize() -> None:
323323
assert normalized_standard_english_quote_convention.get_closing_quotation_mark_at_depth(4) == "'"
324324

325325
western_european_quote_convention = QuoteConvention(
326-
"test-quote-convention",
326+
"test_quote_convention",
327327
[
328328
SingleLevelQuoteConvention("\u201c", "\u201d"),
329329
SingleLevelQuoteConvention("\u00ab", "\u00bb"),
330330
SingleLevelQuoteConvention("\u2018", "\u2019"),
331331
],
332332
)
333333
normalized_western_european_quote_convention = western_european_quote_convention.normalize()
334-
assert normalized_western_european_quote_convention.name == "test-quote-convention_normalized"
334+
assert normalized_western_european_quote_convention.name == "test_quote_convention_normalized"
335335
assert normalized_western_european_quote_convention.num_levels == 3
336336
assert normalized_western_european_quote_convention.get_opening_quotation_mark_at_depth(1) == '"'
337337
assert normalized_western_european_quote_convention.get_closing_quotation_mark_at_depth(1) == '"'
@@ -341,7 +341,7 @@ def test_normalize() -> None:
341341
assert normalized_western_european_quote_convention.get_closing_quotation_mark_at_depth(3) == "'"
342342

343343
hybrid_british_typewriter_english_quote_convention = QuoteConvention(
344-
"hybrid-british-typewriter-english-quote-convention",
344+
"hybrid_british_typewriter_english_quote_convention",
345345
[
346346
SingleLevelQuoteConvention("\u00ab", "\u00bb"),
347347
SingleLevelQuoteConvention("'", "'"),
@@ -354,7 +354,7 @@ def test_normalize() -> None:
354354
)
355355
assert (
356356
normalized_hybrid_british_typewriter_english_quote_convention.name
357-
== "hybrid-british-typewriter-english-quote-convention_normalized"
357+
== "hybrid_british_typewriter_english_quote_convention_normalized"
358358
)
359359
assert normalized_hybrid_british_typewriter_english_quote_convention.num_levels == 3
360360
assert normalized_hybrid_british_typewriter_english_quote_convention.get_opening_quotation_mark_at_depth(1) == '"'
@@ -367,15 +367,15 @@ def test_normalize() -> None:
367367

368368
def test_print_summary() -> None:
369369
quote_convention = QuoteConvention(
370-
"test-quote-convention",
370+
"test_quote_convention",
371371
[
372372
SingleLevelQuoteConvention("\u201c", "\u201D"),
373373
SingleLevelQuoteConvention("\u2018", "\u2019"),
374374
SingleLevelQuoteConvention("\u201D", "\u201D"),
375375
],
376376
)
377377
expected_summary_message = (
378-
"test-quote-convention\n"
378+
"test_quote_convention\n"
379379
+ "\u201CFirst-level quote\u201D\n"
380380
+ "\u2018Second-level quote\u2019\n"
381381
+ "\u201DThird-level quote\u201D\n"

0 commit comments

Comments
 (0)