@@ -27,6 +27,8 @@ def test_paragraph_markers() -> None:
27
27
alignment = to_word_alignment_matrix (
28
28
"0-0 1-1 2-2 3-3 4-4 5-5 6-6 7-7 8-8 9-9 10-10 12-11 13-12 14-13 15-14 16-15 17-18 18-16 19-19"
29
29
),
30
+ paragraph_behavior = UpdateUsfmMarkerBehavior .PRESERVE ,
31
+ style_behavior = UpdateUsfmMarkerBehavior .STRIP ,
30
32
)
31
33
rows = [UpdateUsfmRow (scr_ref ("MAT 1:1" ), str (pretranslation ), {"alignment_info" : align_info })]
32
34
usfm = r"""\id MAT
@@ -60,6 +62,8 @@ def test_style_markers() -> None:
60
62
alignment = to_word_alignment_matrix (
61
63
"0-0 1-1 2-2 3-3 4-4 5-5 6-6 7-7 8-8 9-9 10-10 12-11 13-12 14-13 15-14 16-15 17-18 18-16 19-19"
62
64
),
65
+ paragraph_behavior = UpdateUsfmMarkerBehavior .PRESERVE ,
66
+ style_behavior = UpdateUsfmMarkerBehavior .PRESERVE ,
63
67
)
64
68
rows = [UpdateUsfmRow (scr_ref ("MAT 1:1" ), str (pretranslation ), metadata = {"alignment_info" : align_info })]
65
69
usfm = r"""\id MAT
@@ -79,6 +83,16 @@ def test_style_markers() -> None:
79
83
"""
80
84
assess (target , result )
81
85
86
+ align_info = PlaceMarkersAlignmentInfo (
87
+ source_tokens = [t for t in TOKENIZER .tokenize (source )],
88
+ translation_tokens = [t for t in TOKENIZER .tokenize (pretranslation )],
89
+ alignment = to_word_alignment_matrix (
90
+ "0-0 1-1 2-2 3-3 4-4 5-5 6-6 7-7 8-8 9-9 10-10 12-11 13-12 14-13 15-14 16-15 17-18 18-16 19-19"
91
+ ),
92
+ paragraph_behavior = UpdateUsfmMarkerBehavior .PRESERVE ,
93
+ style_behavior = UpdateUsfmMarkerBehavior .STRIP ,
94
+ )
95
+ rows = [UpdateUsfmRow (scr_ref ("MAT 1:1" ), str (pretranslation ), metadata = {"alignment_info" : align_info })]
82
96
target = update_usfm (
83
97
rows ,
84
98
usfm ,
@@ -159,6 +173,8 @@ def test_trailing_empty_paragraphs() -> None:
159
173
source_tokens = ["Verse" , "1" ],
160
174
translation_tokens = ["New" , "verse" , "1" ],
161
175
alignment = to_word_alignment_matrix ("0-1 1-2" ),
176
+ paragraph_behavior = UpdateUsfmMarkerBehavior .PRESERVE ,
177
+ style_behavior = UpdateUsfmMarkerBehavior .STRIP ,
162
178
)
163
179
},
164
180
)
@@ -197,6 +213,8 @@ def test_headers() -> None:
197
213
source_tokens = ["A" , "B" , "C" ],
198
214
translation_tokens = ["X" , "Y" , "Z" ],
199
215
alignment = to_word_alignment_matrix ("0-0 1-1 2-2" ),
216
+ paragraph_behavior = UpdateUsfmMarkerBehavior .PRESERVE ,
217
+ style_behavior = UpdateUsfmMarkerBehavior .STRIP ,
200
218
)
201
219
},
202
220
),
@@ -208,6 +226,8 @@ def test_headers() -> None:
208
226
source_tokens = ["A" ],
209
227
translation_tokens = ["X" ],
210
228
alignment = to_word_alignment_matrix ("0-0" ),
229
+ paragraph_behavior = UpdateUsfmMarkerBehavior .PRESERVE ,
230
+ style_behavior = UpdateUsfmMarkerBehavior .STRIP ,
211
231
)
212
232
},
213
233
),
@@ -276,6 +296,8 @@ def test_consecutive_markers() -> None:
276
296
source_tokens = ["Old" , "verse" , "1" , "word" ],
277
297
translation_tokens = ["New" , "verse" , "1" , "WORD" ],
278
298
alignment = to_word_alignment_matrix ("0-0 1-1 2-2 3-3" ),
299
+ paragraph_behavior = UpdateUsfmMarkerBehavior .PRESERVE ,
300
+ style_behavior = UpdateUsfmMarkerBehavior .PRESERVE ,
279
301
)
280
302
},
281
303
)
@@ -311,6 +333,8 @@ def test_verse_ranges() -> None:
311
333
source_tokens = ["Verse" , "range" , "old" , "paragraph" , "2" ],
312
334
translation_tokens = ["New" , "verse" , "range" , "text" , "new" , "paragraph" , "2" ],
313
335
alignment = to_word_alignment_matrix ("0-1 1-2 2-4 3-5 4-6" ),
336
+ paragraph_behavior = UpdateUsfmMarkerBehavior .PRESERVE ,
337
+ style_behavior = UpdateUsfmMarkerBehavior .STRIP ,
314
338
)
315
339
},
316
340
)
@@ -346,6 +370,8 @@ def test_no_update() -> None:
346
370
source_tokens = ["Old" , "paragraph" , "1" , "Old" , "paragraph" , "2" ],
347
371
translation_tokens = ["New" , "paragraph" , "1" , "New" , "paragraph" , "2" ],
348
372
alignment = to_word_alignment_matrix ("0-0 1-1 2-2 3-3 4-4 5-5" ),
373
+ paragraph_behavior = UpdateUsfmMarkerBehavior .STRIP ,
374
+ style_behavior = UpdateUsfmMarkerBehavior .STRIP ,
349
375
)
350
376
},
351
377
)
@@ -378,6 +404,8 @@ def test_no_update() -> None:
378
404
source_tokens = [],
379
405
translation_tokens = [],
380
406
alignment = to_word_alignment_matrix ("" ),
407
+ paragraph_behavior = UpdateUsfmMarkerBehavior .PRESERVE ,
408
+ style_behavior = UpdateUsfmMarkerBehavior .STRIP ,
381
409
)
382
410
},
383
411
)
@@ -422,6 +450,8 @@ def test_split_tokens() -> None:
422
450
source_tokens = ["words" , "split" , "words" , "split" , "words" , "split" ],
423
451
translation_tokens = ["words" , "split" , "words" , "split" , "words" , "split" ],
424
452
alignment = to_word_alignment_matrix ("0-0 1-1 2-2 3-3 4-4 5-5" ),
453
+ paragraph_behavior = UpdateUsfmMarkerBehavior .PRESERVE ,
454
+ style_behavior = UpdateUsfmMarkerBehavior .STRIP ,
425
455
)
426
456
},
427
457
)
@@ -458,6 +488,8 @@ def test_no_text() -> None:
458
488
source_tokens = [],
459
489
translation_tokens = [],
460
490
alignment = to_word_alignment_matrix ("" ),
491
+ paragraph_behavior = UpdateUsfmMarkerBehavior .PRESERVE ,
492
+ style_behavior = UpdateUsfmMarkerBehavior .PRESERVE ,
461
493
)
462
494
},
463
495
)
@@ -491,6 +523,8 @@ def test_consecutive_substring() -> None:
491
523
source_tokens = ["string" , "ring" ],
492
524
translation_tokens = ["string" , "ring" ],
493
525
alignment = to_word_alignment_matrix ("0-0 1-1" ),
526
+ paragraph_behavior = UpdateUsfmMarkerBehavior .PRESERVE ,
527
+ style_behavior = UpdateUsfmMarkerBehavior .STRIP ,
494
528
)
495
529
},
496
530
)
@@ -525,6 +559,8 @@ def test_verses_out_of_order() -> None:
525
559
source_tokens = ["verse" , "1" , "paragraph" , "2" ],
526
560
translation_tokens = ["new" , "verse" , "1" , "new" , "paragraph" , "2" ],
527
561
alignment = to_word_alignment_matrix ("0-1 1-2 2-4 3-5" ),
562
+ paragraph_behavior = UpdateUsfmMarkerBehavior .PRESERVE ,
563
+ style_behavior = UpdateUsfmMarkerBehavior .STRIP ,
528
564
)
529
565
},
530
566
),
@@ -536,6 +572,8 @@ def test_verses_out_of_order() -> None:
536
572
source_tokens = ["verse" , "2" ],
537
573
translation_tokens = ["new" , "verse" , "2" ],
538
574
alignment = to_word_alignment_matrix ("0-1 1-2" ),
575
+ paragraph_behavior = UpdateUsfmMarkerBehavior .PRESERVE ,
576
+ style_behavior = UpdateUsfmMarkerBehavior .STRIP ,
539
577
)
540
578
},
541
579
),
@@ -562,6 +600,47 @@ def test_verses_out_of_order() -> None:
562
600
assess (target , result )
563
601
564
602
603
+ def test_strip_paragraphs_with_header () -> None :
604
+ rows = [
605
+ UpdateUsfmRow (
606
+ scr_ref ("MAT 1:1" ),
607
+ "new verse 1 new paragraph 2" ,
608
+ metadata = {
609
+ "alignment_info" : PlaceMarkersAlignmentInfo (
610
+ source_tokens = ["verse" , "1" , "paragraph" , "2" ],
611
+ translation_tokens = ["new" , "verse" , "1" , "new" , "paragraph" , "2" ],
612
+ alignment = to_word_alignment_matrix ("0-1 1-2 2-4 3-5" ),
613
+ paragraph_behavior = UpdateUsfmMarkerBehavior .STRIP ,
614
+ style_behavior = UpdateUsfmMarkerBehavior .PRESERVE ,
615
+ )
616
+ },
617
+ )
618
+ ]
619
+ usfm = r"""\id MAT
620
+ \c 1
621
+ \v 1 verse 1
622
+ \s header
623
+ \p paragraph 2
624
+ \v 2 verse 2
625
+ """
626
+
627
+ target = update_usfm (
628
+ rows ,
629
+ usfm ,
630
+ paragraph_behavior = UpdateUsfmMarkerBehavior .STRIP ,
631
+ style_behavior = UpdateUsfmMarkerBehavior .PRESERVE ,
632
+ update_block_handlers = [PlaceMarkersUsfmUpdateBlockHandler ()],
633
+ )
634
+ result = r"""\id MAT
635
+ \c 1
636
+ \v 1 new verse 1 new paragraph 2
637
+ \s header
638
+ \p
639
+ \v 2 verse 2
640
+ """
641
+ assess (target , result )
642
+
643
+
565
644
def scr_ref (* refs : str ) -> List [ScriptureRef ]:
566
645
return [ScriptureRef .parse (ref ) for ref in refs ]
567
646
0 commit comments