Skip to content

Commit 8c11992

Browse files
authored
test: update test_difflib from CPython3.11.2 (RustPython#5063)
1 parent b864e5d commit 8c11992

File tree

2 files changed

+66
-51
lines changed

2 files changed

+66
-51
lines changed

Lib/difflib.py

+10-43
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ class SequenceMatcher:
6262
notion, pairing up elements that appear uniquely in each sequence.
6363
That, and the method here, appear to yield more intuitive difference
6464
reports than does diff. This method appears to be the least vulnerable
65-
to synching up on blocks of "junk lines", though (like blank lines in
65+
to syncing up on blocks of "junk lines", though (like blank lines in
6666
ordinary text files, or maybe "<P>" lines in HTML files). That may be
6767
because this is the only method of the 3 that has a *concept* of
6868
"junk" <wink>.
@@ -115,38 +115,6 @@ class SequenceMatcher:
115115
case. SequenceMatcher is quadratic time for the worst case and has
116116
expected-case behavior dependent in a complicated way on how many
117117
elements the sequences have in common; best case time is linear.
118-
119-
Methods:
120-
121-
__init__(isjunk=None, a='', b='')
122-
Construct a SequenceMatcher.
123-
124-
set_seqs(a, b)
125-
Set the two sequences to be compared.
126-
127-
set_seq1(a)
128-
Set the first sequence to be compared.
129-
130-
set_seq2(b)
131-
Set the second sequence to be compared.
132-
133-
find_longest_match(alo, ahi, blo, bhi)
134-
Find longest matching block in a[alo:ahi] and b[blo:bhi].
135-
136-
get_matching_blocks()
137-
Return list of triples describing matching subsequences.
138-
139-
get_opcodes()
140-
Return list of 5-tuples describing how to turn a into b.
141-
142-
ratio()
143-
Return a measure of the sequences' similarity (float in [0,1]).
144-
145-
quick_ratio()
146-
Return an upper bound on .ratio() relatively quickly.
147-
148-
real_quick_ratio()
149-
Return an upper bound on ratio() very quickly.
150118
"""
151119

152120
def __init__(self, isjunk=None, a='', b='', autojunk=True):
@@ -334,9 +302,11 @@ def __chain_b(self):
334302
for elt in popular: # ditto; as fast for 1% deletion
335303
del b2j[elt]
336304

337-
def find_longest_match(self, alo, ahi, blo, bhi):
305+
def find_longest_match(self, alo=0, ahi=None, blo=0, bhi=None):
338306
"""Find longest matching block in a[alo:ahi] and b[blo:bhi].
339307
308+
By default it will find the longest match in the entirety of a and b.
309+
340310
If isjunk is not defined:
341311
342312
Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where
@@ -391,6 +361,10 @@ def find_longest_match(self, alo, ahi, blo, bhi):
391361
# the unique 'b's and then matching the first two 'a's.
392362

393363
a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.bjunk.__contains__
364+
if ahi is None:
365+
ahi = len(a)
366+
if bhi is None:
367+
bhi = len(b)
394368
besti, bestj, bestsize = alo, blo, 0
395369
# find longest junk-free match
396370
# during an iteration of the loop, j2len[j] = length of longest
@@ -688,6 +662,7 @@ def real_quick_ratio(self):
688662

689663
__class_getitem__ = classmethod(GenericAlias)
690664

665+
691666
def get_close_matches(word, possibilities, n=3, cutoff=0.6):
692667
"""Use SequenceMatcher to return list of the best "good enough" matches.
693668
@@ -830,14 +805,6 @@ class Differ:
830805
+ 4. Complicated is better than complex.
831806
? ++++ ^ ^
832807
+ 5. Flat is better than nested.
833-
834-
Methods:
835-
836-
__init__(linejunk=None, charjunk=None)
837-
Construct a text differencer, with optional filters.
838-
839-
compare(a, b)
840-
Compare two sequences of lines; generate the resulting delta.
841808
"""
842809

843810
def __init__(self, linejunk=None, charjunk=None):
@@ -870,7 +837,7 @@ def compare(self, a, b):
870837
Each sequence must contain individual single-line strings ending with
871838
newlines. Such sequences can be obtained from the `readlines()` method
872839
of file-like objects. The delta generated also consists of newline-
873-
terminated strings, ready to be printed as-is via the writeline()
840+
terminated strings, ready to be printed as-is via the writelines()
874841
method of a file-like object.
875842
876843
Example:

Lib/test/test_difflib.py

+56-8
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import difflib
2-
from test.support import run_unittest, findfile
2+
from test.support import findfile
33
import unittest
44
import doctest
55
import sys
@@ -241,7 +241,7 @@ def test_html_diff(self):
241241
#with open('test_difflib_expect.html','w') as fp:
242242
# fp.write(actual)
243243

244-
with open(findfile('test_difflib_expect.html')) as fp:
244+
with open(findfile('test_difflib_expect.html'), encoding="utf-8") as fp:
245245
self.assertEqual(actual, fp.read())
246246

247247
def test_recursion_limit(self):
@@ -503,12 +503,60 @@ def test_is_character_junk_false(self):
503503
for char in ['a', '#', '\n', '\f', '\r', '\v']:
504504
self.assertFalse(difflib.IS_CHARACTER_JUNK(char), repr(char))
505505

506-
def test_main():
506+
class TestFindLongest(unittest.TestCase):
507+
def longer_match_exists(self, a, b, n):
508+
return any(b_part in a for b_part in
509+
[b[i:i + n + 1] for i in range(0, len(b) - n - 1)])
510+
511+
def test_default_args(self):
512+
a = 'foo bar'
513+
b = 'foo baz bar'
514+
sm = difflib.SequenceMatcher(a=a, b=b)
515+
match = sm.find_longest_match()
516+
self.assertEqual(match.a, 0)
517+
self.assertEqual(match.b, 0)
518+
self.assertEqual(match.size, 6)
519+
self.assertEqual(a[match.a: match.a + match.size],
520+
b[match.b: match.b + match.size])
521+
self.assertFalse(self.longer_match_exists(a, b, match.size))
522+
523+
match = sm.find_longest_match(alo=2, blo=4)
524+
self.assertEqual(match.a, 3)
525+
self.assertEqual(match.b, 7)
526+
self.assertEqual(match.size, 4)
527+
self.assertEqual(a[match.a: match.a + match.size],
528+
b[match.b: match.b + match.size])
529+
self.assertFalse(self.longer_match_exists(a[2:], b[4:], match.size))
530+
531+
match = sm.find_longest_match(bhi=5, blo=1)
532+
self.assertEqual(match.a, 1)
533+
self.assertEqual(match.b, 1)
534+
self.assertEqual(match.size, 4)
535+
self.assertEqual(a[match.a: match.a + match.size],
536+
b[match.b: match.b + match.size])
537+
self.assertFalse(self.longer_match_exists(a, b[1:5], match.size))
538+
539+
def test_longest_match_with_popular_chars(self):
540+
a = 'dabcd'
541+
b = 'd'*100 + 'abc' + 'd'*100 # length over 200 so popular used
542+
sm = difflib.SequenceMatcher(a=a, b=b)
543+
match = sm.find_longest_match(0, len(a), 0, len(b))
544+
self.assertEqual(match.a, 0)
545+
self.assertEqual(match.b, 99)
546+
self.assertEqual(match.size, 5)
547+
self.assertEqual(a[match.a: match.a + match.size],
548+
b[match.b: match.b + match.size])
549+
self.assertFalse(self.longer_match_exists(a, b, match.size))
550+
551+
552+
def setUpModule():
507553
difflib.HtmlDiff._default_prefix = 0
508-
Doctests = doctest.DocTestSuite(difflib)
509-
run_unittest(
510-
TestWithAscii, TestAutojunk, TestSFpatches, TestSFbugs,
511-
TestOutputFormat, TestBytes, TestJunkAPIs, Doctests)
554+
555+
556+
def load_tests(loader, tests, pattern):
557+
tests.addTest(doctest.DocTestSuite(difflib))
558+
return tests
559+
512560

513561
if __name__ == '__main__':
514-
test_main()
562+
unittest.main()

0 commit comments

Comments
 (0)