-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcontrole-hyphenations.py
executable file
·69 lines (64 loc) · 2.23 KB
/
controle-hyphenations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/env python3
from re import sub
from difflib import SequenceMatcher
output = open('controle-hyphenations.html', 'w')
output.write("""<!DOCTYPE html>
<html>
<head>
<meta charset='utf-8'>
<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">
<title>Rapportage afbreekpatronen met fouten</title>
</head>
<body style="font-family:courier;">
<h1>Rapportage afbreekpatronen met fouten</h1>
<table border="1">
""")
for line in open('controle-hyphenations.tsv', 'r'):
word, hyph = line[:-1].split('\t')
stripped = hyph
stripped = stripped.replace('{///}', '/')
while '<' in stripped and '>' in stripped:
stripped = sub(r'(.*)<(.*)>(.*)', r'\1\2\3', stripped)
while '{' in stripped and '/' in stripped and '}' in stripped:
stripped = sub(r'(.*){(.*)/(.*)}(.*)', r'\1\2\4', stripped)
while '[' in stripped and '/' in stripped and ']' in stripped:
stripped = sub(r'(.*)\[(.*)/(.*)\](.*)', r'\1\2\4', stripped)
stripped = stripped.replace('~', '').replace('=', '').replace('|', '')
if word != stripped:
word_layout = []
word_index = -1
stripped_layout = []
stripped_index = -1
s = SequenceMatcher(None, word, stripped)
for tag, i1, i2, j1, j2 in s.get_opcodes():
if tag == 'delete':
if word_index == -1:
word_layout.append(word[:i1])
word_layout.append('<span style="background-color:#ff8888;">%s</span>' %word[i1:i2])
word_index = i2
elif tag == 'replace':
word_layout.append('<span style="background-color:#ffcc88;">%s</span>' %word[i1:i2])
word_index = i2
stripped_layout.append('<span style="background-color:#88ffcc;">%s</span>' %stripped[j1:j2])
stripped_index = j2
elif tag == 'equal':
word_layout.append(word[i1:i2])
word_index = i2
stripped_layout.append(stripped[j1:j2])
stripped_index = j2
elif tag == 'insert':
stripped_layout.append('<span style="background-color:#88ff88;">%s</span>' %stripped[j1:j2])
stripped_index = j2
if word_index == -1:
word_layout = word
else:
word_layout = ''.join(word_layout)
if stripped_index == -1:
stripped_layout = stripped
else:
stripped_layout = ''.join(stripped_layout)
output.write('<tr><td>{}</td><td>{}</td></tr>\n'.format(word_layout, stripped_layout))
output.write("""</table>
</body>
</html>
""")