forked from bmschmidt/Novel-Devotions
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSchinke_Latin_Stemming.py
67 lines (55 loc) · 2.41 KB
/
Schinke_Latin_Stemming.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# http://snowball.tartarus.org/otherapps/schinke/intro.html
# /Users/dt/Documents/UChicago/Literature/5/Schinke_Latin_Stemming.py
"""
Created on Sun Aug 13 22:23:33 2017
@author: dt
"""
convert_1 = [[ 'j', 'i'], ['v', 'u']]
stop_case = ['atque', 'quoque', 'neque', 'itaque', 'absque', 'apsque', 'abusque', 'adaeque', 'adusque', 'denique',
'deque', 'susque', 'oblique', 'peraeque', 'plenisque', 'quandoque', 'quisque', 'quaeque',
'cuiusque', 'cuique', 'quemque', 'quamque', 'quaque', 'quique', 'quorumque', 'quarumque',
'quibusque', 'quosque', 'quasque', 'quotusquisque', 'quousque', 'ubique', 'undique', 'usque',
'uterque', 'utique', 'utroque', 'utribique', 'torque', 'coque', 'concoque', 'contorque',
'detorque', 'decoque', 'excoque', 'extorque', 'obtorque', 'optorque', 'retorque', 'recoque',
'attorque', 'incoque', 'intorque', 'praetorque']
figure_6a = ['ibus', 'ius', 'ae', 'am', 'as', 'em', 'es', 'ia', 'is', 'nt',
'os', 'ud', 'um', 'us', 'a', 'e', 'i', 'o', 'u']
figure_6b = ['iuntur','beris', 'erunt', 'untur', 'iunt', 'mini', 'ntur',
'stis', 'bor', 'ero', 'mur', 'mus', 'ris', 'sti', 'tis',
'tur', 'unt', 'bo', 'ns', 'nt', 'ri', 'm', 'r', 's', 't']
convert_2 = [ [['iuntur', 'erunt', 'untur', 'iunt', 'unt'], 'i'],
[['beris', 'bor', 'bo'], 'bi'],
[['ero'], 'eri'] ]
def schinke_latin_stemming(string):
res = {}
for convert_pair in convert_1:
string = string.replace(convert_pair[0], convert_pair[1])
if string[-3:] == 'que':
if string in stop_case:
res['noun'] = string
res['verb'] = string
return res
else:
string = string[:-3]
noun = string
for suffix in figure_6a:
if string.endswith(suffix):
noun = string.rstrip(suffix)
break
if len(noun)>=2:
res['noun'] = noun
for pair in convert_2:
for suffix in pair[0]:
if string.endswith(suffix):
string = pair[1].join(string.rsplit(suffix, 1))
break
for suffix in figure_6b:
if string.endswith(suffix):
string = string.rstrip(suffix)
break
if len(string)>=2:
res['verb'] = string
return res
print(schinke_latin_stemming("ducibus"))