-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_top_level_synsets.py
86 lines (56 loc) · 2.1 KB
/
get_top_level_synsets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import json
import typing
from web.services import directory_service
from web.services import condep_service
directory = directory_service.get_verb_details() # Type: Dict
children_of_synsets = dict()
parent_lookup = dict()
direct_synsets = set()
synsets_with_hypernyms = set()
def _encode_for_json(obj):
if type(obj) == set:
return list(obj)
output = obj.__dict__
for key, value in output.items():
if type(value) == set:
output[key] = list(value)
return output
for lemma, verb_data in directory.items():
for sense in verb_data.database_ids:
if not sense.synset:
continue
direct_synsets.add(sense.synset)
for lemma, verb_data in directory.items():
for sense in verb_data.database_ids:
if not sense.synset:
continue
for hypernym in sense.hypernyms:
# if hypernym not in list_of_synsets:
# continue
synsets_with_hypernyms.add(sense.synset)
if hypernym in children_of_synsets.keys():
children_of_synsets[hypernym].add(sense.synset)
else:
children_of_synsets[hypernym] = set([sense.synset])
synsets_with_hyponyms = set(children_of_synsets.keys())
all_synsets = direct_synsets.union(synsets_with_hyponyms)
top_level_synsets = all_synsets.difference(synsets_with_hypernyms)
synsets_needing_cd = []
for synset in top_level_synsets:
cd = condep_service.get_condep_for_synset(synset)
if cd:
continue
verbs = directory_service.get_verbs_in_synset(synset)
if not verbs:
synsets_needing_cd.append(synset)
continue
for verb in verbs:
cd = condep_service.get_condep_for_verb(verb)
if cd:
break
if not cd:
synsets_needing_cd.append(synset)
with open('web/static/children_of_synsets.json','w') as hypo_file:
json.dump(children_of_synsets, hypo_file, default=_encode_for_json)
with open('top_level_synsets.txt','w') as syn_file:
syn_file.write('\n'.join(synsets_needing_cd))