Skip to content

Commit 44a109f

Browse files
committed
fix bug of three or more entities per sentence; add io and path exception handling
Signed-off-by: Gang Ling <[email protected]>
1 parent c57621c commit 44a109f

File tree

8 files changed

+105
-97
lines changed

8 files changed

+105
-97
lines changed

CONFIG.cfg

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[LTH]
2-
Path: <absolute-path-to-LTH>/lth_srl
2+
Path: /home/gangling/PycharmProjects/text2drs/lth_srl
33

44
[CoreNLP]
5-
Path: <absolute-path-to-CoreNLP>/stanford-corenlp-full-2016-10-31
5+
Path: /home/gangling/PycharmProjects/text2drs/stanford-corenlp-full-2016-10-31

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@ From Narrative Text to Formal Action Language System Descriptions
1414
* Download or git clone (https://github.com/gling07/Text2DRS) Text2DRS repository
1515
* If you already have LTH or Stanford core-NLP 3.7.0, you can omit related steps and edit CONFIG file directly
1616
* Download LTH (http://nlp.cs.lth.se/software/semantic-parsing-propbank-nombank-frames/)
17-
* Unzip LTH package and move the package dictionary into Text2DRS repository folder
17+
* Unzip LTH package
1818
* Download Standford core-NLP **3.7.0** package (https://stanfordnlp.github.io/CoreNLP/history.html)
19-
* Unzip core-NLP package and move the package dictionary into Text2DRS repository folder
19+
* Unzip core-NLP package
2020
* Edit CONFIG.cfg file to include system paths of LTH and core-NLP package as following:
2121
```
2222
[LTH]

corenlp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424

2525
def coreference(xml):
26-
coref_dictionary = {}
26+
coref_dictionary = dict()
2727
root = xml.getroot()
2828
for elem in root.findall('./document/coreference/coreference/'):
2929
is_mention = elem.attrib.get('representative')

drs.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,9 @@ def main_process(data_dct_lst):
4343
return drs_dict
4444

4545
def retrieve_entity(data_dct_lst):
46-
entities = []
46+
entities = list()
4747
for sentences in data_dct_lst:
48-
temp = []
48+
temp = list()
4949
for sen in sentences:
5050
if sen.get('PPOS') == 'NNP' or sen.get('PPOS') == 'NN':
5151
temp.append(sen.get('Form'))
@@ -57,7 +57,7 @@ def retrieve_entity(data_dct_lst):
5757

5858

5959
def mapping_entity(entities):
60-
entities_dictionary = {}
60+
entities_dictionary = dict()
6161
count = 1;
6262
for entity in entities:
6363
entities_dictionary['r'+ str(count)] = entity
@@ -67,7 +67,7 @@ def mapping_entity(entities):
6767

6868

6969
def retrieve_property(entities_map):
70-
properties = []
70+
properties = list()
7171
for key, entity in entities_map.items():
7272
temp = (key, entity)
7373
properties.append(temp)
@@ -76,7 +76,7 @@ def retrieve_property(entities_map):
7676

7777

7878
def retrieve_event(data_dct_lst):
79-
events_dictionary = {}
79+
events_dictionary = dict()
8080
count = 1;
8181
for sentences in data_dct_lst:
8282
for sen in sentences:

drs2.py

Lines changed: 39 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ def drs_generator(data_dct_lst, coref_dictionary):
4444

4545
return drs_dict
4646

47+
4748
def get_omit_entities(coref_dictionary):
4849

4950
omit_list = list()
@@ -57,6 +58,7 @@ def get_omit_entities(coref_dictionary):
5758
omit_list.append((key, v))
5859
return omit_list
5960

61+
6062
def get_all_entities(data_dct_lst, omit_list):
6163
entities = list()
6264
num = 0
@@ -72,7 +74,7 @@ def get_all_entities(data_dct_lst, omit_list):
7274

7375

7476
def mapping_entity(entities):
75-
entities_dictionary = {}
77+
entities_dictionary = dict()
7678
count = 1;
7779
for entity in entities:
7880
entities_dictionary['r'+ str(count)] = entity
@@ -82,7 +84,7 @@ def mapping_entity(entities):
8284

8385

8486
def retrieve_property(entities_map):
85-
properties = []
87+
properties = list()
8688
for key, entity in entities_map.items():
8789
temp = (key, entity)
8890
properties.append(temp)
@@ -91,7 +93,7 @@ def retrieve_property(entities_map):
9193

9294

9395
def retrieve_event(data_dct_lst):
94-
events_dictionary = {}
96+
events_dictionary = dict()
9597
count = 1;
9698
for sentences in data_dct_lst:
9799
for sen in sentences:
@@ -109,7 +111,7 @@ def retrieve_event_type(data_dct_lst):
109111
for sentence in data_dct_lst:
110112
for item in sentence:
111113
if item.get('PPOS') == 'VBD':
112-
event_type_dictionary['e' + str(count)] = item.get('vn-pb')[0]['vn']
114+
event_type_dictionary['e' + str(count)] = item.get('vn-pb')[0][1]
113115
count += 1
114116

115117
event_type_list = [(k, v) for k, v in event_type_dictionary.items()]
@@ -127,34 +129,40 @@ def retrieve_event_time(events_map):
127129
return event_time_list
128130

129131

130-
def retrieve_event_argument(data_dct_lst, property, eventType):
132+
def retrieve_event_argument(data_dct_lst, property, event_type):
133+
131134
event_argument_list = list()
132-
sentence_property = list()
133-
sentence_rolesets = list()
134-
for et, sentence in zip(eventType, data_dct_lst):
135-
vn = et[1]
135+
event_argument_dict = dict()
136+
index = 1
137+
for event, sentence in zip(event_type, data_dct_lst):
138+
arguments_list = list()
139+
args_to_vn = list()
140+
event_ref = event[0]
141+
for sent in sentence:
142+
if sent.get('Args') != '_' and sent.get('vn-pb')[0] != '_':
143+
# use first verb class as vn class
144+
vn_role = sent.get('vn-pb')[0][1]
145+
if sent.get('PPOS') == 'NNP' or sent.get('PPOS') == 'NN' or sent.get('PPOS') == 'TO':
146+
args_to_vn.append(vn_role)
147+
148+
sub_index = 0
136149
for item in sentence:
137150
tmp = list()
138151
if item.get('PPOS') == 'NNP' or item.get('PPOS') == 'NN':
139-
sentence_property.append(item.get('Form'))
140-
tmp += item.get('vn-pb')
141-
for i in tmp:
142-
k_list = [k for k in i.keys()]
143-
for k in k_list:
144-
if k == vn:
145-
sentence_rolesets.append(i[k])
146-
147-
index = 0
148-
count = 0
149-
for p, r in zip(sentence_property, sentence_rolesets):
150-
entity = ''
151-
for i in property:
152-
if i[1] == p:
153-
entity = i[0]
154-
event_argument_list.append((eventType[index][0], r, entity))
155-
count += 1
156-
if count == 2:
157-
index += 1
158-
count = 0
159-
160-
return event_argument_list
152+
tmp.append(event_ref)
153+
tmp.append(args_to_vn[sub_index])
154+
sub_index += 1
155+
entity = item.get('Form')
156+
for (ref, ent) in property:
157+
if entity == ent:
158+
tmp.append(ref)
159+
break
160+
arguments_list.append(tmp)
161+
event_argument_dict[index] = arguments_list
162+
index += 1
163+
164+
for value in event_argument_dict.values():
165+
for v in value:
166+
event_argument_list.append(v)
167+
168+
return event_argument_list

fileGenerator.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,6 @@
2323

2424

2525
# print drs in asp format
26-
27-
2826
def drs_to_asp(drs_dict):
2927
print('%', end=' ')
3028
print(', '.join(drs_dict['entity']), end=', ')
@@ -88,6 +86,7 @@ def drs_to_asp(drs_dict):
8886
print()
8987
count = 0
9088

89+
9190
# print verbnet srl table
9291
def print_table(m_lst):
9392
dct_keys = m_lst[0][0].keys()
@@ -100,13 +99,18 @@ def print_table(m_lst):
10099
for key in dct_keys:
101100
if key == 'vn-pb':
102101
for item in sub_dct3.get(key):
103-
if 'vn' in item.keys():
104-
print('{};'.format(item.get('vn')), end="")
105-
elif '_' not in item.keys():
106-
for k,v in item.items():
107-
print('{}:{};'.format(k,v), end="")
102+
if item[0] == 'vn':
103+
print('{};'.format(item[1]), end='')
104+
elif item[0] != '_':
105+
count = 0
106+
for k in item:
107+
if count == 0:
108+
print('{}'.format(k), end=":")
109+
count += 1
110+
else:
111+
print('{}'.format(k), end="; ")
112+
count = 0
108113
else:
109-
print('{:5s}'.format(item.get('_')), end="")
114+
print('{:5s}'.format(item[0]), end="")
110115
else:
111-
print("{:10s}\t".format(sub_dct3.get(key)), end="")
112-
116+
print("{:10s}\t".format(sub_dct3.get(key)), end="")

text2drs.py

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
import sys
2626
import subprocess
2727
import argparse
28+
from typing import Any, Union
29+
2830
import verbnetsrl
2931
import drs
3032
import xml.etree.ElementTree as ET
@@ -94,6 +96,7 @@ def process_lth(file, lth_path):
9496
# switch back to text2drs dictionary
9597
os.chdir(text2_drs_path)
9698

99+
97100
# process input file by running corenlp through command line
98101
# output file format can be choose from text, xml, json
99102
def process_corenlp(file, corenlp_path):
@@ -121,13 +124,24 @@ def main():
121124
parser.add_argument("input", help='given full path of input file', type=str)
122125
args = parser.parse_args()
123126

124-
config.read(args.config)
125-
input_file = args.input
127+
try:
128+
config.read(args.config)
129+
except IOError:
130+
print('Could not find CONFIG file')
131+
132+
try:
133+
input_file = args.input
134+
except IOError:
135+
print('Could not find the txt file')
126136

137+
input_file = args.input
127138
lth_path = config.get('LTH', 'Path')
128-
corenlp_path = config.get('CoreNLP', 'Path')
129139

130-
process_lth(input_file, lth_path)
140+
if os.path.exists(lth_path):
141+
process_lth(input_file, lth_path)
142+
else:
143+
print('LTH path is invalid')
144+
sys.exit()
131145

132146
# read lth output file and store in lth_output
133147
lth_output = None
@@ -152,7 +166,13 @@ def main():
152166
sys.stdout = orig_stdout
153167
f.close()
154168

155-
corenlp_output_path = process_corenlp(input_file, corenlp_path)
169+
corenlp_path = config.get('CoreNLP', 'Path')
170+
if os.path.exists(corenlp_path):
171+
corenlp_output_path = process_corenlp(input_file, corenlp_path)
172+
else:
173+
print('Core-NLP path invalid')
174+
sys.exit()
175+
156176
corenlp_output = None
157177
try:
158178
corenlp_output = ET.parse(corenlp_output_path)

0 commit comments

Comments
 (0)