-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlexical_baseline.py
64 lines (59 loc) · 1.86 KB
/
lexical_baseline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import pandas as pd
def parse_lexicon(path_to_lexicon):
"""
Reads a csv and parses the lexicon (semicolons separating tokens, different columns separating tokens and type)
"""
df = pd.read_csv(path_to_lexicon)
tokens = df['tokens'].tolist()
labels = df['label'].tolist()
relationtypes = df['relationtype'].tolist()
zipped_ref = zip(tokens, labels, relationtypes)
# create dictionary of lexicon
new_dict = {}
for t, l, r in zipped_ref:
if ';' in t:
new_dict[t.split('; ')[0]] = [r, l]
new_dict[t.split('; ')[1]] = [r, l]
for i in range(2, 20):
try:
new_dict[t.split('; ')[i]] = [r, l]
except IndexError:
x = 0
else:
new_dict[t] = [r, l]
return(new_dict.keys())
def label_with_lexicon(lex_tokens, tokens):
"""
reads the lexicon (csv) and returns labels for a list of tokens
"""
labels = []
for t in tokens:
if t in lex_tokens:
labels.append("I-event")
else:
labels.append("O")
return(labels)
def evaluate_baseline(labels, gold):
"""
Calculates precision, recall and f1 for binary event detection using lexical baseline
"""
tp_e = 0
fp_e = 0
fn_e = 0
zipped = zip(labels, gold)
for tuple in zipped:
if tuple[0] == tuple[1] and tuple[0] == 'I-event':
tp_e+=1
if tuple[0] != tuple[1] and tuple[0] == 'O':
fn_e+=1
if tuple[0] != tuple[1] and tuple[0] == 'I-event':
fp_e+=1
try:
precision_e = tp_e / (tp_e + fp_e)
recall_e = tp_e / (tp_e + fn_e)
f1_e = 2 * (precision_e * recall_e) / (precision_e + recall_e)
except ZeroDivisionError:
precision_e = 0
recall_e = 0
f1_e = 0
return(precision_e, recall_e, f1_e)