1
1
import argparse
2
- import collections
3
2
from collections import defaultdict , Counter
4
3
import pathlib
5
- import goldretriever
6
4
import pandas as pd
7
5
import json
8
- import mmif
6
+ from clams_utils . aapb import goldretriever
9
7
10
8
# constant:
11
9
GOLD_URL = "https://github.com/clamsproject/aapb-annotations/tree/bebd93af0882b8cf942ba827917938b49570d6d9/scene-recognition/golds"
@@ -110,8 +108,13 @@ def document_evaluation(combined_dict):
110
108
average_p = 0
111
109
average_r = 0
112
110
average_f1 = 0
111
+ # counter to account for unseen labels
112
+ unseen = 0
113
113
for label in total_counts :
114
114
tp , fp , fn = total_counts [label ]["tp" ], total_counts [label ]["fp" ], total_counts [label ]["fn" ]
115
+ # if no instances are present/predicted, account for this when taking average of scores
116
+ if tp + fp + fn == 0 :
117
+ unseen += 1
115
118
precision = float (tp / (tp + fp )) if (tp + fp ) > 0 else 0
116
119
recall = float (tp / (tp + fn )) if (tp + fn ) > 0 else 0
117
120
f1 = float (2 * (precision * recall )/ (precision + recall )) if (precision + recall ) > 0 else 0
@@ -123,9 +126,11 @@ def document_evaluation(combined_dict):
123
126
average_r += recall
124
127
average_f1 += f1
125
128
# calculate macro averages for document and add to scores_by_label
126
- scores_by_label ["average" ]["precision" ] = float (average_p / len (scores_by_label ))
127
- scores_by_label ["average" ]["recall" ] = float (average_r / len (scores_by_label ))
128
- scores_by_label ["average" ]["f1" ] = float (average_f1 / len (scores_by_label ))
129
+ # make sure to account for unseen unpredicted labels
130
+ denominator = len (scores_by_label ) - unseen
131
+ scores_by_label ["average" ]["precision" ] = float (average_p / denominator )
132
+ scores_by_label ["average" ]["recall" ] = float (average_r / denominator )
133
+ scores_by_label ["average" ]["f1" ] = float (average_f1 / denominator )
129
134
# return both scores_by_label and total_counts (to calculate micro avg later)
130
135
return scores_by_label , total_counts
131
136
@@ -190,6 +195,24 @@ def run_dataset_eval(mmif_dir, gold_dir, count_subtypes):
190
195
data_scores = total_evaluation (document_counts )
191
196
return doc_scores , data_scores
192
197
198
+ def separate_score_outputs (doc_scores , dataset_scores , mmif_dir ):
199
+ # get name for new directory
200
+ # with our standard, this results in "scores@" appended to the batch name
201
+ batch_score_name = "scores@" + mmif_dir .split ('@' )[- 1 ].strip ('/' )
202
+ # create new dir for scores based on batch name
203
+ new_dir = pathlib .Path .cwd () / batch_score_name
204
+ new_dir .mkdir (parents = True , exist_ok = True )
205
+ # iterate through nested dict, output separate scores for each guid
206
+ for guid in doc_scores :
207
+ doc_df = pd .DataFrame (doc_scores [guid ])
208
+ doc_df = doc_df .transpose ()
209
+ out_path = new_dir / f"{ guid } .csv"
210
+ doc_df .to_csv (out_path )
211
+ # output total dataset scores
212
+ dataset_df = pd .DataFrame (dataset_scores )
213
+ dataset_df = dataset_df .transpose ()
214
+ dataset_df .to_csv (new_dir / "dataset_scores.csv" )
215
+
193
216
194
217
if __name__ == "__main__" :
195
218
parser = argparse .ArgumentParser ()
@@ -201,13 +224,10 @@ def run_dataset_eval(mmif_dir, gold_dir, count_subtypes):
201
224
help = 'bool flag whether to consider subtypes for evaluation' )
202
225
args = parser .parse_args ()
203
226
mmif_dir = args .mmif_dir
204
- GOLD_URL = "https://github.com/clamsproject/aapb-annotations/tree/bebd93af0882b8cf942ba827917938b49570d6d9/scene-recognition/golds"
205
227
gold_dir = goldretriever .download_golds (GOLD_URL ) if args .gold_dir is None else args .gold_dir
206
228
count_subtypes = args .count_subtypes
207
229
document_scores , dataset_scores = run_dataset_eval (mmif_dir , gold_dir , count_subtypes )
208
230
# document scores are for each doc, dataset scores are for overall (micro avg)
209
- doc_df = pd .DataFrame (document_scores )
210
- dataset_df = pd .DataFrame (dataset_scores )
211
- doc_df .to_csv ('document_scores.csv' )
212
- dataset_df .to_csv ('dataset_scores.csv' )
231
+ # call method to output scores for each doc and then for total scores
232
+ separate_score_outputs (document_scores , dataset_scores , mmif_dir )
213
233
0 commit comments