forked from sangjoon-park/Medical_X-VL
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcalculate_bleu.py
executable file
·44 lines (35 loc) · 1.32 KB
/
calculate_bleu.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import json
import numpy as np
from collections import Counter
from nltk import ngrams
from dataset.utils import pre_caption
import nltk.translate.bleu_score as bleu
from nltk.translate.bleu_score import corpus_bleu
from nltk.translate.bleu_score import SmoothingFunction
smooth = SmoothingFunction()
gen_file = './generated_20220713_ALBEF_seed42_mimic.json'
gen_dic = [json.loads(l) for l in open(gen_file)][0]
num_ids = len(gen_dic)
bleu4 = 0.
for i, id in enumerate(gen_dic.keys()):
references = gen_dic[str(id)]['caption'][0]
hypotheses = gen_dic[str(id)]['predicted']
hypotheses = hypotheses.replace('[CLS] ', '').replace(' [SEP]', '')
references = references.split()
hypotheses = hypotheses.split()
# references = references.split('.')
# hypotheses = hypotheses.split('.')
# corp_ref = []
# for ref in references:
# if len(ref.split()) != 0:
# corp_ref.append(ref.split())
#
# corp_hypo = []
# for hypo in hypotheses:
# if len(hypo.split()) != 0:
# corp_hypo.append(hypo.split())
score = bleu.sentence_bleu([references], hypotheses, weights=(0.25, 0.25, 0.25, 0.25), smoothing_function=smooth.method1)
# score = corpus_bleu([corp_ref], corp_hypo, smoothing_function=smooth.method1)
bleu4 += score
bleu4 = bleu4 / (i + 1)
print(bleu4)