-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathdemo-triple-scoring.py
111 lines (62 loc) · 2.75 KB
/
demo-triple-scoring.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from c_clause import PredictionHandler, Loader
from clause import Options
from clause.util.utils import get_base_dir
import numpy as np
## *** Example for triple ccoring on Yago3-10 **
## ** Preparation **
# ensure that the data is downloaded from the data downloads on read the docs
data = f"{get_base_dir()}/data/yago3-10/train.txt"
rules = f"{get_base_dir()}/data/yago3-10/anyburl-rules-small.txt"
opts = Options()
loader = Loader(options=opts.get("loader"))
loader.load_data(data=data)
loader.load_rules(rules)
# prediction score for a triple is the confidence of the highest predicting rule
opts.set("prediction_handler.aggregation_function", "maxplus")
opts.set("prediction_handler.num_top_rules", 1)
scorer = PredictionHandler(options=opts.get("prediction_handler"))
## **1) Scoring from strings **
# from test.txt
triples = [
["Radolfzell", "isLocatedIn", "Baden-Württemberg"],
["Paul_McCartney", "hasMusicalRole", "wordnet_bass_guitar_102804123"],
["Agatha_Christie", "influences", "Ibn-e-Safi"],
]
scorer.calculate_scores(triples=triples, loader=loader)
# output as string or idx's
str_scores = scorer.get_scores(as_string=True)
arr_scores = np.array(scorer.get_scores(as_string=False))
# list of lists with 4 elements: head, relation, tail, score
print(str_scores)
# 2d np.array
print(arr_scores)
# write to file
scorer.write_scores(path=f"{get_base_dir()}/local/triples-scores.txt", as_string=True)
## **2) Scoring from idx's **
# we use random entites/relation idx's here
# one may set an index before data loading
# or retrieve the index after loading like so
entity_index = loader.get_entity_index()
relation_index = loader.get_relation_index()
triples = np.array([[10, 5, 1050], [3, 0, 34]])
scorer.calculate_scores(triples=triples, loader=loader)
# write to file / output as idx's / output as string; independent of input type
# e.g. output as string
# if a target triple is not predicted by any rule, score is 0
print(scorer.get_scores(as_string=True))
## **3) Scoring from file **
# we score every triple in the yago testset
targets = f"{get_base_dir()}/data/yago3-10/test.txt"
# the test set contains entities that are not in the train.txt
# and therefore not known in the index but they have to be known
# we therefore use a new loader and also load the test set as filter
# it does not have any other effect here except of filling the index
loader2 = Loader(opts.get("loader"))
loader2.load_data(data=data, filter=targets)
loader2.load_rules(rules=rules)
scorer.calculate_scores(triples=targets, loader=loader2)
# write, output to python as above etc..
out_path = f"{get_base_dir()}/local/yago-test-scores.txt"
scorer.write_scores(path=out_path, as_string=True)
# output some scores
print(scorer.get_scores(as_string=True)[0:5])