-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsmm_eval.py
125 lines (99 loc) · 3.98 KB
/
smm_eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import sys
from pathlib import Path
sys.path.insert(0, "../")
import scipy.io
from leitmotifs.lama import *
from leitmotifs.competitors import *
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
import warnings
warnings.simplefilter("ignore")
dataset_names = [
'physio',
'Boxing',
'Swordplay',
'Basketball',
'Charleston - Side By Side Female',
'crypto',
'birds',
"What I've Done - Linkin Park",
'Numb - Linkin Park',
'Vanilla Ice - Ice Ice Baby',
'Queen David Bowie - Under Pressure',
'The Rolling Stones - Paint It, Black',
'Star Wars - The Imperial March',
'Lord of the Rings Symphony - The Shire']
scores = {}
# Load the .mat file
root_directory = "results/smm_benchmark/results/"
for dirpath, dirnames, filenames in os.walk(root_directory):
for dirname in dirnames:
path = os.path.join(dirpath, dirname)
print ("-------------------------------")
print(path)
for i in range(1, 15):
file_path = Path(os.path.join(path, "config.txt"))
# print(file_path.read_text())
series_path = "../datasets/benchmark/" + str(i) + ".csv"
series = pd.read_csv(series_path, header=None).T
df_gt = read_ground_truth(series_path)
file = path + "/Motif_" + str(i) + "_DepO_2_DepT_2.mat"
# only for plotting
#ml = LAMA(
# dataset_names[i-1],
# series,
# dimension_labels=df.index,
#)
#ml.plot_dataset()
# some dataset found no motifs
if not os.path.exists(file):
# print(f"The file {file} does not exist.")
scores[i] = [dataset_names[i - 1], 0.0, 0.0]
# print("\t", scores[i])
continue
# print("\t"+dataset_names[i - 1])
mat_file = scipy.io.loadmat(file, struct_as_record=False, squeeze_me=True)
motif_bags = mat_file["MotifBag"]
if not isinstance(motif_bags, np.ndarray):
motif_bags = [motif_bags]
best_f_score = 0.0
best_motif_set = []
best_dims = []
best_length = 0
precision, recall = 0, 0
for motif_bag in motif_bags:
if motif_bag:
startIdx = motif_bag.startIdx
motif_set = startIdx
dims = motif_bag.depd[0] - 1 # matlab uses 1-indexing but python 0-indexing
if not isinstance(dims, np.ndarray):
dims = [dims]
length = motif_bag.Tscope[0]
if length == 0:
length = 1
precision, recall = compute_precision_recall(
np.sort(motif_set), df_gt.values[0, 0], length)
f_score = 2 * (precision * recall) / (precision + recall + 1e-8)
if f_score > best_f_score:
best_f_score = f_score
best_motif_set = motif_set
best_length = length
best_dims = dims
best_precision = precision
best_recall = recall
precision, recall = compute_precision_recall(
np.sort(best_motif_set), df_gt.values[0, 0], best_length)
scores[i] = [dataset_names[i - 1], precision, recall]
#print("\tMotifs:\t", len(motif_bags))
#print("\tDims:\t",best_dims)
#print("\tLength:\t",best_length)
# print("\t", scores[i])
df = pd.DataFrame(scores).T
df.columns = "Dataset Precision Recall".split()
df["Precision"] = df["Precision"].astype(float)
df["Recall"] = df["Recall"].astype(float)
# print(df)
print(df.set_index("Dataset").describe().loc["mean"])
df.to_csv('csv/smm_'+dirname+'_results.csv', index=False)
# break