-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathresults_equations.py
97 lines (68 loc) · 3 KB
/
results_equations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import numpy as np
import pandas as pd
import glob
import os
import json
from collections import defaultdict
def main(path, output_path):
df, all_assignments = load_results(path)
df.to_csv(output_path, index=False)
print(df)
with open(output_path.replace('.csv', '.assignments.json'), 'w') as f:
json.dump(all_assignments, f, indent=4)
def load_results(path):
with open('data/datasets/equations.problem_text_to_id.json', 'r') as f:
problem_text_to_id = json.load(f)
with open('data/datasets/equations.problem_text_to_orig.json', 'r') as f:
problem_text_to_orig = json.load(f)
files = glob.glob(path + "/*.csv")
dfs = []
all_assignments = {}
for file in files:
parts = os.path.basename(file).replace('.csv','').split('_')
model, dataset = parts[0], '_'.join(parts[1:])
df = pd.read_csv(file)
df['model'] = model
df['dataset'] = dataset
with open(file.replace('.csv','.json'), 'r') as f:
cfg = json.load(f)
if cfg['script'] == 'model_sd_old.py':
df['n_kcs'] = cfg['initial_kcs']
all_assignments[model] = extract_assignments(file, problem_text_to_id, problem_text_to_orig)
dfs.append(df)
df = pd.concat(dfs, axis=0, ignore_index=True)
df.rename(columns={
'Unnamed: 0' : 'split'
}, inplace=True)
return df, all_assignments
def extract_assignments(path, problem_text_to_id, problem_text_to_orig, thres=50):
params_file = path.replace('.csv', '.params.npy.npz')
d = np.load(params_file)
Aprior = d['Aprior'] # Splits x Problems x KCs
trans_logits = d['alpha'] # Splits x Latent KCs x Target x Source (this was mislabeled as alpha, when it should be t)
id_to_problem_text = { v:k for k,v in problem_text_to_id.items() }
assignments_by_split = []
for i in range(Aprior.shape[0]):
Q = Aprior[i, :, :]
split_trans_logits = trans_logits[i, :, :, :] # Latent KCs x Target x Source
logit_pL = split_trans_logits[:, 1, 0]
logit_pF = split_trans_logits[:, 0, 1]
# compute frequency of each skill
problem_assignment = np.argmax(Q, axis=1) # P
skills, counts = np.unique(problem_assignment, return_counts=True)
skill_freq = np.zeros(Q.shape[1])
skill_freq[skills] = counts
sorted_skills = np.argsort(-skill_freq)
split_assignments = np.argsort(-Q, axis=0) # PxK
split_assignments = split_assignments[:thres, :] # 10xK
readable_assignments = [ {
"top_problems" : [id_to_problem_text[i] for i in split_assignments[:, k]],
"freq" : int(skill_freq[k]),
"logit_pL" : float(logit_pL[k]),
"logit_pF" : float(logit_pF[k])
} for k in sorted_skills ]
assignments_by_split.append(readable_assignments)
return assignments_by_split
if __name__ == "__main__":
import sys
main(sys.argv[1], sys.argv[2])