-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathFig_AUC_over_time.py
102 lines (83 loc) · 3.65 KB
/
Fig_AUC_over_time.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import os
import pickle
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score
def read_elo_results(file_path):
elo_results = []
with open(file_path, 'r') as file:
lines = file.readlines()
for line in lines:
if not len(line)==1:
id1, id2, winner = line.strip().split(',')
elo_results.append((int(id1), int(id2), int(winner)))
return elo_results
def update_elo(elo_scores, id1, id2, winner, K=32):
# Compute expected scores
R1 = 10**(elo_scores[id1] / 400)
R2 = 10**(elo_scores[id2] / 400)
E1 = R1 / (R1 + R2)
E2 = R2 / (R1 + R2)
# Update scores
if winner == 1:
S1, S2 = 1, 0
else:
S1, S2 = 0, 1
elo_scores[id1] = elo_scores[id1] + K * (S1 - E1)
elo_scores[id2] = elo_scores[id2] + K * (S2 - E2)
return elo_scores
if __name__ == "__main__":
data_dir="data"
os.makedirs(data_dir, exist_ok=True)
file_path = os.path.join(data_dir, 'all_evaluation_data.pkl')
if os.path.exists(file_path):
with open(file_path, 'rb') as file:
all_data = pickle.load(file)
print("'all_data' has been loaded from the pickle file.")
else:
print(f"{file_path} doesnt exist.")
exit()
num_of_samples = len(all_data['interest'])
all_auc_labels=['GPT4o mini', 'GPT4o', 'GPT 3.5']
result_files = ['combined_ELO_results_4omini.txt', 'combined_ELO_results_4o.txt', 'combined_ELO_results_35.txt']
all_auc_evolutions=[]
for result_file in result_files:
elo_scores = [1400] * num_of_samples
match_counts = [0] * num_of_samples
elo_results = read_elo_results(os.path.join(data_dir, result_file))
#random.shuffle(elo_results)
# Prepare interest data and other relevant variables
interest_data = np.array(all_data['interest'])
# Initialize list to store AUC values
auc_values = []
# Update ELO scores based on results and compute AUC after each update
for idx, (id1, id2, winner) in enumerate(elo_results):
elo_scores = update_elo(elo_scores, id1, id2, winner)
match_counts[id1] += 1
match_counts[id2] += 1
# Compute AUC after every 10th iteration
if (idx + 1) % 1 == 0 or idx == len(elo_results) - 1:
# Compute AUC
ranked_indices = np.argsort(elo_scores)[::-1]
interest_binary = [1 if interest_data[i] >= 4 else 0 for i in ranked_indices]
auc = roc_auc_score(interest_binary, np.sort(elo_scores)[::-1])
auc_values.append(auc)
print(f'{idx + 1}/{len(elo_results)}: {auc}')
all_auc_evolutions.append(auc_values)
# Plot AUC values over the course of the tournament
plt.figure()
plt.plot(all_auc_evolutions[0], label=f'AUC over time ({all_auc_labels[0]})')
plt.plot(all_auc_evolutions[1], label=f'AUC over time ({all_auc_labels[1]})')
plt.plot(all_auc_evolutions[2], label=f'AUC over time ({all_auc_labels[2]})')
plt.xlabel('Match Number')
plt.ylabel('AUC')
plt.title('AUC over the course of the ELO tournament')
plt.legend(loc="lower right")
plt.grid(True)
save_dir = 'figures'
os.makedirs(save_dir, exist_ok=True)
auc_plot_file = os.path.join(save_dir, "auc_over_time_final.png")
plt.savefig(auc_plot_file, dpi=300, format='png')
plt.show()
plt.close()
print(f"AUC over time plot saved to {auc_plot_file}")