-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
81 lines (72 loc) · 3.21 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import random
import time
import sys
import numpy as np
FILLING_VALUE = 0 # change to np.nan to ignore predictions for missing values
def load_matrix(folder):
matrix = np.genfromtxt(f"./data/{folder}/participants-votes.csv", filling_values=FILLING_VALUE,
delimiter=',', skip_header=1, dtype=float, invalid_raise=False)[:, 7:]
people, notes = matrix.shape
print("Loaded {} people, {} notes".format(people, notes))
print(matrix)
return matrix
def cost(matrix, intercept, user_friendliness, note_quality, user_alignment, note_alignment):
expected_values = (
intercept +
user_friendliness[:, np.newaxis] +
note_quality[np.newaxis, :] +
np.outer(user_alignment, note_alignment)
)
error = matrix - expected_values
total = np.nansum(error ** 2)
# Add regularization params
people, notes = matrix.shape
total += 0.15 * ((intercept ** 2) * people * notes +
np.sum(user_friendliness ** 2) +
np.sum(note_quality ** 2) +
np.sum(user_alignment ** 2) +
np.sum(note_alignment ** 2))
return total
def naive_descent(matrix, folder):
people, notes = matrix.shape
intercept = np.array([random.random()])
user_friendliness = np.random.random(people)
note_quality = np.random.random(notes)
user_alignment = np.random.random(people)
note_alignment = np.random.random(notes)
score = cost(matrix, intercept[0], user_friendliness,
note_quality, user_alignment, note_alignment)
improvements = 0
timestart = time.time()
for round in range(9**99):
round_delta = max(0.001, 0.1 / (round+1)**0.5)
start_round_score = score
# Try tweaking each value up and down, and see if that reduces the cost.
# If it does, keep the change.
for var in (intercept, user_friendliness, note_quality, user_alignment, note_alignment):
for i in range(len(var)):
for delta in (round_delta, -round_delta):
var[i] += delta
new_score = cost(
matrix, intercept[0], user_friendliness, note_quality, user_alignment, note_alignment)
if score < new_score:
var[i] -= delta
else:
improvements += 1
score = new_score
if improvements % 50 == 0:
elapsed = time.time() - timestart
print("Round {}, improvements {}, cost: {}, elapsed: {}".format(
round, improvements, score, elapsed))
np.savetxt(
f"./data/{folder}/note_alignment.txt", note_alignment, delimiter=',')
np.savetxt(
f"./data/{folder}/note_quality.txt", note_quality, delimiter=',')
# If we can't descend any further, exit
if score == start_round_score:
print("Finished descending")
break
if __name__ == '__main__':
folder = sys.argv[1]
matrix = load_matrix(folder)
naive_descent(matrix, folder)