forked from lancedb/vectordb-recipes
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
69 lines (49 loc) · 2.14 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import lancedb
import numpy as np
import pandas as pd
global data
data = []
global table
table = None
def get_recommendations(title):
pd_data = pd.DataFrame(data)
# Table Search
result = table.search(pd_data[pd_data['title'] == title]["vector"].values[0]).limit(5).to_df()
# Get IMDB links
links = pd.read_csv('./ml-latest-small/links.csv', header=0, names=["movie id", "imdb id", "tmdb id"], converters={'imdb id': str})
ret = result['title'].values.tolist()
# Loop to add links
for i in range(len(ret)):
link = links[links['movie id'] == result['id'].values[i]]["imdb id"].values[0]
link = "https://www.imdb.com/title/tt" + link
ret[i] = [ret[i], link]
return ret
if __name__ == "__main__":
# Load and prepare data
ratings = pd.read_csv('./ml-latest-small/ratings.csv', header=None, names=["user id", "movie id", "rating", "timestamp"])
ratings = ratings.drop(columns=['timestamp'])
ratings = ratings.drop(0)
ratings["rating"] = ratings["rating"].values.astype(np.float32)
ratings["user id"] = ratings["user id"].values.astype(np.int32)
ratings["movie id"] = ratings["movie id"].values.astype(np.int32)
reviewmatrix = ratings.pivot(index="user id", columns="movie id", values="rating").fillna(0)
# SVD
matrix = reviewmatrix.values
u, s, vh = np.linalg.svd(matrix, full_matrices=False)
vectors = np.rot90(np.fliplr(vh))
print(vectors.shape)
# Metadata
movies = pd.read_csv('./ml-latest-small/movies.csv', header=0, names=["movie id", "title", "genres"])
movies = movies[movies['movie id'].isin(reviewmatrix.columns)]
data = []
for i in range(len(movies)):
data.append({"id": movies.iloc[i]["movie id"], "title": movies.iloc[i]['title'], "vector": vectors[i], "genre": movies.iloc[i]['genres']})
print(pd.DataFrame(data))
# Connect to LanceDB
db = lancedb.connect("./data/test-db")
try:
table = db.create_table("movie_set", data=data)
except:
table = db.open_table("movie_set")
print(get_recommendations("Moana (2016)"))
print(get_recommendations("Rogue One: A Star Wars Story (2016)"))