Skip to content

Commit baef687

Browse files
committed
feat: add recommending movies by plot
1 parent 2076db0 commit baef687

File tree

3 files changed

+109
-17
lines changed

3 files changed

+109
-17
lines changed

05-movie-recommendation-system/movie-recommendation.ipynb

+97-17
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 1,
5+
"execution_count": 6,
66
"metadata": {},
77
"outputs": [],
88
"source": [
@@ -11,7 +11,7 @@
1111
},
1212
{
1313
"cell_type": "code",
14-
"execution_count": 2,
14+
"execution_count": 7,
1515
"metadata": {},
1616
"outputs": [],
1717
"source": [
@@ -21,7 +21,7 @@
2121
},
2222
{
2323
"cell_type": "code",
24-
"execution_count": 3,
24+
"execution_count": 8,
2525
"metadata": {},
2626
"outputs": [],
2727
"source": [
@@ -38,7 +38,7 @@
3838
},
3939
{
4040
"cell_type": "code",
41-
"execution_count": 4,
41+
"execution_count": 9,
4242
"metadata": {},
4343
"outputs": [],
4444
"source": [
@@ -48,7 +48,7 @@
4848
},
4949
{
5050
"cell_type": "code",
51-
"execution_count": 5,
51+
"execution_count": 10,
5252
"metadata": {},
5353
"outputs": [],
5454
"source": [
@@ -58,7 +58,7 @@
5858
},
5959
{
6060
"cell_type": "code",
61-
"execution_count": 6,
61+
"execution_count": 11,
6262
"metadata": {},
6363
"outputs": [],
6464
"source": [
@@ -67,7 +67,7 @@
6767
},
6868
{
6969
"cell_type": "code",
70-
"execution_count": 7,
70+
"execution_count": 12,
7171
"metadata": {},
7272
"outputs": [],
7373
"source": [
@@ -84,7 +84,7 @@
8484
},
8585
{
8686
"cell_type": "code",
87-
"execution_count": 8,
87+
"execution_count": 14,
8888
"metadata": {},
8989
"outputs": [],
9090
"source": [
@@ -95,7 +95,7 @@
9595
},
9696
{
9797
"cell_type": "code",
98-
"execution_count": 9,
98+
"execution_count": 16,
9999
"metadata": {},
100100
"outputs": [],
101101
"source": [
@@ -110,7 +110,7 @@
110110
},
111111
{
112112
"cell_type": "code",
113-
"execution_count": 10,
113+
"execution_count": 17,
114114
"metadata": {},
115115
"outputs": [],
116116
"source": [
@@ -119,7 +119,7 @@
119119
},
120120
{
121121
"cell_type": "code",
122-
"execution_count": 11,
122+
"execution_count": 18,
123123
"metadata": {},
124124
"outputs": [],
125125
"source": [
@@ -128,7 +128,7 @@
128128
},
129129
{
130130
"cell_type": "code",
131-
"execution_count": 12,
131+
"execution_count": 19,
132132
"metadata": {},
133133
"outputs": [
134134
{
@@ -137,7 +137,7 @@
137137
"16751"
138138
]
139139
},
140-
"execution_count": 12,
140+
"execution_count": 19,
141141
"metadata": {},
142142
"output_type": "execute_result"
143143
}
@@ -148,7 +148,7 @@
148148
},
149149
{
150150
"cell_type": "code",
151-
"execution_count": 13,
151+
"execution_count": 20,
152152
"metadata": {},
153153
"outputs": [
154154
{
@@ -166,15 +166,15 @@
166166
},
167167
{
168168
"cell_type": "code",
169-
"execution_count": 16,
169+
"execution_count": 27,
170170
"metadata": {},
171171
"outputs": [],
172172
"source": [
173173
"# establish a cache of embeddings to avoid recomputing\n",
174174
"# cache is a dict of tuples (text, model) -> embedding, saved as a pickle file\n",
175175
"\n",
176176
"# set path to embedding cache\n",
177-
"embedding_cache_path = \"./embeddings/movie_embeddings_cache.pkl\"\n",
177+
"embedding_cache_path = \"movie_embeddings_cache.pkl\"\n",
178178
"\n",
179179
"# load the cache if it exists, and save a copy to disk\n",
180180
"try:\n",
@@ -201,7 +201,7 @@
201201
},
202202
{
203203
"cell_type": "code",
204-
"execution_count": 15,
204+
"execution_count": 28,
205205
"metadata": {},
206206
"outputs": [
207207
{
@@ -300,6 +300,86 @@
300300
"source": [
301301
"atlas.map_embeddings(embeddings=np.array(plot_embeddings), data=data)"
302302
]
303+
},
304+
{
305+
"attachments": {},
306+
"cell_type": "markdown",
307+
"metadata": {},
308+
"source": [
309+
"## Recommending movies by plot"
310+
]
311+
},
312+
{
313+
"cell_type": "code",
314+
"execution_count": 30,
315+
"metadata": {},
316+
"outputs": [],
317+
"source": [
318+
"from openai.embeddings_utils import distances_from_embeddings, indices_of_nearest_neighbors_from_distances"
319+
]
320+
},
321+
{
322+
"cell_type": "code",
323+
"execution_count": 35,
324+
"metadata": {},
325+
"outputs": [],
326+
"source": [
327+
"def print_recommendations_from_strings(\n",
328+
" strings,\n",
329+
" index_of_source_string,\n",
330+
" k_nearest_neighbors=3,\n",
331+
" model=\"text-embedding-ada-002\"\n",
332+
"):\n",
333+
" # get all the embeddings\n",
334+
" embeddings = [embedding_from_string(string) for string in strings]\n",
335+
" # get embedding for our specific query string\n",
336+
" query_embedding = embeddings[index_of_source_string]\n",
337+
" # get distances between our embedding and all other embeddings\n",
338+
" distances = distances_from_embeddings(query_embedding, embeddings)\n",
339+
" # get indices of the nearest neighbors\n",
340+
" indices_of_nearest_neighbors = indices_of_nearest_neighbors_from_distances(distances)\n",
341+
" query_string = strings[index_of_source_string]\n",
342+
" match_count = 0\n",
343+
" for i in indices_of_nearest_neighbors:\n",
344+
" # skip the searched movie\n",
345+
" if query_string == strings[i]:\n",
346+
" continue\n",
347+
" if match_count >= k_nearest_neighbors:\n",
348+
" break\n",
349+
" match_count += 1\n",
350+
" print(f\"Found {match_count} closest match: \")\n",
351+
" print(f\"Distance of: {distances[i]}\")\n",
352+
" print(strings[i])"
353+
]
354+
},
355+
{
356+
"cell_type": "code",
357+
"execution_count": 36,
358+
"metadata": {},
359+
"outputs": [
360+
{
361+
"name": "stdout",
362+
"output_type": "stream",
363+
"text": [
364+
"Found 1 closest match: \n",
365+
"Distance of: 0.15240804182456968\n",
366+
"Belle McGill is unaware of husband Jimmy's gambling problem. First he loses $100 at the racetrack and vows never to place another wager. Then he persuades future son-in-law Ben to bet on a sure thing, Leadpipe, but gets a tip on another horse just before the race, bets Ben's money on that instead, then watches Leadpipe win.\n",
367+
"In danger of losing his business, if not his family, Jimmy delays paying off Ben, who excitedly believes his horse was the winner. Unbenknowst to all, Belle has been making bets of her own. When a horse called Honey Girl comes along, Belle and Jimmy risk everything they have, and they come out winners.\n",
368+
"Found 2 closest match: \n",
369+
"Distance of: 0.16723019461008426\n",
370+
"A racketeer known as \"Sunshine Joe\" specializes in ticket scalping. His gang of colorfully nicknamed thugs includes Liverlips, Sam the Gonoph and Bennie South Street, as well as \"Georgie the Chaser,\" who was dubbed that way because of his penchant for chasing after women.\n",
371+
"On a train, Georgie happens upon Clarice Van Cleve, an heiress who loves to fall in love, particularly with men in uniform. This has created many a headache for her father, who already has seen Clarice elope three times with military types, each tryst ending badly.\n",
372+
"Mr. Van Cleve diverts his daughter to a New Jersey health resort, where he introduces her to his friend Mr. Wilmot and handsome son Hector, in the hope that Clarice and Hector will hit it off. Georgie the gigolo still has Clarice's eye, however, pretending to be a combat pilot. But when Clarice turns up and begins acting like a homemaker, driving him crazy, Georgie, learning she's been disinherited by her dad, leaves by claiming he's needed by \"the King\" to fly a mission.\n",
373+
"Sunshine Joe runs off with money earned from scalped tickets to the Harvard-Yale college football game. It so happens Hector is a member of the Yale team, so all of Joe's goons go to New Haven, Connecticut for the game and place bets. Shocked to find Hector is a benchwarmer, they pull a gun on the coach, demanding Hector be permitted to play. He kicks a field goal to win the game, then ends up, a man in another kind of uniform, in the arms of Clarice.\n",
374+
"Found 3 closest match: \n",
375+
"Distance of: 0.17311384937127183\n",
376+
"A card sharp steps in when a Mexican family's ranch is threatened by swindlers and cheats.\n"
377+
]
378+
}
379+
],
380+
"source": [
381+
"print_recommendations_from_strings(movie_plots, 0)"
382+
]
303383
}
304384
],
305385
"metadata": {

requirements.txt

+12
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,16 @@ charset-normalizer==3.1.0
1919
click==8.1.3
2020
cohere==4.3.0
2121
comm==0.1.3
22+
contourpy==1.0.7
23+
cycler==0.11.0
2224
debugpy==1.6.7
2325
decorator==5.1.1
2426
defusedxml==0.7.1
2527
et-xmlfile==1.1.0
2628
executing==1.2.0
2729
fastjsonschema==2.16.3
2830
Flask==2.3.2
31+
fonttools==4.39.3
2932
fqdn==1.5.1
3033
frozenlist==1.3.3
3134
idna==3.4
@@ -38,6 +41,7 @@ isoduration==20.11.0
3841
itsdangerous==2.1.2
3942
jedi==0.18.2
4043
Jinja2==3.1.2
44+
joblib==1.2.0
4145
jsonlines==3.1.0
4246
jsonpointer==2.3
4347
jsonschema==4.17.3
@@ -50,9 +54,11 @@ jupyter_server==2.5.0
5054
jupyter_server_terminals==0.4.4
5155
jupyterlab-pygments==0.2.2
5256
jupyterlab-widgets==3.0.7
57+
kiwisolver==1.4.4
5358
loguru==0.7.0
5459
markdown-it-py==2.2.0
5560
MarkupSafe==2.1.2
61+
matplotlib==3.7.1
5662
matplotlib-inline==0.1.6
5763
mdurl==0.1.2
5864
mistune==2.0.5
@@ -75,7 +81,9 @@ pandocfilters==1.5.0
7581
parso==0.8.3
7682
pexpect==4.8.0
7783
pickleshare==0.7.5
84+
Pillow==9.5.0
7885
platformdirs==3.2.0
86+
plotly==5.14.1
7987
prometheus-client==0.16.0
8088
prompt-toolkit==3.0.38
8189
psutil==5.9.5
@@ -85,6 +93,7 @@ pyarrow==12.0.0
8593
pycparser==2.21
8694
pydantic==1.10.7
8795
Pygments==2.15.1
96+
pyparsing==3.0.9
8897
pyrsistent==0.19.3
8998
python-dateutil==2.8.2
9099
python-dotenv==1.0.0
@@ -100,6 +109,8 @@ requests==2.28.2
100109
rfc3339-validator==0.1.4
101110
rfc3986-validator==0.1.1
102111
rich==13.3.5
112+
scikit-learn==1.2.2
113+
scipy==1.10.1
103114
Send2Trash==1.8.0
104115
six==1.16.0
105116
sniffio==1.3.0
@@ -108,6 +119,7 @@ spotipy==2.23.0
108119
stack-data==0.6.2
109120
tenacity==8.2.2
110121
terminado==0.17.1
122+
threadpoolctl==3.1.0
111123
tiktoken==0.3.3
112124
tinycss2==1.2.1
113125
tornado==6.3.1

0 commit comments

Comments
 (0)