Skip to content

Commit fe87297

Browse files
committed
Added get_random_best code. Added the AB test
1 parent 4ed59e7 commit fe87297

File tree

4 files changed

+175
-7
lines changed

4 files changed

+175
-7
lines changed

pytest.ini

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
[pytest]
22
env =
3-
DATABASE_URL=postgresql+asyncpg://app:app@app_db/postgres
43
SITE_URL=
4+
DATABASE_URL=postgresql+asyncpg://app:app@app_db:5432/app
55
REDIS_URL=redis://:myStrongPassword@redis:6379
6+
# no docker version, useful for debugging
7+
# DATABASE_URL=postgresql+asyncpg://app:[email protected]:65432/app
8+
# REDIS_URL=redis://:[email protected]:36379
69

710
SITE_DOMAIN=127.0.0.1
811
SECURE_COOKIES=false

src/recommendations/candidates.py

+46
Original file line numberDiff line numberDiff line change
@@ -399,3 +399,49 @@ async def get_best_memes_from_each_source(
399399
"""
400400
res = await fetch_all(text(query))
401401
return res
402+
403+
404+
async def get_random_best(
405+
user_id: int,
406+
limit: int = 10,
407+
exclude_meme_ids: list[int] = [],
408+
):
409+
"""Selects 'limit' best memes from the top 100 memes obtained using the cleared meme statistics
410+
The cleared statistics aggregates reactions from users with less than 200 previous reactions
411+
It's aim is to remove the bias from old users
412+
"""
413+
414+
query = f"""
415+
SELECT
416+
M.id
417+
, M.type, M.telegram_file_id, M.caption, M.recommended_by
418+
FROM (
419+
SELECT
420+
M.id
421+
, M.type, M.telegram_file_id, M.caption
422+
, 'random_best_ab_240422' AS recommended_by
423+
, random() rand
424+
FROM meme M
425+
INNER JOIN meme_stats MS
426+
ON MS.meme_id = M.id
427+
428+
INNER JOIN user_language L
429+
ON L.user_id = {user_id}
430+
AND L.language_code = M.language_code
431+
432+
LEFT JOIN user_meme_reaction R
433+
ON R.meme_id = M.id
434+
AND R.user_id = {user_id}
435+
436+
WHERE 1=1
437+
AND M.status = 'ok'
438+
AND R.meme_id IS NULL
439+
-- 100 ru, 50 en, 50 all lang
440+
AND M.id IN (4101086, 4442353, 3755262, 4524041, 914304, 1213657, 3477742, 3850309, 4106545, 3918656, 1976055, 3729527, 4370768, 4031941, 3902467, 3940729, 3966109, 4144377, 4131644, 4720051, 4438220, 943398, 3486879, 3958437, 3193252, 4011185, 3855063, 4261258, 4368086, 4255270, 1194244, 10222, 4818828, 3820043, 758408, 3188657, 4451345, 2050874, 4665040, 4106819, 3798967, 1825631, 3140601, 4840661, 4250457, 10202, 4363045, 3823857, 3755199, 4214428, 3604880, 3759401, 3928967, 3859587, 1240438, 4634391, 4002944, 2914449, 1955395, 1902244, 4256739, 1721327, 1285555, 1901653, 1584871, 3517077, 4493086, 4128512, 3570595, 3975285, 1484762, 1811655, 1071204, 4033401, 2294710, 4236782, 881987, 4180263, 1100991, 3867070, 1859048, 4285721, 1466518, 2262302, 4478289, 1859157, 4232654, 1202886, 978202, 2279188, 1892350, 961273, 4033397, 3513207, 3635346, 4320621, 4558947, 4252321, 1084225, 2350587, 4339982, 3724969, 3613758, 1768655, 4148626, 1285566, 2181541, 1103300, 3516406, 1197518, 4036174, 3537906, 2953444, 13636, 3724910, 3911502, 1988648, 3587199, 1398183, 4166913, 3911320, 1311422, 2153377, 3604881, 3596142, 1006843, 4473556, 4231678, 4856209, 10114, 3520485, 4232460, 1721545, 3747694, 3914292, 4119263, 4033399, 1482707, 4243473, 4336344, 1678337, 3516170, 2279191, 3724979, 3772372, 4763033, 4128276, 463991, 1006837, 1202853, 4101086, 1103300, 4119263, 4357615, 1194244, 3859587, 3630862, 4478289, 4665040, 3798967, 3940785, 10222, 4255187, 1304918, 3823857, 1398183, 16818, 881987, 2005796, 3639651, 4231648, 3902342, 4031503, 4231678, 4166913, 4720051, 3855063, 4370768, 2350587, 758408, 4818828, 4261258, 3587199, 648225, 4716664, 3918656, 4183519, 3600534, 4473556, 3772372, 4243473, 4524041, 943398, 4840661, 4250457, 1825631, 4363045, 4232460, 4148761, 3513207)
441+
{exclude_meme_ids_sql_filter(exclude_meme_ids)}
442+
ORDER BY rand
443+
LIMIT {limit}
444+
) M
445+
"""
446+
res = await fetch_all(text(query))
447+
return res

src/recommendations/meme_queue.py

+25-6
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from src.recommendations.candidates import (
55
classic,
66
get_best_memes_from_each_source,
7+
get_random_best,
78
less_seen_meme_and_source,
89
like_spread_and_recent_memes,
910
)
@@ -45,9 +46,19 @@ async def generate_cold_start_recommendations(user_id, limit=10):
4546
memes_in_queue = await redis.get_all_memes_in_queue_by_key(queue_key)
4647
meme_ids_in_queue = [meme["id"] for meme in memes_in_queue]
4748

48-
candidates = await get_best_memes_from_each_source(
49-
user_id, limit=limit, exclude_meme_ids=meme_ids_in_queue
50-
)
49+
candidates = []
50+
51+
# AB test
52+
if user_id % 100 < 50:
53+
candidates = await get_random_best(
54+
user_id, limit=limit, exclude_meme_ids=meme_ids_in_queue
55+
)
56+
57+
if len(candidates) == 0:
58+
candidates = await get_best_memes_from_each_source(
59+
user_id, limit=limit, exclude_meme_ids=meme_ids_in_queue
60+
)
61+
5162
if len(candidates) == 0:
5263
return
5364

@@ -63,10 +74,18 @@ async def generate_recommendations(user_id, limit):
6374

6475
r = random.random()
6576

77+
candidates = []
78+
6679
if user_info["nmemes_sent"] < 30:
67-
candidates = await get_best_memes_from_each_source(
68-
user_id, limit=limit, exclude_meme_ids=meme_ids_in_queue
69-
)
80+
# AB test
81+
if user_id % 100 < 50:
82+
candidates = await get_random_best(
83+
user_id, limit=limit, exclude_meme_ids=meme_ids_in_queue
84+
)
85+
if len(candidates) == 0:
86+
candidates = await get_best_memes_from_each_source(
87+
user_id, limit=limit, exclude_meme_ids=meme_ids_in_queue
88+
)
7089

7190
elif user_info["nmemes_sent"] < 100:
7291
if r < 0.5:
+100
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
from datetime import datetime
2+
3+
import pytest
4+
import pytest_asyncio
5+
from sqlalchemy import delete, insert, select
6+
from sqlalchemy.ext.asyncio import AsyncConnection
7+
8+
from src import redis
9+
from src.database import (engine, meme, meme_source, meme_stats, user,
10+
user_language, user_meme_reaction)
11+
from src.recommendations.candidates import get_random_best
12+
from src.recommendations.meme_queue import generate_cold_start_recommendations
13+
14+
15+
@pytest_asyncio.fixture()
16+
async def conn():
17+
async with engine.connect() as conn:
18+
19+
await conn.execute(
20+
insert(user),
21+
[{'id': 1, 'type': "user"}, {'id': 51, 'type': "user"}]
22+
)
23+
await conn.execute(
24+
insert(meme_source),
25+
{'id': 1, 'type': 'telegram', 'url': '111', 'status': 'parsing_enabled', 'created_at': datetime(2024, 1, 1)}
26+
)
27+
meme_basic = {
28+
'raw_meme_id': 1, 'type': 'image', 'telegram_image_id': '111', 'caption': '111', 'meme_source_id': 1,
29+
'published_at': datetime(2024, 1, 1), 'status': 'ok', 'language_code': 'ru',
30+
}
31+
good_meme_1 = 4101086
32+
good_meme_2 = 4442353
33+
seen_meme = 3755262
34+
bad_meme = 1
35+
meme_ids = [good_meme_1, good_meme_2, seen_meme, bad_meme]
36+
37+
await conn.execute(
38+
insert(meme),
39+
[meme_basic.copy() | {'id': meme_id, 'raw_meme_id': meme_id} for meme_id in meme_ids]
40+
)
41+
await conn.execute(
42+
insert(meme_stats),
43+
[{'meme_id': meme_id} for meme_id in meme_ids],
44+
)
45+
await conn.execute(
46+
insert(user_language),
47+
[
48+
{'user_id': 1, 'language_code': 'ru', 'created_at': datetime(2024, 1, 1)},
49+
{'user_id': 51, 'language_code': 'ru', 'created_at': datetime(2024, 1, 1)}
50+
]
51+
)
52+
await conn.execute(
53+
insert(user_meme_reaction),
54+
[
55+
{'user_id': 1, 'meme_id': seen_meme, 'reaction_id': 1, 'recommended_by': '111', 'sent_at': datetime(2024, 1, 1)},
56+
{'user_id': 51, 'meme_id': seen_meme, 'reaction_id': 1, 'recommended_by': '111', 'sent_at': datetime(2024, 1, 1)}
57+
]
58+
)
59+
60+
await conn.commit()
61+
yield conn
62+
63+
await conn.execute(delete(user_meme_reaction))
64+
await conn.execute(delete(user_language))
65+
await conn.execute(delete(meme_stats))
66+
await conn.execute(delete(meme))
67+
await conn.execute(delete(meme_source))
68+
await conn.execute(delete(user))
69+
await conn.commit()
70+
71+
queue_key = redis.get_meme_queue_key(1)
72+
await redis.delete_by_key(queue_key)
73+
queue_key = redis.get_meme_queue_key(51)
74+
await redis.delete_by_key(queue_key)
75+
76+
# TODO: redis sends the runtime error after the test succeeds
77+
# RuntimeError: Event loop is closed
78+
79+
80+
@pytest.mark.asyncio
81+
async def test_random_best(conn: AsyncConnection):
82+
recs = await get_random_best(1, 10)
83+
assert len(recs) == 2
84+
85+
86+
@pytest.mark.asyncio
87+
async def test_random_best_meme_queue(conn: AsyncConnection):
88+
user_id = 1
89+
await generate_cold_start_recommendations(user_id)
90+
queue_key = redis.get_meme_queue_key(user_id)
91+
recs = await redis.get_all_memes_in_queue_by_key(queue_key)
92+
assert len(recs) == 2
93+
assert recs[0]['recommended_by'] == 'random_best_ab_240422'
94+
95+
user_id = 51
96+
await generate_cold_start_recommendations(user_id)
97+
queue_key = redis.get_meme_queue_key(user_id)
98+
recs = await redis.get_all_memes_in_queue_by_key(queue_key)
99+
assert len(recs) == 1
100+
assert recs[0]['recommended_by'] == 'best_meme_from_each_source'

0 commit comments

Comments
 (0)