Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added get_random_best code. Added the AB test #66

Merged
merged 3 commits into from
Apr 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion pytest.ini
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
[pytest]
env =
DATABASE_URL=postgresql+asyncpg://app:app@app_db/postgres
SITE_URL=
DATABASE_URL=postgresql+asyncpg://app:app@app_db:5432/app
REDIS_URL=redis://:myStrongPassword@redis:6379
# no docker version, useful for debugging
# DATABASE_URL=postgresql+asyncpg://app:[email protected]:65432/app
# REDIS_URL=redis://:[email protected]:36379

SITE_DOMAIN=127.0.0.1
SECURE_COOKIES=false
Expand Down
81 changes: 81 additions & 0 deletions src/recommendations/candidates.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,3 +399,84 @@ async def get_best_memes_from_each_source(
"""
res = await fetch_all(text(query))
return res


async def get_random_best(
user_id: int,
limit: int = 10,
exclude_meme_ids: list[int] = [],
):
"""Selects 'limit' best memes from the top 100 memes obtained using the cleared
meme statistics. The cleared statistics aggregates reactions from users with
less than 200 previous reactions. It's aim is to remove the bias from old users
"""

query = f"""
SELECT
M.id
, M.type, M.telegram_file_id, M.caption, M.recommended_by
FROM (
SELECT
M.id
, M.type, M.telegram_file_id, M.caption
, 'random_best_ab_240422' AS recommended_by
, random() rand
FROM meme M
INNER JOIN meme_stats MS
ON MS.meme_id = M.id

INNER JOIN user_language L
ON L.user_id = {user_id}
AND L.language_code = M.language_code

LEFT JOIN user_meme_reaction R
ON R.meme_id = M.id
AND R.user_id = {user_id}

WHERE 1=1
AND M.status = 'ok'
AND R.meme_id IS NULL
-- 100 ru, 50 en, 50 all lang
AND M.id IN (
4101086, 4442353, 3755262, 4524041, 914304, 1213657,
3477742, 3850309, 4106545, 3918656, 1976055, 3729527,
4370768, 4031941, 3902467, 3940729, 3966109, 4144377,
4131644, 4720051, 4438220, 943398, 3486879, 3958437,
3193252, 4011185, 3855063, 4261258, 4368086, 4255270,
1194244, 10222, 4818828, 3820043, 758408, 3188657,
4451345, 2050874, 4665040, 4106819, 3798967, 1825631,
3140601, 4840661, 4250457, 10202, 4363045, 3823857,
3755199, 4214428, 3604880, 3759401, 3928967, 3859587,
1240438, 4634391, 4002944, 2914449, 1955395, 1902244,
4256739, 1721327, 1285555, 1901653, 1584871, 3517077,
4493086, 4128512, 3570595, 3975285, 1484762, 1811655,
1071204, 4033401, 2294710, 4236782, 881987, 4180263,
1100991, 3867070, 1859048, 4285721, 1466518, 2262302,
4478289, 1859157, 4232654, 1202886, 978202, 2279188,
1892350, 961273, 4033397, 3513207, 3635346, 4320621,
4558947, 4252321, 1084225, 2350587, 4339982, 3724969,
3613758, 1768655, 4148626, 1285566, 2181541, 1103300,
3516406, 1197518, 4036174, 3537906, 2953444, 13636,
3724910, 3911502, 1988648, 3587199, 1398183, 4166913,
3911320, 1311422, 2153377, 3604881, 3596142, 1006843,
4473556, 4231678, 4856209, 10114, 3520485, 4232460,
1721545, 3747694, 3914292, 4119263, 4033399, 1482707,
4243473, 4336344, 1678337, 3516170, 2279191, 3724979,
3772372, 4763033, 4128276, 463991, 1006837, 1202853,
4101086, 1103300, 4119263, 4357615, 1194244, 3859587,
3630862, 4478289, 4665040, 3798967, 3940785, 10222,
4255187, 1304918, 3823857, 1398183, 16818, 881987,
2005796, 3639651, 4231648, 3902342, 4031503, 4231678,
4166913, 4720051, 3855063, 4370768, 2350587, 758408,
4818828, 4261258, 3587199, 648225, 4716664, 3918656,
4183519, 3600534, 4473556, 3772372, 4243473, 4524041,
943398, 4840661, 4250457, 1825631, 4363045, 4232460,
4148761, 3513207
)
{exclude_meme_ids_sql_filter(exclude_meme_ids)}
ORDER BY rand
LIMIT {limit}
) M
"""
res = await fetch_all(text(query))
return res
31 changes: 25 additions & 6 deletions src/recommendations/meme_queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from src.recommendations.candidates import (
classic,
get_best_memes_from_each_source,
get_random_best,
less_seen_meme_and_source,
like_spread_and_recent_memes,
)
Expand Down Expand Up @@ -45,9 +46,19 @@ async def generate_cold_start_recommendations(user_id, limit=10):
memes_in_queue = await redis.get_all_memes_in_queue_by_key(queue_key)
meme_ids_in_queue = [meme["id"] for meme in memes_in_queue]

candidates = await get_best_memes_from_each_source(
user_id, limit=limit, exclude_meme_ids=meme_ids_in_queue
)
candidates = []

# AB test
if user_id % 100 < 50:
candidates = await get_random_best(
user_id, limit=limit, exclude_meme_ids=meme_ids_in_queue
)

if len(candidates) == 0:
candidates = await get_best_memes_from_each_source(
user_id, limit=limit, exclude_meme_ids=meme_ids_in_queue
)

if len(candidates) == 0:
return

Expand All @@ -63,10 +74,18 @@ async def generate_recommendations(user_id, limit):

r = random.random()

candidates = []

if user_info["nmemes_sent"] < 30:
candidates = await get_best_memes_from_each_source(
user_id, limit=limit, exclude_meme_ids=meme_ids_in_queue
)
# AB test
if user_id % 100 < 50:
candidates = await get_random_best(
user_id, limit=limit, exclude_meme_ids=meme_ids_in_queue
)
if len(candidates) == 0:
candidates = await get_best_memes_from_each_source(
user_id, limit=limit, exclude_meme_ids=meme_ids_in_queue
)

elif user_info["nmemes_sent"] < 100:
if r < 0.5:
Expand Down
1 change: 0 additions & 1 deletion src/tgbot/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@
from src.tgbot.handlers.moderator import get_meme, meme_source
from src.tgbot.handlers.stats.stats import handle_stats
from src.tgbot.handlers.stats.wrapped import handle_wrapped, handle_wrapped_button

from src.tgbot.handlers.upload import upload_meme

application: Application = None # type: ignore
Expand Down
100 changes: 100 additions & 0 deletions tests/recommendations/test_random_best.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
from datetime import datetime

import pytest
import pytest_asyncio
from sqlalchemy import delete, insert, select
from sqlalchemy.ext.asyncio import AsyncConnection

from src import redis
from src.database import (engine, meme, meme_source, meme_stats, user,
user_language, user_meme_reaction)
from src.recommendations.candidates import get_random_best
from src.recommendations.meme_queue import generate_cold_start_recommendations


@pytest_asyncio.fixture()
async def conn():
async with engine.connect() as conn:

await conn.execute(
insert(user),
[{'id': 1, 'type': "user"}, {'id': 51, 'type': "user"}]
)
await conn.execute(
insert(meme_source),
{'id': 1, 'type': 'telegram', 'url': '111', 'status': 'parsing_enabled', 'created_at': datetime(2024, 1, 1)}
)
meme_basic = {
'raw_meme_id': 1, 'type': 'image', 'telegram_image_id': '111', 'caption': '111', 'meme_source_id': 1,
'published_at': datetime(2024, 1, 1), 'status': 'ok', 'language_code': 'ru',
}
good_meme_1 = 4101086
good_meme_2 = 4442353
seen_meme = 3755262
bad_meme = 1
meme_ids = [good_meme_1, good_meme_2, seen_meme, bad_meme]

await conn.execute(
insert(meme),
[meme_basic.copy() | {'id': meme_id, 'raw_meme_id': meme_id} for meme_id in meme_ids]
)
await conn.execute(
insert(meme_stats),
[{'meme_id': meme_id} for meme_id in meme_ids],
)
await conn.execute(
insert(user_language),
[
{'user_id': 1, 'language_code': 'ru', 'created_at': datetime(2024, 1, 1)},
{'user_id': 51, 'language_code': 'ru', 'created_at': datetime(2024, 1, 1)}
]
)
await conn.execute(
insert(user_meme_reaction),
[
{'user_id': 1, 'meme_id': seen_meme, 'reaction_id': 1, 'recommended_by': '111', 'sent_at': datetime(2024, 1, 1)},
{'user_id': 51, 'meme_id': seen_meme, 'reaction_id': 1, 'recommended_by': '111', 'sent_at': datetime(2024, 1, 1)}
]
)

await conn.commit()
yield conn

await conn.execute(delete(user_meme_reaction))
await conn.execute(delete(user_language))
await conn.execute(delete(meme_stats))
await conn.execute(delete(meme))
await conn.execute(delete(meme_source))
await conn.execute(delete(user))
await conn.commit()

queue_key = redis.get_meme_queue_key(1)
await redis.delete_by_key(queue_key)
queue_key = redis.get_meme_queue_key(51)
await redis.delete_by_key(queue_key)

# TODO: redis sends the runtime error after the test succeeds
# RuntimeError: Event loop is closed


@pytest.mark.asyncio
async def test_random_best(conn: AsyncConnection):
recs = await get_random_best(1, 10)
assert len(recs) == 2


@pytest.mark.asyncio
async def test_random_best_meme_queue(conn: AsyncConnection):
user_id = 1
await generate_cold_start_recommendations(user_id)
queue_key = redis.get_meme_queue_key(user_id)
recs = await redis.get_all_memes_in_queue_by_key(queue_key)
assert len(recs) == 2
assert recs[0]['recommended_by'] == 'random_best_ab_240422'

user_id = 51
await generate_cold_start_recommendations(user_id)
queue_key = redis.get_meme_queue_key(user_id)
recs = await redis.get_all_memes_in_queue_by_key(queue_key)
assert len(recs) == 1
assert recs[0]['recommended_by'] == 'best_meme_from_each_source'
Loading