Skip to content

Commit

Permalink
Added get_random_best code. Added the AB test (#66)
Browse files Browse the repository at this point in the history
* Added get_random_best code. Added the AB test

* fixed formatting

* fixed formatting
  • Loading branch information
ledovsky authored Apr 25, 2024
1 parent 4ed59e7 commit a2d0558
Show file tree
Hide file tree
Showing 5 changed files with 210 additions and 8 deletions.
5 changes: 4 additions & 1 deletion pytest.ini
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
[pytest]
env =
DATABASE_URL=postgresql+asyncpg://app:app@app_db/postgres
SITE_URL=
DATABASE_URL=postgresql+asyncpg://app:app@app_db:5432/app
REDIS_URL=redis://:myStrongPassword@redis:6379
# no docker version, useful for debugging
# DATABASE_URL=postgresql+asyncpg://app:[email protected]:65432/app
# REDIS_URL=redis://:[email protected]:36379

SITE_DOMAIN=127.0.0.1
SECURE_COOKIES=false
Expand Down
81 changes: 81 additions & 0 deletions src/recommendations/candidates.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,3 +399,84 @@ async def get_best_memes_from_each_source(
"""
res = await fetch_all(text(query))
return res


async def get_random_best(
user_id: int,
limit: int = 10,
exclude_meme_ids: list[int] = [],
):
"""Selects 'limit' best memes from the top 100 memes obtained using the cleared
meme statistics. The cleared statistics aggregates reactions from users with
less than 200 previous reactions. It's aim is to remove the bias from old users
"""

query = f"""
SELECT
M.id
, M.type, M.telegram_file_id, M.caption, M.recommended_by
FROM (
SELECT
M.id
, M.type, M.telegram_file_id, M.caption
, 'random_best_ab_240422' AS recommended_by
, random() rand
FROM meme M
INNER JOIN meme_stats MS
ON MS.meme_id = M.id
INNER JOIN user_language L
ON L.user_id = {user_id}
AND L.language_code = M.language_code
LEFT JOIN user_meme_reaction R
ON R.meme_id = M.id
AND R.user_id = {user_id}
WHERE 1=1
AND M.status = 'ok'
AND R.meme_id IS NULL
-- 100 ru, 50 en, 50 all lang
AND M.id IN (
4101086, 4442353, 3755262, 4524041, 914304, 1213657,
3477742, 3850309, 4106545, 3918656, 1976055, 3729527,
4370768, 4031941, 3902467, 3940729, 3966109, 4144377,
4131644, 4720051, 4438220, 943398, 3486879, 3958437,
3193252, 4011185, 3855063, 4261258, 4368086, 4255270,
1194244, 10222, 4818828, 3820043, 758408, 3188657,
4451345, 2050874, 4665040, 4106819, 3798967, 1825631,
3140601, 4840661, 4250457, 10202, 4363045, 3823857,
3755199, 4214428, 3604880, 3759401, 3928967, 3859587,
1240438, 4634391, 4002944, 2914449, 1955395, 1902244,
4256739, 1721327, 1285555, 1901653, 1584871, 3517077,
4493086, 4128512, 3570595, 3975285, 1484762, 1811655,
1071204, 4033401, 2294710, 4236782, 881987, 4180263,
1100991, 3867070, 1859048, 4285721, 1466518, 2262302,
4478289, 1859157, 4232654, 1202886, 978202, 2279188,
1892350, 961273, 4033397, 3513207, 3635346, 4320621,
4558947, 4252321, 1084225, 2350587, 4339982, 3724969,
3613758, 1768655, 4148626, 1285566, 2181541, 1103300,
3516406, 1197518, 4036174, 3537906, 2953444, 13636,
3724910, 3911502, 1988648, 3587199, 1398183, 4166913,
3911320, 1311422, 2153377, 3604881, 3596142, 1006843,
4473556, 4231678, 4856209, 10114, 3520485, 4232460,
1721545, 3747694, 3914292, 4119263, 4033399, 1482707,
4243473, 4336344, 1678337, 3516170, 2279191, 3724979,
3772372, 4763033, 4128276, 463991, 1006837, 1202853,
4101086, 1103300, 4119263, 4357615, 1194244, 3859587,
3630862, 4478289, 4665040, 3798967, 3940785, 10222,
4255187, 1304918, 3823857, 1398183, 16818, 881987,
2005796, 3639651, 4231648, 3902342, 4031503, 4231678,
4166913, 4720051, 3855063, 4370768, 2350587, 758408,
4818828, 4261258, 3587199, 648225, 4716664, 3918656,
4183519, 3600534, 4473556, 3772372, 4243473, 4524041,
943398, 4840661, 4250457, 1825631, 4363045, 4232460,
4148761, 3513207
)
{exclude_meme_ids_sql_filter(exclude_meme_ids)}
ORDER BY rand
LIMIT {limit}
) M
"""
res = await fetch_all(text(query))
return res
31 changes: 25 additions & 6 deletions src/recommendations/meme_queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from src.recommendations.candidates import (
classic,
get_best_memes_from_each_source,
get_random_best,
less_seen_meme_and_source,
like_spread_and_recent_memes,
)
Expand Down Expand Up @@ -45,9 +46,19 @@ async def generate_cold_start_recommendations(user_id, limit=10):
memes_in_queue = await redis.get_all_memes_in_queue_by_key(queue_key)
meme_ids_in_queue = [meme["id"] for meme in memes_in_queue]

candidates = await get_best_memes_from_each_source(
user_id, limit=limit, exclude_meme_ids=meme_ids_in_queue
)
candidates = []

# AB test
if user_id % 100 < 50:
candidates = await get_random_best(
user_id, limit=limit, exclude_meme_ids=meme_ids_in_queue
)

if len(candidates) == 0:
candidates = await get_best_memes_from_each_source(
user_id, limit=limit, exclude_meme_ids=meme_ids_in_queue
)

if len(candidates) == 0:
return

Expand All @@ -63,10 +74,18 @@ async def generate_recommendations(user_id, limit):

r = random.random()

candidates = []

if user_info["nmemes_sent"] < 30:
candidates = await get_best_memes_from_each_source(
user_id, limit=limit, exclude_meme_ids=meme_ids_in_queue
)
# AB test
if user_id % 100 < 50:
candidates = await get_random_best(
user_id, limit=limit, exclude_meme_ids=meme_ids_in_queue
)
if len(candidates) == 0:
candidates = await get_best_memes_from_each_source(
user_id, limit=limit, exclude_meme_ids=meme_ids_in_queue
)

elif user_info["nmemes_sent"] < 100:
if r < 0.5:
Expand Down
1 change: 0 additions & 1 deletion src/tgbot/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@
from src.tgbot.handlers.moderator import get_meme, meme_source
from src.tgbot.handlers.stats.stats import handle_stats
from src.tgbot.handlers.stats.wrapped import handle_wrapped, handle_wrapped_button

from src.tgbot.handlers.upload import upload_meme

application: Application = None # type: ignore
Expand Down
100 changes: 100 additions & 0 deletions tests/recommendations/test_random_best.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
from datetime import datetime

import pytest
import pytest_asyncio
from sqlalchemy import delete, insert, select
from sqlalchemy.ext.asyncio import AsyncConnection

from src import redis
from src.database import (engine, meme, meme_source, meme_stats, user,
user_language, user_meme_reaction)
from src.recommendations.candidates import get_random_best
from src.recommendations.meme_queue import generate_cold_start_recommendations


@pytest_asyncio.fixture()
async def conn():
async with engine.connect() as conn:

await conn.execute(
insert(user),
[{'id': 1, 'type': "user"}, {'id': 51, 'type': "user"}]
)
await conn.execute(
insert(meme_source),
{'id': 1, 'type': 'telegram', 'url': '111', 'status': 'parsing_enabled', 'created_at': datetime(2024, 1, 1)}
)
meme_basic = {
'raw_meme_id': 1, 'type': 'image', 'telegram_image_id': '111', 'caption': '111', 'meme_source_id': 1,
'published_at': datetime(2024, 1, 1), 'status': 'ok', 'language_code': 'ru',
}
good_meme_1 = 4101086
good_meme_2 = 4442353
seen_meme = 3755262
bad_meme = 1
meme_ids = [good_meme_1, good_meme_2, seen_meme, bad_meme]

await conn.execute(
insert(meme),
[meme_basic.copy() | {'id': meme_id, 'raw_meme_id': meme_id} for meme_id in meme_ids]
)
await conn.execute(
insert(meme_stats),
[{'meme_id': meme_id} for meme_id in meme_ids],
)
await conn.execute(
insert(user_language),
[
{'user_id': 1, 'language_code': 'ru', 'created_at': datetime(2024, 1, 1)},
{'user_id': 51, 'language_code': 'ru', 'created_at': datetime(2024, 1, 1)}
]
)
await conn.execute(
insert(user_meme_reaction),
[
{'user_id': 1, 'meme_id': seen_meme, 'reaction_id': 1, 'recommended_by': '111', 'sent_at': datetime(2024, 1, 1)},
{'user_id': 51, 'meme_id': seen_meme, 'reaction_id': 1, 'recommended_by': '111', 'sent_at': datetime(2024, 1, 1)}
]
)

await conn.commit()
yield conn

await conn.execute(delete(user_meme_reaction))
await conn.execute(delete(user_language))
await conn.execute(delete(meme_stats))
await conn.execute(delete(meme))
await conn.execute(delete(meme_source))
await conn.execute(delete(user))
await conn.commit()

queue_key = redis.get_meme_queue_key(1)
await redis.delete_by_key(queue_key)
queue_key = redis.get_meme_queue_key(51)
await redis.delete_by_key(queue_key)

# TODO: redis sends the runtime error after the test succeeds
# RuntimeError: Event loop is closed


@pytest.mark.asyncio
async def test_random_best(conn: AsyncConnection):
recs = await get_random_best(1, 10)
assert len(recs) == 2


@pytest.mark.asyncio
async def test_random_best_meme_queue(conn: AsyncConnection):
user_id = 1
await generate_cold_start_recommendations(user_id)
queue_key = redis.get_meme_queue_key(user_id)
recs = await redis.get_all_memes_in_queue_by_key(queue_key)
assert len(recs) == 2
assert recs[0]['recommended_by'] == 'random_best_ab_240422'

user_id = 51
await generate_cold_start_recommendations(user_id)
queue_key = redis.get_meme_queue_key(user_id)
recs = await redis.get_all_memes_in_queue_by_key(queue_key)
assert len(recs) == 1
assert recs[0]['recommended_by'] == 'best_meme_from_each_source'

0 comments on commit a2d0558

Please sign in to comment.