Skip to content

Commit

Permalink
added lr_smoothed algorithm
Browse files Browse the repository at this point in the history
  • Loading branch information
ledovsky committed Jul 8, 2024
1 parent 681a0ca commit 395f47e
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 22 deletions.
100 changes: 82 additions & 18 deletions src/stats/meme.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,95 @@
from src.database import execute


async def calculate_meme_reactions_stats() -> None:
insert_query = """
async def calculate_meme_reactions_stats(
min_user_reactions=10,
min_meme_reactions=3) -> None:
"""
lr_smoothed algorithm
Smoothing is needed to handle the cases when users have very diverse like rates
Step 1. Change target to the symmetrical form:
like_symmetrical: (0, 1) -> (-1, 1)
Step 2. Calculate symmetrical like rate for each user
user_like_rate = avg(like_symmetrical)
Step 3. Calculate smoothed likes
like_smoothed = like_symmetrical - user_like_rate
like_smoothed in (-2, 2)
Step 4. Calculate meme smoothed like rate
lr_smoothed = avg(like_smoothed)
lr_smoothed in (-2, 2)
"""

insert_query = f"""
INSERT INTO meme_stats (
meme_id,
nlikes,
ndislikes,
nmemes_sent,
age_days,
sec_to_react,
updated_at
updated_at,
lr_smoothed
)
WITH LR AS (
SELECT *
FROM (
SELECT meme_id, AVG(lr_smoothed) lr_smoothed, COUNT(user_id) n_reactions
FROM (
SELECT *, (like_ - lr_avg) lr_smoothed
FROM (
SELECT
meme_id
, COUNT(*) FILTER (WHERE reaction_id = 1) nlikes
, COUNT(*) FILTER (WHERE reaction_id = 2) ndislikes
, COUNT(*) nmemes_sent
, MAX(EXTRACT('DAYS' FROM NOW() - M.published_at)) age_days
, COALESCE(EXTRACT(
EPOCH FROM
percentile_cont(0.5) WITHIN GROUP (ORDER BY reacted_at - sent_at)
), 99999) AS sec_to_react
, NOW() AS updated_at
FROM user_meme_reaction E
INNER JOIN meme M
ON M.id = E.meme_id
GROUP BY 1
user_id,
meme_id,
like_,
sent_at,
AVG(like_) OVER (PARTITION BY user_id ORDER BY sent_at) lr_avg,
COUNT(like_) over (PARTITION BY user_id ORDER BY sent_at) n_reactions
FROM (
SELECT *, CASE WHEN reaction_id = 1 THEN 1 ELSE -1 END like_
FROM user_meme_reaction r
JOIN meme
ON r.meme_id = meme.id
WHERE reaction_id IS NOT NULL
) t
) t
WHERE n_reactions >= {min_user_reactions}
) t
GROUP BY meme_id
) t
WHERE n_reactions >= {min_meme_reactions}
)
SELECT
MS.*,
COALESCE(LR.lr_smoothed, 0) lr_smoothed
FROM (
SELECT
meme_id
, COUNT(*) FILTER (WHERE reaction_id = 1) nlikes
, COUNT(*) FILTER (WHERE reaction_id = 2) ndislikes
, COUNT(*) nmemes_sent
, MAX(EXTRACT('DAYS' FROM NOW() - M.published_at)) age_days
, COALESCE(EXTRACT(
EPOCH FROM
percentile_cont(0.5) WITHIN GROUP (ORDER BY reacted_at - sent_at)
), 99999) AS sec_to_react
, NOW() AS updated_at
FROM user_meme_reaction E
INNER JOIN meme M
ON M.id = E.meme_id
GROUP BY 1
) MS
LEFT JOIN LR
ON MS.meme_id = LR.meme_id
ON CONFLICT (meme_id) DO
UPDATE SET
Expand All @@ -37,7 +100,8 @@ async def calculate_meme_reactions_stats() -> None:
nmemes_sent = EXCLUDED.nmemes_sent,
age_days = EXCLUDED.age_days,
sec_to_react = EXCLUDED.sec_to_react,
updated_at = EXCLUDED.updated_at
updated_at = EXCLUDED.updated_at,
lr_smoothed = EXCLUDED.lr_smoothed
"""
await execute(text(insert_query))

Expand Down
18 changes: 14 additions & 4 deletions tests/stats/test_meme.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ async def conn():
'type': 'image', 'telegram_image_id': '111', 'caption': '111', 'meme_source_id': 1,
'published_at': datetime(2024, 1, 1), 'status': 'ok', 'language_code': 'ru',
}
meme_ids = [1, 2, 3, 4, 5]
meme_ids = [1, 2, 3, 4, 5, 6]
await conn.execute(
insert(meme),
[{'id': meme_id, 'raw_meme_id': meme_id, **meme_common} for meme_id in meme_ids]
Expand All @@ -49,12 +49,14 @@ async def conn():
{'user_id': 1, 'meme_id': 2, 'reaction_id': 1, **umr_common},
{'user_id': 1, 'meme_id': 3, 'reaction_id': 1, **umr_common},
{'user_id': 1, 'meme_id': 4, 'reaction_id': 1, **umr_common},
{'user_id': 1, 'meme_id': 5, 'reaction_id': 2, **umr_common},
{'user_id': 1, 'meme_id': 5, 'reaction_id': 1, **umr_common},
{'user_id': 1, 'meme_id': 6, 'reaction_id': 2, **umr_common},
{'user_id': 2, 'meme_id': 1, 'reaction_id': 1, **umr_common},
{'user_id': 2, 'meme_id': 2, 'reaction_id': 2, **umr_common},
{'user_id': 2, 'meme_id': 3, 'reaction_id': 2, **umr_common},
{'user_id': 2, 'meme_id': 4, 'reaction_id': 2, **umr_common},
{'user_id': 2, 'meme_id': 5, 'reaction_id': 2, **umr_common},
{'user_id': 2, 'meme_id': 6, 'reaction_id': 2, **umr_common},
]
)

Expand All @@ -73,8 +75,16 @@ async def conn():

@pytest.mark.asyncio
async def test_calculate_meme_reactions_stats(conn: AsyncConnection):
await calculate_meme_reactions_stats()
await calculate_meme_reactions_stats(min_meme_reactions=0, min_user_reactions=0)

res = await fetch_all(select(meme_stats))
assert len(res) == 6

print(res)
assert len(res) == 5

eps = 1e-3
for row in res:
if row['meme_id'] == 1:
assert abs(row['lr_smoothed'] - 1) < eps
if row['meme_id'] == 2:
assert abs(row['lr_smoothed']) < eps

0 comments on commit 395f47e

Please sign in to comment.