Skip to content

Commit d730e13

Browse files
authoredMar 17, 2025··
Merge pull request #68 from bellingcat/config-funcs
Move shared util functions
2 parents b50ca91 + ba7ed57 commit d730e13

File tree

9 files changed

+62
-59
lines changed

9 files changed

+62
-59
lines changed
 

‎app/shared/aa_utils.py

-49
This file was deleted.

‎app/tests/worker/test_worker_main.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66

77
from app.shared import schemas
88
from app.shared.db import models
9-
from app.worker.main import create_archive_task, create_sheet_task, get_all_urls
9+
from app.web.utils.misc import get_all_urls
10+
from app.worker.main import create_archive_task, create_sheet_task
1011

1112

1213
class TestCreateArchiveTask:

‎app/web/endpoints/interoperability.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,12 @@
99
from sqlalchemy.orm import Session
1010

1111
from app.shared import business_logic, schemas
12-
from app.shared.aa_utils import get_all_urls
1312
from app.shared.db import models, worker_crud
1413
from app.shared.db.database import get_db_dependency
1514
from app.shared.log import log_error
1615
from app.web.config import ALLOW_ANY_EMAIL
1716
from app.web.security import token_api_key_auth
17+
from app.web.utils.misc import get_all_urls
1818

1919

2020
interoperability_router = APIRouter(

‎app/web/endpoints/task.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ def get_status(
2626
try:
2727
if task.status == "FAILURE":
2828
# *FAILURE* The task raised an exception, or has exceeded the retry limit.
29-
# The :attr:`result` attribute then contains the exception raised by the task.
29+
# The :attr:`result` attribute then contains the exception raised by
30+
# the task.
3031
# https://docs.celeryq.dev/en/stable/_modules/celery/result.html#AsyncResult
3132
raise task.result
3233

‎app/web/middleware.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010
async def logging_middleware(request: Request, call_next):
1111
try:
1212
response = await call_next(request)
13-
# TODO: use Origin to have summary prometheus metrics on where requests come from
13+
# TODO: use Origin to have summary prometheus metrics on where
14+
# requests come from
1415
# origin = request.headers.get("origin")
1516
logger.info(
1617
f"{request.client.host}:{request.client.port} {request.method} {request.url._url} - HTTP {response.status_code}"
@@ -25,7 +26,9 @@ async def logging_middleware(request: Request, call_next):
2526
raise e
2627

2728

28-
async def increase_exceptions_counter(e: Exception, location: str = "cronjob"):
29+
async def increase_exceptions_counter(
30+
e: Exception, location: str = "cronjob"
31+
) -> None:
2932
if location == "cronjob":
3033
try:
3134
last_trace = traceback.extract_tb(e.__traceback__)[-1]

‎app/web/security.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ async def get_token_or_user_auth(
5959
async def get_user_auth(
6060
credentials: HTTPAuthorizationCredentials = Depends(bearer_security),
6161
):
62-
# validates the Bearer token in the case that it requires it
62+
# Validates the Bearer token in the case that it requires it
6363
valid_user, info = authenticate_user(credentials.credentials)
6464
if valid_user:
6565
return info.lower()

‎app/web/utils/metrics.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,12 @@
3737
)
3838

3939

40-
async def redis_subscribe_worker_exceptions(REDIS_EXCEPTIONS_CHANNEL: str):
41-
# Subscribe to Redis channel and increment the counter for each exception with info on the exception and task
40+
async def redis_subscribe_worker_exceptions(redis_exceptions_channel: str):
41+
# Subscribe to Redis channel and increment the counter for each exception
42+
# with info on the exception and task
4243
Redis = get_redis()
4344
PubSubExceptions = Redis.pubsub()
44-
PubSubExceptions.subscribe(REDIS_EXCEPTIONS_CHANNEL)
45+
PubSubExceptions.subscribe(redis_exceptions_channel)
4546
while True:
4647
message = PubSubExceptions.get_message()
4748
if message and message["type"] == "message":

‎app/web/utils/misc.py

+46
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
import base64
2+
from typing import List
23

4+
from auto_archiver.core import Media, Metadata
35
from fastapi.encoders import jsonable_encoder
6+
from loguru import logger
7+
8+
from app.shared.db import models
49

510

611
def custom_jsonable_encoder(obj):
@@ -14,3 +19,44 @@ def convert_priority_to_queue_dict(priority: str) -> dict:
1419
"priority": 0 if priority == "high" else 10,
1520
"queue": f"{priority}_priority",
1621
}
22+
23+
24+
def convert_if_media(media):
25+
if isinstance(media, Media):
26+
return media
27+
elif isinstance(media, dict):
28+
try:
29+
return Media.from_dict(media)
30+
except Exception as e:
31+
logger.debug(f"error parsing {media} : {e}")
32+
return False
33+
34+
35+
def get_all_urls(result: Metadata) -> List[models.ArchiveUrl]:
36+
db_urls = []
37+
for m in result.media:
38+
for i, url in enumerate(m.urls):
39+
db_urls.append(
40+
models.ArchiveUrl(url=url, key=m.get("id", f"media_{i}"))
41+
)
42+
for k, prop in m.properties.items():
43+
if prop_converted := convert_if_media(prop):
44+
for i, url in enumerate(prop_converted.urls):
45+
db_urls.append(
46+
models.ArchiveUrl(
47+
url=url, key=prop_converted.get("id", f"{k}_{i}")
48+
)
49+
)
50+
if isinstance(prop, list):
51+
for i, prop_media in enumerate(prop):
52+
if prop_media := convert_if_media(prop_media):
53+
for j, url in enumerate(prop_media.urls):
54+
db_urls.append(
55+
models.ArchiveUrl(
56+
url=url,
57+
key=prop_media.get(
58+
"id", f"{k}{prop_media.key}_{i}.{j}"
59+
),
60+
)
61+
)
62+
return db_urls

‎app/worker/main.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@
88
from sqlalchemy import exc
99

1010
from app.shared import business_logic, schemas
11-
from app.shared.aa_utils import get_all_urls
1211
from app.shared.db import models, worker_crud
1312
from app.shared.db.database import get_db
1413
from app.shared.log import log_error
1514
from app.shared.settings import get_settings
1615
from app.shared.task_messaging import get_celery, get_redis
16+
from app.web.utils.misc import get_all_urls
1717
from app.worker.worker_log import setup_celery_logger
1818

1919

0 commit comments

Comments
 (0)
Please sign in to comment.