Skip to content

Commit fd092b3

Browse files
feat: add aggregated data collector API (#314)
* feat: add API to collect data * feat: update version and changelog * fix: process results * chore: change IsGitHubAction class name * chore: block non-SELECT queries * feat: add setting AGGREGATED_DATA_COLLECTOR_API_ENABLED to disable data collector endpoint * refactor: execute all predefined queries and remove query selection * chore: rename EOX_CORE_DATA_COLLECTOR_ENABLED setting to AGGREGATED_DATA_COLLECTOR_API_ENABLED * chore: COUNTDOWN and MAX_RETRIES constants * refactor: merge serialize_data and process_query_results into post_process_query_results * refactor: remove validate Only SELECT queries are allowed * chore: rename EOX_CORE_SAVE_DATA_API_CLIENT_ID and EOX_CORE_SAVE_DATA_API_CLIENT_SECRET * chore: Updated DataCollectorView to retrieve destination_url and token_generation_url from settings. * refactor: move DatacollectorPermission to a dedicated permissions.py file * feat: add aggregated collector test --------- Co-authored-by: Luis Felipe Castano <[email protected]>
1 parent eea72f9 commit fd092b3

File tree

17 files changed

+545
-2
lines changed

17 files changed

+545
-2
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,12 @@ Please do not update the unreleased notes.
1111

1212
<!-- Content should be placed here -->
1313

14+
## [v11.2.0](https://github.com/eduNEXT/eox-core/compare/v11.1.0...v11.2.0) - (2025-01-20)
15+
16+
### Added
17+
18+
- New API endpoint to support data collection and report generation (if the feature is enabled).
19+
1420
## [v11.1.0](https://github.com/eduNEXT/eox-core/compare/v11.0.0...v11.1.0) - (2024-11-21)
1521

1622
### Changed

eox_core/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
"""
22
Init for main eox-core app
33
"""
4-
__version__ = '11.1.0'
4+
__version__ = '11.2.0'

eox_core/api/data/aggregated_collector/__init__.py

Whitespace-only changes.
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
"""
2+
This module contains SQL queries to retrieve platform-related metrics.
3+
4+
These queries are used to extract information about user activity, course
5+
engagement, and certificate issuance. The data retrieval is conditioned on
6+
the feature being enabled and the necessary authentication credentials
7+
being set.
8+
"""
9+
10+
# This query counts the total number of users in the platform.
11+
TOTAL_USERS_QUERY = """
12+
SELECT
13+
COUNT(*)
14+
FROM
15+
auth_user as au;
16+
"""
17+
18+
# This query counts the number of unique active users in the last month.
19+
# A user is considered active if they have modified a student module within the last month.
20+
ACTIVE_USERS_LAST_MONTH_QUERY = """
21+
SELECT
22+
YEAR(cs.modified) AS 'Year',
23+
MONTH(cs.modified) AS 'Month',
24+
COUNT(DISTINCT cs.student_id) AS 'Active Users'
25+
FROM
26+
courseware_studentmodule AS cs
27+
WHERE
28+
cs.modified >= DATE_FORMAT(DATE_SUB(CURDATE(), INTERVAL 1 MONTH), '%Y-%m-01')
29+
AND cs.modified < DATE_FORMAT(CURDATE(), '%Y-%m-01')
30+
GROUP BY
31+
YEAR(cs.modified), MONTH(cs.modified);
32+
"""
33+
34+
# This query counts the number of unique active users in the last 7 days.
35+
# A user is considered active if they have modified a student module within the last 7 days.
36+
ACTIVE_USERS_LAST_7_DAYS_QUERY = """
37+
SELECT
38+
COUNT(DISTINCT cs.student_id)
39+
FROM
40+
courseware_studentmodule AS cs
41+
WHERE
42+
cs.modified >= DATE_SUB(CURDATE(), INTERVAL 7 DAY);
43+
"""
44+
45+
# This query counts the total number of courses created on the platform.
46+
TOTAL_COURSES_CREATED_QUERY = """
47+
SELECT
48+
COUNT(*)
49+
FROM
50+
course_overviews_courseoverview as coc;
51+
"""
52+
53+
# This query counts the number of CourseOverviews objects that started before
54+
# now and have not yet ended
55+
ACTIVE_COURSES_COUNT_QUERY = """
56+
SELECT
57+
COUNT(*)
58+
FROM
59+
course_overviews_courseoverview as coc
60+
WHERE
61+
coc.start <= NOW()
62+
AND (
63+
coc.end >= NOW()
64+
OR coc.end IS NULL
65+
);
66+
"""
67+
68+
# This query counts the number of courses that have at least one issued certificate.
69+
COURSES_WITH_ACTIVE_CERTIFICATES_QUERY = """
70+
SELECT
71+
COUNT(DISTINCT coc.id)
72+
FROM
73+
course_overviews_courseoverview AS coc
74+
JOIN
75+
certificates_generatedcertificate AS cg
76+
ON
77+
coc.id = cg.course_id;
78+
"""
79+
80+
# This query counts the number of new enrollments in the last month.
81+
ENROLLMENTS_LAST_MONTH_QUERY = """
82+
SELECT
83+
YEAR(sc.created) AS 'Year',
84+
MONTH(sc.created) AS 'Month',
85+
COUNT(DISTINCT sc.id) AS 'Enrollments'
86+
FROM
87+
student_courseenrollment AS sc
88+
WHERE
89+
sc.created >= DATE_FORMAT(DATE_SUB(CURDATE(), INTERVAL 1 MONTH), '%Y-%m-01')
90+
AND sc.created < DATE_FORMAT(CURDATE(), '%Y-%m-01')
91+
GROUP BY
92+
YEAR(sc.created), MONTH(sc.created);
93+
"""
94+
95+
# This query counts the number of new enrollments in the last 7 days.
96+
ENROLLMENTS_LAST_7_DAYS_QUERY = """
97+
SELECT
98+
COUNT(DISTINCT sc.id)
99+
FROM
100+
student_courseenrollment AS sc
101+
WHERE
102+
sc.created >= DATE_SUB(CURDATE(), INTERVAL 7 DAY);
103+
"""
104+
105+
# This query counts the total number of certificates issued on the platform.
106+
CERTIFICATES_ISSUED_QUERY = """
107+
SELECT
108+
COUNT(*)
109+
FROM
110+
certificates_generatedcertificate as cg;
111+
"""
112+
113+
PREDEFINED_QUERIES = {
114+
"Total Users": TOTAL_USERS_QUERY,
115+
"Active Users Last Month": ACTIVE_USERS_LAST_MONTH_QUERY,
116+
"Active users in the last 7 days": ACTIVE_USERS_LAST_7_DAYS_QUERY,
117+
"Total Courses Created": TOTAL_COURSES_CREATED_QUERY,
118+
"Active Courses Count": ACTIVE_COURSES_COUNT_QUERY,
119+
"Courses With Active Certificates": COURSES_WITH_ACTIVE_CERTIFICATES_QUERY,
120+
"Enrollments Last Month": ENROLLMENTS_LAST_MONTH_QUERY,
121+
"Enrollments Last 7 Days": ENROLLMENTS_LAST_7_DAYS_QUERY,
122+
"Certificates Issued": CERTIFICATES_ISSUED_QUERY,
123+
}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
"""
2+
Async task for generating reports by executing database queries
3+
and posting the results to the Shipyard API.
4+
"""
5+
6+
import logging
7+
8+
from celery import shared_task
9+
from django.db.utils import DatabaseError, OperationalError
10+
11+
from eox_core.api.data.aggregated_collector.queries import PREDEFINED_QUERIES
12+
from eox_core.api.data.aggregated_collector.utils import execute_query, post_data_to_api, post_process_query_results
13+
14+
logger = logging.getLogger(__name__)
15+
16+
COUNTDOWN = 60
17+
MAX_RETRIES = 3
18+
19+
20+
@shared_task(bind=True)
21+
def generate_report(self, destination_url: str, token_generation_url: str, current_host: str):
22+
"""
23+
Async task to generate a report:
24+
1. Executes all predefined queries.
25+
2. Sends the results to the Shipyard API.
26+
27+
Args:
28+
self (Task): The Celery task instance.
29+
destination_url (str): URL to send the results.
30+
31+
Raises:
32+
Retry: If an error occurs, the task retries up to 3 times with a 60-second delay.
33+
"""
34+
try:
35+
report_data = {}
36+
for query_name, query_sql in PREDEFINED_QUERIES.items():
37+
logger.info("Executing query: %s", query_name)
38+
try:
39+
result = execute_query(query_sql)
40+
41+
processed_result = post_process_query_results(result)
42+
report_data[query_name] = processed_result
43+
except (DatabaseError, OperationalError) as e:
44+
logger.error("Failed to execute query '%s': %s", query_name, e)
45+
continue
46+
47+
post_data_to_api(destination_url, report_data, token_generation_url, current_host)
48+
49+
logger.info("Report generation task completed successfully.")
50+
except Exception as e:
51+
logger.error("An error occurred in the report generation task: '%s'. Retrying", e)
52+
raise self.retry(exc=e, countdown=COUNTDOWN, max_retries=MAX_RETRIES)
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
"""
2+
URL configuration for the Microsite API.
3+
4+
This module defines the URL patterns for the Microsite API,
5+
including versioned endpoints for aggregated_collector.
6+
"""
7+
from django.urls import include, re_path
8+
9+
app_name = 'eox_core' # pylint: disable=invalid-name
10+
11+
12+
urlpatterns = [ # pylint: disable=invalid-name
13+
re_path(r'^v1/', include('eox_core.api.data.aggregated_collector.v1.urls', namespace='eox-data-api-collector-v1')),
14+
]
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
"""
2+
Utility functions for report generation, including query execution and data posting.
3+
"""
4+
5+
import logging
6+
from datetime import datetime
7+
8+
import requests
9+
from django.conf import settings
10+
from django.db import connection
11+
12+
from eox_core.utils import get_access_token
13+
14+
logger = logging.getLogger(__name__)
15+
16+
17+
def execute_query(sql_query: str):
18+
"""
19+
Execute a raw SQL query and return the results in a structured format.
20+
21+
Args:
22+
sql_query (str): The raw SQL query to execute.
23+
24+
Returns:
25+
list or dict: Structured query results.
26+
27+
Example:
28+
>>> execute_query("SELECT id, username FROM auth_user WHERE is_active = 1;")
29+
[
30+
{"id": 1, "username": "john_doe"},
31+
{"id": 2, "username": "jane_doe"}
32+
]
33+
"""
34+
with connection.cursor() as cursor:
35+
cursor.execute(sql_query)
36+
rows = cursor.fetchall()
37+
# If the query returns more than one column, return rows as is.
38+
if cursor.description:
39+
columns = [col[0] for col in cursor.description]
40+
if len(columns) == 1:
41+
return [row[0] for row in rows] # Return single-column results as a list
42+
return [dict(zip(columns, row)) for row in rows] # Multi-column results as a list of dicts
43+
return rows
44+
45+
46+
def post_process_query_results(data: any):
47+
"""
48+
Cleans and processes query results by:
49+
- Serializing datetime objects into strings.
50+
- Extracting scalar values from single-item lists.
51+
- Returning structured data for further use.
52+
53+
Args:
54+
data (dict, list, datetime, or scalar): The query result data.
55+
56+
Returns:
57+
dict, list, or scalar: The processed query result.
58+
"""
59+
if isinstance(data, dict):
60+
return {key: post_process_query_results(value) for key, value in data.items()}
61+
if isinstance(data, list):
62+
# If it's a list with one item, return just the item
63+
if len(data) == 1:
64+
return post_process_query_results(data[0])
65+
return [post_process_query_results(item) for item in data]
66+
if isinstance(data, datetime):
67+
return data.isoformat()
68+
return data
69+
70+
71+
def post_data_to_api(api_url: str, report_data: dict, token_generation_url: str, current_host: str):
72+
"""
73+
Sends the generated report data to the Shipyard API.
74+
75+
Args:
76+
report_data (dict): The data to be sent to the Shipyard API.
77+
78+
Raises:
79+
Exception: If the API request fails.
80+
"""
81+
token = get_access_token(
82+
token_generation_url,
83+
settings.EOX_CORE_AGGREGATED_COLLECTOR_TARGET_CLIENT_ID,
84+
settings.EOX_CORE_AGGREGATED_COLLECTOR_TARGET_CLIENT_SECRET,
85+
)
86+
headers = {
87+
"Authorization": f"Bearer {token}",
88+
"Content-Type": "application/json",
89+
}
90+
payload = {"instance_domain": current_host, "data": report_data}
91+
92+
try:
93+
response = requests.post(api_url, json=payload, headers=headers, timeout=10)
94+
response.raise_for_status()
95+
except requests.Timeout as exc:
96+
raise requests.Timeout("The request to API timed out.") from exc
97+
except requests.RequestException as e:
98+
raise requests.RequestException(f"Failed to post data to API: {e}")

eox_core/api/data/aggregated_collector/v1/__init__.py

Whitespace-only changes.
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
"""
2+
Custom permission classes for the Aggregated Collector API.
3+
"""
4+
5+
import logging
6+
7+
from django.conf import settings
8+
from rest_framework.authentication import get_authorization_header
9+
from rest_framework.permissions import BasePermission
10+
11+
logger = logging.getLogger(__name__)
12+
13+
14+
class AggregatedCollectorPermission(BasePermission):
15+
"""
16+
Permission class to allow access only if:
17+
- The AGGREGATED_DATA_COLLECTOR_API_ENABLED setting is True.
18+
- The request contains a valid GitHub Action token.
19+
"""
20+
21+
def has_permission(self, request, view):
22+
""""
23+
Determines if the request has permission to access the Aggregated Collector API.
24+
"""
25+
# Check if the API is enabled
26+
if not getattr(settings, "AGGREGATED_DATA_COLLECTOR_API_ENABLED", False):
27+
return False
28+
29+
# Check if the request contains a valid token
30+
auth_header = get_authorization_header(request).decode('utf-8')
31+
auth_token = settings.EOX_CORE_AGGREGATED_COLLECTOR_AUTH_TOKEN
32+
if auth_header and auth_header == f"Bearer {auth_token}":
33+
return True
34+
return False

eox_core/api/data/aggregated_collector/v1/tests/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)