Send in nonblocking loop (#15)

charles-turner-1 · web-flow · commit c0d31fd9b030 · 2025-01-21T18:53:32.000+11:00
* Replace blocking event loop with multiprocessing to spawn new process
  to run event loop if needed.
* Add a couple of helper functions which handle the event loop sending
  telemetry data in the background.
* Update docstrings
* Add test which times sending a few requests to ensure that they are non blocking.
* Update wheel version (security vulnerability)
* Remove redundant test (didn't actually test anything or improve
coverage)
diff --git a/pyproject.toml b/pyproject.toml
@@ -131,3 +131,6 @@ max-complexity = 18
 
 [tool.ruff.isort]
 known-first-party = ["access_py_telemetry"]
+
+[tool.pytest.ini_options]
+asyncio_default_fixture_loop_scope = "function"
diff --git a/requirements_dev.txt b/requirements_dev.txt
@@ -1,6 +1,6 @@
 pip>=23.3
 bump2version==0.5.11
-wheel==0.33.6
+wheel>=0.38.1
 watchdog==0.9.0
 tox==3.14.0
 coverage==4.5.4
diff --git a/src/access_py_telemetry/api.py b/src/access_py_telemetry/api.py
@@ -12,7 +12,19 @@
 import asyncio
 import pydantic
 import yaml
+import multiprocessing
 from pathlib import Path
+from multiprocessing import Process
+
+try:
+    multiprocessing.set_start_method("spawn")
+except RuntimeError:
+    """
+    Since this is a side effect on module import, we can't guarantee that it will
+    always be called before the first Process is created, or that the start method
+    isn't already set. In this case, we just ignore the error - the processes should
+    still work fine.
+    """
 from .utils import ENDPOINTS, REGISTRIES
 
 S = TypeVar("S", bound="SessionID")
@@ -84,11 +96,14 @@ def pop_fields(self) -> dict[str, list[str]]:
         return self._pop_fields
 
     @pydantic.validate_call
-    def remove_fields(self, service: str, fields: Iterable[str]) -> None:
+    def remove_fields(self, service: str, fields: str | Iterable[str]) -> None:
         """
         Set the fields to remove from the telemetry data for a given service. Useful for excluding default
         fields that are not needed for a particular telemetry call: eg, removing
         Session tracking if a CLI is being used.
+
+        Note: This does not use a set union, so you must specify all fields you want to remove in one call.
+        # TODO: Maybe make this easier to use?
         """
         if isinstance(fields, str):
             fields = [fields]
@@ -244,44 +259,98 @@ async def send_telemetry(endpoint: str, data: dict[str, Any]) -> None:
     return None
 
 
-def send_in_loop(endpoint: str, telemetry_data: dict[str, Any]) -> None:
+def send_in_loop(
+    endpoint: str, telemetry_data: dict[str, Any], timeout: float | None = None
+) -> None:
     """
     Wraps the send_telemetry function in an event loop. This function will:
     - Check if an event loop is already running
-    - Create a new event loop if one is not running
-    - Send the telemetry data
+    - If an event loop is running, send the telemetry data in the background
+    - If an event loop is not running, create a new event loop in a separate process
+        and send the telemetry data in the background using that loop.
 
     Parameters
     ----------
     endpoint : str
         The URL to send the telemetry data to.
     telemetry_data : dict
         The telemetry data to send.
+    timeout : float, optional
+        The maximum time to wait for the coroutine to finish. If the coroutine takes
+        longer than this time, a TimeoutError will be raised. If None, the coroutine
+        will terminate after 60 seconds.
 
     Returns
     -------
     None
 
-    Warnings
-    --------
-    RuntimeWarning
-        If the event loop is not running, telemetry will block execution.
     """
+    timeout = timeout or 60
 
-    # Check if there's an existing event loop, otherwise create a new one
     try:
         loop = asyncio.get_running_loop()
     except RuntimeError:
-        loop = asyncio.new_event_loop()
-        asyncio.set_event_loop(loop)
-
-    if loop.is_running():
-        loop.create_task(send_telemetry(endpoint, telemetry_data))
+        _run_in_proc(endpoint, telemetry_data, timeout)
     else:
-        # breakpoint()
-        # loop.create_task(send_telemetry(telemetry_data))
-        loop.run_until_complete(send_telemetry(endpoint, telemetry_data))
+        loop.create_task(send_telemetry(endpoint, telemetry_data))
+        return None
+
+
+def _run_event_loop(endpoint: str, telemetry_data: dict[str, Any]) -> None:
+    """
+    Handles the creation and running of an event loop for sending telemetry data.
+    This function is intended to be run in a separate process, and will:
+    - Create a new event loop
+    - Send the telemetry data
+    - Run the event loop until the telemetry data is sent
+
+    Parameters
+    ----------
+    endpoint : str
+        The URL to send the telemetry data to.
+    telemetry_data : dict
+        The telemetry data to send.
+
+    Returns
+    -------
+    None
+    """
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    loop.run_until_complete(send_telemetry(endpoint, telemetry_data))
+
+
+def _run_in_proc(endpoint: str, telemetry_data: dict[str, Any], timeout: float) -> None:
+    """
+    Handles the creation and running of a separate process for sending telemetry data.
+    This function will:
+    - Create a new process and run the _run_event_loop function in that process
+    - Wait for the process to finish
+    - If the process takes longer than the specified timeout, terminate the process
+        and raise a warning
+
+    Parameters
+    ----------
+    endpoint : str
+        The URL to send the telemetry data to.
+    telemetry_data : dict
+        The telemetry data to send.
+    timeout : float
+        The maximum time to wait for the process to finish.
+
+    Returns
+    -------
+    None
+
+    """
+    proc = Process(target=_run_event_loop, args=(endpoint, telemetry_data))
+    proc.start()
+    proc.join(timeout)
+    if proc.is_alive():
+        proc.terminate()
         warnings.warn(
-            "Event loop not running, telemetry will block execution",
+            f"Telemetry data not sent within {timeout} seconds",
             category=RuntimeWarning,
+            stacklevel=2,
         )
+    return None
diff --git a/tests/test_api.py b/tests/test_api.py
@@ -4,11 +4,12 @@
 """Tests for `access_py_telemetry` package."""
 
 import access_py_telemetry.api
-from access_py_telemetry.api import SessionID, ApiHandler
-import warnings
+from access_py_telemetry.api import SessionID, ApiHandler, send_in_loop
 from pydantic import ValidationError
 import pytest
 
+import time
+
 
 @pytest.fixture
 def local_host():
@@ -164,13 +165,16 @@ def test_api_handler_remove_fields(api_handler):
 
     assert api_handler._pop_fields == {"payu": ["session_id"]}
 
+    # Now remove the 'model' field from the payu record, as a string.
+    api_handler.remove_fields("payu", "model")
+
 
-def test_api_handler_send_api_request_no_loop(local_host, api_handler):
+def test_api_handler_send_api_request(api_handler, capsys):
     """
-    Create and send an API request with telemetry data.
+    Create and send an API request with telemetry data - just to make sure that
+    the request is being sent correctly.
     """
-
-    api_handler.server_url = local_host
+    api_handler.server_url = "http://dud/host/endpoint"
 
     # Pretend we only have catalog & payu services and then mock the initialisation
     # of the _extra_fields attribute
@@ -189,17 +193,15 @@ def test_api_handler_send_api_request_no_loop(local_host, api_handler):
     # Remove indeterminate fields
     api_handler.remove_fields("payu", ["session_id", "name"])
 
-    with pytest.warns(RuntimeWarning) as warnings_record:
-        api_handler.send_api_request(
-            service_name="payu",
-            function_name="_test",
-            args=[1, 2, 3],
-            kwargs={"name": "test_username"},
-        )
-
-    # This should contain two warnings - one for the failed request and one for the
-    # event loop. Sometimes we get a third, which I need to find.
-    assert len(warnings_record) >= 2
+    # We should get a warning because we've used a dud url, but pytest doesn't
+    # seem to capture subprocess warnings. I'm not sure there is really a good
+    # way test for this.
+    api_handler.send_api_request(
+        service_name="payu",
+        function_name="_test",
+        args=[1, 2, 3],
+        kwargs={"name": "test_username"},
+    )
 
     assert api_handler._last_record == {
         "function": "_test",
@@ -209,12 +211,28 @@ def test_api_handler_send_api_request_no_loop(local_host, api_handler):
         "random_number": 2,
     }
 
-    if len(warnings_record) == 3:
-        # Just reraise all the warnings if we get an unexpected one so we can come
-        # back and track it down
 
-        for warning in warnings_record:
-            warnings.warn(warning.message, warning.category, stacklevel=2)
+def test_send_in_loop_is_bg():
+    """
+    Send a request, but make sure that it runs in the background (ie. is non-blocking).
+
+    There will be some overhead associated with the processes startup and teardown,
+    but we shouldn't be waiting for the requests to finish. Using a long timeout
+    and only sending 3 requests should be enough to ensure that we're not accidentally
+    testing the process startup/teardown time.
+    """
+    start_time = time.time()
+
+    for _ in range(3):
+        send_in_loop(endpoint="https://dud/endpoint", telemetry_data={}, timeout=3)
+
+    print("Requests sent")
+
+    end_time = time.time()
+
+    dt = end_time - start_time
+
+    assert dt < 4
 
 
 def test_api_handler_invalid_endpoint(api_handler):