Merge pull request #1012 from UCL/wgraham/pyinstrument-profiling-ci

Profiling workflow on `workflow_dispatch`
UCL · Sep 7, 2023 · 51f61a7 · 51f61a7
2 parents aace4d1 + c9f92a3
commit 51f61a7
Show file tree

Hide file tree

Showing 11 changed files with 487 additions and 223 deletions.
diff --git a/.github/workflows/run-profiling.yaml b/.github/workflows/run-profiling.yaml
@@ -0,0 +1,48 @@
+# Runs a pyinstrument profiling session on the scale_run profiling model,
+# capturing the profiling result and sending it to the TLOmodel-profiling
+# repository for processing and display.
+#
+# Profiling script executed is src/scripts/profiling/run-profiling.py.
+# Output is the .pyisession file from the profiler, placed into results/
+# results/ folder is then pushed to the TLOmodel-profiling repo, results branch.
+name: Run profiling
+
+on:
+  workflow_dispatch:
+
+jobs:
+  run-profiling:
+    name: Setup developer environment
+    runs-on: [self-hosted]
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+
+      ## The profile environment produces outputs in the /results directory
+      - name: Run profiling in dev environment
+        run: |
+          UNIQUE_ID=${GITHUB_EVENT_NAME}_${GITHUB_RUN_NUMBER}_${GITHUB_SHA} >> $GITHUB_ENV
+          tox -vv -e profile -- --output_name ${UNIQUE_ID}
+
+      ## The token provided needs contents and pages access to the target repo
+      ## Token can be (re)generated by a member of the UCL organisation, 
+      ## the current member is the rc-softdev-admin.
+      ## [10-07-2023] The current token will expire 10-07-2024
+      - name: Push results to profiling repository
+        uses: dmnemec/[email protected]
+        env:
+          API_TOKEN_GITHUB: ${{ secrets.PROFILING_ACCESS_TOKEN }}
+        with:
+          source_file: results
+          destination_repo: UCL/TLOmodel-profiling
+          destination_folder: .
+          destination_branch: results
+          user_email: [email protected]
+          user_name:  rc-softdev-admin
+
+      - name: Trigger website rebuild
+        uses: peter-evans/repository-dispatch@v2
+        with:
+          token: ${{ secrets.PROFILING_ACCESS_TOKEN }}
+          repository: UCL/TLOmodel-profiling
+          event-type: new-profiling-results
diff --git a/.gitignore b/.gitignore
@@ -47,6 +47,10 @@ coverage.xml
 .hypothesis/
 .pytest_cache/
 
+# Profiling results files
+src/scripts/profiling/results/
+results/
+
 # Translations
 *.mo
 *.pot

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -25,3 +25,6 @@ exclude docs/_*.rst
 exclude docs/hsi_events.csv
 
 global-exclude *.py[cod] __pycache__ *.so *.dylib .ipynb_checkpoints/** *~
+
+# exclude profiling scripts from minimal build
+recursive-exclude profiling *
diff --git a/requirements/dev.in b/requirements/dev.in
@@ -5,5 +5,8 @@ pytest
 virtualenv
 tox
 
+# Profiling
+pyinstrument
+
 # Building requirements files
 pip-tools
diff --git a/requirements/dev.txt b/requirements/dev.txt
@@ -1,6 +1,6 @@
 #
-# This file is autogenerated by pip-compile
-# To update, run:
+# This file is autogenerated by pip-compile with Python 3.8
+# by the following command:
 #
 #    pip-compile --output-file=requirements/dev.txt requirements/dev.in
 #
@@ -27,14 +27,14 @@ azure-core==1.11.0
     #   azure-storage-file-share
 azure-identity==1.5.0
     # via -r requirements/base.in
+azure-keyvault==4.1.0
+    # via -r requirements/base.in
 azure-keyvault-certificates==4.2.1
     # via azure-keyvault
 azure-keyvault-keys==4.3.1
     # via azure-keyvault
 azure-keyvault-secrets==4.2.0
     # via azure-keyvault
-azure-keyvault==4.1.0
-    # via -r requirements/base.in
 azure-storage-file-share==12.4.1
     # via -r requirements/base.in
 certifi==2020.12.5
@@ -83,12 +83,12 @@ kiwisolver==1.3.1
     # via matplotlib
 matplotlib==3.3.4
     # via -r requirements/base.in
-msal-extensions==0.3.0
-    # via azure-identity
 msal==1.9.0
     # via
     #   azure-identity
     #   msal-extensions
+msal-extensions==0.3.0
+    # via azure-identity
 msrest==0.6.21
     # via
     #   azure-batch
@@ -131,6 +131,8 @@ py==1.10.0
     #   tox
 pycparser==2.20
     # via cffi
+pyinstrument==4.5.0
+    # via -r requirements/dev.in
 pyjwt[crypto]==2.0.1
     # via
     #   adal
@@ -150,15 +152,15 @@ python-dateutil==2.8.1
     #   pandas
 pytz==2021.1
     # via pandas
-requests-oauthlib==1.3.0
-    # via msrest
 requests==2.25.1
     # via
     #   adal
     #   azure-core
     #   msal
     #   msrest
     #   requests-oauthlib
+requests-oauthlib==1.3.0
+    # via msrest
 scipy==1.6.1
     # via -r requirements/base.in
 six==1.15.0

diff --git a/src/scripts/profiling/_parameters.py b/src/scripts/profiling/_parameters.py
@@ -0,0 +1,20 @@
+from _paths import TLO_OUTPUT_DIR, TLO_ROOT
+
+scale_run_parameters = {
+    "years": 0,
+    "months": 1,
+    "initial_population": 50000,
+    "tlo_dir": TLO_ROOT,
+    "output_dir": TLO_OUTPUT_DIR,
+    "log_filename": "scale_run_profiling",
+    "log_level": "DEBUG",
+    "parse_log_file": False,
+    "show_progress_bar": True,
+    "seed": 0,
+    "disable_health_system": False,
+    "disable_spurious_symptoms": False,
+    "capabilities_coefficient": None,
+    "mode_appt_constraints": 0,  # 2,
+    "save_final_population": False,
+    "record_hsi_event_details": False,
+}
diff --git a/src/scripts/profiling/_paths.py b/src/scripts/profiling/_paths.py
@@ -0,0 +1,8 @@
+import os
+from pathlib import Path
+
+PROFILING_DIR = Path(os.path.abspath(os.path.dirname(__file__)))
+PROFILING_RESULTS = (PROFILING_DIR / "results").resolve()
+
+TLO_ROOT = (PROFILING_DIR / ".." / ".." / "..").resolve()
+TLO_OUTPUT_DIR = (TLO_ROOT / "outputs").resolve()
diff --git a/src/scripts/profiling/run_profiling.py b/src/scripts/profiling/run_profiling.py
@@ -0,0 +1,130 @@
+import argparse
+import os
+import warnings
+from datetime import datetime
+from pathlib import Path
+
+from _parameters import scale_run_parameters
+from _paths import PROFILING_RESULTS
+from pyinstrument import Profiler
+from pyinstrument.renderers import HTMLRenderer, JSONRenderer
+from scale_run import scale_run
+
+HELP_STR = (
+    "Produces profiling runs for a selection of models and parameters,\n"
+    "writing the results in HTML and/or JSON format.\n"
+    "Output names will default to the profiling timestamp if not provided."
+)
+
+
+def current_time(formatstr: str = "%Y-%m-%d_%H%M") -> str:
+    """Produces a string of the current time in the specified format."""
+    return datetime.utcnow().strftime(formatstr)
+
+
+def run_profiling(
+    output_dir: Path = PROFILING_RESULTS,
+    output_name: Path = None,
+    write_pyis: bool = True,
+    write_html: bool = False,
+    write_json: bool = False,
+    interval: float = 1e-1,
+) -> None:
+    # Suppress "ignore" warnings
+    warnings.filterwarnings("ignore")
+
+    # Create the directory that this profiling run will live in
+    output_dir = output_dir / current_time("%Y/%m/%d/%H%M")
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+
+    # Assign output filenames
+    if output_name is None:
+        output_pyis_file = output_dir / "output.pyisession"
+        output_html_file = output_dir / "output.html"
+        output_json_file = output_dir / "output.json"
+    else:
+        output_pyis_file = output_dir / f"{output_name.stem}.pyisession"
+        output_html_file = output_dir / f"{output_name.stem}.html"
+        output_json_file = output_dir / f"{output_name.stem}.json"
+
+    # Create the profiler to record the stack
+    # An instance of a Profiler can be start()-ed and stop()-ped multiple times,
+    # combining the recorded sessions into one at the end.
+    # As such, the same profiler can be used to record the profile of multiple scripts,
+    # however this may create large datafiles so using separate profilers is preferable
+    p = Profiler(interval=interval)
+
+    print(f"[{current_time('%H:%M:%S')}:INFO] Starting profiling runs")
+
+    # Profile scale_run
+    scale_run(**scale_run_parameters, profiler=p)
+
+    print(f"[{current_time('%H:%M:%S')}:INFO] Profiling runs complete")
+
+    # Fetch the recorded session: if multiple scripts are to be profiled,
+    # this needs to be done after each model "run",
+    # and p needs to be re-initialised before starting the next model run.
+    scale_run_session = p.last_session
+
+    # Write outputs to files
+    # Renderer initialisation options:
+    # show_all: removes library calls where identifiable
+    # timeline: if true, samples are left in chronological order rather than total time
+    if write_pyis:
+        print(f"Writing {output_html_file}", end="...", flush=True)
+        scale_run_session.save(output_pyis_file)
+        print("done")
+    if write_html:
+        html_renderer = HTMLRenderer(show_all=False, timeline=False)
+        print(f"Writing {output_html_file}", end="...", flush=True)
+        with open(output_html_file, "w") as f:
+            f.write(html_renderer.render(scale_run_session))
+        print("done")
+    if write_json:
+        json_renderer = JSONRenderer(show_all=False, timeline=False)
+        print(f"Writing {output_json_file}", end="...", flush=True)
+        with open(output_json_file, "w") as f:
+            f.write(json_renderer.render(scale_run_session))
+        print("done")
+
+    return
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description=HELP_STR)
+    parser.add_argument(
+        "--pyis",
+        action="store_true",
+        help="Write .ipysession output.",
+        dest="write_pyis",
+    )
+    parser.add_argument(
+        "--html", action="store_true", help="Write HTML output.", dest="write_html"
+    )
+    parser.add_argument(
+        "--json", action="store_true", help="Write JSON output.", dest="write_json"
+    )
+    parser.add_argument(
+        "--output_dir",
+        type=Path,
+        help="Redirect the output(s) to this directory.",
+        default=PROFILING_RESULTS,
+    )
+    parser.add_argument(
+        "--output_name",
+        type=Path,
+        help="Name to give to the output file(s). File extensions will automatically appended.",
+        default=None,
+    )
+    parser.add_argument(
+        "-i",
+        "--interval-seconds",
+        dest="interval",
+        type=float,
+        help="Interval in seconds between capture frames for profiling.",
+        default=1e-1,
+    )
+
+    args = parser.parse_args()
+    run_profiling(**vars(args))