UCL · willGraham01 · Jun 26, 2023 · Jun 26, 2023 · Jun 26, 2023 · Jun 26, 2023
diff --git a/.gitignore b/.gitignore
@@ -47,6 +47,9 @@ coverage.xml
 .hypothesis/
 .pytest_cache/
 
+# Profiling
+src/scripts/profiling/html/
+
 # Translations
 *.mo
 *.pot

diff --git a/requirements/dev.in b/requirements/dev.in
@@ -5,5 +5,8 @@ pytest
 virtualenv
 tox
 
+# Profiling
+pyinstrument
+
 # Building requirements files
 pip-tools
diff --git a/requirements/dev.txt b/requirements/dev.txt
@@ -1,6 +1,6 @@
 #
-# This file is autogenerated by pip-compile
-# To update, run:
+# This file is autogenerated by pip-compile with Python 3.8
+# by the following command:
 #
 #    pip-compile --output-file=requirements/dev.txt requirements/dev.in
 #
@@ -27,14 +27,14 @@ azure-core==1.11.0
     #   azure-storage-file-share
 azure-identity==1.5.0
     # via -r requirements/base.in
+azure-keyvault==4.1.0
+    # via -r requirements/base.in
 azure-keyvault-certificates==4.2.1
     # via azure-keyvault
 azure-keyvault-keys==4.3.1
     # via azure-keyvault
 azure-keyvault-secrets==4.2.0
     # via azure-keyvault
-azure-keyvault==4.1.0
-    # via -r requirements/base.in
 azure-storage-file-share==12.4.1
     # via -r requirements/base.in
 certifi==2020.12.5
@@ -83,12 +83,12 @@ kiwisolver==1.3.1
     # via matplotlib
 matplotlib==3.3.4
     # via -r requirements/base.in
-msal-extensions==0.3.0
-    # via azure-identity
 msal==1.9.0
     # via
     #   azure-identity
     #   msal-extensions
+msal-extensions==0.3.0
+    # via azure-identity
 msrest==0.6.21
     # via
     #   azure-batch
@@ -131,6 +131,8 @@ py==1.10.0
     #   tox
 pycparser==2.20
     # via cffi
+pyinstrument==4.5.0
+    # via -r requirements/dev.in
 pyjwt[crypto]==2.0.1
     # via
     #   adal
@@ -150,15 +152,15 @@ python-dateutil==2.8.1
     #   pandas
 pytz==2021.1
     # via pandas
-requests-oauthlib==1.3.0
-    # via msrest
 requests==2.25.1
     # via
     #   adal
     #   azure-core
     #   msal
     #   msrest
     #   requests-oauthlib
+requests-oauthlib==1.3.0
+    # via msrest
 scipy==1.6.1
     # via -r requirements/base.in
 six==1.15.0

diff --git a/src/scripts/profiling/README.md b/src/scripts/profiling/README.md
@@ -0,0 +1,31 @@
+# Profiling with `pyinstrument`
+
+Activate your developer environment, and navigate to the root of the TLOModel repository.
+Run
+```sh
+python src/scripts/profiling/profile.py HMTL_OUTPUT_LOCATION
+```
+to run the profiling script (currently only supports `scale_run.py`).
+You can also request command-line help using the `-h` or `--help` flags.
+If you do not provide the `HTML_OUTPUT_LOCATION`, the script will write the outputs to the default location (`profiling/html`).
+
+## Files within `profiling/`
+
+Utility files:
+- `_paths.py`: Defines some absolute paths to ensure that the profiler writes outputs to the correct locations and the script is robust against being run in different working directories.
+- `shared.py`: Logging and other processes that are shared across multiple files.
+
+Files that are used to wrap the automatic profiling run:
+- `parameters.py`: Parameters for each of the models that the profiler should run, stored as dictionaries.
+- `profile.py`: Main profiling script; runs all models that need to be profiled and outputs results.
+
+Models which are run by the profiler:
+- `scale_run.py`: A run of the full model at scale using all disease modules considered complete and all
+modules for birth / labour / newborn outcome.
+
+Models which are not presently used by the profiler, but can be run locally:
+- `batch_test.py`
+- `heavy_use_of_bed_days.py`
+- `heavy_use_of_spurious_symptoms.py`
+- `run_full_model_with_hard_constraints_in_healthsystem.py`
+- `run_with_high_intensity_of_HSI_and_simplified_births.py`
diff --git a/src/scripts/profiling/_paths.py b/src/scripts/profiling/_paths.py
@@ -0,0 +1,8 @@
+import os
+from pathlib import Path
+
+PROFILING_DIR = Path(os.path.abspath(os.path.dirname(__file__)))
+PROFILING_HTML_DIR = (PROFILING_DIR / "html").resolve()
+
+TLO_ROOT = (PROFILING_DIR / ".." / ".." / "..").resolve()
+TLO_OUTPUT_DIR = (TLO_ROOT / "outputs").resolve()
diff --git a/src/scripts/profiling/parameters.py b/src/scripts/profiling/parameters.py
@@ -0,0 +1,21 @@
+from _paths import TLO_ROOT, TLO_OUTPUT_DIR
+
+# Parameters to pass to scale_run
+scale_run_parameters = {
+    "years": 0,
+    "months": 1,
+    "initial_population": 50000,
+    "tlo_dir": TLO_ROOT,
+    "output_dir": TLO_OUTPUT_DIR,
+    "log_filename": "scale_run_benchmark",
+    "log_level": "DEBUG",
+    "parse_log_file": False,
+    "show_progress_bar": True,
+    "seed": 0,
+    "disable_health_system": False,
+    "disable_spurious_symptoms": False,
+    "capabilities_coefficient": None,
+    "mode_appt_constraints": 2,
+    "save_final_population": False,
+    "record_hsi_event_details": False,
+}
diff --git a/src/scripts/profiling/profile.py b/src/scripts/profiling/profile.py
@@ -0,0 +1,67 @@
+import argparse
+from datetime import datetime
+
+import os
+from pathlib import Path
+import warnings
+
+from pyinstrument import Profiler
+from pyinstrument.renderers import HTMLRenderer
+
+from _paths import PROFILING_HTML_DIR
+from parameters import scale_run_parameters
+from scale_run import scale_run
+
+
+def current_time(formatstr: str = "%Y-%m-%d_%H%M") -> str:
+    """Produces a string of the current time in the specified format"""
+    return datetime.utcnow().strftime(formatstr)
+
+
+def profile_all(output_html_dir: str = None) -> None:
+    warnings.filterwarnings("ignore")
+
+    # Setup the output file and directory
+    if output_html_dir is None:
+        output_html_dir = PROFILING_HTML_DIR
+    if not os.path.exists(PROFILING_HTML_DIR):
+        os.mkdir(PROFILING_HTML_DIR)
+    output_html_file = PROFILING_HTML_DIR / (current_time() + ".html")
+
+    # Setup the profiler, to record the stack every interval seconds
+    p = Profiler(interval=1e-3)
+
+    print(f"[{current_time('%H:%M:%S')}:INFO] Starting profiling runs")
+    # Perform all profiling runs, passing in the profiler so it can be started within each run and halted between for more accurate results
+    scale_run(**scale_run_parameters, profiler=p)
+
+    print(f"[{current_time('%H:%M:%S')}:INFO] Profiling runs complete")
+    # Recorded sessions are combined, so last_session should fetch the combination of all profiling runs conducted
+    profiled_session = p.last_session
+
+    # Parse results into HTML
+    # show_all: removes library calls where identifiable
+    # timeline: if true, samples are left in chronological order rather than total time
+    html_renderer = HTMLRenderer(show_all=False, timeline=False)
+
+    # Write HTML file
+    print(f"Writing output to: {output_html_file}", end="...", flush=True)
+    with open(output_html_file, "w") as f:
+        f.write(html_renderer.render(profiled_session))
+    print("done")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Run all profiling scripts and save the results."
+    )
+    parser.add_argument(
+        "output_html_dir",
+        nargs="?",
+        type=str,
+        default=None,
+        help="Directory into which to write profiling results as HTML files.",
+    )
+
+    args = parser.parse_args()
+    profile_all(**vars(args))