Skip to content

Commit

Permalink
Merge pull request #1012 from UCL/wgraham/pyinstrument-profiling-ci
Browse files Browse the repository at this point in the history
Profiling workflow on `workflow_dispatch`
  • Loading branch information
willGraham01 authored Sep 7, 2023
2 parents aace4d1 + c9f92a3 commit 51f61a7
Show file tree
Hide file tree
Showing 11 changed files with 487 additions and 223 deletions.
48 changes: 48 additions & 0 deletions .github/workflows/run-profiling.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Runs a pyinstrument profiling session on the scale_run profiling model,
# capturing the profiling result and sending it to the TLOmodel-profiling
# repository for processing and display.
#
# Profiling script executed is src/scripts/profiling/run-profiling.py.
# Output is the .pyisession file from the profiler, placed into results/
# results/ folder is then pushed to the TLOmodel-profiling repo, results branch.
name: Run profiling

on:
workflow_dispatch:

jobs:
run-profiling:
name: Setup developer environment
runs-on: [self-hosted]
steps:
- name: Checkout repository
uses: actions/checkout@v3

## The profile environment produces outputs in the /results directory
- name: Run profiling in dev environment
run: |
UNIQUE_ID=${GITHUB_EVENT_NAME}_${GITHUB_RUN_NUMBER}_${GITHUB_SHA} >> $GITHUB_ENV
tox -vv -e profile -- --output_name ${UNIQUE_ID}
## The token provided needs contents and pages access to the target repo
## Token can be (re)generated by a member of the UCL organisation,
## the current member is the rc-softdev-admin.
## [10-07-2023] The current token will expire 10-07-2024
- name: Push results to profiling repository
uses: dmnemec/[email protected]
env:
API_TOKEN_GITHUB: ${{ secrets.PROFILING_ACCESS_TOKEN }}
with:
source_file: results
destination_repo: UCL/TLOmodel-profiling
destination_folder: .
destination_branch: results
user_email: [email protected]
user_name: rc-softdev-admin

- name: Trigger website rebuild
uses: peter-evans/repository-dispatch@v2
with:
token: ${{ secrets.PROFILING_ACCESS_TOKEN }}
repository: UCL/TLOmodel-profiling
event-type: new-profiling-results
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ coverage.xml
.hypothesis/
.pytest_cache/

# Profiling results files
src/scripts/profiling/results/
results/

# Translations
*.mo
*.pot
Expand Down
3 changes: 3 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,6 @@ exclude docs/_*.rst
exclude docs/hsi_events.csv

global-exclude *.py[cod] __pycache__ *.so *.dylib .ipynb_checkpoints/** *~

# exclude profiling scripts from minimal build
recursive-exclude profiling *
3 changes: 3 additions & 0 deletions requirements/dev.in
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,8 @@ pytest
virtualenv
tox

# Profiling
pyinstrument

# Building requirements files
pip-tools
18 changes: 10 additions & 8 deletions requirements/dev.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#
# This file is autogenerated by pip-compile
# To update, run:
# This file is autogenerated by pip-compile with Python 3.8
# by the following command:
#
# pip-compile --output-file=requirements/dev.txt requirements/dev.in
#
Expand All @@ -27,14 +27,14 @@ azure-core==1.11.0
# azure-storage-file-share
azure-identity==1.5.0
# via -r requirements/base.in
azure-keyvault==4.1.0
# via -r requirements/base.in
azure-keyvault-certificates==4.2.1
# via azure-keyvault
azure-keyvault-keys==4.3.1
# via azure-keyvault
azure-keyvault-secrets==4.2.0
# via azure-keyvault
azure-keyvault==4.1.0
# via -r requirements/base.in
azure-storage-file-share==12.4.1
# via -r requirements/base.in
certifi==2020.12.5
Expand Down Expand Up @@ -83,12 +83,12 @@ kiwisolver==1.3.1
# via matplotlib
matplotlib==3.3.4
# via -r requirements/base.in
msal-extensions==0.3.0
# via azure-identity
msal==1.9.0
# via
# azure-identity
# msal-extensions
msal-extensions==0.3.0
# via azure-identity
msrest==0.6.21
# via
# azure-batch
Expand Down Expand Up @@ -131,6 +131,8 @@ py==1.10.0
# tox
pycparser==2.20
# via cffi
pyinstrument==4.5.0
# via -r requirements/dev.in
pyjwt[crypto]==2.0.1
# via
# adal
Expand All @@ -150,15 +152,15 @@ python-dateutil==2.8.1
# pandas
pytz==2021.1
# via pandas
requests-oauthlib==1.3.0
# via msrest
requests==2.25.1
# via
# adal
# azure-core
# msal
# msrest
# requests-oauthlib
requests-oauthlib==1.3.0
# via msrest
scipy==1.6.1
# via -r requirements/base.in
six==1.15.0
Expand Down
20 changes: 20 additions & 0 deletions src/scripts/profiling/_parameters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from _paths import TLO_OUTPUT_DIR, TLO_ROOT

scale_run_parameters = {
"years": 0,
"months": 1,
"initial_population": 50000,
"tlo_dir": TLO_ROOT,
"output_dir": TLO_OUTPUT_DIR,
"log_filename": "scale_run_profiling",
"log_level": "DEBUG",
"parse_log_file": False,
"show_progress_bar": True,
"seed": 0,
"disable_health_system": False,
"disable_spurious_symptoms": False,
"capabilities_coefficient": None,
"mode_appt_constraints": 0, # 2,
"save_final_population": False,
"record_hsi_event_details": False,
}
8 changes: 8 additions & 0 deletions src/scripts/profiling/_paths.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import os
from pathlib import Path

PROFILING_DIR = Path(os.path.abspath(os.path.dirname(__file__)))
PROFILING_RESULTS = (PROFILING_DIR / "results").resolve()

TLO_ROOT = (PROFILING_DIR / ".." / ".." / "..").resolve()
TLO_OUTPUT_DIR = (TLO_ROOT / "outputs").resolve()
130 changes: 130 additions & 0 deletions src/scripts/profiling/run_profiling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import argparse
import os
import warnings
from datetime import datetime
from pathlib import Path

from _parameters import scale_run_parameters
from _paths import PROFILING_RESULTS
from pyinstrument import Profiler
from pyinstrument.renderers import HTMLRenderer, JSONRenderer
from scale_run import scale_run

HELP_STR = (
"Produces profiling runs for a selection of models and parameters,\n"
"writing the results in HTML and/or JSON format.\n"
"Output names will default to the profiling timestamp if not provided."
)


def current_time(formatstr: str = "%Y-%m-%d_%H%M") -> str:
"""Produces a string of the current time in the specified format."""
return datetime.utcnow().strftime(formatstr)


def run_profiling(
output_dir: Path = PROFILING_RESULTS,
output_name: Path = None,
write_pyis: bool = True,
write_html: bool = False,
write_json: bool = False,
interval: float = 1e-1,
) -> None:
# Suppress "ignore" warnings
warnings.filterwarnings("ignore")

# Create the directory that this profiling run will live in
output_dir = output_dir / current_time("%Y/%m/%d/%H%M")
if not os.path.exists(output_dir):
os.makedirs(output_dir)

# Assign output filenames
if output_name is None:
output_pyis_file = output_dir / "output.pyisession"
output_html_file = output_dir / "output.html"
output_json_file = output_dir / "output.json"
else:
output_pyis_file = output_dir / f"{output_name.stem}.pyisession"
output_html_file = output_dir / f"{output_name.stem}.html"
output_json_file = output_dir / f"{output_name.stem}.json"

# Create the profiler to record the stack
# An instance of a Profiler can be start()-ed and stop()-ped multiple times,
# combining the recorded sessions into one at the end.
# As such, the same profiler can be used to record the profile of multiple scripts,
# however this may create large datafiles so using separate profilers is preferable
p = Profiler(interval=interval)

print(f"[{current_time('%H:%M:%S')}:INFO] Starting profiling runs")

# Profile scale_run
scale_run(**scale_run_parameters, profiler=p)

print(f"[{current_time('%H:%M:%S')}:INFO] Profiling runs complete")

# Fetch the recorded session: if multiple scripts are to be profiled,
# this needs to be done after each model "run",
# and p needs to be re-initialised before starting the next model run.
scale_run_session = p.last_session

# Write outputs to files
# Renderer initialisation options:
# show_all: removes library calls where identifiable
# timeline: if true, samples are left in chronological order rather than total time
if write_pyis:
print(f"Writing {output_html_file}", end="...", flush=True)
scale_run_session.save(output_pyis_file)
print("done")
if write_html:
html_renderer = HTMLRenderer(show_all=False, timeline=False)
print(f"Writing {output_html_file}", end="...", flush=True)
with open(output_html_file, "w") as f:
f.write(html_renderer.render(scale_run_session))
print("done")
if write_json:
json_renderer = JSONRenderer(show_all=False, timeline=False)
print(f"Writing {output_json_file}", end="...", flush=True)
with open(output_json_file, "w") as f:
f.write(json_renderer.render(scale_run_session))
print("done")

return


if __name__ == "__main__":
parser = argparse.ArgumentParser(description=HELP_STR)
parser.add_argument(
"--pyis",
action="store_true",
help="Write .ipysession output.",
dest="write_pyis",
)
parser.add_argument(
"--html", action="store_true", help="Write HTML output.", dest="write_html"
)
parser.add_argument(
"--json", action="store_true", help="Write JSON output.", dest="write_json"
)
parser.add_argument(
"--output_dir",
type=Path,
help="Redirect the output(s) to this directory.",
default=PROFILING_RESULTS,
)
parser.add_argument(
"--output_name",
type=Path,
help="Name to give to the output file(s). File extensions will automatically appended.",
default=None,
)
parser.add_argument(
"-i",
"--interval-seconds",
dest="interval",
type=float,
help="Interval in seconds between capture frames for profiling.",
default=1e-1,
)

args = parser.parse_args()
run_profiling(**vars(args))
Loading

0 comments on commit 51f61a7

Please sign in to comment.