diff --git a/.gitignore b/.gitignore index 616ff42acf..c01ebc2f05 100644 --- a/.gitignore +++ b/.gitignore @@ -47,6 +47,9 @@ coverage.xml .hypothesis/ .pytest_cache/ +# Profiling +src/scripts/profiling/html/ + # Translations *.mo *.pot diff --git a/requirements/dev.in b/requirements/dev.in index ee827f5070..ba610b0d2d 100644 --- a/requirements/dev.in +++ b/requirements/dev.in @@ -5,5 +5,8 @@ pytest virtualenv tox +# Profiling +pyinstrument + # Building requirements files pip-tools diff --git a/requirements/dev.txt b/requirements/dev.txt index 995f8112d8..4d0772dc85 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -1,6 +1,6 @@ # -# This file is autogenerated by pip-compile -# To update, run: +# This file is autogenerated by pip-compile with Python 3.8 +# by the following command: # # pip-compile --output-file=requirements/dev.txt requirements/dev.in # @@ -27,14 +27,14 @@ azure-core==1.11.0 # azure-storage-file-share azure-identity==1.5.0 # via -r requirements/base.in +azure-keyvault==4.1.0 + # via -r requirements/base.in azure-keyvault-certificates==4.2.1 # via azure-keyvault azure-keyvault-keys==4.3.1 # via azure-keyvault azure-keyvault-secrets==4.2.0 # via azure-keyvault -azure-keyvault==4.1.0 - # via -r requirements/base.in azure-storage-file-share==12.4.1 # via -r requirements/base.in certifi==2020.12.5 @@ -83,12 +83,12 @@ kiwisolver==1.3.1 # via matplotlib matplotlib==3.3.4 # via -r requirements/base.in -msal-extensions==0.3.0 - # via azure-identity msal==1.9.0 # via # azure-identity # msal-extensions +msal-extensions==0.3.0 + # via azure-identity msrest==0.6.21 # via # azure-batch @@ -131,6 +131,8 @@ py==1.10.0 # tox pycparser==2.20 # via cffi +pyinstrument==4.5.0 + # via -r requirements/dev.in pyjwt[crypto]==2.0.1 # via # adal @@ -150,8 +152,6 @@ python-dateutil==2.8.1 # pandas pytz==2021.1 # via pandas -requests-oauthlib==1.3.0 - # via msrest requests==2.25.1 # via # adal @@ -159,6 +159,8 @@ requests==2.25.1 # msal # msrest # requests-oauthlib +requests-oauthlib==1.3.0 + # via msrest scipy==1.6.1 # via -r requirements/base.in six==1.15.0 diff --git a/src/scripts/profiling/README.md b/src/scripts/profiling/README.md new file mode 100644 index 0000000000..b90dddd3e4 --- /dev/null +++ b/src/scripts/profiling/README.md @@ -0,0 +1,31 @@ +# Profiling with `pyinstrument` + +Activate your developer environment, and navigate to the root of the TLOModel repository. +Run +```sh +python src/scripts/profiling/profile.py HMTL_OUTPUT_LOCATION +``` +to run the profiling script (currently only supports `scale_run.py`). +You can also request command-line help using the `-h` or `--help` flags. +If you do not provide the `HTML_OUTPUT_LOCATION`, the script will write the outputs to the default location (`profiling/html`). + +## Files within `profiling/` + +Utility files: +- `_paths.py`: Defines some absolute paths to ensure that the profiler writes outputs to the correct locations and the script is robust against being run in different working directories. +- `shared.py`: Logging and other processes that are shared across multiple files. + +Files that are used to wrap the automatic profiling run: +- `parameters.py`: Parameters for each of the models that the profiler should run, stored as dictionaries. +- `profile.py`: Main profiling script; runs all models that need to be profiled and outputs results. + +Models which are run by the profiler: +- `scale_run.py`: A run of the full model at scale using all disease modules considered complete and all +modules for birth / labour / newborn outcome. + +Models which are not presently used by the profiler, but can be run locally: +- `batch_test.py` +- `heavy_use_of_bed_days.py` +- `heavy_use_of_spurious_symptoms.py` +- `run_full_model_with_hard_constraints_in_healthsystem.py` +- `run_with_high_intensity_of_HSI_and_simplified_births.py` \ No newline at end of file diff --git a/src/scripts/profiling/_paths.py b/src/scripts/profiling/_paths.py new file mode 100644 index 0000000000..556b6f94e2 --- /dev/null +++ b/src/scripts/profiling/_paths.py @@ -0,0 +1,8 @@ +import os +from pathlib import Path + +PROFILING_DIR = Path(os.path.abspath(os.path.dirname(__file__))) +PROFILING_HTML_DIR = (PROFILING_DIR / "html").resolve() + +TLO_ROOT = (PROFILING_DIR / ".." / ".." / "..").resolve() +TLO_OUTPUT_DIR = (TLO_ROOT / "outputs").resolve() diff --git a/src/scripts/profiling/parameters.py b/src/scripts/profiling/parameters.py new file mode 100644 index 0000000000..65107191f1 --- /dev/null +++ b/src/scripts/profiling/parameters.py @@ -0,0 +1,21 @@ +from _paths import TLO_ROOT, TLO_OUTPUT_DIR + +# Parameters to pass to scale_run +scale_run_parameters = { + "years": 0, + "months": 1, + "initial_population": 50000, + "tlo_dir": TLO_ROOT, + "output_dir": TLO_OUTPUT_DIR, + "log_filename": "scale_run_benchmark", + "log_level": "DEBUG", + "parse_log_file": False, + "show_progress_bar": True, + "seed": 0, + "disable_health_system": False, + "disable_spurious_symptoms": False, + "capabilities_coefficient": None, + "mode_appt_constraints": 2, + "save_final_population": False, + "record_hsi_event_details": False, +} diff --git a/src/scripts/profiling/profile.py b/src/scripts/profiling/profile.py new file mode 100644 index 0000000000..e7d56d2bc3 --- /dev/null +++ b/src/scripts/profiling/profile.py @@ -0,0 +1,67 @@ +import argparse +from datetime import datetime + +import os +from pathlib import Path +import warnings + +from pyinstrument import Profiler +from pyinstrument.renderers import HTMLRenderer + +from _paths import PROFILING_HTML_DIR +from parameters import scale_run_parameters +from scale_run import scale_run + + +def current_time(formatstr: str = "%Y-%m-%d_%H%M") -> str: + """Produces a string of the current time in the specified format""" + return datetime.utcnow().strftime(formatstr) + + +def profile_all(output_html_dir: str = None) -> None: + warnings.filterwarnings("ignore") + + # Setup the output file and directory + if output_html_dir is None: + output_html_dir = PROFILING_HTML_DIR + if not os.path.exists(PROFILING_HTML_DIR): + os.mkdir(PROFILING_HTML_DIR) + output_html_file = PROFILING_HTML_DIR / (current_time() + ".html") + + # Setup the profiler, to record the stack every interval seconds + p = Profiler(interval=1e-3) + + print(f"[{current_time('%H:%M:%S')}:INFO] Starting profiling runs") + # Perform all profiling runs, passing in the profiler so it can be started within each run and halted between for more accurate results + scale_run(**scale_run_parameters, profiler=p) + + print(f"[{current_time('%H:%M:%S')}:INFO] Profiling runs complete") + # Recorded sessions are combined, so last_session should fetch the combination of all profiling runs conducted + profiled_session = p.last_session + + # Parse results into HTML + # show_all: removes library calls where identifiable + # timeline: if true, samples are left in chronological order rather than total time + html_renderer = HTMLRenderer(show_all=False, timeline=False) + + # Write HTML file + print(f"Writing output to: {output_html_file}", end="...", flush=True) + with open(output_html_file, "w") as f: + f.write(html_renderer.render(profiled_session)) + print("done") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Run all profiling scripts and save the results." + ) + parser.add_argument( + "output_html_dir", + nargs="?", + type=str, + default=None, + help="Directory into which to write profiling results as HTML files.", + ) + + args = parser.parse_args() + profile_all(**vars(args)) diff --git a/src/scripts/profiling/scale_run.py b/src/scripts/profiling/scale_run.py index 0e02bb70b2..5757027156 100644 --- a/src/scripts/profiling/scale_run.py +++ b/src/scripts/profiling/scale_run.py @@ -14,205 +14,251 @@ import os import warnings from pathlib import Path +from typing import Literal, Optional, Type, TYPE_CHECKING import pandas as pd -import shared + +if TYPE_CHECKING: + import pyinstrument from tlo import Date, Simulation, logging -from tlo.analysis.utils import parse_log_file +from tlo.analysis.utils import parse_log_file as log_parse_fn # avoid name conflicts from tlo.methods.fullmodel import fullmodel -# Parse arguments defining run options -parser = argparse.ArgumentParser(description="Run model at scale") -parser.add_argument( - "--years", - type=int, - help="Number of years to simulate for (plus any months specified by --months)", - default=20 -) -parser.add_argument( - "--months", - type=int, - help="Number of months to simulate for (plus any years specified by --years)", - default=0 -) -parser.add_argument( - "--initial-population", - type=int, - help="Initial population size", - default=50000 -) -parser.add_argument( - "--tlo-dir", - type=Path, - help="Root TLOmodel directory", - default="." -) -parser.add_argument( - "--output-dir", - type=Path, - help="Directory to write output to", - default="./outputs" -) -parser.add_argument( - "--log-filename", type=str, help="Filename to use for log", default="for_profiling" -) -parser.add_argument( - "--log-level", - type=str, - help="Level to log at", - choices=("CRITICAL", "DEBUG", "FATAL", "WARNING", "INFO"), - default="WARNING" -) -parser.add_argument( - "--parse-log-file", - help=( - "Parse log file to create log dataframe at end of simulation (only useful with " - "interactive -i runs)" - ), - action="store_true", -) -parser.add_argument( - "--show-progress-bar", - help="Show progress bar during simulation rather than log output", - action="store_true", -) -parser.add_argument( - "--seed", - help="Seed for base pseudo-random number generator", - type=int, - default=0, -) -parser.add_argument( - "--disable-health-system", - help=( - "Disable health system - i.e. no processing happens by the health system but " - "all HSI Events run" - ), - action="store_true" -) -parser.add_argument( - "--disable-spurious-symptoms", - help="Disable the generation of spurious symptoms in SymptomManager", - action="store_true" -) -parser.add_argument( - "--capabilities-coefficient", - help=( - "Capabilities coefficient to use in HealthSystem. If not specified the ratio of" - " the initial population to the estimated 2010 population will be used." - ), - type=float, - default=None, -) -parser.add_argument( - "--mode-appt-constraints", - help=( - "Mode of constraints to use in HealthSystem (0: no constraints - all events " - "run with no squeeze factor, 1: elastic, all events run with squeeze factor, " - "2: hard, only events with no squeeze factor run" - ), - choices=(0, 1, 2), - type=int, - default=2, -) -parser.add_argument( - "--ignore-warnings", - help=( - "Ignore any warnings (prevents warning messages being printed). Useful when " - "combined with --show-progress-bar to avoid disruption of progress bar display" - ), - action="store_true" -) -parser.add_argument( - "--save-args-json", - help="Save the parsed arguments to a JSON file", - action="store_true" -) -parser.add_argument( - "--save-final-population", - help="Save the final population dataframe to a pickle file", - action="store_true" -) -parser.add_argument( - "--record-hsi-event-details", - help=( - "Keep a record of set of non-target specific details of HSI events that are " - "run and output to a JSON file 'hsi_event_details.json' in output directory." - ), - action="store_true" -) -args = parser.parse_args() - -if args.ignore_warnings: - warnings.filterwarnings("ignore") - -if not os.path.exists(args.output_dir): - os.makedirs(args.output_dir) - -if args.save_args_json: - # Save arguments to a JSON file - with open(args.output_dir / "arguments.json", "w") as f: - args_dict = { - k: str(v) if isinstance(v, Path) else v for k, v in vars(args).items() - } - json.dump(args_dict, f, indent=4) - -# Simulation period -start_date = Date(2010, 1, 1) -end_date = start_date + pd.DateOffset(years=args.years, months=args.months) - -# The resource files -resourcefilepath = Path(args.tlo_dir / "resources") - -log_config = { - "filename": args.log_filename, - "directory": args.output_dir, - "custom_levels": {"*": getattr(logging, args.log_level)} -} - -sim = Simulation( - start_date=start_date, - seed=args.seed, - log_config=log_config, - show_progress_bar=args.show_progress_bar -) - -# Register the appropriate modules with the arguments passed through -sim.register( - *fullmodel( - resourcefilepath=resourcefilepath, - use_simplified_births=False, - module_kwargs={ - "HealthSystem": { - "disable": args.disable_health_system, - "mode_appt_constraints": args.mode_appt_constraints, - "capabilities_coefficient": args.capabilities_coefficient, - "hsi_event_count_log_period": "simulation" if args.record_hsi_event_details else None +from shared import print_checksum, schedule_profile_log +from _paths import TLO_ROOT, TLO_OUTPUT_DIR + + +def scale_run( + years: int, + months: int, + initial_population: int, + tlo_dir: Path, + output_dir: Path, + log_filename: str, + log_level: Literal["CRITICAL", "DEBUG", "FATAL", "WARNING", "INFO"], + parse_log_file: bool, + show_progress_bar: bool, + seed: int, + disable_health_system: bool, + disable_spurious_symptoms: bool, + capabilities_coefficient: float, + mode_appt_constraints: Literal[0, 1, 2], + save_final_population: bool, + record_hsi_event_details: bool, + profiler: Optional[Type["pyinstrument.Profiler"]] = None, +) -> None: + """ + A run of the full model at scale using all disease modules considered complete and all + modules for birth / labour / newborn outcome. + """ + + if profiler is not None: + profiler.start() + + # Simulation period + start_date = Date(2010, 1, 1) + end_date = start_date + pd.DateOffset(years=years, months=months) + + # The resource files + resourcefilepath = Path(tlo_dir / "resources") + + log_config = { + "filename": log_filename, + "directory": output_dir, + "custom_levels": {"*": getattr(logging, log_level)}, + } + + sim = Simulation( + start_date=start_date, + seed=seed, + log_config=log_config, + show_progress_bar=show_progress_bar, + ) + + # Register the appropriate modules with the arguments passed through + sim.register( + *fullmodel( + resourcefilepath=resourcefilepath, + use_simplified_births=False, + module_kwargs={ + "HealthSystem": { + "disable": disable_health_system, + "mode_appt_constraints": mode_appt_constraints, + "capabilities_coefficient": capabilities_coefficient, + "hsi_event_count_log_period": "simulation" + if record_hsi_event_details + else None, + }, + "SymptomManager": {"spurious_symptoms": not disable_spurious_symptoms}, }, - "SymptomManager": {"spurious_symptoms": not args.disable_spurious_symptoms}, - } - ) -) - -# Run the simulation -sim.make_initial_population(n=args.initial_population) -shared.schedule_profile_log(sim) -sim.simulate(end_date=end_date) -shared.print_checksum(sim) - -if args.save_final_population: - sim.population.props.to_pickle(args.output_dir / "final_population.pkl") - -if args.parse_log_file: - log_df = parse_log_file(sim.log_filepath) - -if args.record_hsi_event_details: - with open(args.output_dir / "hsi_event_details.json", "w") as json_file: - json.dump( - [ - event_details._asdict() - for event_details in sim.modules['HealthSystem'].hsi_event_counts.keys() - ], - json_file ) + ) + + # Run the simulation + sim.make_initial_population(n=initial_population) + schedule_profile_log(sim) + sim.simulate(end_date=end_date) + print_checksum(sim) + + if save_final_population: + sim.population.props.to_pickle(output_dir / "final_population.pkl") + + if parse_log_file: + log_df = log_parse_fn(sim.log_filepath) + + if record_hsi_event_details: + with open(output_dir / "hsi_event_details.json", "w") as json_file: + json.dump( + [ + event_details._asdict() + for event_details in sim.modules[ + "HealthSystem" + ].hsi_event_counts.keys() + ], + json_file, + ) + + if profiler is not None: + profiler.stop() + return + + +if __name__ == "__main__": + # Parse arguments defining run options + parser = argparse.ArgumentParser(description="Run model at scale") + parser.add_argument( + "--years", + type=int, + help="Number of years to simulate for (plus any months specified by --months)", + default=20, + ) + parser.add_argument( + "--months", + type=int, + help="Number of months to simulate for (plus any years specified by --years)", + default=0, + ) + parser.add_argument( + "--initial_population", type=int, help="Initial population size", default=50000 + ) + parser.add_argument( + "--tlo_dir", type=Path, help="Root TLOmodel directory", default=TLO_ROOT + ) + parser.add_argument( + "--output_dir", + type=Path, + help="Directory to write output to", + default=TLO_OUTPUT_DIR, + ) + parser.add_argument( + "--log_filename", + type=str, + help="Filename to use for log", + default="for_profiling", + ) + parser.add_argument( + "--log_level", + type=str, + help="Level to log at", + choices=("CRITICAL", "DEBUG", "FATAL", "WARNING", "INFO"), + default="WARNING", + ) + parser.add_argument( + "--parse_log_file", + help=( + "Parse log file to create log dataframe at end of simulation (only useful with " + "interactive -i runs)" + ), + action="store_true", + ) + parser.add_argument( + "--show_progress_bar", + help="Show progress bar during simulation rather than log output", + action="store_true", + ) + parser.add_argument( + "--seed", + help="Seed for base pseudo-random number generator", + type=int, + default=0, + ) + parser.add_argument( + "--disable_health_system", + help=( + "Disable health system - i.e. no processing happens by the health system but " + "all HSI Events run" + ), + action="store_true", + ) + parser.add_argument( + "--disable_spurious_symptoms", + help="Disable the generation of spurious symptoms in SymptomManager", + action="store_true", + ) + parser.add_argument( + "--capabilities_coefficient", + help=( + "Capabilities coefficient to use in HealthSystem. If not specified the ratio of" + " the initial population to the estimated 2010 population will be used." + ), + type=float, + default=None, + ) + parser.add_argument( + "--mode_appt_constraints", + help=( + "Mode of constraints to use in HealthSystem (0: no constraints - all events " + "run with no squeeze factor, 1: elastic, all events run with squeeze factor, " + "2: hard, only events with no squeeze factor run" + ), + choices=(0, 1, 2), + type=int, + default=2, + ) + parser.add_argument( + "--ignore_warnings", + help=( + "Ignore any warnings (prevents warning messages being printed). Useful when " + "combined with --show-progress-bar to avoid disruption of progress bar display" + ), + action="store_true", + ) + parser.add_argument( + "--save_args_json", + help="Save the parsed arguments to a JSON file", + action="store_true", + ) + parser.add_argument( + "--save_final_population", + help="Save the final population dataframe to a pickle file", + action="store_true", + ) + parser.add_argument( + "--record_hsi_event_details", + help=( + "Keep a record of set of non-target specific details of HSI events that are " + "run and output to a JSON file 'hsi_event_details.json' in output directory." + ), + action="store_true", + ) + args = parser.parse_args() + + if args.ignore_warnings: + warnings.filterwarnings("ignore") + + if not os.path.exists(args.output_dir): + os.makedirs(args.output_dir) + + if args.save_args_json: + # Save arguments to a JSON file + with open(args.output_dir / "arguments.json", "w") as f: + args_dict = { + k: str(v) if isinstance(v, Path) else v for k, v in vars(args).items() + } + json.dump(args_dict, f, indent=4) + + inputs = vars(args) + inputs.pop("save_args_json") + inputs.pop("ignore_warnings") + scale_run(**inputs)