Skip to content

Commit 92e5b13

Browse files
Minor profiling output improvements (#1236)
* Write file name not full path as html_output stat * Explicitly show tlo and hide pandas frames in profiling output * Add option for flat profiling output * Only run profiling variable setup if we actually need to * Fix SHA that is reported on comment-triggered PRs * Slightly more verbose step name * Fail early when ansi2html not available and flat HTML output requested * Disable progress bar and log output by default during profiling runs --------- Co-authored-by: willGraham01 <[email protected]>
1 parent 864e5e1 commit 92e5b13

File tree

5 files changed

+91
-9
lines changed

5 files changed

+91
-9
lines changed

.github/workflows/run-profiling.yaml

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,17 +31,33 @@ jobs:
3131
set-variables:
3232
name: Create unique output file identifier and artifact name
3333
runs-on: ubuntu-latest
34+
if: (github.event_name != 'issue_comment') || ((github.event_name == 'issue_comment') && (github.event.comment.body == '/run profiling'))
3435
outputs:
3536
profiling-output-dir: profiling_results/
3637
profiling-filename: ${{ steps.set-profiling-filename.outputs.name }}
3738
artifact-name: ${{ steps.set-artifact-name.outputs.name }}
38-
profiling-on-sha: ${{ steps.set-github-info.outputs.sha }}
39+
profiling-on-sha: ${{ steps.determine-correct-sha.outputs.result }}
3940
profiling-event-trigger: ${{ steps.set-github-info.outputs.event }}
4041
steps:
42+
- id: determine-correct-sha
43+
uses: actions/github-script@v7
44+
with:
45+
result-encoding: string
46+
script: |
47+
if (!context.payload.issue.pull_request) {
48+
return context.sha;
49+
};
50+
const { data: pr } = await github.rest.pulls.get({
51+
owner: context.issue.owner,
52+
repo: context.issue.repo,
53+
pull_number: context.issue.number,
54+
});
55+
return pr.head.sha;
56+
4157
- id: set-profiling-filename
4258
name: Set profiling output file name
4359
run: |
44-
echo "name=${GITHUB_EVENT_NAME}_${GITHUB_RUN_NUMBER}_${GITHUB_SHA}" >> "${GITHUB_OUTPUT}"
60+
echo "name=${GITHUB_EVENT_NAME}_${GITHUB_RUN_NUMBER}_${{ steps.determine-correct-sha.outputs.result }}" >> "${GITHUB_OUTPUT}"
4561
4662
- id: set-artifact-name
4763
name: Set artifact name
@@ -51,7 +67,7 @@ jobs:
5167
- id: set-github-info
5268
name: Fix Git and GitHub information when passing between workflows
5369
run: |
54-
echo "sha=${GITHUB_SHA}" >> "${GITHUB_OUTPUT}"
70+
echo "sha=${{ steps.determine-correct-sha.outputs.result }}" >> "${GITHUB_OUTPUT}"
5571
echo "event=${GITHUB_EVENT_NAME}" >> "${GITHUB_OUTPUT}"
5672
5773
profile-on-comment:
@@ -65,11 +81,13 @@ jobs:
6581
commands: |
6682
tox -vv -e profile -- \
6783
--html \
84+
--flat-html \
6885
--root-output-dir ${{ needs.set-variables.outputs.profiling-output-dir }} \
6986
--output-name ${{ needs.set-variables.outputs.profiling-filename }} \
7087
--additional-stats \
7188
sha=${{ needs.set-variables.outputs.profiling-on-sha }} \
72-
trigger=${{ needs.set-variables.outputs.profiling-event-trigger }}
89+
trigger=${{ needs.set-variables.outputs.profiling-event-trigger }} \
90+
--disable-log-output-to-stdout
7391
description: Profiled run of the model
7492
timeout-minutes: 8640
7593
application-organization: UCL
@@ -98,11 +116,13 @@ jobs:
98116
run: |
99117
tox -vv -e profile -- \
100118
--html \
119+
--flat-html \
101120
--root-output-dir ${{ needs.set-variables.outputs.profiling-output-dir }} \
102121
--output-name ${{ needs.set-variables.outputs.profiling-filename }} \
103122
--additional-stats \
104123
sha=${{ needs.set-variables.outputs.profiling-on-sha }} \
105-
trigger=${{ needs.set-variables.outputs.profiling-event-trigger }}
124+
trigger=${{ needs.set-variables.outputs.profiling-event-trigger }} \
125+
--disable-log-output-to-stdout
106126
107127
## Upload the output as an artifact so we can push it to the profiling repository
108128
- name: Save results as artifact

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ dev = [
5656
"pylint",
5757
"ruff",
5858
# Profiling
59+
"ansi2html",
5960
"psutil",
6061
"pyinstrument>=4.3",
6162
# Building requirements files

requirements/dev.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
#
77
adal==1.2.7
88
# via msrestazure
9+
ansi2html==1.9.1
10+
# via tlo (pyproject.toml)
911
astroid==3.0.0
1012
# via pylint
1113
azure-batch==14.0.0

src/scripts/profiling/run_profiling.py

Lines changed: 56 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,16 @@
99
import numpy as np
1010
from psutil import disk_io_counters
1111
from pyinstrument import Profiler
12-
from pyinstrument.renderers import HTMLRenderer
12+
from pyinstrument.renderers import ConsoleRenderer, HTMLRenderer
1313
from pyinstrument.session import Session
1414
from scale_run import save_arguments_to_json, scale_run
1515

16+
try:
17+
from ansi2html import Ansi2HTMLConverter
18+
ANSI2HTML_AVAILABLE = True
19+
except ImportError:
20+
ANSI2HTML_AVAILABLE = False
21+
1622
from tlo import Simulation
1723

1824
_PROFILING_RESULTS: Path = (Path(__file__).parents[3] / "profiling_results").resolve()
@@ -176,17 +182,26 @@ def run_profiling(
176182
output_name: str = "profiling",
177183
write_html: bool = False,
178184
write_pyisession: bool = False,
185+
write_flat_html: bool = True,
179186
interval: float = 2e-1,
180187
initial_population: int = 50000,
181188
simulation_years: int = 5,
182189
simulation_months: int = 0,
183190
mode_appt_constraints: Literal[0, 1, 2] = 2,
184191
additional_stats: Optional[List[Tuple[str, str]]] = None,
192+
show_progress_bar: bool = False,
193+
disable_log_output_to_stdout: bool = False,
185194
) -> None:
186195
"""
187196
Uses pyinstrument to profile the scale_run simulation,
188197
writing the output in the requested formats.
189198
"""
199+
if write_flat_html and not ANSI2HTML_AVAILABLE:
200+
# Check if flat HTML output requested but ansi2html module not available at
201+
# _start_ of function to avoid erroring after a potentially long profiling run
202+
msg = "ansi2html required for flat HTML output."
203+
raise ValueError(msg)
204+
190205
additional_stats = dict(() if additional_stats is None else additional_stats)
191206

192207
# Create the profiler to record the stack
@@ -208,7 +223,7 @@ def run_profiling(
208223
"log_filename": "scale_run_profiling",
209224
"log_level": "WARNING",
210225
"parse_log_file": False,
211-
"show_progress_bar": True,
226+
"show_progress_bar": show_progress_bar,
212227
"seed": 0,
213228
"disable_health_system": False,
214229
"disable_spurious_symptoms": False,
@@ -218,6 +233,7 @@ def run_profiling(
218233
"record_hsi_event_details": False,
219234
"ignore_warnings": True,
220235
"log_final_population_checksum": False,
236+
"disable_log_output_to_stdout": disable_log_output_to_stdout,
221237
}
222238

223239
output_arg_file = output_dir / f"{output_name}.args.json"
@@ -253,7 +269,11 @@ def run_profiling(
253269
# Renderer initialisation options:
254270
# show_all: removes library calls where identifiable
255271
# timeline: if true, samples are left in chronological order rather than total time
256-
html_renderer = HTMLRenderer(show_all=False, timeline=False)
272+
html_renderer = HTMLRenderer(
273+
show_all=False,
274+
timeline=False,
275+
processor_options={"show_regex": ".*/tlo/.*", "hide_regex": ".*/pandas/.*"}
276+
)
257277
print(f"Writing {output_html_file}", end="...", flush=True)
258278
with open(output_html_file, "w") as f:
259279
f.write(html_renderer.render(scale_run_session))
@@ -268,13 +288,29 @@ def run_profiling(
268288
f"\tWas : {additional_stats['html_output']}"
269289
f"\tReplaced by: {output_html_file}"
270290
)
271-
additional_stats["html_output"] = str(output_html_file)
291+
additional_stats["html_output"] = str(output_html_file.name)
272292

273293
if write_pyisession:
274294
output_ipysession_file = output_dir / f"{output_name}.pyisession"
275295
print(f"Writing {output_ipysession_file}", end="...", flush=True)
276296
scale_run_session.save(output_ipysession_file)
277297
print("done")
298+
299+
if write_flat_html:
300+
output_html_file = output_dir / f"{output_name}.flat.html"
301+
console_renderer = ConsoleRenderer(
302+
show_all=False,
303+
timeline=False,
304+
color=True,
305+
flat=True,
306+
processor_options={"show_regex": ".*/tlo/.*", "hide_regex": ".*/pandas/.*"}
307+
)
308+
converter = Ansi2HTMLConverter(title=output_name)
309+
print(f"Writing {output_html_file}", end="...", flush=True)
310+
with open(output_html_file, "w") as f:
311+
f.write(converter.convert(console_renderer.render(scale_run_session)))
312+
print("done")
313+
additional_stats["flat_html_output"] = str(output_html_file.name)
278314

279315
# Write the statistics file, main output
280316
output_stat_file = output_dir / f"{output_name}.stats.json"
@@ -329,6 +365,12 @@ def run_profiling(
329365
action="store_true",
330366
dest="write_pyisession",
331367
)
368+
parser.add_argument(
369+
"--flat-html",
370+
action="store_true",
371+
help="Write flat HTML output in addition to statistics output.",
372+
dest="write_flat_html",
373+
)
332374
parser.add_argument(
333375
"-i",
334376
"--interval-seconds",
@@ -382,6 +424,16 @@ def run_profiling(
382424
"as strings."
383425
),
384426
)
427+
parser.add_argument(
428+
"--show-progress-bar",
429+
help="Show simulation progress bar during simulation rather than log output",
430+
action="store_true",
431+
)
432+
parser.add_argument(
433+
"--disable-log-output-to-stdout",
434+
help="Disable simulation log output being displayed in stdout stream",
435+
action="store_true",
436+
)
385437

386438
args = parser.parse_args()
387439

src/scripts/profiling/scale_run.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ def scale_run(
4444
log_level: Literal["CRITICAL", "DEBUG", "FATAL", "WARNING", "INFO"] = "WARNING",
4545
parse_log_file: bool = False,
4646
show_progress_bar: bool = False,
47+
disable_log_output_to_stdout: bool = False,
4748
seed: int = 0,
4849
disable_health_system: bool = False,
4950
disable_spurious_symptoms: bool = False,
@@ -70,6 +71,7 @@ def scale_run(
7071
"filename": log_filename,
7172
"directory": output_dir,
7273
"custom_levels": {"*": getattr(logging, log_level)},
74+
"suppress_stdout": disable_log_output_to_stdout,
7375
}
7476

7577
sim = Simulation(
@@ -192,6 +194,11 @@ def scale_run(
192194
help="Show progress bar during simulation rather than log output",
193195
action="store_true",
194196
)
197+
parser.add_argument(
198+
"--disable-log-output-to-stdout",
199+
help="Disable log output being displayed in stdout stream",
200+
action="store_true",
201+
)
195202
parser.add_argument(
196203
"--seed",
197204
help="Seed for base pseudo-random number generator",

0 commit comments

Comments
 (0)