Minor profiling output improvements (#1236)

matt-graham · willGraham01 · web-flow · commit 92e5b139d792 · 2024-03-20T16:30:41.000Z
* Write file name not full path as html_output stat

* Explicitly show tlo and hide pandas frames in profiling output

* Add option for flat profiling output

* Only run profiling variable setup if we actually need to

* Fix SHA that is reported on comment-triggered PRs

* Slightly more verbose step name

* Fail early when ansi2html not available and flat HTML output requested

* Disable progress bar and log output by default during profiling runs

---------

Co-authored-by: willGraham01 &lt;1willgraham@gmail.com&gt;
diff --git a/.github/workflows/run-profiling.yaml b/.github/workflows/run-profiling.yaml
@@ -31,17 +31,33 @@ jobs:
   set-variables:
     name: Create unique output file identifier and artifact name
     runs-on: ubuntu-latest
+    if: (github.event_name != 'issue_comment') || ((github.event_name == 'issue_comment') && (github.event.comment.body == '/run profiling'))
     outputs:
       profiling-output-dir: profiling_results/
       profiling-filename: ${{ steps.set-profiling-filename.outputs.name }}
       artifact-name: ${{ steps.set-artifact-name.outputs.name }}
-      profiling-on-sha: ${{ steps.set-github-info.outputs.sha }}
+      profiling-on-sha: ${{ steps.determine-correct-sha.outputs.result }}
       profiling-event-trigger: ${{ steps.set-github-info.outputs.event }}
     steps:
+      - id: determine-correct-sha
+        uses: actions/github-script@v7
+        with:
+          result-encoding: string
+          script: |
+            if (!context.payload.issue.pull_request) {
+              return context.sha;
+            };
+            const { data: pr } = await github.rest.pulls.get({
+              owner: context.issue.owner,
+              repo: context.issue.repo,
+              pull_number: context.issue.number,
+            });
+            return pr.head.sha;
+
       - id: set-profiling-filename
         name: Set profiling output file name
         run: |
-          echo "name=${GITHUB_EVENT_NAME}_${GITHUB_RUN_NUMBER}_${GITHUB_SHA}" >> "${GITHUB_OUTPUT}"
+          echo "name=${GITHUB_EVENT_NAME}_${GITHUB_RUN_NUMBER}_${{ steps.determine-correct-sha.outputs.result }}" >> "${GITHUB_OUTPUT}"
 
       - id: set-artifact-name
         name: Set artifact name
@@ -51,7 +67,7 @@ jobs:
       - id: set-github-info
         name: Fix Git and GitHub information when passing between workflows
         run: |
-          echo "sha=${GITHUB_SHA}" >> "${GITHUB_OUTPUT}"
+          echo "sha=${{ steps.determine-correct-sha.outputs.result }}" >> "${GITHUB_OUTPUT}"
           echo "event=${GITHUB_EVENT_NAME}" >> "${GITHUB_OUTPUT}"
 
   profile-on-comment:
@@ -65,11 +81,13 @@ jobs:
       commands: |
         tox -vv -e profile -- \
           --html \
+          --flat-html \
           --root-output-dir ${{ needs.set-variables.outputs.profiling-output-dir }} \
           --output-name ${{ needs.set-variables.outputs.profiling-filename }} \
           --additional-stats \
           sha=${{ needs.set-variables.outputs.profiling-on-sha }} \
-          trigger=${{ needs.set-variables.outputs.profiling-event-trigger }}
+          trigger=${{ needs.set-variables.outputs.profiling-event-trigger }} \
+          --disable-log-output-to-stdout
       description: Profiled run of the model
       timeout-minutes: 8640
       application-organization: UCL
@@ -98,11 +116,13 @@ jobs:
         run: |
           tox -vv -e profile -- \
             --html \
+            --flat-html \
             --root-output-dir ${{ needs.set-variables.outputs.profiling-output-dir }} \
             --output-name ${{ needs.set-variables.outputs.profiling-filename }} \
             --additional-stats \
             sha=${{ needs.set-variables.outputs.profiling-on-sha }} \
-            trigger=${{ needs.set-variables.outputs.profiling-event-trigger }}
+            trigger=${{ needs.set-variables.outputs.profiling-event-trigger }} \
+            --disable-log-output-to-stdout
 
       ## Upload the output as an artifact so we can push it to the profiling repository
       - name: Save results as artifact
diff --git a/pyproject.toml b/pyproject.toml
@@ -56,6 +56,7 @@ dev = [
     "pylint",
     "ruff",
     # Profiling
+    "ansi2html",
     "psutil",
     "pyinstrument>=4.3",
     # Building requirements files
diff --git a/requirements/dev.txt b/requirements/dev.txt
@@ -6,6 +6,8 @@
 #
 adal==1.2.7
     # via msrestazure
+ansi2html==1.9.1
+    # via tlo (pyproject.toml)
 astroid==3.0.0
     # via pylint
 azure-batch==14.0.0
diff --git a/src/scripts/profiling/run_profiling.py b/src/scripts/profiling/run_profiling.py
@@ -9,10 +9,16 @@
 import numpy as np
 from psutil import disk_io_counters
 from pyinstrument import Profiler
-from pyinstrument.renderers import HTMLRenderer
+from pyinstrument.renderers import ConsoleRenderer, HTMLRenderer
 from pyinstrument.session import Session
 from scale_run import save_arguments_to_json, scale_run
 
+try:
+    from ansi2html import Ansi2HTMLConverter
+    ANSI2HTML_AVAILABLE = True
+except ImportError:
+    ANSI2HTML_AVAILABLE = False
+
 from tlo import Simulation
 
 _PROFILING_RESULTS: Path = (Path(__file__).parents[3] / "profiling_results").resolve()
@@ -176,17 +182,26 @@ def run_profiling(
     output_name: str = "profiling",
     write_html: bool = False,
     write_pyisession: bool = False,
+    write_flat_html: bool = True,
     interval: float = 2e-1,
     initial_population: int = 50000,
     simulation_years: int = 5,
     simulation_months: int = 0,
     mode_appt_constraints: Literal[0, 1, 2] = 2,
     additional_stats: Optional[List[Tuple[str, str]]] = None,
+    show_progress_bar: bool = False,
+    disable_log_output_to_stdout: bool = False,
 ) -> None:
     """
     Uses pyinstrument to profile the scale_run simulation,
     writing the output in the requested formats.
     """
+    if write_flat_html and not ANSI2HTML_AVAILABLE:
+        # Check if flat HTML output requested but ansi2html module not available at
+        # _start_ of function to avoid erroring after a potentially long profiling run
+        msg = "ansi2html required for flat HTML output."
+        raise ValueError(msg)
+
     additional_stats = dict(() if additional_stats is None else additional_stats)
 
     # Create the profiler to record the stack
@@ -208,7 +223,7 @@ def run_profiling(
         "log_filename": "scale_run_profiling",
         "log_level": "WARNING",
         "parse_log_file": False,
-        "show_progress_bar": True,
+        "show_progress_bar": show_progress_bar,
         "seed": 0,
         "disable_health_system": False,
         "disable_spurious_symptoms": False,
@@ -218,6 +233,7 @@ def run_profiling(
         "record_hsi_event_details": False,
         "ignore_warnings": True,
         "log_final_population_checksum": False,
+        "disable_log_output_to_stdout": disable_log_output_to_stdout,
     }
 
     output_arg_file = output_dir / f"{output_name}.args.json"
@@ -253,7 +269,11 @@ def run_profiling(
         # Renderer initialisation options:
         # show_all: removes library calls where identifiable
         # timeline: if true, samples are left in chronological order rather than total time
-        html_renderer = HTMLRenderer(show_all=False, timeline=False)
+        html_renderer = HTMLRenderer(
+            show_all=False,
+            timeline=False,
+            processor_options={"show_regex": ".*/tlo/.*", "hide_regex": ".*/pandas/.*"}
+        )
         print(f"Writing {output_html_file}", end="...", flush=True)
         with open(output_html_file, "w") as f:
             f.write(html_renderer.render(scale_run_session))
@@ -268,13 +288,29 @@ def run_profiling(
                 f"\tWas        : {additional_stats['html_output']}"
                 f"\tReplaced by: {output_html_file}"
             )
-        additional_stats["html_output"] = str(output_html_file)
+        additional_stats["html_output"] = str(output_html_file.name)
 
     if write_pyisession:
         output_ipysession_file = output_dir / f"{output_name}.pyisession"
         print(f"Writing {output_ipysession_file}", end="...", flush=True)
         scale_run_session.save(output_ipysession_file)
         print("done")
+        
+    if write_flat_html:
+        output_html_file = output_dir / f"{output_name}.flat.html"
+        console_renderer = ConsoleRenderer(
+            show_all=False,
+            timeline=False,
+            color=True,
+            flat=True,
+            processor_options={"show_regex": ".*/tlo/.*", "hide_regex": ".*/pandas/.*"}
+        )
+        converter = Ansi2HTMLConverter(title=output_name)
+        print(f"Writing {output_html_file}", end="...", flush=True)
+        with open(output_html_file, "w") as f:
+            f.write(converter.convert(console_renderer.render(scale_run_session)))
+        print("done")
+        additional_stats["flat_html_output"] = str(output_html_file.name)
 
     # Write the statistics file, main output
     output_stat_file = output_dir / f"{output_name}.stats.json"
@@ -329,6 +365,12 @@ def run_profiling(
         action="store_true",
         dest="write_pyisession",
     )
+    parser.add_argument(
+        "--flat-html",
+        action="store_true",
+        help="Write flat HTML output in addition to statistics output.",
+        dest="write_flat_html",
+    )
     parser.add_argument(
         "-i",
         "--interval-seconds",
@@ -382,6 +424,16 @@ def run_profiling(
             "as strings."
         ),
     )
+    parser.add_argument(
+        "--show-progress-bar",
+        help="Show simulation progress bar during simulation rather than log output",
+        action="store_true",
+    )
+    parser.add_argument(
+        "--disable-log-output-to-stdout",
+        help="Disable simulation log output being displayed in stdout stream",
+        action="store_true",
+    )
 
     args = parser.parse_args()
 
diff --git a/src/scripts/profiling/scale_run.py b/src/scripts/profiling/scale_run.py
@@ -44,6 +44,7 @@ def scale_run(
     log_level: Literal["CRITICAL", "DEBUG", "FATAL", "WARNING", "INFO"] = "WARNING",
     parse_log_file: bool = False,
     show_progress_bar: bool = False,
+    disable_log_output_to_stdout: bool = False,
     seed: int = 0,
     disable_health_system: bool = False,
     disable_spurious_symptoms: bool = False,
@@ -70,6 +71,7 @@ def scale_run(
         "filename": log_filename,
         "directory": output_dir,
         "custom_levels": {"*": getattr(logging, log_level)},
+        "suppress_stdout": disable_log_output_to_stdout,
     }
 
     sim = Simulation(
@@ -192,6 +194,11 @@ def scale_run(
         help="Show progress bar during simulation rather than log output",
         action="store_true",
     )
+    parser.add_argument(
+        "--disable-log-output-to-stdout",
+        help="Disable log output being displayed in stdout stream",
+        action="store_true",
+    )
     parser.add_argument(
         "--seed",
         help="Seed for base pseudo-random number generator",

Original file line number	Diff line number	Diff line change
`@@ -6,6 +6,8 @@`
`6`	`6`	`#`
`7`	`7`	`adal==1.2.7`
`8`	`8`	`# via msrestazure`
	`9`	`+ansi2html==1.9.1`
	`10`	`+ # via tlo (pyproject.toml)`
`9`	`11`	`astroid==3.0.0`
`10`	`12`	`# via pylint`
`11`	`13`	`azure-batch==14.0.0`