From 1d3152748f91f67175ce8a52a34bc83c10ea6fbb Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Thu, 15 Feb 2024 13:51:51 +0100
Subject: [PATCH 01/93] modulearize bench_download tool

---
 tools/performance/engine-benchmarks/README.md |  3 ++
 .../engine-benchmarks/bench_download.py       | 31 ++----------
 .../engine-benchmarks/bench_tool/__init__.py  | 47 +++++++++++++++++++
 3 files changed, 54 insertions(+), 27 deletions(-)
 create mode 100644 tools/performance/engine-benchmarks/bench_tool/__init__.py

diff --git a/tools/performance/engine-benchmarks/README.md b/tools/performance/engine-benchmarks/README.md
index c37cfbe060af..13ab9778fe8b 100644
--- a/tools/performance/engine-benchmarks/README.md
+++ b/tools/performance/engine-benchmarks/README.md
@@ -4,6 +4,9 @@ This directory contains a python script `bench_download.py` for downloading
 Engine and stdlib benchmark results from GitHub, and `Engine_Benchs` Enso
 project for analysing the downloaded data.
 
+Note that for convenience, there is `bench_tool` directory that is a Python package.
+The `bench_download.py` script uses this package.
+
 Dependencies for `bench_download.py`:
 
 - python >= 3.7
diff --git a/tools/performance/engine-benchmarks/bench_download.py b/tools/performance/engine-benchmarks/bench_download.py
index 7743adb5627a..060447858427 100755
--- a/tools/performance/engine-benchmarks/bench_download.py
+++ b/tools/performance/engine-benchmarks/bench_download.py
@@ -67,6 +67,10 @@
 import xml.etree.ElementTree as ET
 from urllib.parse import urlencode
 
+from bench_tool import DATE_FORMAT, GENERATED_SITE_DIR, GH_DATE_FORMAT, ENGINE_BENCH_WORKFLOW_ID, \
+    NEW_ENGINE_BENCH_WORKFLOW_ID, STDLIBS_BENCH_WORKFLOW_ID, ENSO_COMMIT_BASE_URL, \
+    GH_ARTIFACT_RETENTION_PERIOD, TEMPLATES_DIR, JINJA_TEMPLATE
+
 
 if not (sys.version_info.major >= 3 and sys.version_info.minor >= 7):
     print("ERROR: python version lower than 3.7")
@@ -79,33 +83,6 @@
     print("ERROR: One of pandas, numpy, or jinja2 packages not installed", file=sys.stderr)
     exit(1)
 
-DATE_FORMAT = "%Y-%m-%d"
-ENGINE_BENCH_WORKFLOW_ID = 29450898
-"""
-Workflow ID of engine benchmarks, got via `gh api 
-'/repos/enso-org/enso/actions/workflows'`.
-The name of the workflow is 'Benchmark Engine'
-"""
-NEW_ENGINE_BENCH_WORKFLOW_ID = 67075764
-"""
-Workflow ID for 'Benchmark Engine' workflow, which is the new workflow
-since 2023-08-22.
-"""
-STDLIBS_BENCH_WORKFLOW_ID = 66661001
-"""
-Workflow ID of stdlibs benchmarks, got via `gh api 
-'/repos/enso-org/enso/actions/workflows'`.
-The name is 'Benchmark Standard Libraries'
-"""
-GH_DATE_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
-""" Date format as returned from responses in GH API"""
-ENSO_COMMIT_BASE_URL = "https://github.com/enso-org/enso/commit/"
-JINJA_TEMPLATE = "templates/template_jinja.html"
-""" Path to the Jinja HTML template """
-TEMPLATES_DIR = "templates"
-GENERATED_SITE_DIR = "generated_site"
-GH_ARTIFACT_RETENTION_PERIOD = timedelta(days=90)
-
 
 class Source(Enum):
     ENGINE = "engine"
diff --git a/tools/performance/engine-benchmarks/bench_tool/__init__.py b/tools/performance/engine-benchmarks/bench_tool/__init__.py
new file mode 100644
index 000000000000..ccc8b34e2628
--- /dev/null
+++ b/tools/performance/engine-benchmarks/bench_tool/__init__.py
@@ -0,0 +1,47 @@
+import os
+from datetime import timedelta
+from os import path
+from typing import List, Dict, Optional, Set, Tuple
+import re
+import subprocess
+import sys
+from argparse import ArgumentParser
+import shutil
+from pathlib import Path
+
+
+def pkg_dir() -> Path:
+    """ Directory of this package """
+    return Path(os.path.dirname(os.path.realpath(__file__)))
+
+
+DATE_FORMAT = "%Y-%m-%d"
+GH_DATE_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
+ENGINE_BENCH_WORKFLOW_ID = 29450898
+"""
+Workflow ID of engine benchmarks, got via `gh api 
+'/repos/enso-org/enso/actions/workflows'`.
+The name of the workflow is 'Benchmark Engine'
+"""
+NEW_ENGINE_BENCH_WORKFLOW_ID = 67075764
+"""
+Workflow ID for 'Benchmark Engine' workflow, which is the new workflow
+since 2023-08-22.
+"""
+STDLIBS_BENCH_WORKFLOW_ID = 66661001
+"""
+Workflow ID of stdlibs benchmarks, got via `gh api 
+'/repos/enso-org/enso/actions/workflows'`.
+The name is 'Benchmark Standard Libraries'
+"""
+""" Date format as returned from responses in GH API"""
+ENSO_COMMIT_BASE_URL = "https://github.com/enso-org/enso/commit/"
+
+GH_ARTIFACT_RETENTION_PERIOD = timedelta(days=90)
+
+GENERATED_SITE_DIR = pkg_dir().parent.joinpath("generated_site")
+TEMPLATES_DIR = pkg_dir().parent.joinpath("templates")
+JINJA_TEMPLATE = TEMPLATES_DIR.joinpath("template_jinja.html")
+
+assert TEMPLATES_DIR.exists()
+assert JINJA_TEMPLATE.exists()

From 261a1cff11e6f2fe84997304b46dba9fdfb9d8e6 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Thu, 15 Feb 2024 14:09:16 +0100
Subject: [PATCH 02/93] Move python version control at the beginning

---
 tools/performance/engine-benchmarks/bench_download.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/tools/performance/engine-benchmarks/bench_download.py b/tools/performance/engine-benchmarks/bench_download.py
index 060447858427..7e9f033eb1e9 100755
--- a/tools/performance/engine-benchmarks/bench_download.py
+++ b/tools/performance/engine-benchmarks/bench_download.py
@@ -45,6 +45,11 @@
         - Used as a template engine for the HTML.
 """
 
+import sys
+if not (sys.version_info.major >= 3 and sys.version_info.minor >= 7):
+    print("ERROR: python version lower than 3.7")
+    exit(1)
+
 import asyncio
 import json
 import logging
@@ -53,8 +58,6 @@
 import os
 import re
 import shutil
-import subprocess
-import sys
 import tempfile
 import zipfile
 from argparse import ArgumentParser, RawDescriptionHelpFormatter
@@ -71,10 +74,6 @@
     NEW_ENGINE_BENCH_WORKFLOW_ID, STDLIBS_BENCH_WORKFLOW_ID, ENSO_COMMIT_BASE_URL, \
     GH_ARTIFACT_RETENTION_PERIOD, TEMPLATES_DIR, JINJA_TEMPLATE
 
-
-if not (sys.version_info.major >= 3 and sys.version_info.minor >= 7):
-    print("ERROR: python version lower than 3.7")
-    exit(1)
 try:
     import pandas as pd
     import numpy as np

From 49223b26a234e176d636ee33ee69413c7d41f165 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Thu, 15 Feb 2024 14:09:36 +0100
Subject: [PATCH 03/93] Improve error message for dependencies miss

---
 tools/performance/engine-benchmarks/bench_download.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/performance/engine-benchmarks/bench_download.py b/tools/performance/engine-benchmarks/bench_download.py
index 7e9f033eb1e9..344e01d56821 100755
--- a/tools/performance/engine-benchmarks/bench_download.py
+++ b/tools/performance/engine-benchmarks/bench_download.py
@@ -80,6 +80,8 @@
     import jinja2
 except ModuleNotFoundError as err:
     print("ERROR: One of pandas, numpy, or jinja2 packages not installed", file=sys.stderr)
+    print("Install either with `pip install pandas numpy jinja2` or "
+          "with `apt-get install python3-pandas python3-numpy python3-jinja2`", file=sys.stderr)
     exit(1)
 
 

From d6bc3a47a81f5258e61876c2dbcd24cf65499bf7 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Thu, 15 Feb 2024 14:10:56 +0100
Subject: [PATCH 04/93] Extract dataclasses into __init__

---
 .../engine-benchmarks/bench_download.py       | 125 +-----------------
 .../engine-benchmarks/bench_tool/__init__.py  | 119 ++++++++++++++++-
 2 files changed, 123 insertions(+), 121 deletions(-)

diff --git a/tools/performance/engine-benchmarks/bench_download.py b/tools/performance/engine-benchmarks/bench_download.py
index 344e01d56821..0caa110e280d 100755
--- a/tools/performance/engine-benchmarks/bench_download.py
+++ b/tools/performance/engine-benchmarks/bench_download.py
@@ -63,16 +63,14 @@
 from argparse import ArgumentParser, RawDescriptionHelpFormatter
 from csv import DictWriter
 from datetime import datetime, timedelta
-from enum import Enum
 from os import path
-from typing import List, Dict, Optional, Any, Union, Set
-from dataclasses import dataclass
+from typing import List, Dict, Optional, Any, Set
 import xml.etree.ElementTree as ET
-from urllib.parse import urlencode
 
-from bench_tool import DATE_FORMAT, GENERATED_SITE_DIR, GH_DATE_FORMAT, ENGINE_BENCH_WORKFLOW_ID, \
-    NEW_ENGINE_BENCH_WORKFLOW_ID, STDLIBS_BENCH_WORKFLOW_ID, ENSO_COMMIT_BASE_URL, \
-    GH_ARTIFACT_RETENTION_PERIOD, TEMPLATES_DIR, JINJA_TEMPLATE
+from bench_tool import DATE_FORMAT, GENERATED_SITE_DIR, GH_DATE_FORMAT, ENSO_COMMIT_BASE_URL, \
+    GH_ARTIFACT_RETENTION_PERIOD, TEMPLATES_DIR, JINJA_TEMPLATE, ENSO_REPO, Author, Commit, JobRun, JobReport, \
+    BenchDatapoint, TemplateBenchData, JinjaData, Source
+from bench_tool.gh import invoke_gh_api, ensure_gh_installed
 
 try:
     import pandas as pd
@@ -85,119 +83,6 @@
     exit(1)
 
 
-class Source(Enum):
-    ENGINE = "engine"
-    STDLIB = "stdlib"
-
-    def workflow_ids(self) -> List[int]:
-        if self == Source.ENGINE:
-            return [ENGINE_BENCH_WORKFLOW_ID, NEW_ENGINE_BENCH_WORKFLOW_ID]
-        elif self == Source.STDLIB:
-            return [STDLIBS_BENCH_WORKFLOW_ID]
-        else:
-            raise ValueError(f"Unknown source {self}")
-
-
-@dataclass
-class Author:
-    name: str
-
-
-@dataclass
-class Commit:
-    """ Corresponds to the commit from GH API """
-    id: str
-    author: Author
-    timestamp: str
-    message: str
-
-
-@dataclass
-class JobRun:
-    """
-    Gathered via the GH API. Defines a single run of an Engine benchmark job.
-    """
-    id: str
-    display_title: str
-    html_url: str
-    run_attempt: int
-    """ An event as defined by the GitHub API, for example 'push' or 'schedule' """
-    event: str
-    head_commit: Commit
-
-
-@dataclass
-class JobReport:
-    """
-    Gathered via the GH API - a report that is pushed as an aritfact to the job.
-    Contains a XML file with scores for all the benchmarks.
-    """
-    label_score_dict: Dict[str, float]
-    """ A mapping of benchmark labels to their scores """
-    bench_run: JobRun
-
-
-@dataclass
-class BenchmarkData:
-    """
-    Data for a single benchmark compiled from all the job reports.
-    """
-
-    @dataclass
-    class Entry:
-        score: float
-        commit: Commit
-        bench_run_url: str
-        bench_run_event: str
-
-    label: str
-    """ Label for the benchmark, as reported by org.enso.interpreter.bench.BenchmarksRunner """
-    entries: List[Entry]
-    """ Entries sorted by timestamps """
-
-
-@dataclass
-class BenchDatapoint:
-    """
-    A single datapoint that will be on the chart. `timestamp` is on X axis,
-    `score` on Y axis, and the rest of the fields is used either for the tooltip,
-    or for the selection info.
-    """
-    timestamp: datetime
-    score: float
-    score_diff: str
-    """ Difference of the score with previous datapoint, or NaN """
-    score_diff_perc: str
-    tooltip: str
-    bench_run_url: str
-    commit_id: str
-    commit_msg: str
-    commit_author: str
-    commit_url: str
-
-
-@dataclass
-class TemplateBenchData:
-    """ Data for one benchmark label (with a unique name and ID) """
-    id: str
-    """ ID of the benchmark, must not contain dots """
-    name: str
-    """ Human readable name of the benchmark """
-    branches_datapoints: Dict[str, List[BenchDatapoint]]
-    """ Mapping of branches to datapoints for that branch """
-
-
-@dataclass
-class JinjaData:
-    bench_source: Source
-    bench_datas: List[TemplateBenchData]
-    branches: List[str]
-    since: datetime
-    until: datetime
-    display_since: datetime
-    """ The date from which all the datapoints are first displayed """
-
-
 def _parse_bench_run_from_json(obj: Dict[Any, Any]) -> JobRun:
     return JobRun(
         id=str(obj["id"]),
diff --git a/tools/performance/engine-benchmarks/bench_tool/__init__.py b/tools/performance/engine-benchmarks/bench_tool/__init__.py
index ccc8b34e2628..9f1047c18d1c 100644
--- a/tools/performance/engine-benchmarks/bench_tool/__init__.py
+++ b/tools/performance/engine-benchmarks/bench_tool/__init__.py
@@ -1,5 +1,7 @@
 import os
-from datetime import timedelta
+from dataclasses import dataclass
+from datetime import timedelta, datetime
+from enum import Enum
 from os import path
 from typing import List, Dict, Optional, Set, Tuple
 import re
@@ -45,3 +47,118 @@ def pkg_dir() -> Path:
 
 assert TEMPLATES_DIR.exists()
 assert JINJA_TEMPLATE.exists()
+
+
+class Source(Enum):
+    ENGINE = "engine"
+    STDLIB = "stdlib"
+
+    def workflow_ids(self) -> List[int]:
+        if self == Source.ENGINE:
+            return [ENGINE_BENCH_WORKFLOW_ID, NEW_ENGINE_BENCH_WORKFLOW_ID]
+        elif self == Source.STDLIB:
+            return [STDLIBS_BENCH_WORKFLOW_ID]
+        else:
+            raise ValueError(f"Unknown source {self}")
+
+@dataclass
+class Author:
+    name: str
+
+
+@dataclass
+class Commit:
+    """ Corresponds to the commit from GH API """
+    id: str
+    author: Author
+    timestamp: str
+    message: str
+
+
+@dataclass
+class JobRun:
+    """
+    Gathered via the GH API. Defines a single run of an Engine benchmark job.
+    """
+    id: str
+    display_title: str
+    html_url: str
+    run_attempt: int
+    """ An event as defined by the GitHub API, for example 'push' or 'schedule' """
+    event: str
+    head_commit: Commit
+
+
+@dataclass
+class JobReport:
+    """
+    Gathered via the GH API - a report that is pushed as an aritfact to the job.
+    Contains a XML file with scores for all the benchmarks.
+    """
+    label_score_dict: Dict[str, float]
+    """ A mapping of benchmark labels to their scores """
+    bench_run: JobRun
+
+
+@dataclass
+class BenchmarkData:
+    """
+    Data for a single benchmark compiled from all the job reports.
+    """
+
+    @dataclass
+    class Entry:
+        score: float
+        commit: Commit
+        bench_run_url: str
+        bench_run_event: str
+
+    label: str
+    """ Label for the benchmark, as reported by org.enso.interpreter.bench.BenchmarksRunner """
+    entries: List[Entry]
+    """ Entries sorted by timestamps """
+
+
+@dataclass
+class BenchDatapoint:
+    """
+    A single datapoint that will be on the chart. `timestamp` is on X axis,
+    `score` on Y axis, and the rest of the fields is used either for the tooltip,
+    or for the selection info.
+    """
+    timestamp: datetime
+    score: float
+    score_diff: str
+    """ Difference of the score with previous datapoint, or NaN """
+    score_diff_perc: str
+    tooltip: str
+    bench_run_url: str
+    commit_id: str
+    commit_msg: str
+    commit_author: str
+    commit_url: str
+
+
+@dataclass
+class TemplateBenchData:
+    """ Data for one benchmark label (with a unique name and ID) """
+    id: str
+    """ ID of the benchmark, must not contain dots """
+    name: str
+    """ Human readable name of the benchmark """
+    branches_datapoints: Dict[str, List[BenchDatapoint]]
+    """ Mapping of branches to datapoints for that branch """
+
+
+@dataclass
+class JinjaData:
+    bench_source: Source
+    bench_datas: List[TemplateBenchData]
+    branches: List[str]
+    since: datetime
+    until: datetime
+    display_since: datetime
+    """ The date from which all the datapoints are first displayed """
+
+
+

From 33575cf9578845ef8d91a0a3b781e11509a07e59 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Thu, 15 Feb 2024 14:11:29 +0100
Subject: [PATCH 05/93] Add gh.py for GH query handling

---
 .../engine-benchmarks/bench_download.py       | 45 ++-----------
 .../engine-benchmarks/bench_tool/__init__.py  |  1 +
 .../engine-benchmarks/bench_tool/gh.py        | 66 +++++++++++++++++++
 3 files changed, 71 insertions(+), 41 deletions(-)
 create mode 100644 tools/performance/engine-benchmarks/bench_tool/gh.py

diff --git a/tools/performance/engine-benchmarks/bench_download.py b/tools/performance/engine-benchmarks/bench_download.py
index 0caa110e280d..529023732529 100755
--- a/tools/performance/engine-benchmarks/bench_download.py
+++ b/tools/performance/engine-benchmarks/bench_download.py
@@ -161,32 +161,6 @@ def _read_json(json_file: str) -> Dict[Any, Any]:
         return json.load(f)
 
 
-async def _invoke_gh_api(endpoint: str,
-                   query_params: Dict[str, str] = {},
-                   result_as_text: bool = True) -> Union[Dict[str, Any], bytes]:
-    urlencode(query_params)
-    cmd = [
-        "gh",
-        "api",
-        f"/repos/enso-org/enso{endpoint}" + "?" + urlencode(query_params)
-    ]
-    logging.info(f"Starting subprocess `{' '.join(cmd)}`")
-    proc = await asyncio.create_subprocess_exec("gh", *cmd[1:],
-                                                stdout=subprocess.PIPE,
-                                                stderr=subprocess.PIPE)
-    out, err = await proc.communicate()
-    logging.info(f"Finished subprocess `{' '.join(cmd)}`")
-    if proc.returncode != 0:
-        print("Command `" + " ".join(cmd) + "` FAILED with errcode " + str(
-            proc.returncode))
-        print(err.decode())
-        exit(proc.returncode)
-    if result_as_text:
-        return json.loads(out.decode())
-    else:
-        return out
-
-
 class Cache:
     """
     Cache is a directory filled with json files that have name of format <bench_run_id>.json, and
@@ -270,7 +244,7 @@ async def get_bench_runs(since: datetime, until: datetime, branch: str, workflow
         # Start with 1, just to determine the total count
         "per_page": "1"
     }
-    res = await _invoke_gh_api(f"/actions/workflows/{workflow_id}/runs", query_fields)
+    res = await invoke_gh_api(ENSO_REPO, f"/actions/workflows/{workflow_id}/runs", query_fields)
     total_count = int(res["total_count"])
     per_page = 3
     logging.debug(f"Total count of all runs: {total_count} for workflow ID "
@@ -279,7 +253,7 @@ async def get_bench_runs(since: datetime, until: datetime, branch: str, workflow
     async def get_and_parse_run(page: int, parsed_bench_runs) -> None:
         _query_fields = query_fields.copy()
         _query_fields["page"] = str(page)
-        res = await _invoke_gh_api(f"/actions/workflows/{workflow_id}/runs", _query_fields)
+        res = await invoke_gh_api(ENSO_REPO, f"/actions/workflows/{workflow_id}/runs", _query_fields)
         bench_runs_json = res["workflow_runs"]
         _parsed_bench_runs = [_parse_bench_run_from_json(bench_run_json)
                               for bench_run_json in bench_runs_json]
@@ -316,7 +290,7 @@ async def get_bench_report(bench_run: JobRun, cache: Cache, temp_dir: str) -> Op
     # There might be multiple artifacts in the artifact list for a benchmark run
     # We are looking for the one named 'Runtime Benchmark Report', which will
     # be downloaded as a ZIP file.
-    obj: Dict[str, Any] = await _invoke_gh_api(f"/actions/runs/{bench_run.id}/artifacts")
+    obj: Dict[str, Any] = await invoke_gh_api(ENSO_REPO, f"/actions/runs/{bench_run.id}/artifacts")
     artifacts = obj["artifacts"]
     assert len(artifacts) == 1, "There should be exactly one artifact for a benchmark run"
     bench_report_artifact = artifacts[0]
@@ -331,7 +305,7 @@ async def get_bench_report(bench_run: JobRun, cache: Cache, temp_dir: str) -> Op
         return None
 
     # Get contents of the ZIP artifact file
-    artifact_ret = await _invoke_gh_api(f"/actions/artifacts/{artifact_id}/zip", result_as_text=False)
+    artifact_ret = await invoke_gh_api(ENSO_REPO, f"/actions/artifacts/{artifact_id}/zip", result_as_json=False)
     zip_file_name = os.path.join(temp_dir, artifact_id + ".zip")
     logging.debug(f"Writing artifact ZIP content into {zip_file_name}")
     with open(zip_file_name, "wb") as zip_file:
@@ -535,17 +509,6 @@ def render_html(jinja_data: JinjaData, template_file: str, html_out_fname: str)
         html_file.write(generated_html)
 
 
-def ensure_gh_installed() -> None:
-    try:
-        out = subprocess.run(["gh", "--version"], check=True, capture_output=True)
-        if out.returncode != 0:
-            print("`gh` command not found - GH CLI utility is not installed. "
-                  "See https://cli.github.com/", file=sys.stderr)
-            exit(1)
-    except subprocess.CalledProcessError:
-        print("`gh` command not found - GH CLI utility is not installed. "
-              "See https://cli.github.com/", file=sys.stderr)
-        exit(1)
 
 
 async def main():
diff --git a/tools/performance/engine-benchmarks/bench_tool/__init__.py b/tools/performance/engine-benchmarks/bench_tool/__init__.py
index 9f1047c18d1c..5620dca66535 100644
--- a/tools/performance/engine-benchmarks/bench_tool/__init__.py
+++ b/tools/performance/engine-benchmarks/bench_tool/__init__.py
@@ -17,6 +17,7 @@ def pkg_dir() -> Path:
     return Path(os.path.dirname(os.path.realpath(__file__)))
 
 
+ENSO_REPO = "enso-org/enso"
 DATE_FORMAT = "%Y-%m-%d"
 GH_DATE_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
 ENGINE_BENCH_WORKFLOW_ID = 29450898
diff --git a/tools/performance/engine-benchmarks/bench_tool/gh.py b/tools/performance/engine-benchmarks/bench_tool/gh.py
new file mode 100644
index 000000000000..550a3dda60d1
--- /dev/null
+++ b/tools/performance/engine-benchmarks/bench_tool/gh.py
@@ -0,0 +1,66 @@
+import json
+import logging
+import os
+from os import path
+from typing import List, Dict, Optional, Set, Tuple, Union, Any
+import re
+import subprocess
+import sys
+from argparse import ArgumentParser
+import shutil
+import asyncio
+from urllib.parse import urlencode
+
+_logger = logging.getLogger(__name__)
+
+
+def ensure_gh_installed() -> None:
+    try:
+        out = subprocess.run(["gh", "--version"], check=True, capture_output=True)
+        if out.returncode != 0:
+            print("`gh` command not found - GH CLI utility is not installed. "
+                  "See https://cli.github.com/", file=sys.stderr)
+            exit(1)
+    except subprocess.CalledProcessError:
+        print("`gh` command not found - GH CLI utility is not installed. "
+              "See https://cli.github.com/", file=sys.stderr)
+        exit(1)
+
+
+async def invoke_gh_api(
+        repo: str,
+        endpoint: str,
+        query_params: Dict[str, str] = {},
+        result_as_json: bool = True
+) -> Union[Dict[str, Any], bytes]:
+    """
+    Invokes the GitHub API using the `gh` command line tool.
+    :param repo: Repository name in the form `owner/repo`
+    :param endpoint: Endpoint of the query. Must start with `/`.
+    :param query_params: Additional query parameters.
+    :param result_as_json: If result should be parsed as JSON.
+          If false, the raw bytes are returned.
+    :return:
+    """
+    assert endpoint.startswith("/")
+    urlencode(query_params)
+    cmd = [
+        "gh",
+        "api",
+        f"/repos/{repo}{endpoint}" + "?" + urlencode(query_params)
+    ]
+    _logger.debug("Invoking gh API with `%s`", " ".join(cmd))
+    proc = await asyncio.create_subprocess_exec("gh", *cmd[1:],
+                                                stdout=subprocess.PIPE,
+                                                stderr=subprocess.PIPE)
+    out, err = await proc.communicate()
+    _logger.debug("Finished gh API `%s`", " ".join(cmd))
+    if proc.returncode != 0:
+        _logger.error("Command `%s` FAILED with errcode %d",
+                      " ".join(cmd),
+                      proc.returncode)
+        exit(proc.returncode)
+    if result_as_json:
+        return json.loads(out.decode())
+    else:
+        return out

From 05c5591bf7a809a27c2dfb6f3e4f225711d37cb9 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Thu, 15 Feb 2024 16:18:21 +0100
Subject: [PATCH 06/93] Do not fail if there are more than 1 artifact in a
 bench report

---
 tools/performance/engine-benchmarks/bench_download.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/performance/engine-benchmarks/bench_download.py b/tools/performance/engine-benchmarks/bench_download.py
index 529023732529..bf0914b745ba 100755
--- a/tools/performance/engine-benchmarks/bench_download.py
+++ b/tools/performance/engine-benchmarks/bench_download.py
@@ -292,7 +292,10 @@ async def get_bench_report(bench_run: JobRun, cache: Cache, temp_dir: str) -> Op
     # be downloaded as a ZIP file.
     obj: Dict[str, Any] = await invoke_gh_api(ENSO_REPO, f"/actions/runs/{bench_run.id}/artifacts")
     artifacts = obj["artifacts"]
-    assert len(artifacts) == 1, "There should be exactly one artifact for a benchmark run"
+    if len(artifacts) != 1:
+        logging.warning("Bench run %s does not contain an artifact, but it is a successful run.",
+                      bench_run.id)
+        return None
     bench_report_artifact = artifacts[0]
     assert bench_report_artifact, "Benchmark Report artifact not found"
     artifact_id = str(bench_report_artifact["id"])

From a1c21c782d14d2dc0d76e519bc82438a9f13d036 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Thu, 15 Feb 2024 16:30:31 +0100
Subject: [PATCH 07/93] Add jinja template functionality to template_render.py

---
 .../engine-benchmarks/bench_download.py       | 127 +----------------
 .../bench_tool/template_render.py             | 134 ++++++++++++++++++
 2 files changed, 138 insertions(+), 123 deletions(-)
 create mode 100644 tools/performance/engine-benchmarks/bench_tool/template_render.py

diff --git a/tools/performance/engine-benchmarks/bench_download.py b/tools/performance/engine-benchmarks/bench_download.py
index bf0914b745ba..3e964133491d 100755
--- a/tools/performance/engine-benchmarks/bench_download.py
+++ b/tools/performance/engine-benchmarks/bench_download.py
@@ -46,6 +46,7 @@
 """
 
 import sys
+
 if not (sys.version_info.major >= 3 and sys.version_info.minor >= 7):
     print("ERROR: python version lower than 3.7")
     exit(1)
@@ -67,10 +68,10 @@
 from typing import List, Dict, Optional, Any, Set
 import xml.etree.ElementTree as ET
 
-from bench_tool import DATE_FORMAT, GENERATED_SITE_DIR, GH_DATE_FORMAT, ENSO_COMMIT_BASE_URL, \
-    GH_ARTIFACT_RETENTION_PERIOD, TEMPLATES_DIR, JINJA_TEMPLATE, ENSO_REPO, Author, Commit, JobRun, JobReport, \
-    BenchDatapoint, TemplateBenchData, JinjaData, Source
+from bench_tool import DATE_FORMAT, GENERATED_SITE_DIR, GH_DATE_FORMAT, GH_ARTIFACT_RETENTION_PERIOD, TEMPLATES_DIR, JINJA_TEMPLATE, ENSO_REPO, Author, Commit, JobRun, JobReport, \
+    TemplateBenchData, JinjaData, Source
 from bench_tool.gh import invoke_gh_api, ensure_gh_installed
+from bench_tool.template_render import create_template_data, render_html
 
 try:
     import pandas as pd
@@ -381,114 +382,6 @@ def populate_cache(cache_dir: str) -> Cache:
     return cache
 
 
-def create_template_data(
-        job_reports_per_branch: Dict[str, List[JobReport]],
-        bench_labels: Set[str]) -> List[TemplateBenchData]:
-    """
-    Creates all the necessary data for the Jinja template from all collected
-    benchmark job reports.
-    :param job_reports_per_branch: Mapping of branch name to list of job reports.
-    job reports should be sorted by the commit date, otherwise the difference
-    between scores might be wrongly computed.
-    :param bench_labels:
-    :return:
-    """
-
-    def pct_to_str(score_diff_perc: float) -> str:
-        if not np.isnan(score_diff_perc):
-            buff = "+" if score_diff_perc > 0 else ""
-            buff += "{:.5f}".format(score_diff_perc * 100)
-            buff += "%"
-            return buff
-        else:
-            return "NaN"
-
-    def diff_str(score_diff: float, score_diff_perc: float) -> str:
-        if not np.isnan(score_diff):
-            diff_str = "+" if score_diff > 0 else ""
-            diff_str += "{:.5f}".format(score_diff)
-            diff_str += " ("
-            diff_str += pct_to_str(score_diff_perc)
-            diff_str += ")"
-            return diff_str
-        else:
-            return "NA"
-
-    template_bench_datas: List[TemplateBenchData] = []
-    for bench_label in bench_labels:
-        logging.debug(f"Creating template data for benchmark {bench_label}")
-        branch_datapoints: Dict[str, List[BenchDatapoint]] = {}
-        for branch, job_reports in job_reports_per_branch.items():
-            logging.debug(f"Creating datapoints for branch {branch} from {len(job_reports)} job reports")
-            datapoints: List[BenchDatapoint] = []
-            for job_report in job_reports:
-                prev_datapoint: Optional[BenchDatapoint] = \
-                    datapoints[-1] if len(datapoints) > 0 else None
-                if bench_label in job_report.label_score_dict:
-                    score = job_report.label_score_dict[bench_label]
-                    commit = job_report.bench_run.head_commit
-                    timestamp = datetime.strptime(
-                        commit.timestamp,
-                        GH_DATE_FORMAT
-                    )
-                    commit_msg_header = \
-                        commit.message.splitlines()[0].replace('"', "'")
-                    series = pd.Series([
-                        prev_datapoint.score if prev_datapoint else None,
-                        score
-                    ])
-                    score_diff = series.diff()[1]
-                    score_diff_perc = series.pct_change()[1]
-                    tooltip = "score = " + str(score) + "\\n"
-                    tooltip += "date = " + str(timestamp) + "\\n"
-                    tooltip += "branch = " + branch + "\\n"
-                    tooltip += "diff = " + diff_str(score_diff, score_diff_perc)
-                    author_name = commit.author.name\
-                        .replace('"', '\\"')\
-                        .replace("'", "\\'")
-                    datapoints.append(BenchDatapoint(
-                        timestamp=timestamp,
-                        score=score,
-                        score_diff=str(score_diff),
-                        score_diff_perc=pct_to_str(score_diff_perc),
-                        tooltip=tooltip,
-                        bench_run_url=job_report.bench_run.html_url,
-                        commit_id=commit.id,
-                        commit_msg=commit_msg_header,
-                        commit_author=author_name,
-                        commit_url=ENSO_COMMIT_BASE_URL + commit.id,
-                    ))
-            logging.debug(f"{len(datapoints)} datapoints created for branch {branch}")
-            branch_datapoints[branch] = datapoints.copy()
-        logging.debug(f"Template data for benchmark {bench_label} created")
-        template_bench_datas.append(TemplateBenchData(
-            id=_label_to_id(bench_label),
-            name=_label_to_name(bench_label),
-            branches_datapoints=branch_datapoints,
-        ))
-    return template_bench_datas
-
-
-def _label_to_id(label: str) -> str:
-    return label.replace(".", "_")
-
-
-def _label_to_name(label: str) -> str:
-    items = label.split(".")
-    assert len(items) >= 2
-    filtered_items = \
-        [item for item in items if item not in (
-            "org",
-            "enso",
-            "benchmark",
-            "benchmarks",
-            "semantic",
-            "interpreter",
-            "bench"
-        )]
-    return "_".join(filtered_items)
-
-
 def _gather_all_bench_labels(job_reports: List[JobReport]) -> Set[str]:
     """
     Iterates through all the job reports and gathers all the benchmark labels
@@ -502,18 +395,6 @@ def _gather_all_bench_labels(job_reports: List[JobReport]) -> Set[str]:
     return all_labels
 
 
-def render_html(jinja_data: JinjaData, template_file: str, html_out_fname: str) -> None:
-    jinja_env = jinja2.Environment(loader=jinja2.FileSystemLoader("."))
-    jinja_template = jinja_env.get_template(template_file)
-    generated_html = jinja_template.render(jinja_data.__dict__)
-    if path.exists(html_out_fname):
-        logging.info(f"{html_out_fname} already exist, rewritting")
-    with open(html_out_fname, "w") as html_file:
-        html_file.write(generated_html)
-
-
-
-
 async def main():
     default_since: datetime = (datetime.now() - timedelta(days=14))
     default_until: datetime = datetime.now()
diff --git a/tools/performance/engine-benchmarks/bench_tool/template_render.py b/tools/performance/engine-benchmarks/bench_tool/template_render.py
new file mode 100644
index 000000000000..c7c54a6a0593
--- /dev/null
+++ b/tools/performance/engine-benchmarks/bench_tool/template_render.py
@@ -0,0 +1,134 @@
+import logging
+import os
+from datetime import datetime
+from os import path
+from typing import List, Dict, Optional, Set, Tuple
+import re
+import subprocess
+import sys
+from argparse import ArgumentParser
+import shutil
+
+import jinja2
+import numpy as np
+import pandas as pd
+
+from bench_tool import JobReport, TemplateBenchData, BenchDatapoint, GH_DATE_FORMAT, ENSO_COMMIT_BASE_URL, JinjaData
+
+
+def create_template_data(
+        job_reports_per_branch: Dict[str, List[JobReport]],
+        bench_labels: Set[str]) -> List[TemplateBenchData]:
+    """
+    Creates all the necessary data for the Jinja template from all collected
+    benchmark job reports.
+    :param job_reports_per_branch: Mapping of branch name to list of job reports.
+    job reports should be sorted by the commit date, otherwise the difference
+    between scores might be wrongly computed.
+    :param bench_labels:
+    :return:
+    """
+
+    def pct_to_str(score_diff_perc: float) -> str:
+        if not np.isnan(score_diff_perc):
+            buff = "+" if score_diff_perc > 0 else ""
+            buff += "{:.5f}".format(score_diff_perc * 100)
+            buff += "%"
+            return buff
+        else:
+            return "NaN"
+
+    def diff_str(score_diff: float, score_diff_perc: float) -> str:
+        if not np.isnan(score_diff):
+            diff_str = "+" if score_diff > 0 else ""
+            diff_str += "{:.5f}".format(score_diff)
+            diff_str += " ("
+            diff_str += pct_to_str(score_diff_perc)
+            diff_str += ")"
+            return diff_str
+        else:
+            return "NA"
+
+    template_bench_datas: List[TemplateBenchData] = []
+    for bench_label in bench_labels:
+        logging.debug(f"Creating template data for benchmark {bench_label}")
+        branch_datapoints: Dict[str, List[BenchDatapoint]] = {}
+        for branch, job_reports in job_reports_per_branch.items():
+            logging.debug(f"Creating datapoints for branch {branch} from {len(job_reports)} job reports")
+            datapoints: List[BenchDatapoint] = []
+            for job_report in job_reports:
+                prev_datapoint: Optional[BenchDatapoint] = \
+                    datapoints[-1] if len(datapoints) > 0 else None
+                if bench_label in job_report.label_score_dict:
+                    score = job_report.label_score_dict[bench_label]
+                    commit = job_report.bench_run.head_commit
+                    timestamp = datetime.strptime(
+                        commit.timestamp,
+                        GH_DATE_FORMAT
+                    )
+                    commit_msg_header = \
+                        commit.message.splitlines()[0].replace('"', "'")
+                    series = pd.Series([
+                        prev_datapoint.score if prev_datapoint else None,
+                        score
+                    ])
+                    score_diff = series.diff()[1]
+                    score_diff_perc = series.pct_change()[1]
+                    tooltip = "score = " + str(score) + "\\n"
+                    tooltip += "date = " + str(timestamp) + "\\n"
+                    tooltip += "branch = " + branch + "\\n"
+                    tooltip += "diff = " + diff_str(score_diff, score_diff_perc)
+                    author_name = commit.author.name\
+                        .replace('"', '\\"')\
+                        .replace("'", "\\'")
+                    datapoints.append(BenchDatapoint(
+                        timestamp=timestamp,
+                        score=score,
+                        score_diff=str(score_diff),
+                        score_diff_perc=pct_to_str(score_diff_perc),
+                        tooltip=tooltip,
+                        bench_run_url=job_report.bench_run.html_url,
+                        commit_id=commit.id,
+                        commit_msg=commit_msg_header,
+                        commit_author=author_name,
+                        commit_url=ENSO_COMMIT_BASE_URL + commit.id,
+                    ))
+            logging.debug(f"{len(datapoints)} datapoints created for branch {branch}")
+            branch_datapoints[branch] = datapoints.copy()
+        logging.debug(f"Template data for benchmark {bench_label} created")
+        template_bench_datas.append(TemplateBenchData(
+            id=_label_to_id(bench_label),
+            name=_label_to_name(bench_label),
+            branches_datapoints=branch_datapoints,
+        ))
+    return template_bench_datas
+
+
+def render_html(jinja_data: JinjaData, template_file: str, html_out_fname: str) -> None:
+    jinja_env = jinja2.Environment(loader=jinja2.FileSystemLoader("."))
+    jinja_template = jinja_env.get_template(template_file)
+    generated_html = jinja_template.render(jinja_data.__dict__)
+    if path.exists(html_out_fname):
+        logging.info(f"{html_out_fname} already exist, rewritting")
+    with open(html_out_fname, "w") as html_file:
+        html_file.write(generated_html)
+
+
+def _label_to_id(label: str) -> str:
+    return label.replace(".", "_")
+
+
+def _label_to_name(label: str) -> str:
+    items = label.split(".")
+    assert len(items) >= 2
+    filtered_items = \
+        [item for item in items if item not in (
+            "org",
+            "enso",
+            "benchmark",
+            "benchmarks",
+            "semantic",
+            "interpreter",
+            "bench"
+        )]
+    return "_".join(filtered_items)

From 1f6c0791deb2b5054e1c8f4fb59acb6315ef6cfb Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Thu, 15 Feb 2024 16:33:52 +0100
Subject: [PATCH 08/93] Improve logging

---
 .../bench_tool/template_render.py             | 21 ++++++++-----------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/tools/performance/engine-benchmarks/bench_tool/template_render.py b/tools/performance/engine-benchmarks/bench_tool/template_render.py
index c7c54a6a0593..81c241c8f6fc 100644
--- a/tools/performance/engine-benchmarks/bench_tool/template_render.py
+++ b/tools/performance/engine-benchmarks/bench_tool/template_render.py
@@ -1,13 +1,7 @@
 import logging
-import os
 from datetime import datetime
 from os import path
-from typing import List, Dict, Optional, Set, Tuple
-import re
-import subprocess
-import sys
-from argparse import ArgumentParser
-import shutil
+from typing import List, Dict, Optional, Set
 
 import jinja2
 import numpy as np
@@ -15,6 +9,7 @@
 
 from bench_tool import JobReport, TemplateBenchData, BenchDatapoint, GH_DATE_FORMAT, ENSO_COMMIT_BASE_URL, JinjaData
 
+_logger = logging.getLogger(__name__)
 
 def create_template_data(
         job_reports_per_branch: Dict[str, List[JobReport]],
@@ -51,10 +46,11 @@ def diff_str(score_diff: float, score_diff_perc: float) -> str:
 
     template_bench_datas: List[TemplateBenchData] = []
     for bench_label in bench_labels:
-        logging.debug(f"Creating template data for benchmark {bench_label}")
+        _logger.debug("Creating template data for benchmark %s", bench_label)
         branch_datapoints: Dict[str, List[BenchDatapoint]] = {}
         for branch, job_reports in job_reports_per_branch.items():
-            logging.debug(f"Creating datapoints for branch {branch} from {len(job_reports)} job reports")
+            _logger.debug("Creating datapoints for branch %s from %d job reports",
+                          branch, len(job_reports))
             datapoints: List[BenchDatapoint] = []
             for job_report in job_reports:
                 prev_datapoint: Optional[BenchDatapoint] = \
@@ -93,9 +89,10 @@ def diff_str(score_diff: float, score_diff_perc: float) -> str:
                         commit_author=author_name,
                         commit_url=ENSO_COMMIT_BASE_URL + commit.id,
                     ))
-            logging.debug(f"{len(datapoints)} datapoints created for branch {branch}")
+            _logger.debug("%d datapoints created for branch %s",
+                          len(datapoints), branch)
             branch_datapoints[branch] = datapoints.copy()
-        logging.debug(f"Template data for benchmark {bench_label} created")
+        _logger.debug("Template data for benchmark %s created", bench_label)
         template_bench_datas.append(TemplateBenchData(
             id=_label_to_id(bench_label),
             name=_label_to_name(bench_label),
@@ -109,7 +106,7 @@ def render_html(jinja_data: JinjaData, template_file: str, html_out_fname: str)
     jinja_template = jinja_env.get_template(template_file)
     generated_html = jinja_template.render(jinja_data.__dict__)
     if path.exists(html_out_fname):
-        logging.info(f"{html_out_fname} already exist, rewritting")
+        _logger.info("%s already exist, rewritting", html_out_fname)
     with open(html_out_fname, "w") as html_file:
         html_file.write(generated_html)
 

From fb02b4a0a38a9dd0167fe568ab813f68209395b5 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Thu, 15 Feb 2024 17:04:05 +0100
Subject: [PATCH 09/93] Add simple test case

---
 .../engine-benchmarks/bench_tool/test_gh.py      | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 tools/performance/engine-benchmarks/bench_tool/test_gh.py

diff --git a/tools/performance/engine-benchmarks/bench_tool/test_gh.py b/tools/performance/engine-benchmarks/bench_tool/test_gh.py
new file mode 100644
index 000000000000..3a70375d46cd
--- /dev/null
+++ b/tools/performance/engine-benchmarks/bench_tool/test_gh.py
@@ -0,0 +1,16 @@
+import unittest
+
+from bench_tool import ENSO_REPO
+from .gh import ensure_gh_installed, fetch_file
+
+
+# Create a unit test
+class TestGH(unittest.IsolatedAsyncioTestCase):
+    async def test_ensure_gh_installed(self):
+        self.assertIsNone(ensure_gh_installed())
+
+    async def test_file_fetch(self):
+        content = await fetch_file(ENSO_REPO, "README.md")
+        self.assertIsNotNone(content)
+        self.assertIsInstance(content, str)
+        self.assertGreater(len(content), 0)

From 25063ce296d77399e418b2032a9c2bf51af133f6 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Thu, 15 Feb 2024 17:04:21 +0100
Subject: [PATCH 10/93] Implement fetch_file from GH

---
 tools/performance/engine-benchmarks/bench_tool/gh.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tools/performance/engine-benchmarks/bench_tool/gh.py b/tools/performance/engine-benchmarks/bench_tool/gh.py
index 550a3dda60d1..da22179c5e5a 100644
--- a/tools/performance/engine-benchmarks/bench_tool/gh.py
+++ b/tools/performance/engine-benchmarks/bench_tool/gh.py
@@ -1,3 +1,4 @@
+import base64
 import json
 import logging
 import os
@@ -64,3 +65,14 @@ async def invoke_gh_api(
         return json.loads(out.decode())
     else:
         return out
+
+
+async def fetch_file(repo: str, file_path: str) -> Optional[str]:
+    try:
+        ret = await invoke_gh_api(repo, f"/contents/{file_path}", result_as_json=True)
+        file_content = base64.b64decode(ret["content"]).decode()
+        return file_content
+    except subprocess.CalledProcessError as e:
+        _logger.error("Failed to fetch file %s from %s, with: %s",
+                      file_path, repo, e)
+        return None

From 3c37a48eb3f038bae1b202c5bf0b09feae54a625 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Thu, 15 Feb 2024 17:04:32 +0100
Subject: [PATCH 11/93] Add cache skeleton

---
 tools/performance/engine-benchmarks/bench_tool/cache.py | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 tools/performance/engine-benchmarks/bench_tool/cache.py

diff --git a/tools/performance/engine-benchmarks/bench_tool/cache.py b/tools/performance/engine-benchmarks/bench_tool/cache.py
new file mode 100644
index 000000000000..9bf7dd18f434
--- /dev/null
+++ b/tools/performance/engine-benchmarks/bench_tool/cache.py
@@ -0,0 +1,8 @@
+import os
+from os import path
+from typing import List, Dict, Optional, Set, Tuple
+import re
+import subprocess
+import sys
+from argparse import ArgumentParser
+import shutil

From ad892147084a528cbdd705dd839ebee03fbc39db Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Fri, 16 Feb 2024 17:39:21 +0100
Subject: [PATCH 12/93] Add git.py

---
 .../engine-benchmarks/bench_tool/cache.py     |  8 --
 .../engine-benchmarks/bench_tool/git.py       | 85 +++++++++++++++++++
 .../engine-benchmarks/bench_tool/test_git.py  | 59 +++++++++++++
 3 files changed, 144 insertions(+), 8 deletions(-)
 delete mode 100644 tools/performance/engine-benchmarks/bench_tool/cache.py
 create mode 100644 tools/performance/engine-benchmarks/bench_tool/git.py
 create mode 100644 tools/performance/engine-benchmarks/bench_tool/test_git.py

diff --git a/tools/performance/engine-benchmarks/bench_tool/cache.py b/tools/performance/engine-benchmarks/bench_tool/cache.py
deleted file mode 100644
index 9bf7dd18f434..000000000000
--- a/tools/performance/engine-benchmarks/bench_tool/cache.py
+++ /dev/null
@@ -1,8 +0,0 @@
-import os
-from os import path
-from typing import List, Dict, Optional, Set, Tuple
-import re
-import subprocess
-import sys
-from argparse import ArgumentParser
-import shutil
diff --git a/tools/performance/engine-benchmarks/bench_tool/git.py b/tools/performance/engine-benchmarks/bench_tool/git.py
new file mode 100644
index 000000000000..2251dba3d6c5
--- /dev/null
+++ b/tools/performance/engine-benchmarks/bench_tool/git.py
@@ -0,0 +1,85 @@
+import asyncio
+import logging
+import subprocess
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Set
+
+_logger = logging.getLogger(__name__)
+
+
+@dataclass
+class GitStatus:
+    modified: Set[str]
+    untracked: Set[str]
+    added: Set[str]
+
+
+async def clone(repo: str, dest: Path) -> None:
+    _logger.debug("Cloning %s to %s", repo, dest)
+    dest_abs_path = str(dest.absolute())
+    args = ["clone", f"git@github.com:{repo}.git", dest_abs_path]
+    proc = await asyncio.create_subprocess_exec("git", *args)
+    ret = await proc.wait()
+    if ret != 0:
+        stdout, stderr = await proc.communicate()
+        out = stdout.decode() + stderr.decode()
+        raise RuntimeError(f"Failed to clone {repo}: {out}")
+    assert dest.exists()
+
+
+async def status(repo: Path) -> GitStatus:
+    assert repo.exists()
+    proc = await asyncio.create_subprocess_exec("git", "status", "--porcelain", cwd=repo,
+                                                stdout=subprocess.PIPE)
+    out, _ = await proc.communicate()
+    lines = out.decode().splitlines()
+    untracked: Set[str] = set()
+    modified: Set[str] = set()
+    added: Set[str] = set()
+    for line in lines:
+        if line.startswith("??"):
+            untracked.add(line.split()[1])
+        elif line.startswith(" M"):
+            modified.add(line.split()[1])
+        elif line.startswith("A "):
+            added.add(line.split()[1])
+    return GitStatus(modified, untracked, added)
+
+
+async def add(repo: Path, files: Set[str]) -> None:
+    args = ["add"] + list(files)
+    proc = await asyncio.create_subprocess_exec("git", *args, cwd=repo, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    ret = await proc.wait()
+    if ret != 0:
+        raise RuntimeError(f"Failed to add {files} to {repo}")
+
+
+async def commit(repo: Path, msg: str) -> None:
+    _logger.debug("Committing %s with message %s", repo, msg)
+    stat = await status(repo)
+    assert len(stat.added) > 0
+    args = ["commit", "-m", msg]
+    proc = await asyncio.create_subprocess_exec("git", *args, cwd=repo, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    ret = await proc.wait()
+    if ret != 0:
+        raise RuntimeError(f"Failed to commit {repo}")
+
+
+async def push(repo: Path) -> None:
+    _logger.debug("Pushing to %s", repo)
+    args = ["push"]
+    proc = await asyncio.create_subprocess_exec("git", *args, cwd=repo, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    ret = await proc.wait()
+    if ret != 0:
+        raise RuntimeError(f"Failed to push {repo}")
+
+
+async def init(repo: Path) -> None:
+    _logger.debug("Initializing git repo in %s", repo)
+    assert repo.exists()
+    args = ["init"]
+    proc = await asyncio.create_subprocess_exec("git", *args, cwd=repo, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    ret = await proc.wait()
+    if ret != 0:
+        raise RuntimeError(f"Failed to init {repo}")
diff --git a/tools/performance/engine-benchmarks/bench_tool/test_git.py b/tools/performance/engine-benchmarks/bench_tool/test_git.py
new file mode 100644
index 000000000000..2be622e1f462
--- /dev/null
+++ b/tools/performance/engine-benchmarks/bench_tool/test_git.py
@@ -0,0 +1,59 @@
+import shutil
+import tempfile
+import unittest
+from pathlib import Path
+
+from . import git
+
+
+class TestGit(unittest.IsolatedAsyncioTestCase):
+    def setUp(self):
+        self.repo_root = Path(tempfile.mkdtemp())
+
+    def tearDown(self):
+        shutil.rmtree(self.repo_root)
+
+    async def test_init(self):
+        await git.init(self.repo_root)
+        status = await git.status(self.repo_root)
+        self.assertEqual(0, len(status.added))
+        self.assertEqual(0, len(status.modified))
+        self.assertEqual(0, len(status.untracked))
+
+    async def test_add_file(self):
+        await git.init(self.repo_root)
+        self.repo_root.joinpath("README.md").write_text("Hello")
+        status = await git.status(self.repo_root)
+        self.assertEqual(1, len(status.untracked))
+
+    async def test_commit(self):
+        await git.init(self.repo_root)
+        self.repo_root.joinpath("README.md").write_text("Hello")
+        await git.add(self.repo_root, {"README.md"})
+        await git.commit(self.repo_root, "Initial commit")
+        status = await git.status(self.repo_root)
+        self.assertEqual(0, len(status.added))
+        self.assertEqual(0, len(status.modified))
+        self.assertEqual(0, len(status.untracked))
+
+    async def test_modify_file(self):
+        await git.init(self.repo_root)
+        self.repo_root.joinpath("README.md").write_text("Hello")
+        await git.add(self.repo_root, {"README.md"})
+        await git.commit(self.repo_root, "Initial commit")
+        self.repo_root.joinpath("README.md").write_text("Hello World")
+        status = await git.status(self.repo_root)
+        self.assertEqual(0, len(status.added))
+        self.assertEqual(1, len(status.modified))
+        self.assertEqual(0, len(status.untracked))
+
+    async def test_add_more_files(self):
+        await git.init(self.repo_root)
+        self.repo_root.joinpath("README.md").write_text("Hello")
+        self.repo_root.joinpath("pom.xml").write_text("<xml></xml>")
+        status = await git.status(self.repo_root)
+        self.assertEquals(2, len(status.untracked))
+        await git.add(self.repo_root, {"README.md", "pom.xml"})
+        status = await git.status(self.repo_root)
+        self.assertEqual(2, len(status.added))
+

From 3f49ea1e9661c19110a033f937e3a9a82ec8fe0c Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Fri, 16 Feb 2024 17:39:42 +0100
Subject: [PATCH 13/93] Fix Jinja template render

---
 .../engine-benchmarks/bench_tool/template_render.py  | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/tools/performance/engine-benchmarks/bench_tool/template_render.py b/tools/performance/engine-benchmarks/bench_tool/template_render.py
index 81c241c8f6fc..022f635f3309 100644
--- a/tools/performance/engine-benchmarks/bench_tool/template_render.py
+++ b/tools/performance/engine-benchmarks/bench_tool/template_render.py
@@ -7,7 +7,8 @@
 import numpy as np
 import pandas as pd
 
-from bench_tool import JobReport, TemplateBenchData, BenchDatapoint, GH_DATE_FORMAT, ENSO_COMMIT_BASE_URL, JinjaData
+from bench_tool import JobReport, TemplateBenchData, BenchDatapoint, GH_DATE_FORMAT, ENSO_COMMIT_BASE_URL, JinjaData, \
+    JINJA_TEMPLATE, TEMPLATES_DIR
 
 _logger = logging.getLogger(__name__)
 
@@ -101,9 +102,12 @@ def diff_str(score_diff: float, score_diff_perc: float) -> str:
     return template_bench_datas
 
 
-def render_html(jinja_data: JinjaData, template_file: str, html_out_fname: str) -> None:
-    jinja_env = jinja2.Environment(loader=jinja2.FileSystemLoader("."))
-    jinja_template = jinja_env.get_template(template_file)
+def render_html(jinja_data: JinjaData, html_out_fname: str) -> None:
+    jinja_env = jinja2.Environment(
+        loader=jinja2.FileSystemLoader(TEMPLATES_DIR)
+    )
+    template_name = str(JINJA_TEMPLATE.name)
+    jinja_template = jinja_env.get_template(template_name)
     generated_html = jinja_template.render(jinja_data.__dict__)
     if path.exists(html_out_fname):
         _logger.info("%s already exist, rewritting", html_out_fname)

From c4d1718bea8fff746d7fed1fda5262bc05c19613 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Fri, 16 Feb 2024 17:40:25 +0100
Subject: [PATCH 14/93] gh.fetch_file does not fail

---
 tools/performance/engine-benchmarks/bench_tool/gh.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/tools/performance/engine-benchmarks/bench_tool/gh.py b/tools/performance/engine-benchmarks/bench_tool/gh.py
index da22179c5e5a..ecba5b324225 100644
--- a/tools/performance/engine-benchmarks/bench_tool/gh.py
+++ b/tools/performance/engine-benchmarks/bench_tool/gh.py
@@ -68,11 +68,9 @@ async def invoke_gh_api(
 
 
 async def fetch_file(repo: str, file_path: str) -> Optional[str]:
-    try:
-        ret = await invoke_gh_api(repo, f"/contents/{file_path}", result_as_json=True)
-        file_content = base64.b64decode(ret["content"]).decode()
-        return file_content
-    except subprocess.CalledProcessError as e:
-        _logger.error("Failed to fetch file %s from %s, with: %s",
-                      file_path, repo, e)
+    ret = await invoke_gh_api(repo, f"/contents/{file_path}", result_as_json=True)
+    if ret is None:
+        _logger.warning("File %s not found in %s", file_path, repo)
         return None
+    file_content = base64.b64decode(ret["content"]).decode()
+    return file_content

From 0cf7abdf9049d43bbfd8b0762e1e4819fd0d92aa Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Fri, 16 Feb 2024 17:40:51 +0100
Subject: [PATCH 15/93] gh.invoke_gh_api does not fail, and takes method arg

---
 tools/performance/engine-benchmarks/bench_tool/gh.py  | 11 +++++++----
 .../engine-benchmarks/bench_tool/test_gh.py           |  6 +++++-
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/tools/performance/engine-benchmarks/bench_tool/gh.py b/tools/performance/engine-benchmarks/bench_tool/gh.py
index ecba5b324225..e1d821598126 100644
--- a/tools/performance/engine-benchmarks/bench_tool/gh.py
+++ b/tools/performance/engine-benchmarks/bench_tool/gh.py
@@ -32,8 +32,9 @@ async def invoke_gh_api(
         repo: str,
         endpoint: str,
         query_params: Dict[str, str] = {},
-        result_as_json: bool = True
-) -> Union[Dict[str, Any], bytes]:
+        result_as_json: bool = True,
+        method: str = "GET"
+) -> Optional[Union[Dict[str, Any], bytes]]:
     """
     Invokes the GitHub API using the `gh` command line tool.
     :param repo: Repository name in the form `owner/repo`
@@ -41,13 +42,15 @@ async def invoke_gh_api(
     :param query_params: Additional query parameters.
     :param result_as_json: If result should be parsed as JSON.
           If false, the raw bytes are returned.
-    :return:
+    :param method: HTTP method to use, 'GET' by default.
+    :return: None if the query fails
     """
     assert endpoint.startswith("/")
     urlencode(query_params)
     cmd = [
         "gh",
         "api",
+        "--method", method,
         f"/repos/{repo}{endpoint}" + "?" + urlencode(query_params)
     ]
     _logger.debug("Invoking gh API with `%s`", " ".join(cmd))
@@ -60,7 +63,7 @@ async def invoke_gh_api(
         _logger.error("Command `%s` FAILED with errcode %d",
                       " ".join(cmd),
                       proc.returncode)
-        exit(proc.returncode)
+        return None
     if result_as_json:
         return json.loads(out.decode())
     else:
diff --git a/tools/performance/engine-benchmarks/bench_tool/test_gh.py b/tools/performance/engine-benchmarks/bench_tool/test_gh.py
index 3a70375d46cd..c46677c9913f 100644
--- a/tools/performance/engine-benchmarks/bench_tool/test_gh.py
+++ b/tools/performance/engine-benchmarks/bench_tool/test_gh.py
@@ -1,7 +1,7 @@
 import unittest
 
 from bench_tool import ENSO_REPO
-from .gh import ensure_gh_installed, fetch_file
+from .gh import ensure_gh_installed, fetch_file, invoke_gh_api
 
 
 # Create a unit test
@@ -14,3 +14,7 @@ async def test_file_fetch(self):
         self.assertIsNotNone(content)
         self.assertIsInstance(content, str)
         self.assertGreater(len(content), 0)
+
+    async def test_wrong_gh_query_should_not_fail(self):
+        res = await invoke_gh_api("non_existing_repo", "/non_existing_endpoint")
+        self.assertIsNone(res)

From 94db4796555eec3637ef46ac88a70ff685af2db8 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Fri, 16 Feb 2024 17:42:43 +0100
Subject: [PATCH 16/93] Add test case to test_gh

---
 .../engine-benchmarks/bench_tool/test_gh.py          | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/tools/performance/engine-benchmarks/bench_tool/test_gh.py b/tools/performance/engine-benchmarks/bench_tool/test_gh.py
index c46677c9913f..fe3edb85c7d0 100644
--- a/tools/performance/engine-benchmarks/bench_tool/test_gh.py
+++ b/tools/performance/engine-benchmarks/bench_tool/test_gh.py
@@ -1,20 +1,24 @@
 import unittest
 
 from bench_tool import ENSO_REPO
-from .gh import ensure_gh_installed, fetch_file, invoke_gh_api
+from . import gh
 
 
 # Create a unit test
 class TestGH(unittest.IsolatedAsyncioTestCase):
     async def test_ensure_gh_installed(self):
-        self.assertIsNone(ensure_gh_installed())
+        self.assertIsNone(gh.ensure_gh_installed())
 
     async def test_file_fetch(self):
-        content = await fetch_file(ENSO_REPO, "README.md")
+        content = await gh.fetch_file(ENSO_REPO, "README.md")
         self.assertIsNotNone(content)
         self.assertIsInstance(content, str)
         self.assertGreater(len(content), 0)
 
+    async def test_fetch_non_existing_file(self):
+        content = await gh.fetch_file(ENSO_REPO, "non_existing_file")
+        self.assertIsNone(content)
+
     async def test_wrong_gh_query_should_not_fail(self):
-        res = await invoke_gh_api("non_existing_repo", "/non_existing_endpoint")
+        res = await gh.invoke_gh_api("non_existing_repo", "/non_existing_endpoint")
         self.assertIsNone(res)

From 07bca0b24f81a534dd336347359a202577b0763d Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Fri, 16 Feb 2024 17:56:31 +0100
Subject: [PATCH 17/93] Implement remote_cache.py

---
 .../engine-benchmarks/bench_tool/__init__.py  |   1 +
 .../bench_tool/remote_cache.py                | 148 ++++++++++++++++++
 .../bench_tool/test_remote_cache.py           |  23 +++
 3 files changed, 172 insertions(+)
 create mode 100644 tools/performance/engine-benchmarks/bench_tool/remote_cache.py
 create mode 100644 tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py

diff --git a/tools/performance/engine-benchmarks/bench_tool/__init__.py b/tools/performance/engine-benchmarks/bench_tool/__init__.py
index 5620dca66535..74cda21ff85d 100644
--- a/tools/performance/engine-benchmarks/bench_tool/__init__.py
+++ b/tools/performance/engine-benchmarks/bench_tool/__init__.py
@@ -18,6 +18,7 @@ def pkg_dir() -> Path:
 
 
 ENSO_REPO = "enso-org/enso"
+BENCH_REPO = "enso-org/engine-benchmark-results"
 DATE_FORMAT = "%Y-%m-%d"
 GH_DATE_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
 ENGINE_BENCH_WORKFLOW_ID = 29450898
diff --git a/tools/performance/engine-benchmarks/bench_tool/remote_cache.py b/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
new file mode 100644
index 000000000000..3c116448f1b9
--- /dev/null
+++ b/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
@@ -0,0 +1,148 @@
+"""
+A remote cache is located inhttps://github.com/enso-org/engine-benchmark-results/tree/main/cache.
+It is just a bunch of JSON files, each representing a single job report.
+"""
+import abc
+import json
+import logging
+import os
+import re
+import shutil
+import tempfile
+from pathlib import Path
+from typing import Dict, Optional, Any
+
+from . import gh, JobReport, BENCH_REPO, JobRun, Commit, Author, git
+
+_logger = logging.getLogger(__name__)
+
+CACHE_REMOTE_DIR = "cache"
+
+
+class RemoteCache(abc.ABC):
+    @abc.abstractmethod
+    async def initialize(self) -> None:
+        """
+        Initializes the remote cache.
+        :return:
+        """
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    async def fetch(self, bench_id: str) -> Optional[JobReport]:
+        """
+        Fetches a job report for the given bench ID from the remote cache
+        :param bench_id:
+        :return: None if the report does not exist
+        """
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    async def sync(self) -> None:
+        raise NotImplementedError
+
+
+class ReadonlyRemoteCache(RemoteCache):
+    """
+    Only fetches the artifacts from the remote cache, does not push anything.
+    """
+
+    def __init__(self):
+        self._fetched_items: Dict[str, JobReport] = {}
+
+    async def initialize(self) -> None:
+        # Nop
+        pass
+
+    async def fetch(self, bench_id: str) -> Optional[JobReport]:
+        """ Fetches a job report for the given bench ID from the remote cache """
+        if bench_id in self._fetched_items:
+            return self._fetched_items[bench_id]
+        if not _is_benchrun_id(bench_id):
+            _logger.warning("Invalid bench ID: %s", bench_id)
+            return None
+        remote_path = self._get_remote_path(bench_id)
+        _logger.debug("Fetching cache from %s", remote_path)
+        content = await gh.fetch_file(BENCH_REPO, remote_path)
+        if content is None:
+            _logger.warning("Cache not found for %s", bench_id)
+            return None
+        bench_report = _parse_bench_report_from_json(
+            json.loads(content)
+        )
+        assert bench_id not in self._fetched_items
+        self._fetched_items[bench_id] = bench_report
+        return bench_report
+
+    async def sync(self) -> None:
+        # Nop
+        pass
+
+    def _get_remote_path(self, bench_id: str) -> str:
+        assert _is_benchrun_id(bench_id)
+        return os.path.join(CACHE_REMOTE_DIR, bench_id + ".json")
+
+
+class SyncRemoteCache(RemoteCache):
+    """
+    Fetches and pushes the artifacts to the remote cache. Needs a write permissions to the repo.
+    """
+
+    def __init__(self):
+        self._repo_root_dir = Path(tempfile.mkdtemp(prefix="bench_tool_remote_cache"))
+        self._cache_dir = self._repo_root_dir.joinpath(CACHE_REMOTE_DIR)
+
+    async def initialize(self) -> None:
+        # Checkout the repo
+        await git.clone(BENCH_REPO, self._repo_root_dir)
+        assert self._repo_root_dir.exists()
+        assert self._cache_dir.exists()
+
+    async def fetch(self, bench_id: str) -> Optional[JobReport]:
+        assert self._cache_dir.exists()
+        path = self._cache_dir.joinpath(bench_id + ".json")
+        if path.exists():
+            with path.open() as f:
+                return _parse_bench_report_from_json(json.load(f))
+        else:
+            return None
+
+    async def sync(self) -> None:
+        status = await git.status(self._repo_root_dir)
+        assert len(status.modified) == 0, "The RemoteCache should not modify any files, only add new ones"
+        assert len(status.added) == 0, f"Only untracked files expected in {self._repo_root_dir}"
+        if len(status.untracked) > 0:
+            _logger.info("Untracked files found in the remote cache: %s", status.untracked)
+            await git.add(self._repo_root_dir, status.untracked)
+            await git.commit(self._repo_root_dir, f"Add {len(status.untracked)} new reports")
+            await git.push(self._repo_root_dir)
+        shutil.rmtree(self._repo_root_dir, ignore_errors=True)
+
+
+def _is_benchrun_id(name: str) -> bool:
+    return re.match(r"\d{9}", name) is not None
+
+
+def _parse_bench_report_from_json(obj: Dict[Any, Any]) -> JobReport:
+    return JobReport(
+        bench_run=_parse_bench_run_from_json(obj["bench_run"]),
+        label_score_dict=obj["label_score_dict"]
+    )
+
+
+def _parse_bench_run_from_json(obj: Dict[Any, Any]) -> JobRun:
+    return JobRun(
+        id=str(obj["id"]),
+        html_url=obj["html_url"],
+        run_attempt=int(obj["run_attempt"]),
+        event=obj["event"],
+        display_title=obj["display_title"],
+        head_commit=Commit(
+            id=obj["head_commit"]["id"],
+            message=obj["head_commit"]["message"],
+            timestamp=obj["head_commit"]["timestamp"],
+            author=Author(
+                name=obj["head_commit"]["author"]["name"]
+            )
+        )
+    )
diff --git a/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py b/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py
new file mode 100644
index 000000000000..f92f50097194
--- /dev/null
+++ b/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py
@@ -0,0 +1,23 @@
+import unittest
+
+from .remote_cache import ReadonlyRemoteCache
+
+
+class TestGH(unittest.IsolatedAsyncioTestCase):
+    async def test_fetch_some_cache(self):
+        remote_cache = ReadonlyRemoteCache()
+        await remote_cache.initialize()
+        # This ID is definitelly in the cache
+        bench_id = "3686412302"
+        job_report = await remote_cache.fetch(bench_id)
+        self.assertIsNotNone(job_report)
+        self.assertEquals(1, job_report.bench_run.run_attempt)
+        self.assertEquals(bench_id, job_report.bench_run.id)
+        self.assertEquals("Jaroslav Tulach", job_report.bench_run.head_commit.author.name)
+
+    async def test_non_existing_cache_should_not_fail(self):
+        remote_cache = ReadonlyRemoteCache()
+        await remote_cache.initialize()
+        bench_id = "FOOOO BAR"
+        job_report = await remote_cache.fetch(bench_id)
+        self.assertIsNone(job_report)

From 41f479efdc7d6df404e52d925483e2d4bff12ee1 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Fri, 16 Feb 2024 18:44:38 +0100
Subject: [PATCH 18/93] Use readonly remote cache in bench_download script

---
 .../engine-benchmarks/bench_download.py       | 96 +++++++------------
 .../bench_tool/remote_cache.py                | 29 +++++-
 .../bench_tool/test_remote_cache.py           | 45 ++++++++-
 3 files changed, 103 insertions(+), 67 deletions(-)

diff --git a/tools/performance/engine-benchmarks/bench_download.py b/tools/performance/engine-benchmarks/bench_download.py
index 3e964133491d..c36ca003777b 100755
--- a/tools/performance/engine-benchmarks/bench_download.py
+++ b/tools/performance/engine-benchmarks/bench_download.py
@@ -47,6 +47,8 @@
 
 import sys
 
+from bench_tool.remote_cache import RemoteCache, ReadonlyRemoteCache
+
 if not (sys.version_info.major >= 3 and sys.version_info.minor >= 7):
     print("ERROR: python version lower than 3.7")
     exit(1)
@@ -68,7 +70,8 @@
 from typing import List, Dict, Optional, Any, Set
 import xml.etree.ElementTree as ET
 
-from bench_tool import DATE_FORMAT, GENERATED_SITE_DIR, GH_DATE_FORMAT, GH_ARTIFACT_RETENTION_PERIOD, TEMPLATES_DIR, JINJA_TEMPLATE, ENSO_REPO, Author, Commit, JobRun, JobReport, \
+from bench_tool import DATE_FORMAT, GENERATED_SITE_DIR, GH_DATE_FORMAT, GH_ARTIFACT_RETENTION_PERIOD, TEMPLATES_DIR, \
+    JINJA_TEMPLATE, ENSO_REPO, Author, Commit, JobRun, JobReport, \
     TemplateBenchData, JinjaData, Source
 from bench_tool.gh import invoke_gh_api, ensure_gh_installed
 from bench_tool.template_render import create_template_data, render_html
@@ -274,39 +277,44 @@ async def get_and_parse_run(page: int, parsed_bench_runs) -> None:
     return parsed_bench_runs
 
 
-async def get_bench_report(bench_run: JobRun, cache: Cache, temp_dir: str) -> Optional[JobReport]:
+async def get_bench_report(bench_run: JobRun, temp_dir: str, remote_cache: RemoteCache) -> Optional[JobReport]:
     """
     Extracts some data from the given bench_run, which was fetched via the GH API,
     optionally getting it from the cache.
     An artifact in GH can expire, in such case, returns None.
     :param bench_run:
-    :param cache:
     :param temp_dir: Used for downloading and unzipping artifacts.
     :return: None if the corresponding artifact expired.
     """
-    if bench_run.id in cache:
-        logging.info(f"Getting bench run with ID {bench_run.id} from cache")
-        return cache[bench_run.id]
-
     # There might be multiple artifacts in the artifact list for a benchmark run
     # We are looking for the one named 'Runtime Benchmark Report', which will
     # be downloaded as a ZIP file.
     obj: Dict[str, Any] = await invoke_gh_api(ENSO_REPO, f"/actions/runs/{bench_run.id}/artifacts")
     artifacts = obj["artifacts"]
     if len(artifacts) != 1:
-        logging.warning("Bench run %s does not contain an artifact, but it is a successful run.",
+        logging.warning("Bench run %s does not contain exactly one artifact, but it is a successful run.",
                       bench_run.id)
         return None
     bench_report_artifact = artifacts[0]
     assert bench_report_artifact, "Benchmark Report artifact not found"
     artifact_id = str(bench_report_artifact["id"])
-    if bench_report_artifact["expired"]:
-        created_at = bench_report_artifact["created_at"]
-        updated_at = bench_report_artifact["updated_at"]
-        expires_at = bench_report_artifact["expires_at"]
-        logging.warning(f"Artifact with ID {artifact_id} from bench report {bench_run.id} has expired. "
-                        f"created_at={created_at}, updated_at={updated_at}, expires_at={expires_at}")
+    created_at = bench_report_artifact["created_at"]
+    updated_at = bench_report_artifact["updated_at"]
+    expires_at = bench_report_artifact["expires_at"]
+    is_expired = bench_report_artifact["expired"]
+    logging.debug(f"Got artifact with ID {artifact_id}, from bench run {bench_run.id}: "
+                  f"created_at={created_at}, updated_at={updated_at}, expires_at={expires_at}, "
+                  f"is_expired={is_expired}")
+
+    job_report = await remote_cache.fetch(bench_run.id)
+    if is_expired and job_report is None:
+        logging.error(f"Artifact {artifact_id} from bench run {bench_run.id} is expired, and it is not in the remote cache")
         return None
+    if job_report:
+        logging.debug(f"Got job report from the cache for {bench_run.id}")
+        return job_report
+
+    assert not is_expired
 
     # Get contents of the ZIP artifact file
     artifact_ret = await invoke_gh_api(ENSO_REPO, f"/actions/artifacts/{artifact_id}/zip", result_as_json=False)
@@ -327,7 +335,7 @@ async def get_bench_report(bench_run: JobRun, cache: Cache, temp_dir: str) -> Op
     assert path.exists(bench_report_xml)
 
     bench_report_parsed = _parse_bench_report_from_xml(bench_report_xml, bench_run)
-    cache[bench_run.id] = bench_report_parsed
+    await remote_cache.put(bench_run.id, bench_report_parsed)
     return bench_report_parsed
 
 
@@ -365,23 +373,6 @@ def write_bench_reports_to_csv(bench_reports: List[JobReport], csv_fname: str) -
                 })
 
 
-def populate_cache(cache_dir: str) -> Cache:
-    """
-    Initializes cache from `cache_dir`, if there are any items.
-    See docs of `Cache`.
-
-    :param cache_dir: Path to the cache directory. Does not have to exist
-    :return: Populated cache. Might be empty.
-    """
-    if not path.exists(cache_dir):
-        logging.info(f"No cache at {cache_dir}, creating the cache directory")
-        os.mkdir(cache_dir)
-    logging.debug(f"Initializing cache from {cache_dir}")
-    cache = Cache(cache_dir)
-    logging.debug(f"Cache populated with {len(cache)} items")
-    return cache
-
-
 def _gather_all_bench_labels(job_reports: List[JobReport]) -> Set[str]:
     """
     Iterates through all the job reports and gathers all the benchmark labels
@@ -398,7 +389,6 @@ def _gather_all_bench_labels(job_reports: List[JobReport]) -> Set[str]:
 async def main():
     default_since: datetime = (datetime.now() - timedelta(days=14))
     default_until: datetime = datetime.now()
-    default_cache_dir = path.expanduser("~/.cache/enso_bench_download")
     default_csv_out = "Engine_Benchs/data/benchs.csv"
     date_format_help = DATE_FORMAT.replace("%", "%%")
 
@@ -434,17 +424,6 @@ def _parse_bench_source(_bench_source: str) -> Source:
                             help=f"The date until which the benchmark results will be gathered. "
                                  f"Format is {date_format_help}. "
                                  f"The default is today")
-    arg_parser.add_argument("--use-cache",
-                            default=False,
-                            metavar="(true|false)",
-                            type=lambda input: True if input in ("true", "True") else False,
-                            help="Whether the cache directory should be used. The default is False.")
-    arg_parser.add_argument("-c", "--cache", action="store",
-                            default=default_cache_dir,
-                            metavar="CACHE_DIR",
-                            help=f"Cache directory. Makes sense only iff specified with --use-cache argument. "
-                                 f"The default is {default_cache_dir}. If there are any troubles with the "
-                                 f"cache, just do `rm -rf {default_cache_dir}`.")
     arg_parser.add_argument("-b", "--branches", action="store",
                             nargs="+",
                             default=["develop"],
@@ -476,20 +455,17 @@ def _parse_bench_source(_bench_source: str) -> Source:
 
     since: datetime = args.since
     until: datetime = args.until
-    cache_dir: str = args.cache
     if not args.tmp_dir:
         temp_dir: str = tempfile.mkdtemp()
     else:
         temp_dir: str = args.tmp_dir
-    use_cache: bool = args.use_cache
-    assert cache_dir and temp_dir
     bench_source: Source = args.source
     csv_output: str = args.csv_output
     create_csv: bool = args.create_csv
     branches: List[str] = args.branches
     labels_override: Set[str] = args.labels
-    logging.debug(f"parsed args: since={since}, until={until}, cache_dir={cache_dir}, "
-                 f"temp_dir={temp_dir}, use_cache={use_cache}, bench_source={bench_source}, "
+    logging.debug(f"parsed args: since={since}, until={until}, "
+                 f"temp_dir={temp_dir}, bench_source={bench_source}, "
                  f"csv_output={csv_output}, "
                  f"create_csv={create_csv}, branches={branches}, "
                  f"labels_override={labels_override}")
@@ -499,22 +475,16 @@ def _parse_bench_source(_bench_source: str) -> Source:
     # If the user requires benchmarks for which artifacts are not retained
     # anymore, then cache should be used.
     min_since_without_cache = datetime.today() - GH_ARTIFACT_RETENTION_PERIOD
-    if not use_cache and since < min_since_without_cache:
-        logging.warning(f"The default GH artifact retention period is "
+    if since < min_since_without_cache:
+        logging.info(f"The default GH artifact retention period is "
                         f"{GH_ARTIFACT_RETENTION_PERIOD.days} days. "
                         f"This means that all the artifacts older than "
                         f"{min_since_without_cache.date()} are expired."
-                        f"The use_cache parameter is set to False, so no "
-                        f"expired artifacts will be fetched.")
-        logging.warning(f"The `since` parameter is reset to "
-                        f"{min_since_without_cache.date()} to prevent "
-                        f"unnecessary GH API queries.")
-        since = min_since_without_cache
-
-    if use_cache:
-        cache = populate_cache(cache_dir)
-    else:
-        cache = FakeCache()
+                        f"The since date was set to {since}, so the remote cache is enabled, "
+                        f"and the older artifacts will be fetched from the cache.")
+
+    remote_cache = ReadonlyRemoteCache()
+    await remote_cache.initialize()
 
     bench_labels: Optional[Set[str]] = None
     """ Set of all gathered benchmark labels from all the job reports """
@@ -534,7 +504,7 @@ def _parse_bench_source(_bench_source: str) -> Source:
         job_reports: List[JobReport] = []
 
         async def _process_report(_bench_run):
-            _job_report = await get_bench_report(_bench_run, cache, temp_dir)
+            _job_report = await get_bench_report(_bench_run, temp_dir, remote_cache)
             if _job_report:
                 job_reports.append(_job_report)
 
diff --git a/tools/performance/engine-benchmarks/bench_tool/remote_cache.py b/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
index 3c116448f1b9..d00e068ae7ec 100644
--- a/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
+++ b/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
@@ -37,8 +37,22 @@ async def fetch(self, bench_id: str) -> Optional[JobReport]:
         """
         raise NotImplementedError
 
+    @abc.abstractmethod
+    async def put(self, bench_id: str, job_report: JobReport) -> None:
+        """
+        Puts a job report to the remote cache, or to the internal data structures.
+        :param bench_id:
+        :param job_report:
+        :return:
+        """
+        raise NotImplementedError
+
     @abc.abstractmethod
     async def sync(self) -> None:
+        """
+        Synchronizes the remote cache with the local state.
+        :return:
+        """
         raise NotImplementedError
 
 
@@ -74,6 +88,11 @@ async def fetch(self, bench_id: str) -> Optional[JobReport]:
         self._fetched_items[bench_id] = bench_report
         return bench_report
 
+    async def put(self, bench_id: str, job_report: JobReport) -> None:
+        assert _is_benchrun_id(bench_id)
+        assert bench_id not in self._fetched_items
+        self._fetched_items[bench_id] = job_report
+
     async def sync(self) -> None:
         # Nop
         pass
@@ -104,8 +123,14 @@ async def fetch(self, bench_id: str) -> Optional[JobReport]:
         if path.exists():
             with path.open() as f:
                 return _parse_bench_report_from_json(json.load(f))
-        else:
-            return None
+        return None
+
+    async def put(self, bench_id: str, job_report: JobReport) -> None:
+        assert self._cache_dir.exists()
+        path = self._cache_dir.joinpath(bench_id + ".json")
+        assert not path.exists()
+        with path.open("w") as f:
+            json.dump(job_report, f)
 
     async def sync(self) -> None:
         status = await git.status(self._repo_root_dir)
diff --git a/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py b/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py
index f92f50097194..6c4da65c96f8 100644
--- a/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py
+++ b/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py
@@ -1,9 +1,32 @@
 import unittest
 
-from .remote_cache import ReadonlyRemoteCache
+from . import JobReport, JobRun, Commit, Author
+from .remote_cache import ReadonlyRemoteCache, SyncRemoteCache
 
 
-class TestGH(unittest.IsolatedAsyncioTestCase):
+sample_job_report = JobReport(
+    label_score_dict={
+        "test_label": 1.0
+    },
+    bench_run=JobRun(
+        id="123456789",
+        display_title="Test",
+        html_url="https://github.com/enso-org/enso/actions/runs/123456789",
+        run_attempt=1,
+        event="push",
+        head_commit=Commit(
+            id="a67297aebf6a094d1ad0b0d88cf7438dbf8bd8fe",
+            message="Test commit",
+            timestamp="2021-06-01T12:00:00Z",
+            author=Author(
+                name="Pavel Marek"
+            )
+        )
+    )
+)
+
+
+class TestReadonlyRemoteCache(unittest.IsolatedAsyncioTestCase):
     async def test_fetch_some_cache(self):
         remote_cache = ReadonlyRemoteCache()
         await remote_cache.initialize()
@@ -21,3 +44,21 @@ async def test_non_existing_cache_should_not_fail(self):
         bench_id = "FOOOO BAR"
         job_report = await remote_cache.fetch(bench_id)
         self.assertIsNone(job_report)
+
+    async def test_put_job_report_into_cache(self):
+        remote_cache = ReadonlyRemoteCache()
+        await remote_cache.initialize()
+        bench_id = sample_job_report.bench_run.id
+        await remote_cache.put(bench_id, sample_job_report)
+        job_report = await remote_cache.fetch(bench_id)
+        self.assertIsNotNone(job_report)
+        self.assertEquals(bench_id, job_report.bench_run.id)
+
+
+# WARNING: This case can take very long
+class TestSyncRemoteCache(unittest.IsolatedAsyncioTestCase):
+    async def test_init_sync_remote_cache(self):
+        remote_cache = SyncRemoteCache()
+        await remote_cache.initialize()
+        # No exception should be thrown
+        self.assertTrue(True)

From e8c7f71ae03e537b582bf162d74e8e88eaa11660 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Fri, 16 Feb 2024 18:56:39 +0100
Subject: [PATCH 19/93] Fix path in template rendering

---
 .../engine-benchmarks/bench_download.py           |  3 +--
 .../bench_tool/template_render.py                 | 15 ++++++++-------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/tools/performance/engine-benchmarks/bench_download.py b/tools/performance/engine-benchmarks/bench_download.py
index c36ca003777b..23fef9af6c78 100755
--- a/tools/performance/engine-benchmarks/bench_download.py
+++ b/tools/performance/engine-benchmarks/bench_download.py
@@ -570,10 +570,9 @@ def _get_timestamp(job_report: JobReport) -> datetime:
         os.mkdir(GENERATED_SITE_DIR)
 
     logging.debug(f"Rendering HTML from {JINJA_TEMPLATE} to {GENERATED_SITE_DIR}")
-    site_path = path.join(GENERATED_SITE_DIR, bench_source.value + "-benchs.html")
+    site_path = GENERATED_SITE_DIR.joinpath(bench_source.value + "-benchs.html")
     render_html(
         jinja_data,
-        JINJA_TEMPLATE,
         site_path
     )
     logging.debug(f"Copying static site content from {TEMPLATES_DIR} to {GENERATED_SITE_DIR}")
diff --git a/tools/performance/engine-benchmarks/bench_tool/template_render.py b/tools/performance/engine-benchmarks/bench_tool/template_render.py
index 022f635f3309..30acc0a94999 100644
--- a/tools/performance/engine-benchmarks/bench_tool/template_render.py
+++ b/tools/performance/engine-benchmarks/bench_tool/template_render.py
@@ -1,6 +1,6 @@
 import logging
 from datetime import datetime
-from os import path
+from pathlib import Path
 from typing import List, Dict, Optional, Set
 
 import jinja2
@@ -12,6 +12,7 @@
 
 _logger = logging.getLogger(__name__)
 
+
 def create_template_data(
         job_reports_per_branch: Dict[str, List[JobReport]],
         bench_labels: Set[str]) -> List[TemplateBenchData]:
@@ -75,8 +76,8 @@ def diff_str(score_diff: float, score_diff_perc: float) -> str:
                     tooltip += "date = " + str(timestamp) + "\\n"
                     tooltip += "branch = " + branch + "\\n"
                     tooltip += "diff = " + diff_str(score_diff, score_diff_perc)
-                    author_name = commit.author.name\
-                        .replace('"', '\\"')\
+                    author_name = commit.author.name \
+                        .replace('"', '\\"') \
                         .replace("'", "\\'")
                     datapoints.append(BenchDatapoint(
                         timestamp=timestamp,
@@ -102,16 +103,16 @@ def diff_str(score_diff: float, score_diff_perc: float) -> str:
     return template_bench_datas
 
 
-def render_html(jinja_data: JinjaData, html_out_fname: str) -> None:
+def render_html(jinja_data: JinjaData, html_out: Path) -> None:
     jinja_env = jinja2.Environment(
         loader=jinja2.FileSystemLoader(TEMPLATES_DIR)
     )
     template_name = str(JINJA_TEMPLATE.name)
     jinja_template = jinja_env.get_template(template_name)
     generated_html = jinja_template.render(jinja_data.__dict__)
-    if path.exists(html_out_fname):
-        _logger.info("%s already exist, rewritting", html_out_fname)
-    with open(html_out_fname, "w") as html_file:
+    if html_out.exists():
+        _logger.info("%s already exist, rewriting", html_out)
+    with html_out.open("w") as html_file:
         html_file.write(generated_html)
 
 

From 9cb2a268ee89e2248278751c6422c7cdd0e5d5a6 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Fri, 16 Feb 2024 18:57:27 +0100
Subject: [PATCH 20/93] Remove unused imports

---
 tools/performance/engine-benchmarks/bench_tool/gh.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/tools/performance/engine-benchmarks/bench_tool/gh.py b/tools/performance/engine-benchmarks/bench_tool/gh.py
index e1d821598126..1bfe827edb78 100644
--- a/tools/performance/engine-benchmarks/bench_tool/gh.py
+++ b/tools/performance/engine-benchmarks/bench_tool/gh.py
@@ -1,15 +1,10 @@
+import asyncio
 import base64
 import json
 import logging
-import os
-from os import path
-from typing import List, Dict, Optional, Set, Tuple, Union, Any
-import re
 import subprocess
 import sys
-from argparse import ArgumentParser
-import shutil
-import asyncio
+from typing import Dict, Optional, Union, Any
 from urllib.parse import urlencode
 
 _logger = logging.getLogger(__name__)

From bc1f37261f476cdd51ec91781f6d4ae3c65eb69a Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Mon, 19 Feb 2024 12:19:01 +0100
Subject: [PATCH 21/93] Remove unused Cache classes

---
 .../engine-benchmarks/bench_download.py       | 108 ------------------
 1 file changed, 108 deletions(-)

diff --git a/tools/performance/engine-benchmarks/bench_download.py b/tools/performance/engine-benchmarks/bench_download.py
index 23fef9af6c78..a529a8666235 100755
--- a/tools/performance/engine-benchmarks/bench_download.py
+++ b/tools/performance/engine-benchmarks/bench_download.py
@@ -105,34 +105,6 @@ def _parse_bench_run_from_json(obj: Dict[Any, Any]) -> JobRun:
     )
 
 
-def _parse_bench_report_from_json(obj: Dict[Any, Any]) -> JobReport:
-    return JobReport(
-        bench_run=_parse_bench_run_from_json(obj["bench_run"]),
-        label_score_dict=obj["label_score_dict"]
-    )
-
-
-def _bench_report_to_json(bench_report: JobReport) -> Dict[Any, Any]:
-    return {
-        "bench_run": {
-            "id": bench_report.bench_run.id,
-            "html_url": bench_report.bench_run.html_url,
-            "run_attempt": bench_report.bench_run.run_attempt,
-            "event": bench_report.bench_run.event,
-            "display_title": bench_report.bench_run.display_title,
-            "head_commit": {
-                "id": bench_report.bench_run.head_commit.id,
-                "message": bench_report.bench_run.head_commit.message,
-                "timestamp": bench_report.bench_run.head_commit.timestamp,
-                "author": {
-                    "name": bench_report.bench_run.head_commit.author.name
-                }
-            }
-        },
-        "label_score_dict": bench_report.label_score_dict
-    }
-
-
 def _parse_bench_report_from_xml(bench_report_xml_path: str, bench_run: JobRun) -> "JobReport":
     logging.debug(f"Parsing BenchReport from {bench_report_xml_path}")
     tree = ET.parse(bench_report_xml_path)
@@ -154,86 +126,6 @@ def _parse_bench_report_from_xml(bench_report_xml_path: str, bench_run: JobRun)
         bench_run=bench_run
     )
 
-
-def _is_benchrun_id(name: str) -> bool:
-    return re.match("\d{9}", name) is not None
-
-
-def _read_json(json_file: str) -> Dict[Any, Any]:
-    assert path.exists(json_file) and path.isfile(json_file)
-    with open(json_file, "r") as f:
-        return json.load(f)
-
-
-class Cache:
-    """
-    Cache is a directory filled with json files that have name of format <bench_run_id>.json, and
-    in every json, there is `BenchReport` dataclass serialized.
-    """
-
-    def __init__(self, dirname: str):
-        assert path.exists(dirname) and path.isdir(dirname)
-        self._dir = dirname
-        # Keys are BenchRun ids
-        self._items: Dict[str, JobReport] = {}
-        for fname in os.listdir(dirname):
-            fname_without_ext, ext = path.splitext(fname)
-            if _is_benchrun_id(fname_without_ext) and ext == ".json":
-                logging.debug(f"Loading into cache from {fname}")
-                bench_report = _parse_bench_report_from_json(
-                    _read_json(path.join(dirname, fname))
-                )
-                self._items[fname_without_ext] = bench_report
-
-    def __len__(self) -> int:
-        return len(self._items)
-
-    def __contains__(self, key: str) -> bool:
-        assert _is_benchrun_id(key)
-        return key in self._items
-
-    def __getitem__(self, item: str) -> Optional[JobReport]:
-        if not _is_benchrun_id(item):
-            return None
-        else:
-            return self._items[item]
-
-    def __setitem__(self, bench_run_id: str, bench_report: JobReport) -> None:
-        assert isinstance(bench_report, JobReport)
-        assert isinstance(bench_run_id, str)
-        assert _is_benchrun_id(bench_run_id)
-        self._items[bench_run_id] = bench_report
-        json_fname = path.join(self._dir, bench_run_id + ".json")
-        logging.debug(f"Putting {bench_run_id} into cache {json_fname}")
-        with open(json_fname, "w") as json_file:
-            json.dump(
-                _bench_report_to_json(bench_report),
-                json_file,
-                indent=2,
-                ensure_ascii=False
-            )
-
-    def __str__(self) -> str:
-        return str(self._items)
-
-    def contains(self, bench_run_id: str) -> bool:
-        return bench_run_id in self._items
-
-
-class FakeCache:
-    def __getitem__(self, item):
-        return None
-
-    def __setitem__(self, key, value):
-        pass
-
-    def __contains__(self, item):
-        return False
-
-    def __len__(self):
-        return 0
-
-
 async def get_bench_runs(since: datetime, until: datetime, branch: str, workflow_id: int) -> List[JobRun]:
     """
     Fetches the list of all the job runs from the GH API for the specified `branch`.

From 923d870ccb4bcee660a489676bd9d39c7cf79e21 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Mon, 19 Feb 2024 15:44:42 +0100
Subject: [PATCH 22/93] Move some commont functionality into bench_results and
 utils

---
 .../engine-benchmarks/bench_download.py       | 177 +-----------------
 .../bench_tool/bench_results.py               | 167 +++++++++++++++++
 .../engine-benchmarks/bench_tool/utils.py     |  31 +++
 3 files changed, 206 insertions(+), 169 deletions(-)
 create mode 100644 tools/performance/engine-benchmarks/bench_tool/bench_results.py
 create mode 100644 tools/performance/engine-benchmarks/bench_tool/utils.py

diff --git a/tools/performance/engine-benchmarks/bench_download.py b/tools/performance/engine-benchmarks/bench_download.py
index a529a8666235..0a00a4a68ec3 100755
--- a/tools/performance/engine-benchmarks/bench_download.py
+++ b/tools/performance/engine-benchmarks/bench_download.py
@@ -47,33 +47,30 @@
 
 import sys
 
-from bench_tool.remote_cache import RemoteCache, ReadonlyRemoteCache
+from bench_tool.bench_results import get_bench_runs, get_bench_report
+from bench_tool.remote_cache import ReadonlyRemoteCache
+from bench_tool.utils import gather_all_bench_labels
 
 if not (sys.version_info.major >= 3 and sys.version_info.minor >= 7):
     print("ERROR: python version lower than 3.7")
     exit(1)
 
 import asyncio
-import json
 import logging
 import logging.config
-import math
 import os
-import re
 import shutil
 import tempfile
-import zipfile
 from argparse import ArgumentParser, RawDescriptionHelpFormatter
 from csv import DictWriter
 from datetime import datetime, timedelta
 from os import path
-from typing import List, Dict, Optional, Any, Set
-import xml.etree.ElementTree as ET
+from typing import List, Dict, Optional, Set
 
 from bench_tool import DATE_FORMAT, GENERATED_SITE_DIR, GH_DATE_FORMAT, GH_ARTIFACT_RETENTION_PERIOD, TEMPLATES_DIR, \
-    JINJA_TEMPLATE, ENSO_REPO, Author, Commit, JobRun, JobReport, \
+    JINJA_TEMPLATE, JobRun, JobReport, \
     TemplateBenchData, JinjaData, Source
-from bench_tool.gh import invoke_gh_api, ensure_gh_installed
+from bench_tool.gh import ensure_gh_installed
 from bench_tool.template_render import create_template_data, render_html
 
 try:
@@ -86,151 +83,6 @@
           "with `apt-get install python3-pandas python3-numpy python3-jinja2`", file=sys.stderr)
     exit(1)
 
-
-def _parse_bench_run_from_json(obj: Dict[Any, Any]) -> JobRun:
-    return JobRun(
-        id=str(obj["id"]),
-        html_url=obj["html_url"],
-        run_attempt=int(obj["run_attempt"]),
-        event=obj["event"],
-        display_title=obj["display_title"],
-        head_commit=Commit(
-            id=obj["head_commit"]["id"],
-            message=obj["head_commit"]["message"],
-            timestamp=obj["head_commit"]["timestamp"],
-            author=Author(
-                name=obj["head_commit"]["author"]["name"]
-            )
-        )
-    )
-
-
-def _parse_bench_report_from_xml(bench_report_xml_path: str, bench_run: JobRun) -> "JobReport":
-    logging.debug(f"Parsing BenchReport from {bench_report_xml_path}")
-    tree = ET.parse(bench_report_xml_path)
-    root = tree.getroot()
-    label_score_dict: Dict[str, float] = dict()
-    for cases in root:
-        assert cases.tag == "cases"
-        for case in cases:
-            assert case.tag == "case"
-            label = case.findtext("label").strip()
-            scores = case.find("scores")
-            scores_float = [float(score.text.strip()) for score in scores]
-            if len(scores_float) > 1:
-                logging.warning(f"More than one score for benchmark {label}, "
-                                f"using the last one (the newest one).")
-            label_score_dict[label] = scores_float[len(scores_float) - 1]
-    return JobReport(
-        label_score_dict=label_score_dict,
-        bench_run=bench_run
-    )
-
-async def get_bench_runs(since: datetime, until: datetime, branch: str, workflow_id: int) -> List[JobRun]:
-    """
-    Fetches the list of all the job runs from the GH API for the specified `branch`.
-    """
-    logging.info(f"Looking for all successful Engine benchmark workflow run "
-                 f"actions from {since} to {until} for branch {branch} "
-                 f"and workflow ID {workflow_id}")
-    query_fields = {
-        "branch": branch,
-        "status": "success",
-        "created": since.strftime(DATE_FORMAT) + ".." + until.strftime(DATE_FORMAT),
-        # Start with 1, just to determine the total count
-        "per_page": "1"
-    }
-    res = await invoke_gh_api(ENSO_REPO, f"/actions/workflows/{workflow_id}/runs", query_fields)
-    total_count = int(res["total_count"])
-    per_page = 3
-    logging.debug(f"Total count of all runs: {total_count} for workflow ID "
-                  f"{workflow_id}. Will process {per_page} runs per page")
-
-    async def get_and_parse_run(page: int, parsed_bench_runs) -> None:
-        _query_fields = query_fields.copy()
-        _query_fields["page"] = str(page)
-        res = await invoke_gh_api(ENSO_REPO, f"/actions/workflows/{workflow_id}/runs", _query_fields)
-        bench_runs_json = res["workflow_runs"]
-        _parsed_bench_runs = [_parse_bench_run_from_json(bench_run_json)
-                              for bench_run_json in bench_runs_json]
-        parsed_bench_runs.extend(_parsed_bench_runs)
-
-    # Now we know the total count, so we can fetch all the runs
-    query_fields["per_page"] = str(per_page)
-    num_queries = math.ceil(total_count / per_page)
-    parsed_bench_runs = []
-
-    tasks = []
-    # Page is indexed from 1
-    for page in range(1, num_queries + 1):
-        tasks.append(get_and_parse_run(page, parsed_bench_runs))
-    await asyncio.gather(*tasks)
-
-    return parsed_bench_runs
-
-
-async def get_bench_report(bench_run: JobRun, temp_dir: str, remote_cache: RemoteCache) -> Optional[JobReport]:
-    """
-    Extracts some data from the given bench_run, which was fetched via the GH API,
-    optionally getting it from the cache.
-    An artifact in GH can expire, in such case, returns None.
-    :param bench_run:
-    :param temp_dir: Used for downloading and unzipping artifacts.
-    :return: None if the corresponding artifact expired.
-    """
-    # There might be multiple artifacts in the artifact list for a benchmark run
-    # We are looking for the one named 'Runtime Benchmark Report', which will
-    # be downloaded as a ZIP file.
-    obj: Dict[str, Any] = await invoke_gh_api(ENSO_REPO, f"/actions/runs/{bench_run.id}/artifacts")
-    artifacts = obj["artifacts"]
-    if len(artifacts) != 1:
-        logging.warning("Bench run %s does not contain exactly one artifact, but it is a successful run.",
-                      bench_run.id)
-        return None
-    bench_report_artifact = artifacts[0]
-    assert bench_report_artifact, "Benchmark Report artifact not found"
-    artifact_id = str(bench_report_artifact["id"])
-    created_at = bench_report_artifact["created_at"]
-    updated_at = bench_report_artifact["updated_at"]
-    expires_at = bench_report_artifact["expires_at"]
-    is_expired = bench_report_artifact["expired"]
-    logging.debug(f"Got artifact with ID {artifact_id}, from bench run {bench_run.id}: "
-                  f"created_at={created_at}, updated_at={updated_at}, expires_at={expires_at}, "
-                  f"is_expired={is_expired}")
-
-    job_report = await remote_cache.fetch(bench_run.id)
-    if is_expired and job_report is None:
-        logging.error(f"Artifact {artifact_id} from bench run {bench_run.id} is expired, and it is not in the remote cache")
-        return None
-    if job_report:
-        logging.debug(f"Got job report from the cache for {bench_run.id}")
-        return job_report
-
-    assert not is_expired
-
-    # Get contents of the ZIP artifact file
-    artifact_ret = await invoke_gh_api(ENSO_REPO, f"/actions/artifacts/{artifact_id}/zip", result_as_json=False)
-    zip_file_name = os.path.join(temp_dir, artifact_id + ".zip")
-    logging.debug(f"Writing artifact ZIP content into {zip_file_name}")
-    with open(zip_file_name, "wb") as zip_file:
-        zip_file.write(artifact_ret)
-
-    extracted_dirname = os.path.join(temp_dir, artifact_id)
-    if os.path.exists(extracted_dirname):
-        shutil.rmtree(extracted_dirname)
-    os.mkdir(extracted_dirname)
-
-    logging.debug(f"Extracting {zip_file_name} into {extracted_dirname}")
-    zip_file = zipfile.ZipFile(zip_file_name, "r")
-    zip_file.extractall(extracted_dirname)
-    bench_report_xml = path.join(extracted_dirname, "bench-report.xml")
-    assert path.exists(bench_report_xml)
-
-    bench_report_parsed = _parse_bench_report_from_xml(bench_report_xml, bench_run)
-    await remote_cache.put(bench_run.id, bench_report_parsed)
-    return bench_report_parsed
-
-
 CSV_FIELDNAMES = [
     "label",
     "score",
@@ -265,19 +117,6 @@ def write_bench_reports_to_csv(bench_reports: List[JobReport], csv_fname: str) -
                 })
 
 
-def _gather_all_bench_labels(job_reports: List[JobReport]) -> Set[str]:
-    """
-    Iterates through all the job reports and gathers all the benchmark labels
-    found. Note that every job report can have a different set of benchmark labels.
-    :return: List of benchmark labels.
-    """
-    all_labels = set()
-    for job_report in job_reports:
-        for labels in job_report.label_score_dict.keys():
-            all_labels.add(labels)
-    return all_labels
-
-
 async def main():
     default_since: datetime = (datetime.now() - timedelta(days=14))
     default_until: datetime = datetime.now()
@@ -395,7 +234,7 @@ def _parse_bench_source(_bench_source: str) -> Source:
 
         job_reports: List[JobReport] = []
 
-        async def _process_report(_bench_run):
+        async def _process_report(_bench_run: JobRun):
             _job_report = await get_bench_report(_bench_run, temp_dir, remote_cache)
             if _job_report:
                 job_reports.append(_job_report)
@@ -430,7 +269,7 @@ def _get_timestamp(job_report: JobReport) -> datetime:
 
         # Gather all the benchmark labels from all the job reports
         if bench_labels is None:
-            all_bench_labels = _gather_all_bench_labels(job_reports)
+            all_bench_labels = gather_all_bench_labels(job_reports)
             if len(labels_override) > 0:
                 logging.info(f"Subset of labels specified: {labels_override}")
                 if not set(labels_override).issubset(all_bench_labels):
diff --git a/tools/performance/engine-benchmarks/bench_tool/bench_results.py b/tools/performance/engine-benchmarks/bench_tool/bench_results.py
new file mode 100644
index 000000000000..cb3f90abbd91
--- /dev/null
+++ b/tools/performance/engine-benchmarks/bench_tool/bench_results.py
@@ -0,0 +1,167 @@
+import asyncio
+import logging
+import math
+import os
+import shutil
+import zipfile
+from datetime import datetime
+from os import path
+from typing import List, Dict, Optional, Any
+from xml.etree import ElementTree as ET
+
+from bench_tool import JobRun, DATE_FORMAT, ENSO_REPO, JobReport, Commit, Author
+from bench_tool.gh import invoke_gh_api
+from bench_tool.remote_cache import RemoteCache
+
+
+async def get_bench_runs(since: datetime, until: datetime, branch: str, workflow_id: int) -> List[JobRun]:
+    """
+    Fetches the list of all the SUCCESSFUL job runs from the GH API for the specified `branch`.
+
+    :param since: The date from which the benchmark results will be gathered.
+    :param until: The date until which the benchmark results will be gathered.
+    :param branch: The branch for which the benchmark results will be gathered.
+    :param workflow_id: The ID of the workflow for which the benchmark results will be gathered.
+    """
+    logging.info(f"Looking for all successful Engine benchmark workflow run "
+                 f"actions from {since} to {until} for branch {branch} "
+                 f"and workflow ID {workflow_id}")
+    query_fields = {
+        "branch": branch,
+        "status": "success",
+        "created": since.strftime(DATE_FORMAT) + ".." + until.strftime(DATE_FORMAT),
+        # Start with 1, just to determine the total count
+        "per_page": "1"
+    }
+    res = await invoke_gh_api(ENSO_REPO, f"/actions/workflows/{workflow_id}/runs", query_fields)
+    total_count = int(res["total_count"])
+    per_page = 3
+    logging.debug(f"Total count of all runs: {total_count} for workflow ID "
+                  f"{workflow_id}. Will process {per_page} runs per page")
+
+    async def get_and_parse_run(page: int, parsed_bench_runs) -> None:
+        _query_fields = query_fields.copy()
+        _query_fields["page"] = str(page)
+        res = await invoke_gh_api(ENSO_REPO, f"/actions/workflows/{workflow_id}/runs", _query_fields)
+        bench_runs_json = res["workflow_runs"]
+        _parsed_bench_runs = [_parse_bench_run_from_json(bench_run_json)
+                              for bench_run_json in bench_runs_json]
+        parsed_bench_runs.extend(_parsed_bench_runs)
+
+    # Now we know the total count, so we can fetch all the runs
+    query_fields["per_page"] = str(per_page)
+    num_queries = math.ceil(total_count / per_page)
+    parsed_bench_runs = []
+
+    tasks = []
+    # Page is indexed from 1
+    for page in range(1, num_queries + 1):
+        tasks.append(get_and_parse_run(page, parsed_bench_runs))
+    await asyncio.gather(*tasks)
+
+    return parsed_bench_runs
+
+
+async def get_bench_report(bench_run: JobRun, temp_dir: str, remote_cache: RemoteCache) -> Optional[JobReport]:
+    """
+    Extracts some data from the given bench_run, which was fetched via the GH API,
+    optionally getting it from the cache.
+    An artifact in GH can expire, in such case, returns None.
+    :param bench_run:
+    :param temp_dir: Used for downloading and unzipping artifacts.
+    :return: None if the corresponding artifact cannot be found, neither as a GH artifact, neither from the remote cache.
+    """
+    assert os.path.exists(temp_dir) and os.path.isdir(temp_dir)
+
+    # There might be multiple artifacts in the artifact list for a benchmark run
+    # We are looking for the one named 'Runtime Benchmark Report', which will
+    # be downloaded as a ZIP file.
+    obj: Dict[str, Any] = await invoke_gh_api(ENSO_REPO, f"/actions/runs/{bench_run.id}/artifacts")
+    artifacts = obj["artifacts"]
+    if len(artifacts) != 1:
+        logging.warning("Bench run %s does not contain exactly one artifact, but it is a successful run.",
+                        bench_run.id)
+        return None
+    bench_report_artifact = artifacts[0]
+    assert bench_report_artifact, "Benchmark Report artifact not found"
+    artifact_id = str(bench_report_artifact["id"])
+    created_at = bench_report_artifact["created_at"]
+    updated_at = bench_report_artifact["updated_at"]
+    expires_at = bench_report_artifact["expires_at"]
+    is_expired = bench_report_artifact["expired"]
+    logging.debug(f"Got artifact with ID {artifact_id}, from bench run {bench_run.id}: "
+                  f"created_at={created_at}, updated_at={updated_at}, expires_at={expires_at}, "
+                  f"is_expired={is_expired}")
+
+    job_report = await remote_cache.fetch(bench_run.id)
+    if is_expired and job_report is None:
+        logging.error(
+            f"Artifact {artifact_id} from bench run {bench_run.id} is expired, and it is not in the remote cache")
+        return None
+    if job_report:
+        logging.debug(f"Got job report from the cache for {bench_run.id}")
+        return job_report
+
+    assert not is_expired
+
+    # Get contents of the ZIP artifact file
+    artifact_ret = await invoke_gh_api(ENSO_REPO, f"/actions/artifacts/{artifact_id}/zip", result_as_json=False)
+    zip_file_name = os.path.join(temp_dir, artifact_id + ".zip")
+    logging.debug(f"Writing artifact ZIP content into {zip_file_name}")
+    with open(zip_file_name, "wb") as zip_file:
+        zip_file.write(artifact_ret)
+
+    extracted_dirname = os.path.join(temp_dir, artifact_id)
+    if os.path.exists(extracted_dirname):
+        shutil.rmtree(extracted_dirname)
+    os.mkdir(extracted_dirname)
+
+    logging.debug(f"Extracting {zip_file_name} into {extracted_dirname}")
+    zip_file = zipfile.ZipFile(zip_file_name, "r")
+    zip_file.extractall(extracted_dirname)
+    bench_report_xml = path.join(extracted_dirname, "bench-report.xml")
+    assert path.exists(bench_report_xml)
+
+    bench_report_parsed = _parse_bench_report_from_xml(bench_report_xml, bench_run)
+    await remote_cache.put(bench_run.id, bench_report_parsed)
+    return bench_report_parsed
+
+
+def _parse_bench_report_from_xml(bench_report_xml_path: str, bench_run: JobRun) -> "JobReport":
+    logging.debug(f"Parsing BenchReport from {bench_report_xml_path}")
+    tree = ET.parse(bench_report_xml_path)
+    root = tree.getroot()
+    label_score_dict: Dict[str, float] = dict()
+    for cases in root:
+        assert cases.tag == "cases"
+        for case in cases:
+            assert case.tag == "case"
+            label = case.findtext("label").strip()
+            scores = case.find("scores")
+            scores_float = [float(score.text.strip()) for score in scores]
+            if len(scores_float) > 1:
+                logging.warning(f"More than one score for benchmark {label}, "
+                                f"using the last one (the newest one).")
+            label_score_dict[label] = scores_float[len(scores_float) - 1]
+    return JobReport(
+        label_score_dict=label_score_dict,
+        bench_run=bench_run
+    )
+
+
+def _parse_bench_run_from_json(obj: Dict[Any, Any]) -> JobRun:
+    return JobRun(
+        id=str(obj["id"]),
+        html_url=obj["html_url"],
+        run_attempt=int(obj["run_attempt"]),
+        event=obj["event"],
+        display_title=obj["display_title"],
+        head_commit=Commit(
+            id=obj["head_commit"]["id"],
+            message=obj["head_commit"]["message"],
+            timestamp=obj["head_commit"]["timestamp"],
+            author=Author(
+                name=obj["head_commit"]["author"]["name"]
+            )
+        )
+    )
diff --git a/tools/performance/engine-benchmarks/bench_tool/utils.py b/tools/performance/engine-benchmarks/bench_tool/utils.py
new file mode 100644
index 000000000000..e6b929465b9e
--- /dev/null
+++ b/tools/performance/engine-benchmarks/bench_tool/utils.py
@@ -0,0 +1,31 @@
+import shutil
+import tempfile
+from typing import List, Set
+
+from bench_tool import JobReport
+
+
+class WithTempDir:
+    def __init__(self, prefix: str):
+        self.prefix = prefix
+        self.temp_dir = None
+
+    def __enter__(self):
+        self.temp_dir = tempfile.mkdtemp(prefix=self.prefix)
+        return self.temp_dir
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+
+def gather_all_bench_labels(job_reports: List[JobReport]) -> Set[str]:
+    """
+    Iterates through all the job reports and gathers all the benchmark labels
+    found. Note that every job report can have a different set of benchmark labels.
+    :return: List of benchmark labels.
+    """
+    all_labels = set()
+    for job_report in job_reports:
+        for labels in job_report.label_score_dict.keys():
+            all_labels.add(labels)
+    return all_labels

From 6d3ee257a5aecb7ef72d0094d9cd4760193c6c44 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Mon, 19 Feb 2024 15:45:53 +0100
Subject: [PATCH 23/93] Improve logging

---
 .../bench_tool/bench_results.py               | 21 ++++++++++---------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/tools/performance/engine-benchmarks/bench_tool/bench_results.py b/tools/performance/engine-benchmarks/bench_tool/bench_results.py
index cb3f90abbd91..15275b79cbbd 100644
--- a/tools/performance/engine-benchmarks/bench_tool/bench_results.py
+++ b/tools/performance/engine-benchmarks/bench_tool/bench_results.py
@@ -13,6 +13,7 @@
 from bench_tool.gh import invoke_gh_api
 from bench_tool.remote_cache import RemoteCache
 
+_logger = logging.getLogger(__name__)
 
 async def get_bench_runs(since: datetime, until: datetime, branch: str, workflow_id: int) -> List[JobRun]:
     """
@@ -23,7 +24,7 @@ async def get_bench_runs(since: datetime, until: datetime, branch: str, workflow
     :param branch: The branch for which the benchmark results will be gathered.
     :param workflow_id: The ID of the workflow for which the benchmark results will be gathered.
     """
-    logging.info(f"Looking for all successful Engine benchmark workflow run "
+    _logger.info(f"Looking for all successful Engine benchmark workflow run "
                  f"actions from {since} to {until} for branch {branch} "
                  f"and workflow ID {workflow_id}")
     query_fields = {
@@ -36,7 +37,7 @@ async def get_bench_runs(since: datetime, until: datetime, branch: str, workflow
     res = await invoke_gh_api(ENSO_REPO, f"/actions/workflows/{workflow_id}/runs", query_fields)
     total_count = int(res["total_count"])
     per_page = 3
-    logging.debug(f"Total count of all runs: {total_count} for workflow ID "
+    _logger.debug(f"Total count of all runs: {total_count} for workflow ID "
                   f"{workflow_id}. Will process {per_page} runs per page")
 
     async def get_and_parse_run(page: int, parsed_bench_runs) -> None:
@@ -79,7 +80,7 @@ async def get_bench_report(bench_run: JobRun, temp_dir: str, remote_cache: Remot
     obj: Dict[str, Any] = await invoke_gh_api(ENSO_REPO, f"/actions/runs/{bench_run.id}/artifacts")
     artifacts = obj["artifacts"]
     if len(artifacts) != 1:
-        logging.warning("Bench run %s does not contain exactly one artifact, but it is a successful run.",
+        _logger.warning("Bench run %s does not contain exactly one artifact, but it is a successful run.",
                         bench_run.id)
         return None
     bench_report_artifact = artifacts[0]
@@ -89,17 +90,17 @@ async def get_bench_report(bench_run: JobRun, temp_dir: str, remote_cache: Remot
     updated_at = bench_report_artifact["updated_at"]
     expires_at = bench_report_artifact["expires_at"]
     is_expired = bench_report_artifact["expired"]
-    logging.debug(f"Got artifact with ID {artifact_id}, from bench run {bench_run.id}: "
+    _logger.debug(f"Got artifact with ID {artifact_id}, from bench run {bench_run.id}: "
                   f"created_at={created_at}, updated_at={updated_at}, expires_at={expires_at}, "
                   f"is_expired={is_expired}")
 
     job_report = await remote_cache.fetch(bench_run.id)
     if is_expired and job_report is None:
-        logging.error(
+        _logger.error(
             f"Artifact {artifact_id} from bench run {bench_run.id} is expired, and it is not in the remote cache")
         return None
     if job_report:
-        logging.debug(f"Got job report from the cache for {bench_run.id}")
+        _logger.debug(f"Got job report from the cache for {bench_run.id}")
         return job_report
 
     assert not is_expired
@@ -107,7 +108,7 @@ async def get_bench_report(bench_run: JobRun, temp_dir: str, remote_cache: Remot
     # Get contents of the ZIP artifact file
     artifact_ret = await invoke_gh_api(ENSO_REPO, f"/actions/artifacts/{artifact_id}/zip", result_as_json=False)
     zip_file_name = os.path.join(temp_dir, artifact_id + ".zip")
-    logging.debug(f"Writing artifact ZIP content into {zip_file_name}")
+    _logger.debug(f"Writing artifact ZIP content into {zip_file_name}")
     with open(zip_file_name, "wb") as zip_file:
         zip_file.write(artifact_ret)
 
@@ -116,7 +117,7 @@ async def get_bench_report(bench_run: JobRun, temp_dir: str, remote_cache: Remot
         shutil.rmtree(extracted_dirname)
     os.mkdir(extracted_dirname)
 
-    logging.debug(f"Extracting {zip_file_name} into {extracted_dirname}")
+    _logger.debug(f"Extracting {zip_file_name} into {extracted_dirname}")
     zip_file = zipfile.ZipFile(zip_file_name, "r")
     zip_file.extractall(extracted_dirname)
     bench_report_xml = path.join(extracted_dirname, "bench-report.xml")
@@ -128,7 +129,7 @@ async def get_bench_report(bench_run: JobRun, temp_dir: str, remote_cache: Remot
 
 
 def _parse_bench_report_from_xml(bench_report_xml_path: str, bench_run: JobRun) -> "JobReport":
-    logging.debug(f"Parsing BenchReport from {bench_report_xml_path}")
+    _logger.debug(f"Parsing BenchReport from {bench_report_xml_path}")
     tree = ET.parse(bench_report_xml_path)
     root = tree.getroot()
     label_score_dict: Dict[str, float] = dict()
@@ -140,7 +141,7 @@ def _parse_bench_report_from_xml(bench_report_xml_path: str, bench_run: JobRun)
             scores = case.find("scores")
             scores_float = [float(score.text.strip()) for score in scores]
             if len(scores_float) > 1:
-                logging.warning(f"More than one score for benchmark {label}, "
+                _logger.warning(f"More than one score for benchmark {label}, "
                                 f"using the last one (the newest one).")
             label_score_dict[label] = scores_float[len(scores_float) - 1]
     return JobReport(

From 58cdf0118d2184625075a85fe2635b17ef3b4ab6 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Mon, 19 Feb 2024 15:46:21 +0100
Subject: [PATCH 24/93] Add skeleton of website_regen.py

---
 .../engine-benchmarks/website_regen.py        | 114 ++++++++++++++++++
 1 file changed, 114 insertions(+)
 create mode 100644 tools/performance/engine-benchmarks/website_regen.py

diff --git a/tools/performance/engine-benchmarks/website_regen.py b/tools/performance/engine-benchmarks/website_regen.py
new file mode 100644
index 000000000000..a48f99fb017d
--- /dev/null
+++ b/tools/performance/engine-benchmarks/website_regen.py
@@ -0,0 +1,114 @@
+"""
+IMPORTANT NOTE: Should be run only on the CI!!
+
+This script regenerate the benchmark results website, hosted as GH web pages on the
+https://github.com/enso-org/engine-benchmark-results repo.
+"""
+import asyncio
+import logging
+from argparse import ArgumentParser
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import List, Dict, Set
+
+from bench_tool import Source, JobRun, JobReport, GH_DATE_FORMAT, TemplateBenchData, JinjaData
+from bench_tool.bench_results import get_bench_runs, get_bench_report
+from bench_tool.remote_cache import SyncRemoteCache
+from bench_tool.template_render import create_template_data, render_html
+from bench_tool.utils import WithTempDir, gather_all_bench_labels
+
+# The inception date of the benchmarks, i.e., the date of the first benchmark run.
+ENGINE_SINCE = datetime.fromisoformat("2022-12-01")
+STDLIB_SINCE = datetime.fromisoformat("2023-08-22")
+BRANCH_DEVELOP = "develop"
+
+
+_logger = logging.getLogger("website_regen")
+
+
+async def init_remote_cache() -> SyncRemoteCache:
+    remote_cache = SyncRemoteCache()
+    await remote_cache.initialize()
+    return remote_cache
+
+
+async def generate_bench_website(
+        bench_source: Source,
+        remote_cache: SyncRemoteCache,
+        since: datetime,
+        generated_html: Path
+) -> None:
+    """
+    Generates single `index.html` website with the benchmark results.
+
+    :param bench_source: Source of the benchmarks, either engine or stdlib
+    :param remote_cache: Remote cache used for fetching the job reports.
+    :param since: Date since when the benchmarks should be considered
+    :param generated_html: Path to the generated HTML file
+    :return:
+    """
+    bench_runs: List[JobRun] = []
+    now = datetime.now()
+    for workflow_id in bench_source.workflow_ids():
+        bench_runs.extend(
+            await get_bench_runs(since, now, BRANCH_DEVELOP, workflow_id)
+        )
+    assert len(bench_runs) > 0, "No benchmark runs found"
+
+    job_reports: List[JobReport] = []
+
+    async def _process_report(_bench_run: JobRun):
+        with WithTempDir("website-regen") as temp_dir:
+            _job_report = await get_bench_report(_bench_run, temp_dir, remote_cache)
+        if _job_report:
+            job_reports.append(_job_report)
+
+    tasks = []
+    for bench_run in bench_runs:
+        tasks.append(_process_report(bench_run))
+    await asyncio.gather(*tasks)
+
+    _logger.debug(f"Gathered {len(job_reports)} job reports")
+    assert len(job_reports) > 0, "No job reports found"
+
+    _logger.debug("Sorting job_reports by commit date")
+
+    def _get_timestamp(job_report: JobReport) -> datetime:
+        return datetime.strptime(
+            job_report.bench_run.head_commit.timestamp,
+            GH_DATE_FORMAT
+        )
+
+    job_reports.sort(key=lambda report: _get_timestamp(report))
+    all_bench_labels: Set[str] = gather_all_bench_labels(job_reports)
+    _logger.debug(f"Found {len(all_bench_labels)} unique benchmark labels")
+
+    job_reports_per_branch: Dict[str, List[JobReport]] = {
+        BRANCH_DEVELOP: job_reports
+    }
+    template_bench_datas: List[TemplateBenchData] =\
+        create_template_data(job_reports_per_branch, all_bench_labels)
+    template_bench_datas.sort(key=lambda data: data.id)
+
+    jinja_data = JinjaData(
+        since=since,
+        display_since=max(now - timedelta(days=30), since),
+        until=now,
+        bench_datas=template_bench_datas,
+        bench_source=bench_source,
+        branches=[BRANCH_DEVELOP],
+    )
+    _logger.debug(f"Rendering HTML to {generated_html}")
+    render_html(jinja_data, generated_html)
+    pass
+
+
+if __name__ == '__main__':
+    arg_parser = ArgumentParser(description="Regenerate the benchmark results website")
+    arg_parser.add_argument("-v", "--verbose", action="store_true")
+    arg_parser.add_argument("-n", "--dry-run", action="store_true")
+    args = arg_parser.parse_args()
+    verbose: bool = args.verbose
+    dry_run: bool = args.dry_run
+    logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO)
+

From 54a9ce244fd7c5e0fa869b3c508bd1f825708519 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Mon, 19 Feb 2024 16:29:03 +0100
Subject: [PATCH 25/93] Add some tests on bench_results

---
 .../bench_tool/test_bench_results.py          | 48 +++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 tools/performance/engine-benchmarks/bench_tool/test_bench_results.py

diff --git a/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py b/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py
new file mode 100644
index 000000000000..6042ff040a3a
--- /dev/null
+++ b/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py
@@ -0,0 +1,48 @@
+import abc
+import logging
+import unittest
+from datetime import datetime
+
+from bench_tool import Source, ENGINE_BENCH_WORKFLOW_ID
+from .bench_results import get_bench_report, get_bench_runs
+from .remote_cache import ReadonlyRemoteCache
+from .utils import parse_commit_timestamp, WithTempDir
+
+# A single ID for a benchmark run between 2023-05-01 and 2023-05-05
+# We know for sure that this workflow run is on the GH.
+BENCH_RUN_ID = "4888453297"
+
+
+class MyTestCase(unittest.IsolatedAsyncioTestCase):
+    async def test_get_bench_run(self):
+        """
+        Bench run does not need remote cache - it fetches just some metadata about GH artifacts.
+        :return:
+        """
+        since = datetime.fromisoformat("2023-05-01")
+        until = datetime.fromisoformat("2023-05-05")
+        bench_runs = await get_bench_runs(since, until, "develop", ENGINE_BENCH_WORKFLOW_ID)
+        self.assertEqual(1, len(bench_runs))
+        # There is just a single bench run between 2023-05-01 and 2023-05-05
+        bench_run = bench_runs[0]
+        self.assertEqual(BENCH_RUN_ID, bench_run.id)
+        commit_ts = parse_commit_timestamp(bench_run.head_commit)
+        self.assertLess(since, commit_ts)
+        self.assertGreater(until, commit_ts)
+
+    async def test_get_bench_report(self):
+        # We choose an old date on purpose, so that the remote cache must be used, and is thus
+        # transitively tested.
+        since = datetime.fromisoformat("2023-05-01")
+        until = datetime.fromisoformat("2023-05-05")
+        bench_runs = await get_bench_runs(since, until, "develop", ENGINE_BENCH_WORKFLOW_ID)
+        self.assertEqual(1, len(bench_runs))
+        bench_run = bench_runs[0]
+        remote_cache = ReadonlyRemoteCache()
+        await remote_cache.initialize()
+        with WithTempDir("test_get_bench_report") as temp_dir:
+            bench_report = await get_bench_report(bench_run, temp_dir, remote_cache)
+            self.assertIsNotNone(bench_report)
+            self.assertEquals(bench_run, bench_report.bench_run)
+            self.assertEquals(55, len(bench_report.label_score_dict))
+

From fcf1be1061863f0699278f282eca78b0d0228466 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Mon, 19 Feb 2024 16:29:29 +0100
Subject: [PATCH 26/93] Search bench report artifact by name

---
 .../engine-benchmarks/bench_tool/bench_results.py    | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/tools/performance/engine-benchmarks/bench_tool/bench_results.py b/tools/performance/engine-benchmarks/bench_tool/bench_results.py
index 15275b79cbbd..959038d055ad 100644
--- a/tools/performance/engine-benchmarks/bench_tool/bench_results.py
+++ b/tools/performance/engine-benchmarks/bench_tool/bench_results.py
@@ -13,8 +13,11 @@
 from bench_tool.gh import invoke_gh_api
 from bench_tool.remote_cache import RemoteCache
 
+ARTIFACT_ID = "Runtime Benchmark Report"
+
 _logger = logging.getLogger(__name__)
 
+
 async def get_bench_runs(since: datetime, until: datetime, branch: str, workflow_id: int) -> List[JobRun]:
     """
     Fetches the list of all the SUCCESSFUL job runs from the GH API for the specified `branch`.
@@ -79,11 +82,12 @@ async def get_bench_report(bench_run: JobRun, temp_dir: str, remote_cache: Remot
     # be downloaded as a ZIP file.
     obj: Dict[str, Any] = await invoke_gh_api(ENSO_REPO, f"/actions/runs/{bench_run.id}/artifacts")
     artifacts = obj["artifacts"]
-    if len(artifacts) != 1:
-        _logger.warning("Bench run %s does not contain exactly one artifact, but it is a successful run.",
-                        bench_run.id)
+    artifacts_by_names = {artifact["name"]: artifact for artifact in artifacts}
+    if ARTIFACT_ID not in artifacts_by_names:
+        _logger.warning("Bench run %s does not contain the artifact named %s, but it is a successful run.",
+                        bench_run.id, ARTIFACT_ID)
         return None
-    bench_report_artifact = artifacts[0]
+    bench_report_artifact = artifacts_by_names[ARTIFACT_ID]
     assert bench_report_artifact, "Benchmark Report artifact not found"
     artifact_id = str(bench_report_artifact["id"])
     created_at = bench_report_artifact["created_at"]

From c50aac7ecd64696c5ebc89484dd541444a8f368d Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Mon, 19 Feb 2024 16:30:37 +0100
Subject: [PATCH 27/93] Add utils.parse_commit_timestamp

---
 .../engine-benchmarks/bench_tool/template_render.py      | 9 +++------
 tools/performance/engine-benchmarks/bench_tool/utils.py  | 8 +++++++-
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/tools/performance/engine-benchmarks/bench_tool/template_render.py b/tools/performance/engine-benchmarks/bench_tool/template_render.py
index 30acc0a94999..57a626487111 100644
--- a/tools/performance/engine-benchmarks/bench_tool/template_render.py
+++ b/tools/performance/engine-benchmarks/bench_tool/template_render.py
@@ -1,5 +1,4 @@
 import logging
-from datetime import datetime
 from pathlib import Path
 from typing import List, Dict, Optional, Set
 
@@ -7,8 +6,9 @@
 import numpy as np
 import pandas as pd
 
-from bench_tool import JobReport, TemplateBenchData, BenchDatapoint, GH_DATE_FORMAT, ENSO_COMMIT_BASE_URL, JinjaData, \
+from bench_tool import JobReport, TemplateBenchData, BenchDatapoint, ENSO_COMMIT_BASE_URL, JinjaData, \
     JINJA_TEMPLATE, TEMPLATES_DIR
+from bench_tool.utils import parse_commit_timestamp
 
 _logger = logging.getLogger(__name__)
 
@@ -60,10 +60,7 @@ def diff_str(score_diff: float, score_diff_perc: float) -> str:
                 if bench_label in job_report.label_score_dict:
                     score = job_report.label_score_dict[bench_label]
                     commit = job_report.bench_run.head_commit
-                    timestamp = datetime.strptime(
-                        commit.timestamp,
-                        GH_DATE_FORMAT
-                    )
+                    timestamp = parse_commit_timestamp(commit)
                     commit_msg_header = \
                         commit.message.splitlines()[0].replace('"', "'")
                     series = pd.Series([
diff --git a/tools/performance/engine-benchmarks/bench_tool/utils.py b/tools/performance/engine-benchmarks/bench_tool/utils.py
index e6b929465b9e..cd0388e4540e 100644
--- a/tools/performance/engine-benchmarks/bench_tool/utils.py
+++ b/tools/performance/engine-benchmarks/bench_tool/utils.py
@@ -1,8 +1,9 @@
 import shutil
 import tempfile
+from datetime import datetime
 from typing import List, Set
 
-from bench_tool import JobReport
+from bench_tool import JobReport, GH_DATE_FORMAT, Commit
 
 
 class WithTempDir:
@@ -29,3 +30,8 @@ def gather_all_bench_labels(job_reports: List[JobReport]) -> Set[str]:
         for labels in job_report.label_score_dict.keys():
             all_labels.add(labels)
     return all_labels
+
+
+def parse_commit_timestamp(commit: Commit) -> datetime:
+    """ Parses the timestamp from the commit based on the GH's formatting. """
+    return datetime.strptime(commit.timestamp, GH_DATE_FORMAT)

From 8e3ff5cc5cc325bad5c97cfa653788c85df0eea1 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Mon, 19 Feb 2024 16:32:05 +0100
Subject: [PATCH 28/93] Optimize imports

---
 .../performance/engine-benchmarks/bench_tool/__init__.py  | 8 +-------
 .../engine-benchmarks/bench_tool/test_bench_results.py    | 4 +---
 2 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/tools/performance/engine-benchmarks/bench_tool/__init__.py b/tools/performance/engine-benchmarks/bench_tool/__init__.py
index 74cda21ff85d..b08c0060c71a 100644
--- a/tools/performance/engine-benchmarks/bench_tool/__init__.py
+++ b/tools/performance/engine-benchmarks/bench_tool/__init__.py
@@ -2,14 +2,8 @@
 from dataclasses import dataclass
 from datetime import timedelta, datetime
 from enum import Enum
-from os import path
-from typing import List, Dict, Optional, Set, Tuple
-import re
-import subprocess
-import sys
-from argparse import ArgumentParser
-import shutil
 from pathlib import Path
+from typing import List, Dict
 
 
 def pkg_dir() -> Path:
diff --git a/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py b/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py
index 6042ff040a3a..085f97adf528 100644
--- a/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py
+++ b/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py
@@ -1,9 +1,7 @@
-import abc
-import logging
 import unittest
 from datetime import datetime
 
-from bench_tool import Source, ENGINE_BENCH_WORKFLOW_ID
+from bench_tool import ENGINE_BENCH_WORKFLOW_ID
 from .bench_results import get_bench_report, get_bench_runs
 from .remote_cache import ReadonlyRemoteCache
 from .utils import parse_commit_timestamp, WithTempDir

From 762a256918b7b0752b980400d311018b74a189db Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Mon, 19 Feb 2024 16:37:55 +0100
Subject: [PATCH 29/93] Redirect output of git.clone

---
 tools/performance/engine-benchmarks/bench_tool/git.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/performance/engine-benchmarks/bench_tool/git.py b/tools/performance/engine-benchmarks/bench_tool/git.py
index 2251dba3d6c5..227da1f1d908 100644
--- a/tools/performance/engine-benchmarks/bench_tool/git.py
+++ b/tools/performance/engine-benchmarks/bench_tool/git.py
@@ -19,7 +19,7 @@ async def clone(repo: str, dest: Path) -> None:
     _logger.debug("Cloning %s to %s", repo, dest)
     dest_abs_path = str(dest.absolute())
     args = ["clone", f"git@github.com:{repo}.git", dest_abs_path]
-    proc = await asyncio.create_subprocess_exec("git", *args)
+    proc = await asyncio.create_subprocess_exec("git", *args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     ret = await proc.wait()
     if ret != 0:
         stdout, stderr = await proc.communicate()

From 1e8caff0ed80e6c2e4c0e7c8a5222e1f4bf79adc Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Mon, 19 Feb 2024 16:38:15 +0100
Subject: [PATCH 30/93] Test SyncRemoteCache

---
 .../engine-benchmarks/bench_tool/remote_cache.py          | 3 +++
 .../engine-benchmarks/bench_tool/test_remote_cache.py     | 8 ++++++--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/tools/performance/engine-benchmarks/bench_tool/remote_cache.py b/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
index d00e068ae7ec..7d0ad77603c7 100644
--- a/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
+++ b/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
@@ -111,6 +111,9 @@ def __init__(self):
         self._repo_root_dir = Path(tempfile.mkdtemp(prefix="bench_tool_remote_cache"))
         self._cache_dir = self._repo_root_dir.joinpath(CACHE_REMOTE_DIR)
 
+    def repo_root_dir(self) -> Path:
+        return self._repo_root_dir
+
     async def initialize(self) -> None:
         # Checkout the repo
         await git.clone(BENCH_REPO, self._repo_root_dir)
diff --git a/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py b/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py
index 6c4da65c96f8..ae0122acb435 100644
--- a/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py
+++ b/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py
@@ -60,5 +60,9 @@ class TestSyncRemoteCache(unittest.IsolatedAsyncioTestCase):
     async def test_init_sync_remote_cache(self):
         remote_cache = SyncRemoteCache()
         await remote_cache.initialize()
-        # No exception should be thrown
-        self.assertTrue(True)
+        root_dir = remote_cache.repo_root_dir()
+        self.assertTrue(root_dir.exists())
+        self.assertTrue(root_dir.is_dir())
+        cache_dir = root_dir.joinpath("cache")
+        self.assertTrue(cache_dir.exists())
+        self.assertTrue(cache_dir.is_dir())

From d401604b80c5a99a013d2f8548291ded676381b3 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Mon, 19 Feb 2024 16:58:44 +0100
Subject: [PATCH 31/93] Move more common functionality into utils and
 bench_results

---
 .../engine-benchmarks/bench_download.py       | 28 ++++--------------
 .../bench_tool/bench_results.py               | 29 +++++++++++++++++++
 .../engine-benchmarks/bench_tool/utils.py     | 14 +++++++++
 .../engine-benchmarks/website_regen.py        | 29 ++++---------------
 4 files changed, 53 insertions(+), 47 deletions(-)

diff --git a/tools/performance/engine-benchmarks/bench_download.py b/tools/performance/engine-benchmarks/bench_download.py
index 0a00a4a68ec3..ffd200ac7217 100755
--- a/tools/performance/engine-benchmarks/bench_download.py
+++ b/tools/performance/engine-benchmarks/bench_download.py
@@ -47,9 +47,9 @@
 
 import sys
 
-from bench_tool.bench_results import get_bench_runs, get_bench_report
+from bench_tool.bench_results import get_bench_runs, fetch_job_reports
 from bench_tool.remote_cache import ReadonlyRemoteCache
-from bench_tool.utils import gather_all_bench_labels
+from bench_tool.utils import gather_all_bench_labels, sort_job_reports
 
 if not (sys.version_info.major >= 3 and sys.version_info.minor >= 7):
     print("ERROR: python version lower than 3.7")
@@ -67,7 +67,7 @@
 from os import path
 from typing import List, Dict, Optional, Set
 
-from bench_tool import DATE_FORMAT, GENERATED_SITE_DIR, GH_DATE_FORMAT, GH_ARTIFACT_RETENTION_PERIOD, TEMPLATES_DIR, \
+from bench_tool import DATE_FORMAT, GENERATED_SITE_DIR, GH_ARTIFACT_RETENTION_PERIOD, TEMPLATES_DIR, \
     JINJA_TEMPLATE, JobRun, JobReport, \
     TemplateBenchData, JinjaData, Source
 from bench_tool.gh import ensure_gh_installed
@@ -232,18 +232,7 @@ def _parse_bench_source(_bench_source: str) -> Source:
                 f" until {until} for branch {branch}")
             exit(1)
 
-        job_reports: List[JobReport] = []
-
-        async def _process_report(_bench_run: JobRun):
-            _job_report = await get_bench_report(_bench_run, temp_dir, remote_cache)
-            if _job_report:
-                job_reports.append(_job_report)
-
-        tasks = []
-        for bench_run in bench_runs:
-            tasks.append(_process_report(bench_run))
-        await asyncio.gather(*tasks)
-
+        job_reports = await fetch_job_reports(bench_runs, remote_cache)
         logging.debug(f"Got {len(job_reports)} job reports for branch {branch}")
         if len(job_reports) == 0:
             print(f"There were 0 job_reports in the specified time interval, "
@@ -252,14 +241,7 @@ async def _process_report(_bench_run: JobRun):
             exit(1)
 
         logging.debug("Sorting job_reports by commit date")
-
-        def _get_timestamp(job_report: JobReport) -> datetime:
-            return datetime.strptime(
-                job_report.bench_run.head_commit.timestamp,
-                GH_DATE_FORMAT
-            )
-
-        job_reports.sort(key=lambda report: _get_timestamp(report))
+        sort_job_reports(job_reports)
 
         if create_csv:
             write_bench_reports_to_csv(job_reports, csv_output)
diff --git a/tools/performance/engine-benchmarks/bench_tool/bench_results.py b/tools/performance/engine-benchmarks/bench_tool/bench_results.py
index 959038d055ad..da88a2f2cc47 100644
--- a/tools/performance/engine-benchmarks/bench_tool/bench_results.py
+++ b/tools/performance/engine-benchmarks/bench_tool/bench_results.py
@@ -12,6 +12,7 @@
 from bench_tool import JobRun, DATE_FORMAT, ENSO_REPO, JobReport, Commit, Author
 from bench_tool.gh import invoke_gh_api
 from bench_tool.remote_cache import RemoteCache
+from bench_tool.utils import WithTempDir
 
 ARTIFACT_ID = "Runtime Benchmark Report"
 
@@ -66,6 +67,34 @@ async def get_and_parse_run(page: int, parsed_bench_runs) -> None:
     return parsed_bench_runs
 
 
+async def fetch_job_reports(
+        bench_runs: List[JobRun],
+        remote_cache: RemoteCache
+) -> List[JobReport]:
+    """
+    Fetches all benchmark reports for the given benchmark runs. Benchmark runs are basically
+    just IDs of artifacts, and the reports are the actual benchmark results. These results are
+    either on the GH as artifacts, or are fetched from the cache if the artifact is expired.
+    All the runs are fetched in parallel.
+    :param bench_runs:
+    :param remote_cache:
+    :return:
+    """
+    job_reports: List[JobReport] = []
+
+    async def _process_report(_bench_run: JobRun):
+        with WithTempDir("bench_download") as temp_dir:
+            _job_report = await get_bench_report(_bench_run, temp_dir, remote_cache)
+        if _job_report:
+            job_reports.append(_job_report)
+
+    tasks = []
+    for bench_run in bench_runs:
+        tasks.append(_process_report(bench_run))
+    await asyncio.gather(*tasks)
+    return job_reports
+
+
 async def get_bench_report(bench_run: JobRun, temp_dir: str, remote_cache: RemoteCache) -> Optional[JobReport]:
     """
     Extracts some data from the given bench_run, which was fetched via the GH API,
diff --git a/tools/performance/engine-benchmarks/bench_tool/utils.py b/tools/performance/engine-benchmarks/bench_tool/utils.py
index cd0388e4540e..63eca1fbb9a8 100644
--- a/tools/performance/engine-benchmarks/bench_tool/utils.py
+++ b/tools/performance/engine-benchmarks/bench_tool/utils.py
@@ -35,3 +35,17 @@ def gather_all_bench_labels(job_reports: List[JobReport]) -> Set[str]:
 def parse_commit_timestamp(commit: Commit) -> datetime:
     """ Parses the timestamp from the commit based on the GH's formatting. """
     return datetime.strptime(commit.timestamp, GH_DATE_FORMAT)
+
+
+def sort_job_reports(
+        job_reports: List[JobReport]
+) -> None:
+    """
+    Sorts the job reports in place by the commit date.
+    :param job_reports:
+    :return:
+    """
+    def _get_timestamp(job_report: JobReport) -> datetime:
+        return parse_commit_timestamp(job_report.bench_run.head_commit)
+
+    job_reports.sort(key=lambda report: _get_timestamp(report))
diff --git a/tools/performance/engine-benchmarks/website_regen.py b/tools/performance/engine-benchmarks/website_regen.py
index a48f99fb017d..f2889140ecb8 100644
--- a/tools/performance/engine-benchmarks/website_regen.py
+++ b/tools/performance/engine-benchmarks/website_regen.py
@@ -4,18 +4,17 @@
 This script regenerate the benchmark results website, hosted as GH web pages on the
 https://github.com/enso-org/engine-benchmark-results repo.
 """
-import asyncio
 import logging
 from argparse import ArgumentParser
 from datetime import datetime, timedelta
 from pathlib import Path
 from typing import List, Dict, Set
 
-from bench_tool import Source, JobRun, JobReport, GH_DATE_FORMAT, TemplateBenchData, JinjaData
-from bench_tool.bench_results import get_bench_runs, get_bench_report
+from bench_tool import Source, JobRun, JobReport, TemplateBenchData, JinjaData
+from bench_tool.bench_results import get_bench_runs, fetch_job_reports
 from bench_tool.remote_cache import SyncRemoteCache
 from bench_tool.template_render import create_template_data, render_html
-from bench_tool.utils import WithTempDir, gather_all_bench_labels
+from bench_tool.utils import gather_all_bench_labels, sort_job_reports
 
 # The inception date of the benchmarks, i.e., the date of the first benchmark run.
 ENGINE_SINCE = datetime.fromisoformat("2022-12-01")
@@ -55,31 +54,13 @@ async def generate_bench_website(
         )
     assert len(bench_runs) > 0, "No benchmark runs found"
 
-    job_reports: List[JobReport] = []
-
-    async def _process_report(_bench_run: JobRun):
-        with WithTempDir("website-regen") as temp_dir:
-            _job_report = await get_bench_report(_bench_run, temp_dir, remote_cache)
-        if _job_report:
-            job_reports.append(_job_report)
-
-    tasks = []
-    for bench_run in bench_runs:
-        tasks.append(_process_report(bench_run))
-    await asyncio.gather(*tasks)
-
+    job_reports = await fetch_job_reports(bench_runs, remote_cache)
     _logger.debug(f"Gathered {len(job_reports)} job reports")
     assert len(job_reports) > 0, "No job reports found"
 
     _logger.debug("Sorting job_reports by commit date")
+    sort_job_reports(job_reports)
 
-    def _get_timestamp(job_report: JobReport) -> datetime:
-        return datetime.strptime(
-            job_report.bench_run.head_commit.timestamp,
-            GH_DATE_FORMAT
-        )
-
-    job_reports.sort(key=lambda report: _get_timestamp(report))
     all_bench_labels: Set[str] = gather_all_bench_labels(job_reports)
     _logger.debug(f"Found {len(all_bench_labels)} unique benchmark labels")
 

From 555666b86919b28573d5f949c5791b851bd57ecf Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Mon, 19 Feb 2024 18:40:14 +0100
Subject: [PATCH 32/93] Add git.pull

---
 .../performance/engine-benchmarks/bench_tool/git.py  | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tools/performance/engine-benchmarks/bench_tool/git.py b/tools/performance/engine-benchmarks/bench_tool/git.py
index 227da1f1d908..a4519efd2544 100644
--- a/tools/performance/engine-benchmarks/bench_tool/git.py
+++ b/tools/performance/engine-benchmarks/bench_tool/git.py
@@ -28,6 +28,16 @@ async def clone(repo: str, dest: Path) -> None:
     assert dest.exists()
 
 
+async def pull(repo: Path) -> None:
+    _logger.debug("Pulling %s", repo)
+    # Avoid unnecessary merge commits by using `--ff-only`
+    args = ["pull", "--ff-only"]
+    proc = await asyncio.create_subprocess_exec("git", *args, cwd=repo, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    ret = await proc.wait()
+    if ret != 0:
+        raise RuntimeError(f"Failed to pull {repo}")
+
+
 async def status(repo: Path) -> GitStatus:
     assert repo.exists()
     proc = await asyncio.create_subprocess_exec("git", "status", "--porcelain", cwd=repo,
@@ -48,6 +58,8 @@ async def status(repo: Path) -> GitStatus:
 
 
 async def add(repo: Path, files: Set[str]) -> None:
+    _logger.debug("Adding %s to %s", files, repo)
+    assert len(files) > 0
     args = ["add"] + list(files)
     proc = await asyncio.create_subprocess_exec("git", *args, cwd=repo, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     ret = await proc.wait()

From 96c1cc4e41d58e5968ab4a50cd94489929b084ae Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Mon, 19 Feb 2024 18:41:17 +0100
Subject: [PATCH 33/93] RemoteCache does not have initialize method

---
 tools/performance/engine-benchmarks/bench_download.py |  1 -
 .../engine-benchmarks/bench_tool/remote_cache.py      | 11 -----------
 2 files changed, 12 deletions(-)

diff --git a/tools/performance/engine-benchmarks/bench_download.py b/tools/performance/engine-benchmarks/bench_download.py
index ffd200ac7217..834f747f2e39 100755
--- a/tools/performance/engine-benchmarks/bench_download.py
+++ b/tools/performance/engine-benchmarks/bench_download.py
@@ -215,7 +215,6 @@ def _parse_bench_source(_bench_source: str) -> Source:
                         f"and the older artifacts will be fetched from the cache.")
 
     remote_cache = ReadonlyRemoteCache()
-    await remote_cache.initialize()
 
     bench_labels: Optional[Set[str]] = None
     """ Set of all gathered benchmark labels from all the job reports """
diff --git a/tools/performance/engine-benchmarks/bench_tool/remote_cache.py b/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
index 7d0ad77603c7..41416012acfc 100644
--- a/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
+++ b/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
@@ -20,13 +20,6 @@
 
 
 class RemoteCache(abc.ABC):
-    @abc.abstractmethod
-    async def initialize(self) -> None:
-        """
-        Initializes the remote cache.
-        :return:
-        """
-        raise NotImplementedError
 
     @abc.abstractmethod
     async def fetch(self, bench_id: str) -> Optional[JobReport]:
@@ -64,10 +57,6 @@ class ReadonlyRemoteCache(RemoteCache):
     def __init__(self):
         self._fetched_items: Dict[str, JobReport] = {}
 
-    async def initialize(self) -> None:
-        # Nop
-        pass
-
     async def fetch(self, bench_id: str) -> Optional[JobReport]:
         """ Fetches a job report for the given bench ID from the remote cache """
         if bench_id in self._fetched_items:

From 1e7c7616a896ca806d47169adf2d49e07c9ff36a Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Mon, 19 Feb 2024 18:44:37 +0100
Subject: [PATCH 34/93] Move generate_bench_website to utils

---
 .../engine-benchmarks/bench_tool/utils.py     | 66 +++++++++++++++++-
 .../engine-benchmarks/website_regen.py        | 69 +++----------------
 2 files changed, 72 insertions(+), 63 deletions(-)

diff --git a/tools/performance/engine-benchmarks/bench_tool/utils.py b/tools/performance/engine-benchmarks/bench_tool/utils.py
index 63eca1fbb9a8..c5b99667bb24 100644
--- a/tools/performance/engine-benchmarks/bench_tool/utils.py
+++ b/tools/performance/engine-benchmarks/bench_tool/utils.py
@@ -1,9 +1,16 @@
+import logging
 import shutil
 import tempfile
-from datetime import datetime
-from typing import List, Set
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import List, Set, Dict
 
-from bench_tool import JobReport, GH_DATE_FORMAT, Commit
+from bench_tool import JobReport, GH_DATE_FORMAT, Commit, Source, JobRun, TemplateBenchData, JinjaData, BRANCH_DEVELOP
+from bench_tool.bench_results import get_bench_runs, fetch_job_reports
+from bench_tool.remote_cache import SyncRemoteCache
+from bench_tool.template_render import create_template_data, render_html
+
+_logger = logging.getLogger(__name__)
 
 
 class WithTempDir:
@@ -49,3 +56,56 @@ def _get_timestamp(job_report: JobReport) -> datetime:
         return parse_commit_timestamp(job_report.bench_run.head_commit)
 
     job_reports.sort(key=lambda report: _get_timestamp(report))
+
+
+async def generate_bench_website(
+        bench_source: Source,
+        remote_cache: SyncRemoteCache,
+        since: datetime,
+        until: datetime,
+        generated_html: Path
+) -> None:
+    """
+    Generates single `index.html` website with the benchmark results.
+
+    :param bench_source: Source of the benchmarks, either engine or stdlib
+    :param remote_cache: Remote cache used for fetching the job reports.
+    :param since: Date since when the benchmarks should be considered
+    :param until: Date until when the benchmarks should be considered
+    :param generated_html: Path to the generated HTML file
+    :return:
+    """
+    bench_runs: List[JobRun] = []
+    for workflow_id in bench_source.workflow_ids():
+        bench_runs.extend(
+            await get_bench_runs(since, until, BRANCH_DEVELOP, workflow_id)
+        )
+    assert len(bench_runs) > 0, "No benchmark runs found"
+
+    job_reports = await fetch_job_reports(bench_runs, remote_cache)
+    _logger.debug(f"Gathered {len(job_reports)} job reports")
+    assert len(job_reports) > 0, "No job reports found"
+
+    _logger.debug("Sorting job_reports by commit date")
+    sort_job_reports(job_reports)
+
+    all_bench_labels: Set[str] = gather_all_bench_labels(job_reports)
+    _logger.debug(f"Found {len(all_bench_labels)} unique benchmark labels")
+
+    job_reports_per_branch: Dict[str, List[JobReport]] = {
+        BRANCH_DEVELOP: job_reports
+    }
+    template_bench_datas: List[TemplateBenchData] =\
+        create_template_data(job_reports_per_branch, all_bench_labels)
+    template_bench_datas.sort(key=lambda data: data.id)
+
+    jinja_data = JinjaData(
+        since=since,
+        display_since=max(until - timedelta(days=30), since),
+        until=until,
+        bench_datas=template_bench_datas,
+        bench_source=bench_source,
+        branches=[BRANCH_DEVELOP],
+    )
+    _logger.debug(f"Rendering HTML to {generated_html}")
+    render_html(jinja_data, generated_html)
diff --git a/tools/performance/engine-benchmarks/website_regen.py b/tools/performance/engine-benchmarks/website_regen.py
index f2889140ecb8..f87a54f293a5 100644
--- a/tools/performance/engine-benchmarks/website_regen.py
+++ b/tools/performance/engine-benchmarks/website_regen.py
@@ -4,17 +4,16 @@
 This script regenerate the benchmark results website, hosted as GH web pages on the
 https://github.com/enso-org/engine-benchmark-results repo.
 """
+import asyncio
 import logging
 from argparse import ArgumentParser
-from datetime import datetime, timedelta
+from datetime import datetime
 from pathlib import Path
-from typing import List, Dict, Set
+from typing import Optional
 
-from bench_tool import Source, JobRun, JobReport, TemplateBenchData, JinjaData
-from bench_tool.bench_results import get_bench_runs, fetch_job_reports
+from bench_tool import Source, git
 from bench_tool.remote_cache import SyncRemoteCache
-from bench_tool.template_render import create_template_data, render_html
-from bench_tool.utils import gather_all_bench_labels, sort_job_reports
+from bench_tool.utils import generate_bench_website
 
 # The inception date of the benchmarks, i.e., the date of the first benchmark run.
 ENGINE_SINCE = datetime.fromisoformat("2022-12-01")
@@ -31,60 +30,7 @@ async def init_remote_cache() -> SyncRemoteCache:
     return remote_cache
 
 
-async def generate_bench_website(
-        bench_source: Source,
-        remote_cache: SyncRemoteCache,
-        since: datetime,
-        generated_html: Path
-) -> None:
-    """
-    Generates single `index.html` website with the benchmark results.
-
-    :param bench_source: Source of the benchmarks, either engine or stdlib
-    :param remote_cache: Remote cache used for fetching the job reports.
-    :param since: Date since when the benchmarks should be considered
-    :param generated_html: Path to the generated HTML file
-    :return:
-    """
-    bench_runs: List[JobRun] = []
-    now = datetime.now()
-    for workflow_id in bench_source.workflow_ids():
-        bench_runs.extend(
-            await get_bench_runs(since, now, BRANCH_DEVELOP, workflow_id)
-        )
-    assert len(bench_runs) > 0, "No benchmark runs found"
-
-    job_reports = await fetch_job_reports(bench_runs, remote_cache)
-    _logger.debug(f"Gathered {len(job_reports)} job reports")
-    assert len(job_reports) > 0, "No job reports found"
-
-    _logger.debug("Sorting job_reports by commit date")
-    sort_job_reports(job_reports)
-
-    all_bench_labels: Set[str] = gather_all_bench_labels(job_reports)
-    _logger.debug(f"Found {len(all_bench_labels)} unique benchmark labels")
-
-    job_reports_per_branch: Dict[str, List[JobReport]] = {
-        BRANCH_DEVELOP: job_reports
-    }
-    template_bench_datas: List[TemplateBenchData] =\
-        create_template_data(job_reports_per_branch, all_bench_labels)
-    template_bench_datas.sort(key=lambda data: data.id)
-
-    jinja_data = JinjaData(
-        since=since,
-        display_since=max(now - timedelta(days=30), since),
-        until=now,
-        bench_datas=template_bench_datas,
-        bench_source=bench_source,
-        branches=[BRANCH_DEVELOP],
-    )
-    _logger.debug(f"Rendering HTML to {generated_html}")
-    render_html(jinja_data, generated_html)
-    pass
-
-
-if __name__ == '__main__':
+async def main():
     arg_parser = ArgumentParser(description="Regenerate the benchmark results website")
     arg_parser.add_argument("-v", "--verbose", action="store_true")
     arg_parser.add_argument("-n", "--dry-run", action="store_true")
@@ -93,3 +39,6 @@ async def generate_bench_website(
     dry_run: bool = args.dry_run
     logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO)
 
+
+if __name__ == "__main__":
+    asyncio.run(main())

From 51e1b09afbbf03bf188535d8560da97f81ffd160 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Mon, 19 Feb 2024 18:45:52 +0100
Subject: [PATCH 35/93] SyncRemoteCache can be initialized with local repo

---
 .../engine-benchmarks/bench_tool/__init__.py  |  2 +
 .../bench_tool/remote_cache.py                | 56 +++++++++++--------
 2 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/tools/performance/engine-benchmarks/bench_tool/__init__.py b/tools/performance/engine-benchmarks/bench_tool/__init__.py
index b08c0060c71a..33df62ba937a 100644
--- a/tools/performance/engine-benchmarks/bench_tool/__init__.py
+++ b/tools/performance/engine-benchmarks/bench_tool/__init__.py
@@ -13,6 +13,7 @@ def pkg_dir() -> Path:
 
 ENSO_REPO = "enso-org/enso"
 BENCH_REPO = "enso-org/engine-benchmark-results"
+BRANCH_DEVELOP = "develop"
 DATE_FORMAT = "%Y-%m-%d"
 GH_DATE_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
 ENGINE_BENCH_WORKFLOW_ID = 29450898
@@ -57,6 +58,7 @@ def workflow_ids(self) -> List[int]:
         else:
             raise ValueError(f"Unknown source {self}")
 
+
 @dataclass
 class Author:
     name: str
diff --git a/tools/performance/engine-benchmarks/bench_tool/remote_cache.py b/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
index 41416012acfc..939d75cb6f17 100644
--- a/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
+++ b/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
@@ -40,14 +40,6 @@ async def put(self, bench_id: str, job_report: JobReport) -> None:
         """
         raise NotImplementedError
 
-    @abc.abstractmethod
-    async def sync(self) -> None:
-        """
-        Synchronizes the remote cache with the local state.
-        :return:
-        """
-        raise NotImplementedError
-
 
 class ReadonlyRemoteCache(RemoteCache):
     """
@@ -82,10 +74,6 @@ async def put(self, bench_id: str, job_report: JobReport) -> None:
         assert bench_id not in self._fetched_items
         self._fetched_items[bench_id] = job_report
 
-    async def sync(self) -> None:
-        # Nop
-        pass
-
     def _get_remote_path(self, bench_id: str) -> str:
         assert _is_benchrun_id(bench_id)
         return os.path.join(CACHE_REMOTE_DIR, bench_id + ".json")
@@ -96,16 +84,32 @@ class SyncRemoteCache(RemoteCache):
     Fetches and pushes the artifacts to the remote cache. Needs a write permissions to the repo.
     """
 
-    def __init__(self):
-        self._repo_root_dir = Path(tempfile.mkdtemp(prefix="bench_tool_remote_cache"))
+    def __init__(self, local_root_dir: Optional[Path] = None):
+        if local_root_dir is not None:
+            assert local_root_dir.exists()
+            assert local_root_dir.is_dir()
+            assert local_root_dir.joinpath(".git").exists()
+            self._repo_root_dir = local_root_dir
+            self._should_clone = False
+        else:
+            self._repo_root_dir = Path(tempfile.mkdtemp(prefix="bench_tool_remote_cache"))
+            self._should_clone = True
+        assert self._repo_root_dir.exists()
+        assert self._repo_root_dir.is_dir()
         self._cache_dir = self._repo_root_dir.joinpath(CACHE_REMOTE_DIR)
 
     def repo_root_dir(self) -> Path:
         return self._repo_root_dir
 
     async def initialize(self) -> None:
-        # Checkout the repo
-        await git.clone(BENCH_REPO, self._repo_root_dir)
+        """
+        Make sure the repo is up-to-date
+        :return:
+        """
+        if self._should_clone:
+            await git.clone(BENCH_REPO, self._repo_root_dir)
+        else:
+            await git.pull(self._repo_root_dir)
         assert self._repo_root_dir.exists()
         assert self._cache_dir.exists()
 
@@ -125,15 +129,23 @@ async def put(self, bench_id: str, job_report: JobReport) -> None:
             json.dump(job_report, f)
 
     async def sync(self) -> None:
+        """
+        Synchronizes the local repo state with upstream. That means, pushes if some untracked or
+        modified files are in the local directory.
+        :return:
+        """
         status = await git.status(self._repo_root_dir)
-        assert len(status.modified) == 0, "The RemoteCache should not modify any files, only add new ones"
-        assert len(status.added) == 0, f"Only untracked files expected in {self._repo_root_dir}"
-        if len(status.untracked) > 0:
-            _logger.info("Untracked files found in the remote cache: %s", status.untracked)
-            await git.add(self._repo_root_dir, status.untracked)
+        is_repo_dirty = len(status.modified) > 0 or len(status.added) > 0
+        if is_repo_dirty:
+            _logger.info("Untracked or modified files found in the repo: %s", self._repo_root_dir)
+            if len(status.modified) > 0:
+                _logger.debug("Modified files: %s", status.modified)
+                await git.add(self._repo_root_dir, status.modified)
+            if len(status.untracked) > 0:
+                _logger.debug("Untracked files: %s", status.untracked)
+                await git.add(self._repo_root_dir, status.untracked)
             await git.commit(self._repo_root_dir, f"Add {len(status.untracked)} new reports")
             await git.push(self._repo_root_dir)
-        shutil.rmtree(self._repo_root_dir, ignore_errors=True)
 
 
 def _is_benchrun_id(name: str) -> bool:

From ffe2fab38242ebb2aa967b42c1cd54645b163775 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Mon, 19 Feb 2024 18:46:28 +0100
Subject: [PATCH 36/93] SyncRemoteCache has html files fields

---
 .../engine-benchmarks/bench_tool/remote_cache.py      | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tools/performance/engine-benchmarks/bench_tool/remote_cache.py b/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
index 939d75cb6f17..b7af8392f344 100644
--- a/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
+++ b/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
@@ -17,6 +17,8 @@
 _logger = logging.getLogger(__name__)
 
 CACHE_REMOTE_DIR = "cache"
+ENGINE_INDEX_HTML = "engine-benchs.html"
+STDLIB_INDEX_HTML = "stdlib-benchs.html"
 
 
 class RemoteCache(abc.ABC):
@@ -101,6 +103,15 @@ def __init__(self, local_root_dir: Optional[Path] = None):
     def repo_root_dir(self) -> Path:
         return self._repo_root_dir
 
+    def cache_dir(self) -> Path:
+        return self._cache_dir
+
+    def engine_index_html(self) -> Path:
+        return self._repo_root_dir.joinpath(ENGINE_INDEX_HTML)
+
+    def stdlib_index_html(self) -> Path:
+        return self._repo_root_dir.joinpath(STDLIB_INDEX_HTML)
+
     async def initialize(self) -> None:
         """
         Make sure the repo is up-to-date

From 4ffdadf3dd7b35b18148deb1c37b283f0fcbdb1f Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Mon, 19 Feb 2024 18:46:56 +0100
Subject: [PATCH 37/93] Add test_sync_remote_cache_from_local_repo

---
 .../bench_tool/test_remote_cache.py           | 24 ++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py b/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py
index ae0122acb435..ff8d344e1f70 100644
--- a/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py
+++ b/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py
@@ -1,4 +1,5 @@
 import unittest
+from pathlib import Path
 
 from . import JobReport, JobRun, Commit, Author
 from .remote_cache import ReadonlyRemoteCache, SyncRemoteCache
@@ -55,14 +56,31 @@ async def test_put_job_report_into_cache(self):
         self.assertEquals(bench_id, job_report.bench_run.id)
 
 
-# WARNING: This case can take very long
 class TestSyncRemoteCache(unittest.IsolatedAsyncioTestCase):
-    async def test_init_sync_remote_cache(self):
+    LOCAL_REPO_ROOT = Path("/home/pavel/dev/engine-benchmark-results")
+
+    async def test_init_sync_remote_cache_from_local_repo(self):
+        if not self.LOCAL_REPO_ROOT.exists():
+            self.skipTest(f"Local repo {self.LOCAL_REPO_ROOT} does not exist")
+        remote_cache = SyncRemoteCache(self.LOCAL_REPO_ROOT)
+        await remote_cache.initialize()
+        root_dir = remote_cache.repo_root_dir()
+        self.assertTrue(root_dir.exists())
+        self.assertTrue(root_dir.is_dir())
+        cache_dir = remote_cache.cache_dir()
+        self.assertTrue(cache_dir.exists())
+        self.assertTrue(cache_dir.is_dir())
+        self.assertTrue(remote_cache.engine_index_html().exists())
+        self.assertTrue(remote_cache.stdlib_index_html().exists())
+
+    async def test_clone_sync_remote_cache(self):
         remote_cache = SyncRemoteCache()
         await remote_cache.initialize()
         root_dir = remote_cache.repo_root_dir()
         self.assertTrue(root_dir.exists())
         self.assertTrue(root_dir.is_dir())
-        cache_dir = root_dir.joinpath("cache")
+        cache_dir = remote_cache.cache_dir()
         self.assertTrue(cache_dir.exists())
         self.assertTrue(cache_dir.is_dir())
+        self.assertTrue(remote_cache.engine_index_html().exists())
+        self.assertTrue(remote_cache.stdlib_index_html().exists())

From faa9b410f9a37ca7e164fd5cd879bcb4b71c9e0d Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Mon, 19 Feb 2024 18:47:45 +0100
Subject: [PATCH 38/93] Finish implementation of website_regen

---
 .../engine-benchmarks/website_regen.py        | 37 ++++++++++++++-----
 1 file changed, 27 insertions(+), 10 deletions(-)

diff --git a/tools/performance/engine-benchmarks/website_regen.py b/tools/performance/engine-benchmarks/website_regen.py
index f87a54f293a5..a4795f322e0e 100644
--- a/tools/performance/engine-benchmarks/website_regen.py
+++ b/tools/performance/engine-benchmarks/website_regen.py
@@ -11,33 +11,50 @@
 from pathlib import Path
 from typing import Optional
 
-from bench_tool import Source, git
+from bench_tool import Source
 from bench_tool.remote_cache import SyncRemoteCache
 from bench_tool.utils import generate_bench_website
 
 # The inception date of the benchmarks, i.e., the date of the first benchmark run.
 ENGINE_SINCE = datetime.fromisoformat("2022-12-01")
 STDLIB_SINCE = datetime.fromisoformat("2023-08-22")
-BRANCH_DEVELOP = "develop"
-
 
 _logger = logging.getLogger("website_regen")
 
 
-async def init_remote_cache() -> SyncRemoteCache:
-    remote_cache = SyncRemoteCache()
-    await remote_cache.initialize()
-    return remote_cache
-
-
 async def main():
     arg_parser = ArgumentParser(description="Regenerate the benchmark results website")
     arg_parser.add_argument("-v", "--verbose", action="store_true")
     arg_parser.add_argument("-n", "--dry-run", action="store_true")
+    arg_parser.add_argument("--local-repo",
+                            type=str,
+                            help="Path to the local clone of the engine-benchmark-results repo")
     args = arg_parser.parse_args()
     verbose: bool = args.verbose
-    dry_run: bool = args.dry_run
+    local_repo: Optional[Path] = Path(args.local_repo) if args.local_repo else None
     logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO)
+    remote_cache = SyncRemoteCache(local_repo)
+    _logger.info("Initializing the bench results repo, this might take some time")
+    await remote_cache.initialize()
+    _logger.info("Bench results repo initialized")
+
+    now = datetime.now()
+    engine_html_task = generate_bench_website(
+        Source.ENGINE,
+        remote_cache,
+        ENGINE_SINCE,
+        now,
+        remote_cache.engine_index_html()
+    )
+    stdlib_html_task = generate_bench_website(
+        Source.STDLIB,
+        remote_cache,
+        STDLIB_SINCE,
+        now,
+        remote_cache.stdlib_index_html()
+    )
+    await asyncio.gather(engine_html_task, stdlib_html_task)
+    await remote_cache.sync()
 
 
 if __name__ == "__main__":

From 98f0175ebdd4965cf4835ddc39845468ec2f5a17 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Mon, 19 Feb 2024 18:51:45 +0100
Subject: [PATCH 39/93] generate_website is in its own module

---
 .../engine-benchmarks/bench_tool/test_gh.py   |  1 -
 .../engine-benchmarks/bench_tool/utils.py     | 61 +----------------
 .../engine-benchmarks/bench_tool/website.py   | 65 +++++++++++++++++++
 3 files changed, 68 insertions(+), 59 deletions(-)
 create mode 100644 tools/performance/engine-benchmarks/bench_tool/website.py

diff --git a/tools/performance/engine-benchmarks/bench_tool/test_gh.py b/tools/performance/engine-benchmarks/bench_tool/test_gh.py
index fe3edb85c7d0..51de905959dc 100644
--- a/tools/performance/engine-benchmarks/bench_tool/test_gh.py
+++ b/tools/performance/engine-benchmarks/bench_tool/test_gh.py
@@ -4,7 +4,6 @@
 from . import gh
 
 
-# Create a unit test
 class TestGH(unittest.IsolatedAsyncioTestCase):
     async def test_ensure_gh_installed(self):
         self.assertIsNone(gh.ensure_gh_installed())
diff --git a/tools/performance/engine-benchmarks/bench_tool/utils.py b/tools/performance/engine-benchmarks/bench_tool/utils.py
index c5b99667bb24..f271e7d3a06b 100644
--- a/tools/performance/engine-benchmarks/bench_tool/utils.py
+++ b/tools/performance/engine-benchmarks/bench_tool/utils.py
@@ -1,14 +1,10 @@
 import logging
 import shutil
 import tempfile
-from datetime import datetime, timedelta
-from pathlib import Path
-from typing import List, Set, Dict
+from datetime import datetime
+from typing import List, Set
 
-from bench_tool import JobReport, GH_DATE_FORMAT, Commit, Source, JobRun, TemplateBenchData, JinjaData, BRANCH_DEVELOP
-from bench_tool.bench_results import get_bench_runs, fetch_job_reports
-from bench_tool.remote_cache import SyncRemoteCache
-from bench_tool.template_render import create_template_data, render_html
+from bench_tool import JobReport, GH_DATE_FORMAT, Commit
 
 _logger = logging.getLogger(__name__)
 
@@ -58,54 +54,3 @@ def _get_timestamp(job_report: JobReport) -> datetime:
     job_reports.sort(key=lambda report: _get_timestamp(report))
 
 
-async def generate_bench_website(
-        bench_source: Source,
-        remote_cache: SyncRemoteCache,
-        since: datetime,
-        until: datetime,
-        generated_html: Path
-) -> None:
-    """
-    Generates single `index.html` website with the benchmark results.
-
-    :param bench_source: Source of the benchmarks, either engine or stdlib
-    :param remote_cache: Remote cache used for fetching the job reports.
-    :param since: Date since when the benchmarks should be considered
-    :param until: Date until when the benchmarks should be considered
-    :param generated_html: Path to the generated HTML file
-    :return:
-    """
-    bench_runs: List[JobRun] = []
-    for workflow_id in bench_source.workflow_ids():
-        bench_runs.extend(
-            await get_bench_runs(since, until, BRANCH_DEVELOP, workflow_id)
-        )
-    assert len(bench_runs) > 0, "No benchmark runs found"
-
-    job_reports = await fetch_job_reports(bench_runs, remote_cache)
-    _logger.debug(f"Gathered {len(job_reports)} job reports")
-    assert len(job_reports) > 0, "No job reports found"
-
-    _logger.debug("Sorting job_reports by commit date")
-    sort_job_reports(job_reports)
-
-    all_bench_labels: Set[str] = gather_all_bench_labels(job_reports)
-    _logger.debug(f"Found {len(all_bench_labels)} unique benchmark labels")
-
-    job_reports_per_branch: Dict[str, List[JobReport]] = {
-        BRANCH_DEVELOP: job_reports
-    }
-    template_bench_datas: List[TemplateBenchData] =\
-        create_template_data(job_reports_per_branch, all_bench_labels)
-    template_bench_datas.sort(key=lambda data: data.id)
-
-    jinja_data = JinjaData(
-        since=since,
-        display_since=max(until - timedelta(days=30), since),
-        until=until,
-        bench_datas=template_bench_datas,
-        bench_source=bench_source,
-        branches=[BRANCH_DEVELOP],
-    )
-    _logger.debug(f"Rendering HTML to {generated_html}")
-    render_html(jinja_data, generated_html)
diff --git a/tools/performance/engine-benchmarks/bench_tool/website.py b/tools/performance/engine-benchmarks/bench_tool/website.py
new file mode 100644
index 000000000000..babca05203a3
--- /dev/null
+++ b/tools/performance/engine-benchmarks/bench_tool/website.py
@@ -0,0 +1,65 @@
+import logging
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import List, Dict, Set
+
+from bench_tool import JobRun, BRANCH_DEVELOP, Source, JobReport, TemplateBenchData, JinjaData
+from bench_tool.bench_results import get_bench_runs, fetch_job_reports
+from bench_tool.remote_cache import SyncRemoteCache
+from bench_tool.template_render import create_template_data, render_html
+from bench_tool.utils import sort_job_reports, gather_all_bench_labels
+
+_logger = logging.getLogger(__name__)
+
+
+async def generate_bench_website(
+        bench_source: Source,
+        remote_cache: SyncRemoteCache,
+        since: datetime,
+        until: datetime,
+        generated_html: Path
+) -> None:
+    """
+    Generates single `index.html` website with the benchmark results.
+
+    :param bench_source: Source of the benchmarks, either engine or stdlib
+    :param remote_cache: Remote cache used for fetching the job reports.
+    :param since: Date since when the benchmarks should be considered
+    :param until: Date until when the benchmarks should be considered
+    :param generated_html: Path to the generated HTML file
+    :return:
+    """
+    bench_runs: List[JobRun] = []
+    for workflow_id in bench_source.workflow_ids():
+        bench_runs.extend(
+            await get_bench_runs(since, until, BRANCH_DEVELOP, workflow_id)
+        )
+    assert len(bench_runs) > 0, "No benchmark runs found"
+
+    job_reports = await fetch_job_reports(bench_runs, remote_cache)
+    _logger.debug(f"Gathered {len(job_reports)} job reports")
+    assert len(job_reports) > 0, "No job reports found"
+
+    _logger.debug("Sorting job_reports by commit date")
+    sort_job_reports(job_reports)
+
+    all_bench_labels: Set[str] = gather_all_bench_labels(job_reports)
+    _logger.debug(f"Found {len(all_bench_labels)} unique benchmark labels")
+
+    job_reports_per_branch: Dict[str, List[JobReport]] = {
+        BRANCH_DEVELOP: job_reports
+    }
+    template_bench_datas: List[TemplateBenchData] = \
+        create_template_data(job_reports_per_branch, all_bench_labels)
+    template_bench_datas.sort(key=lambda data: data.id)
+
+    jinja_data = JinjaData(
+        since=since,
+        display_since=max(until - timedelta(days=30), since),
+        until=until,
+        bench_datas=template_bench_datas,
+        bench_source=bench_source,
+        branches=[BRANCH_DEVELOP],
+    )
+    _logger.debug(f"Rendering HTML to {generated_html}")
+    render_html(jinja_data, generated_html)

From f8e1848ff7d0f3a817ed9829e91b42f9d8c7aade Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Mon, 19 Feb 2024 18:53:45 +0100
Subject: [PATCH 40/93] Fix tests

---
 .../engine-benchmarks/bench_tool/test_bench_results.py         | 1 -
 .../engine-benchmarks/bench_tool/test_remote_cache.py          | 3 ---
 2 files changed, 4 deletions(-)

diff --git a/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py b/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py
index 085f97adf528..71c16dcb3cf1 100644
--- a/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py
+++ b/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py
@@ -37,7 +37,6 @@ async def test_get_bench_report(self):
         self.assertEqual(1, len(bench_runs))
         bench_run = bench_runs[0]
         remote_cache = ReadonlyRemoteCache()
-        await remote_cache.initialize()
         with WithTempDir("test_get_bench_report") as temp_dir:
             bench_report = await get_bench_report(bench_run, temp_dir, remote_cache)
             self.assertIsNotNone(bench_report)
diff --git a/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py b/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py
index ff8d344e1f70..65e600b10fdc 100644
--- a/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py
+++ b/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py
@@ -30,7 +30,6 @@
 class TestReadonlyRemoteCache(unittest.IsolatedAsyncioTestCase):
     async def test_fetch_some_cache(self):
         remote_cache = ReadonlyRemoteCache()
-        await remote_cache.initialize()
         # This ID is definitelly in the cache
         bench_id = "3686412302"
         job_report = await remote_cache.fetch(bench_id)
@@ -41,14 +40,12 @@ async def test_fetch_some_cache(self):
 
     async def test_non_existing_cache_should_not_fail(self):
         remote_cache = ReadonlyRemoteCache()
-        await remote_cache.initialize()
         bench_id = "FOOOO BAR"
         job_report = await remote_cache.fetch(bench_id)
         self.assertIsNone(job_report)
 
     async def test_put_job_report_into_cache(self):
         remote_cache = ReadonlyRemoteCache()
-        await remote_cache.initialize()
         bench_id = sample_job_report.bench_run.id
         await remote_cache.put(bench_id, sample_job_report)
         job_report = await remote_cache.fetch(bench_id)

From 220ed1c8bcba4e99d8062a6abba1d30fad03209d Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Mon, 19 Feb 2024 18:54:06 +0100
Subject: [PATCH 41/93] Add test_website_regen

---
 .../bench_tool/test_website_regen.py          | 31 +++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 tools/performance/engine-benchmarks/bench_tool/test_website_regen.py

diff --git a/tools/performance/engine-benchmarks/bench_tool/test_website_regen.py b/tools/performance/engine-benchmarks/bench_tool/test_website_regen.py
new file mode 100644
index 000000000000..567533d4d5d2
--- /dev/null
+++ b/tools/performance/engine-benchmarks/bench_tool/test_website_regen.py
@@ -0,0 +1,31 @@
+import unittest
+from pathlib import Path
+from datetime import datetime
+
+from bench_tool import Source
+from bench_tool.remote_cache import SyncRemoteCache
+from bench_tool.utils import WithTempDir
+from bench_tool.website import generate_bench_website
+
+
+class TestWebsiteRegen(unittest.IsolatedAsyncioTestCase):
+    LOCAL_REPO_ROOT = Path("/home/pavel/dev/engine-benchmark-results")
+
+    async def test_engine_website_regen(self):
+        if not self.LOCAL_REPO_ROOT.exists():
+            self.skipTest(f"Local repo {self.LOCAL_REPO_ROOT} does not exist")
+        remote_cache = SyncRemoteCache(self.LOCAL_REPO_ROOT)
+        # Pull the repo if necessary
+        await remote_cache.initialize()
+        since = datetime.fromisoformat("2023-02-01")
+        until = datetime.fromisoformat("2023-02-25")
+        with WithTempDir("test_engine_website_regen") as temp_dir:
+            temp_dir_path = Path(temp_dir)
+            html_out = temp_dir_path.joinpath("engine-benchs.html")
+            await generate_bench_website(Source.ENGINE, remote_cache, since, until, html_out)
+            self.assertTrue(html_out.exists())
+            self.assertGreater(
+                html_out.stat().st_size, 100 * 1024,
+                "The generated HTML file should have size bigger than 100 KB"
+            )
+        pass

From 4f7f1642a698ea6ac3c3f7cd7df45d6335a772d0 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Thu, 22 Feb 2024 12:52:53 +0100
Subject: [PATCH 42/93] Remove deprecated methods

---
 .../engine-benchmarks/bench_tool/test_bench_results.py      | 4 ++--
 tools/performance/engine-benchmarks/bench_tool/test_git.py  | 2 +-
 .../engine-benchmarks/bench_tool/test_remote_cache.py       | 6 +++---
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py b/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py
index 71c16dcb3cf1..30b06dccb230 100644
--- a/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py
+++ b/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py
@@ -40,6 +40,6 @@ async def test_get_bench_report(self):
         with WithTempDir("test_get_bench_report") as temp_dir:
             bench_report = await get_bench_report(bench_run, temp_dir, remote_cache)
             self.assertIsNotNone(bench_report)
-            self.assertEquals(bench_run, bench_report.bench_run)
-            self.assertEquals(55, len(bench_report.label_score_dict))
+            self.assertEqual(bench_run, bench_report.bench_run)
+            self.assertEqual(55, len(bench_report.label_score_dict))
 
diff --git a/tools/performance/engine-benchmarks/bench_tool/test_git.py b/tools/performance/engine-benchmarks/bench_tool/test_git.py
index 2be622e1f462..61a635786a0f 100644
--- a/tools/performance/engine-benchmarks/bench_tool/test_git.py
+++ b/tools/performance/engine-benchmarks/bench_tool/test_git.py
@@ -52,7 +52,7 @@ async def test_add_more_files(self):
         self.repo_root.joinpath("README.md").write_text("Hello")
         self.repo_root.joinpath("pom.xml").write_text("<xml></xml>")
         status = await git.status(self.repo_root)
-        self.assertEquals(2, len(status.untracked))
+        self.assertEqual(2, len(status.untracked))
         await git.add(self.repo_root, {"README.md", "pom.xml"})
         status = await git.status(self.repo_root)
         self.assertEqual(2, len(status.added))
diff --git a/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py b/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py
index 65e600b10fdc..566533ea77b9 100644
--- a/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py
+++ b/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py
@@ -34,9 +34,9 @@ async def test_fetch_some_cache(self):
         bench_id = "3686412302"
         job_report = await remote_cache.fetch(bench_id)
         self.assertIsNotNone(job_report)
-        self.assertEquals(1, job_report.bench_run.run_attempt)
-        self.assertEquals(bench_id, job_report.bench_run.id)
-        self.assertEquals("Jaroslav Tulach", job_report.bench_run.head_commit.author.name)
+        self.assertEqual(1, job_report.bench_run.run_attempt)
+        self.assertEqual(bench_id, job_report.bench_run.id)
+        self.assertEqual("Jaroslav Tulach", job_report.bench_run.head_commit.author.name)
 
     async def test_non_existing_cache_should_not_fail(self):
         remote_cache = ReadonlyRemoteCache()

From 4c9f463446dc6aea422eb50730b2eb8b433f5411 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Thu, 22 Feb 2024 12:53:05 +0100
Subject: [PATCH 43/93] Metnion how to run tests in the README

---
 tools/performance/engine-benchmarks/README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/performance/engine-benchmarks/README.md b/tools/performance/engine-benchmarks/README.md
index 13ab9778fe8b..e1d94eb63ba2 100644
--- a/tools/performance/engine-benchmarks/README.md
+++ b/tools/performance/engine-benchmarks/README.md
@@ -7,6 +7,9 @@ project for analysing the downloaded data.
 Note that for convenience, there is `bench_tool` directory that is a Python package.
 The `bench_download.py` script uses this package.
 
+To run all the Python tests for that package, run `python -m unittest` in this
+directory.
+
 Dependencies for `bench_download.py`:
 
 - python >= 3.7

From 0de278cfe3b5e6d59228cfa4786d2b1c69ffc7d5 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Thu, 22 Feb 2024 12:54:54 +0100
Subject: [PATCH 44/93] Skip remote repo cloning test

---
 .../engine-benchmarks/bench_tool/test_remote_cache.py            | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py b/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py
index 566533ea77b9..80fa1a2f827b 100644
--- a/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py
+++ b/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py
@@ -71,6 +71,7 @@ async def test_init_sync_remote_cache_from_local_repo(self):
         self.assertTrue(remote_cache.stdlib_index_html().exists())
 
     async def test_clone_sync_remote_cache(self):
+        self.skipTest("TODO: Takes too long")
         remote_cache = SyncRemoteCache()
         await remote_cache.initialize()
         root_dir = remote_cache.repo_root_dir()

From 0868f9a215d61a412cd85fbeccaab23d52957904 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Thu, 22 Feb 2024 13:49:00 +0100
Subject: [PATCH 45/93] Move some common functionality into utils

---
 .../bench_tool/bench_results.py               | 23 ++-----------
 .../bench_tool/remote_cache.py                | 34 ++++---------------
 .../engine-benchmarks/bench_tool/utils.py     | 27 +++++++++++++--
 3 files changed, 34 insertions(+), 50 deletions(-)

diff --git a/tools/performance/engine-benchmarks/bench_tool/bench_results.py b/tools/performance/engine-benchmarks/bench_tool/bench_results.py
index da88a2f2cc47..5d2a79a3002b 100644
--- a/tools/performance/engine-benchmarks/bench_tool/bench_results.py
+++ b/tools/performance/engine-benchmarks/bench_tool/bench_results.py
@@ -9,10 +9,10 @@
 from typing import List, Dict, Optional, Any
 from xml.etree import ElementTree as ET
 
-from bench_tool import JobRun, DATE_FORMAT, ENSO_REPO, JobReport, Commit, Author
+from bench_tool import JobRun, DATE_FORMAT, ENSO_REPO, JobReport
 from bench_tool.gh import invoke_gh_api
 from bench_tool.remote_cache import RemoteCache
-from bench_tool.utils import WithTempDir
+from bench_tool.utils import WithTempDir, parse_bench_run_from_json
 
 ARTIFACT_ID = "Runtime Benchmark Report"
 
@@ -49,7 +49,7 @@ async def get_and_parse_run(page: int, parsed_bench_runs) -> None:
         _query_fields["page"] = str(page)
         res = await invoke_gh_api(ENSO_REPO, f"/actions/workflows/{workflow_id}/runs", _query_fields)
         bench_runs_json = res["workflow_runs"]
-        _parsed_bench_runs = [_parse_bench_run_from_json(bench_run_json)
+        _parsed_bench_runs = [parse_bench_run_from_json(bench_run_json)
                               for bench_run_json in bench_runs_json]
         parsed_bench_runs.extend(_parsed_bench_runs)
 
@@ -182,20 +182,3 @@ def _parse_bench_report_from_xml(bench_report_xml_path: str, bench_run: JobRun)
         bench_run=bench_run
     )
 
-
-def _parse_bench_run_from_json(obj: Dict[Any, Any]) -> JobRun:
-    return JobRun(
-        id=str(obj["id"]),
-        html_url=obj["html_url"],
-        run_attempt=int(obj["run_attempt"]),
-        event=obj["event"],
-        display_title=obj["display_title"],
-        head_commit=Commit(
-            id=obj["head_commit"]["id"],
-            message=obj["head_commit"]["message"],
-            timestamp=obj["head_commit"]["timestamp"],
-            author=Author(
-                name=obj["head_commit"]["author"]["name"]
-            )
-        )
-    )
diff --git a/tools/performance/engine-benchmarks/bench_tool/remote_cache.py b/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
index b7af8392f344..32add0eb622f 100644
--- a/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
+++ b/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
@@ -7,12 +7,12 @@
 import logging
 import os
 import re
-import shutil
 import tempfile
 from pathlib import Path
-from typing import Dict, Optional, Any
+from typing import Dict, Optional
 
-from . import gh, JobReport, BENCH_REPO, JobRun, Commit, Author, git
+from . import gh, JobReport, BENCH_REPO, git
+from .utils import parse_bench_report_from_json
 
 _logger = logging.getLogger(__name__)
 
@@ -64,7 +64,7 @@ async def fetch(self, bench_id: str) -> Optional[JobReport]:
         if content is None:
             _logger.warning("Cache not found for %s", bench_id)
             return None
-        bench_report = _parse_bench_report_from_json(
+        bench_report = parse_bench_report_from_json(
             json.loads(content)
         )
         assert bench_id not in self._fetched_items
@@ -129,7 +129,7 @@ async def fetch(self, bench_id: str) -> Optional[JobReport]:
         path = self._cache_dir.joinpath(bench_id + ".json")
         if path.exists():
             with path.open() as f:
-                return _parse_bench_report_from_json(json.load(f))
+                return parse_bench_report_from_json(json.load(f))
         return None
 
     async def put(self, bench_id: str, job_report: JobReport) -> None:
@@ -163,26 +163,4 @@ def _is_benchrun_id(name: str) -> bool:
     return re.match(r"\d{9}", name) is not None
 
 
-def _parse_bench_report_from_json(obj: Dict[Any, Any]) -> JobReport:
-    return JobReport(
-        bench_run=_parse_bench_run_from_json(obj["bench_run"]),
-        label_score_dict=obj["label_score_dict"]
-    )
-
-
-def _parse_bench_run_from_json(obj: Dict[Any, Any]) -> JobRun:
-    return JobRun(
-        id=str(obj["id"]),
-        html_url=obj["html_url"],
-        run_attempt=int(obj["run_attempt"]),
-        event=obj["event"],
-        display_title=obj["display_title"],
-        head_commit=Commit(
-            id=obj["head_commit"]["id"],
-            message=obj["head_commit"]["message"],
-            timestamp=obj["head_commit"]["timestamp"],
-            author=Author(
-                name=obj["head_commit"]["author"]["name"]
-            )
-        )
-    )
+
diff --git a/tools/performance/engine-benchmarks/bench_tool/utils.py b/tools/performance/engine-benchmarks/bench_tool/utils.py
index f271e7d3a06b..26d8d5b6e75f 100644
--- a/tools/performance/engine-benchmarks/bench_tool/utils.py
+++ b/tools/performance/engine-benchmarks/bench_tool/utils.py
@@ -2,9 +2,9 @@
 import shutil
 import tempfile
 from datetime import datetime
-from typing import List, Set
+from typing import List, Set, Dict, Any
 
-from bench_tool import JobReport, GH_DATE_FORMAT, Commit
+from bench_tool import JobReport, GH_DATE_FORMAT, Commit, JobRun, Author
 
 _logger = logging.getLogger(__name__)
 
@@ -54,3 +54,26 @@ def _get_timestamp(job_report: JobReport) -> datetime:
     job_reports.sort(key=lambda report: _get_timestamp(report))
 
 
+def parse_bench_run_from_json(obj: Dict[Any, Any]) -> JobRun:
+    return JobRun(
+        id=str(obj["id"]),
+        html_url=obj["html_url"],
+        run_attempt=int(obj["run_attempt"]),
+        event=obj["event"],
+        display_title=obj["display_title"],
+        head_commit=Commit(
+            id=obj["head_commit"]["id"],
+            message=obj["head_commit"]["message"],
+            timestamp=obj["head_commit"]["timestamp"],
+            author=Author(
+                name=obj["head_commit"]["author"]["name"]
+            )
+        )
+    )
+
+
+def parse_bench_report_from_json(obj: Dict[Any, Any]) -> JobReport:
+    return JobReport(
+        bench_run=parse_bench_run_from_json(obj["bench_run"]),
+        label_score_dict=obj["label_score_dict"]
+    )

From 7493c2bec410a292e53199445af900c4fc7f5005 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Thu, 22 Feb 2024 14:28:29 +0100
Subject: [PATCH 46/93] Test fetching stdlib job report

---
 .../engine-benchmarks/bench_tool/test_gh.py   | 13 +++++++-
 .../bench_tool/test_remote_cache.py           | 30 +++++++++++++++++++
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/tools/performance/engine-benchmarks/bench_tool/test_gh.py b/tools/performance/engine-benchmarks/bench_tool/test_gh.py
index 51de905959dc..1882390fd601 100644
--- a/tools/performance/engine-benchmarks/bench_tool/test_gh.py
+++ b/tools/performance/engine-benchmarks/bench_tool/test_gh.py
@@ -1,6 +1,6 @@
 import unittest
 
-from bench_tool import ENSO_REPO
+from bench_tool import ENSO_REPO, Source
 from . import gh
 
 
@@ -21,3 +21,14 @@ async def test_fetch_non_existing_file(self):
     async def test_wrong_gh_query_should_not_fail(self):
         res = await gh.invoke_gh_api("non_existing_repo", "/non_existing_endpoint")
         self.assertIsNone(res)
+
+    async def test_get_stdlib_bench_run(self):
+        # This bench run ID does not contain the "Runtime Benchmark Report" artifact name,
+        # but it is a successful run. There should be a special handling for this case
+        # https://github.com/enso-org/enso/actions/runs/7909011591
+        bench_run_id = "7909011591"
+        obj = await gh.invoke_gh_api(ENSO_REPO, f"/actions/runs/{bench_run_id}/artifacts")
+        artifacts = obj["artifacts"]
+        stdlib_artifact_name = Source.STDLIB.artifact_names()[0]
+        self.assertEqual(1, len(artifacts))
+        self.assertEqual(stdlib_artifact_name, artifacts[0]["name"])
diff --git a/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py b/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py
index 80fa1a2f827b..a18f04c783b2 100644
--- a/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py
+++ b/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py
@@ -2,6 +2,7 @@
 from pathlib import Path
 
 from . import JobReport, JobRun, Commit, Author
+from .bench_results import fetch_job_reports
 from .remote_cache import ReadonlyRemoteCache, SyncRemoteCache
 
 
@@ -26,6 +27,20 @@
     )
 )
 
+stdlib_bench_run = JobRun(
+    id='7879611014',
+    display_title='Benchmark Standard Libraries',
+    html_url='https://github.com/enso-org/enso/actions/runs/7879611014',
+    run_attempt=1,
+    event='schedule',
+    head_commit=Commit(
+        id='eb59b475f68146f03fc3cef1092ee56eaaa1600a',
+        author=Author(name='Radosław Waśko'),
+        timestamp='2024-02-12T19:04:13Z',
+        message='Write support for S3 (#8921)\n\n- Closes #8809'
+    )
+)
+
 
 class TestReadonlyRemoteCache(unittest.IsolatedAsyncioTestCase):
     async def test_fetch_some_cache(self):
@@ -52,6 +67,12 @@ async def test_put_job_report_into_cache(self):
         self.assertIsNotNone(job_report)
         self.assertEquals(bench_id, job_report.bench_run.id)
 
+    async def test_fetch_stdlib_report(self):
+        remote_cache = ReadonlyRemoteCache()
+        job_reports = await fetch_job_reports([stdlib_bench_run], remote_cache)
+        self.assertIsNotNone(job_reports)
+        self.assertEqual(1, len(job_reports))
+
 
 class TestSyncRemoteCache(unittest.IsolatedAsyncioTestCase):
     LOCAL_REPO_ROOT = Path("/home/pavel/dev/engine-benchmark-results")
@@ -82,3 +103,12 @@ async def test_clone_sync_remote_cache(self):
         self.assertTrue(cache_dir.is_dir())
         self.assertTrue(remote_cache.engine_index_html().exists())
         self.assertTrue(remote_cache.stdlib_index_html().exists())
+
+    async def test_fetch_stdlib_report(self):
+        if not self.LOCAL_REPO_ROOT.exists():
+            self.skipTest(f"Local repo {self.LOCAL_REPO_ROOT} does not exist")
+        remote_cache = SyncRemoteCache(self.LOCAL_REPO_ROOT)
+        await remote_cache.initialize()
+        job_reports = await fetch_job_reports([stdlib_bench_run], remote_cache)
+        self.assertIsNotNone(job_reports)
+        self.assertEqual(1, len(job_reports))

From 9f737d12e63812fd085efefa065c168b84312072 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Thu, 22 Feb 2024 14:28:57 +0100
Subject: [PATCH 47/93] bench_report is searched by all the artifact names

---
 .../engine-benchmarks/bench_tool/__init__.py  |  8 ++++++++
 .../bench_tool/bench_results.py               | 20 ++++++++++++++-----
 2 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/tools/performance/engine-benchmarks/bench_tool/__init__.py b/tools/performance/engine-benchmarks/bench_tool/__init__.py
index 33df62ba937a..abce5de15f19 100644
--- a/tools/performance/engine-benchmarks/bench_tool/__init__.py
+++ b/tools/performance/engine-benchmarks/bench_tool/__init__.py
@@ -58,6 +58,14 @@ def workflow_ids(self) -> List[int]:
         else:
             raise ValueError(f"Unknown source {self}")
 
+    def artifact_names(self) -> List[str]:
+        if self == Source.ENGINE:
+            return ["Runtime Benchmark Report"]
+        elif self == Source.STDLIB:
+            return ["Enso JMH Benchmark Report"]
+        else:
+            raise ValueError(f"Unknown source {self}")
+
 
 @dataclass
 class Author:
diff --git a/tools/performance/engine-benchmarks/bench_tool/bench_results.py b/tools/performance/engine-benchmarks/bench_tool/bench_results.py
index 5d2a79a3002b..78f1a611a7a3 100644
--- a/tools/performance/engine-benchmarks/bench_tool/bench_results.py
+++ b/tools/performance/engine-benchmarks/bench_tool/bench_results.py
@@ -9,7 +9,7 @@
 from typing import List, Dict, Optional, Any
 from xml.etree import ElementTree as ET
 
-from bench_tool import JobRun, DATE_FORMAT, ENSO_REPO, JobReport
+from bench_tool import JobRun, DATE_FORMAT, ENSO_REPO, JobReport, Source
 from bench_tool.gh import invoke_gh_api
 from bench_tool.remote_cache import RemoteCache
 from bench_tool.utils import WithTempDir, parse_bench_run_from_json
@@ -95,6 +95,10 @@ async def _process_report(_bench_run: JobRun):
     return job_reports
 
 
+def _known_artifact_names() -> List[str]:
+    return Source.STDLIB.artifact_names() + Source.ENGINE.artifact_names()
+
+
 async def get_bench_report(bench_run: JobRun, temp_dir: str, remote_cache: RemoteCache) -> Optional[JobReport]:
     """
     Extracts some data from the given bench_run, which was fetched via the GH API,
@@ -112,11 +116,17 @@ async def get_bench_report(bench_run: JobRun, temp_dir: str, remote_cache: Remot
     obj: Dict[str, Any] = await invoke_gh_api(ENSO_REPO, f"/actions/runs/{bench_run.id}/artifacts")
     artifacts = obj["artifacts"]
     artifacts_by_names = {artifact["name"]: artifact for artifact in artifacts}
-    if ARTIFACT_ID not in artifacts_by_names:
-        _logger.warning("Bench run %s does not contain the artifact named %s, but it is a successful run.",
-                        bench_run.id, ARTIFACT_ID)
+    # At this point, we don't know the source of the benchmark - either it is from
+    # Engine, or from stdlib. Thus, we don't know exactly which artifact name we
+    # are looking for. But we know, there must be exactly one of the artifact names.
+    bench_report_artifact = None
+    for known_name in _known_artifact_names():
+        if known_name in artifacts_by_names:
+            bench_report_artifact = artifacts_by_names[known_name]
+    if bench_report_artifact is None:
+        _logger.warning(f"Bench run {bench_run.id} does not contain any of the known artifact names: "
+                        f"{_known_artifact_names()}, but it is a successful run.")
         return None
-    bench_report_artifact = artifacts_by_names[ARTIFACT_ID]
     assert bench_report_artifact, "Benchmark Report artifact not found"
     artifact_id = str(bench_report_artifact["id"])
     created_at = bench_report_artifact["created_at"]

From 52e6c3138207bfbfa0afbef7d2ce30743d29278f Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Thu, 22 Feb 2024 14:29:18 +0100
Subject: [PATCH 48/93] website_regen has dry_run option

---
 tools/performance/engine-benchmarks/website_regen.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tools/performance/engine-benchmarks/website_regen.py b/tools/performance/engine-benchmarks/website_regen.py
index a4795f322e0e..be408fa6f7eb 100644
--- a/tools/performance/engine-benchmarks/website_regen.py
+++ b/tools/performance/engine-benchmarks/website_regen.py
@@ -13,7 +13,7 @@
 
 from bench_tool import Source
 from bench_tool.remote_cache import SyncRemoteCache
-from bench_tool.utils import generate_bench_website
+from bench_tool.website import generate_bench_website
 
 # The inception date of the benchmarks, i.e., the date of the first benchmark run.
 ENGINE_SINCE = datetime.fromisoformat("2022-12-01")
@@ -30,9 +30,11 @@ async def main():
                             type=str,
                             help="Path to the local clone of the engine-benchmark-results repo")
     args = arg_parser.parse_args()
+    dry_run: bool = args.dry_run
     verbose: bool = args.verbose
     local_repo: Optional[Path] = Path(args.local_repo) if args.local_repo else None
     logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO)
+    _logger.debug(f"Args: dry_run={dry_run}, verbose={verbose}, local_repo={local_repo}")
     remote_cache = SyncRemoteCache(local_repo)
     _logger.info("Initializing the bench results repo, this might take some time")
     await remote_cache.initialize()
@@ -54,7 +56,10 @@ async def main():
         remote_cache.stdlib_index_html()
     )
     await asyncio.gather(engine_html_task, stdlib_html_task)
-    await remote_cache.sync()
+    if dry_run:
+        _logger.info("Dry-run, not syncing the remote cache")
+    else:
+        await remote_cache.sync()
 
 
 if __name__ == "__main__":

From 6793a4fd530beef4b4441bb9a9ebd79fe4cfa3a5 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Thu, 22 Feb 2024 14:29:47 +0100
Subject: [PATCH 49/93] Remove deprecated method

---
 .../engine-benchmarks/bench_tool/test_remote_cache.py           | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py b/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py
index a18f04c783b2..18e046c12700 100644
--- a/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py
+++ b/tools/performance/engine-benchmarks/bench_tool/test_remote_cache.py
@@ -65,7 +65,7 @@ async def test_put_job_report_into_cache(self):
         await remote_cache.put(bench_id, sample_job_report)
         job_report = await remote_cache.fetch(bench_id)
         self.assertIsNotNone(job_report)
-        self.assertEquals(bench_id, job_report.bench_run.id)
+        self.assertEqual(bench_id, job_report.bench_run.id)
 
     async def test_fetch_stdlib_report(self):
         remote_cache = ReadonlyRemoteCache()

From 313c90a2b1750c3ec2290d136010ae5264e23f84 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Thu, 22 Feb 2024 14:53:22 +0100
Subject: [PATCH 50/93] JobRun and JobReport have json encoding

---
 .../engine-benchmarks/bench_tool/__init__.py  | 50 ++++++++++++++++++-
 .../bench_tool/bench_results.py               |  4 +-
 .../bench_tool/remote_cache.py                |  7 ++-
 .../bench_tool/test_bench_results.py          | 37 +++++++++++++-
 .../engine-benchmarks/bench_tool/utils.py     | 29 +----------
 5 files changed, 91 insertions(+), 36 deletions(-)

diff --git a/tools/performance/engine-benchmarks/bench_tool/__init__.py b/tools/performance/engine-benchmarks/bench_tool/__init__.py
index abce5de15f19..c62504599ca3 100644
--- a/tools/performance/engine-benchmarks/bench_tool/__init__.py
+++ b/tools/performance/engine-benchmarks/bench_tool/__init__.py
@@ -3,7 +3,7 @@
 from datetime import timedelta, datetime
 from enum import Enum
 from pathlib import Path
-from typing import List, Dict
+from typing import List, Dict, Any
 
 
 def pkg_dir() -> Path:
@@ -94,6 +94,41 @@ class JobRun:
     event: str
     head_commit: Commit
 
+    @staticmethod
+    def from_dict(obj: Dict[Any, Any]) -> "JobRun":
+        return JobRun(
+            id=str(obj["id"]),
+            html_url=obj["html_url"],
+            run_attempt=int(obj["run_attempt"]),
+            event=obj["event"],
+            display_title=obj["display_title"],
+            head_commit=Commit(
+                id=obj["head_commit"]["id"],
+                message=obj["head_commit"]["message"],
+                timestamp=obj["head_commit"]["timestamp"],
+                author=Author(
+                    name=obj["head_commit"]["author"]["name"]
+                )
+            )
+        )
+
+    def to_dict(self) -> Dict[Any, Any]:
+        return {
+            "id": self.id,
+            "html_url": self.html_url,
+            "run_attempt": self.run_attempt,
+            "event": self.event,
+            "display_title": self.display_title,
+            "head_commit": {
+                "id": self.head_commit.id,
+                "message": self.head_commit.message,
+                "timestamp": self.head_commit.timestamp,
+                "author": {
+                    "name": self.head_commit.author.name
+                }
+            }
+        }
+
 
 @dataclass
 class JobReport:
@@ -105,6 +140,19 @@ class JobReport:
     """ A mapping of benchmark labels to their scores """
     bench_run: JobRun
 
+    @staticmethod
+    def from_dict(obj: Dict[Any, Any]) -> "JobReport":
+        return JobReport(
+            bench_run=JobRun.from_dict(obj["bench_run"]),
+            label_score_dict=obj["label_score_dict"]
+        )
+
+    def to_dict(self) -> Dict[Any, Any]:
+        return {
+            "bench_run": self.bench_run.to_dict(),
+            "label_score_dict": self.label_score_dict
+        }
+
 
 @dataclass
 class BenchmarkData:
diff --git a/tools/performance/engine-benchmarks/bench_tool/bench_results.py b/tools/performance/engine-benchmarks/bench_tool/bench_results.py
index 78f1a611a7a3..5337203334c1 100644
--- a/tools/performance/engine-benchmarks/bench_tool/bench_results.py
+++ b/tools/performance/engine-benchmarks/bench_tool/bench_results.py
@@ -12,7 +12,7 @@
 from bench_tool import JobRun, DATE_FORMAT, ENSO_REPO, JobReport, Source
 from bench_tool.gh import invoke_gh_api
 from bench_tool.remote_cache import RemoteCache
-from bench_tool.utils import WithTempDir, parse_bench_run_from_json
+from bench_tool.utils import WithTempDir
 
 ARTIFACT_ID = "Runtime Benchmark Report"
 
@@ -49,7 +49,7 @@ async def get_and_parse_run(page: int, parsed_bench_runs) -> None:
         _query_fields["page"] = str(page)
         res = await invoke_gh_api(ENSO_REPO, f"/actions/workflows/{workflow_id}/runs", _query_fields)
         bench_runs_json = res["workflow_runs"]
-        _parsed_bench_runs = [parse_bench_run_from_json(bench_run_json)
+        _parsed_bench_runs = [JobRun.from_dict(bench_run_json)
                               for bench_run_json in bench_runs_json]
         parsed_bench_runs.extend(_parsed_bench_runs)
 
diff --git a/tools/performance/engine-benchmarks/bench_tool/remote_cache.py b/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
index 32add0eb622f..10fde836368e 100644
--- a/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
+++ b/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
@@ -12,7 +12,6 @@
 from typing import Dict, Optional
 
 from . import gh, JobReport, BENCH_REPO, git
-from .utils import parse_bench_report_from_json
 
 _logger = logging.getLogger(__name__)
 
@@ -64,7 +63,7 @@ async def fetch(self, bench_id: str) -> Optional[JobReport]:
         if content is None:
             _logger.warning("Cache not found for %s", bench_id)
             return None
-        bench_report = parse_bench_report_from_json(
+        bench_report = JobReport.from_dict(
             json.loads(content)
         )
         assert bench_id not in self._fetched_items
@@ -129,7 +128,7 @@ async def fetch(self, bench_id: str) -> Optional[JobReport]:
         path = self._cache_dir.joinpath(bench_id + ".json")
         if path.exists():
             with path.open() as f:
-                return parse_bench_report_from_json(json.load(f))
+                return JobReport.from_dict(json.load(f))
         return None
 
     async def put(self, bench_id: str, job_report: JobReport) -> None:
@@ -137,7 +136,7 @@ async def put(self, bench_id: str, job_report: JobReport) -> None:
         path = self._cache_dir.joinpath(bench_id + ".json")
         assert not path.exists()
         with path.open("w") as f:
-            json.dump(job_report, f)
+            json.dump(job_report.to_dict(), f)
 
     async def sync(self) -> None:
         """
diff --git a/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py b/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py
index 30b06dccb230..0c45ba70ae05 100644
--- a/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py
+++ b/tools/performance/engine-benchmarks/bench_tool/test_bench_results.py
@@ -1,7 +1,9 @@
+import json
 import unittest
 from datetime import datetime
 
-from bench_tool import ENGINE_BENCH_WORKFLOW_ID
+from bench_tool import ENGINE_BENCH_WORKFLOW_ID, JobReport, JobRun, Commit, \
+    Author
 from .bench_results import get_bench_report, get_bench_runs
 from .remote_cache import ReadonlyRemoteCache
 from .utils import parse_commit_timestamp, WithTempDir
@@ -10,8 +12,39 @@
 # We know for sure that this workflow run is on the GH.
 BENCH_RUN_ID = "4888453297"
 
+sample_job_report = JobReport(
+    label_score_dict={
+        "test_label": 1.0
+    },
+    bench_run=JobRun(
+        id="123456789",
+        display_title="Test",
+        html_url="https://github.com/enso-org/enso/actions/runs/123456789",
+        run_attempt=1,
+        event="push",
+        head_commit=Commit(
+            id="a67297aebf6a094d1ad0b0d88cf7438dbf8bd8fe",
+            message="Test commit",
+            timestamp="2021-06-01T12:00:00Z",
+            author=Author(
+                name="Pavel Marek"
+            )
+        )
+    )
+)
+
+
+class TestBenchResults(unittest.IsolatedAsyncioTestCase):
+    def test_job_report_is_serializable(self):
+        s = json.dumps(sample_job_report.to_dict())
+        self.assertIsNotNone(s)
+        self.assertGreater(len(s), 0)
+
+    def test_job_report_is_deserializable(self):
+        d = sample_job_report.to_dict()
+        job_report = JobReport.from_dict(d)
+        self.assertEqual(sample_job_report, job_report)
 
-class MyTestCase(unittest.IsolatedAsyncioTestCase):
     async def test_get_bench_run(self):
         """
         Bench run does not need remote cache - it fetches just some metadata about GH artifacts.
diff --git a/tools/performance/engine-benchmarks/bench_tool/utils.py b/tools/performance/engine-benchmarks/bench_tool/utils.py
index 26d8d5b6e75f..0a04f0784a87 100644
--- a/tools/performance/engine-benchmarks/bench_tool/utils.py
+++ b/tools/performance/engine-benchmarks/bench_tool/utils.py
@@ -2,9 +2,9 @@
 import shutil
 import tempfile
 from datetime import datetime
-from typing import List, Set, Dict, Any
+from typing import List, Set
 
-from bench_tool import JobReport, GH_DATE_FORMAT, Commit, JobRun, Author
+from bench_tool import JobReport, GH_DATE_FORMAT, Commit
 
 _logger = logging.getLogger(__name__)
 
@@ -52,28 +52,3 @@ def _get_timestamp(job_report: JobReport) -> datetime:
         return parse_commit_timestamp(job_report.bench_run.head_commit)
 
     job_reports.sort(key=lambda report: _get_timestamp(report))
-
-
-def parse_bench_run_from_json(obj: Dict[Any, Any]) -> JobRun:
-    return JobRun(
-        id=str(obj["id"]),
-        html_url=obj["html_url"],
-        run_attempt=int(obj["run_attempt"]),
-        event=obj["event"],
-        display_title=obj["display_title"],
-        head_commit=Commit(
-            id=obj["head_commit"]["id"],
-            message=obj["head_commit"]["message"],
-            timestamp=obj["head_commit"]["timestamp"],
-            author=Author(
-                name=obj["head_commit"]["author"]["name"]
-            )
-        )
-    )
-
-
-def parse_bench_report_from_json(obj: Dict[Any, Any]) -> JobReport:
-    return JobReport(
-        bench_run=parse_bench_run_from_json(obj["bench_run"]),
-        label_score_dict=obj["label_score_dict"]
-    )

From 0a7f4d2f96fe171c71de3507f7d5af389a75e5c9 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Thu, 22 Feb 2024 15:12:56 +0100
Subject: [PATCH 51/93] Json files are saved in pretty form

---
 .../engine-benchmarks/bench_tool/remote_cache.py           | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tools/performance/engine-benchmarks/bench_tool/remote_cache.py b/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
index 10fde836368e..fa753fb82908 100644
--- a/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
+++ b/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
@@ -136,7 +136,12 @@ async def put(self, bench_id: str, job_report: JobReport) -> None:
         path = self._cache_dir.joinpath(bench_id + ".json")
         assert not path.exists()
         with path.open("w") as f:
-            json.dump(job_report.to_dict(), f)
+            json.dump(
+                job_report.to_dict(),
+                f,
+                ensure_ascii=True,
+                indent=2
+            )
 
     async def sync(self) -> None:
         """

From 2633e5c940e105c9461f0ae33a8a9dc85e447cd8 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Thu, 22 Feb 2024 17:53:57 +0100
Subject: [PATCH 52/93] Add Benchmarks upload workflow file

---
 .github/workflows/bench-upload.yml            | 41 +++++++++++++++++++
 .../bench_tool/requirements.txt               |  3 ++
 2 files changed, 44 insertions(+)
 create mode 100644 .github/workflows/bench-upload.yml
 create mode 100644 tools/performance/engine-benchmarks/bench_tool/requirements.txt

diff --git a/.github/workflows/bench-upload.yml b/.github/workflows/bench-upload.yml
new file mode 100644
index 000000000000..a2673662cf8f
--- /dev/null
+++ b/.github/workflows/bench-upload.yml
@@ -0,0 +1,41 @@
+# This file is manually managed. It is used to upload benchmarks to to the
+# https://github.com/enso-org/engine-benchmark-results repository.
+
+name: Benchmarks upload
+on:
+  schedule:
+    - cron: 0 0 * * *
+  workflow_dispatch:
+    inputs:
+      dry-run:
+        description: If set, no results will be uploaded.
+        required: true
+        type: boolean
+        default: false
+jobs:
+  upload-benchmarks:
+    name: Upload benchmarks
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout enso repository
+        uses: actions/checkout@v4
+        repository: enso-org/enso
+        with:
+          path: enso
+      - name: Checkout engine-benchmark-results repository
+        uses: actions/checkout@v4
+        repository: enso-org/engine-benchmark-results
+        with:
+          path: engine-benchmark-results
+      - name: Install dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y python3 python3-jinja2 python3-numpy
+          sudo apt-get install -y gh
+      - name: Upload benchmarks
+        run: |
+          cd enso/tools/performance/engine-benchmarks/bench_tool
+          python3 website_regen.py \
+            -v \
+            ${{ if github.event.inputs.dry-run}} --dry-run ${{ endif }} \
+            --local-repo  engine-benchmark-results
diff --git a/tools/performance/engine-benchmarks/bench_tool/requirements.txt b/tools/performance/engine-benchmarks/bench_tool/requirements.txt
new file mode 100644
index 000000000000..d9ac381d2764
--- /dev/null
+++ b/tools/performance/engine-benchmarks/bench_tool/requirements.txt
@@ -0,0 +1,3 @@
+
+Jinja2 == 3.1.2
+numpy == 1.24.2

From de89366d10da2f930799e9daea0e75ec6995d40f Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Thu, 22 Feb 2024 18:32:36 +0100
Subject: [PATCH 53/93] Fix input name in workflow file

---
 .github/workflows/bench-upload.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/bench-upload.yml b/.github/workflows/bench-upload.yml
index a2673662cf8f..2f6edd21731a 100644
--- a/.github/workflows/bench-upload.yml
+++ b/.github/workflows/bench-upload.yml
@@ -7,7 +7,7 @@ on:
     - cron: 0 0 * * *
   workflow_dispatch:
     inputs:
-      dry-run:
+      just-check:
         description: If set, no results will be uploaded.
         required: true
         type: boolean
@@ -37,5 +37,5 @@ jobs:
           cd enso/tools/performance/engine-benchmarks/bench_tool
           python3 website_regen.py \
             -v \
-            ${{ if github.event.inputs.dry-run}} --dry-run ${{ endif }} \
+            ${{ if github.event.inputs.just-check}} --dry-run ${{ endif }} \
             --local-repo  engine-benchmark-results

From 7dc9ed703369d097c7247c6039e8cb1237e736a2 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Thu, 22 Feb 2024 18:33:54 +0100
Subject: [PATCH 54/93] [WIP] temporarily enable the workflow on all pull
 requests

---
 .github/workflows/bench-upload.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/bench-upload.yml b/.github/workflows/bench-upload.yml
index 2f6edd21731a..bcf2e81cffa3 100644
--- a/.github/workflows/bench-upload.yml
+++ b/.github/workflows/bench-upload.yml
@@ -3,6 +3,8 @@
 
 name: Benchmarks upload
 on:
+  pull_request:
+    branches: ["*"]
   schedule:
     - cron: 0 0 * * *
   workflow_dispatch:

From 7195ffbd0207f1c97dbf3f8d65fc5f3916884cae Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Thu, 22 Feb 2024 18:35:20 +0100
Subject: [PATCH 55/93] Fix repository arg

---
 .github/workflows/bench-upload.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/bench-upload.yml b/.github/workflows/bench-upload.yml
index bcf2e81cffa3..5f4392eb30b2 100644
--- a/.github/workflows/bench-upload.yml
+++ b/.github/workflows/bench-upload.yml
@@ -21,13 +21,13 @@ jobs:
     steps:
       - name: Checkout enso repository
         uses: actions/checkout@v4
-        repository: enso-org/enso
         with:
+          repository: enso-org/enso
           path: enso
       - name: Checkout engine-benchmark-results repository
         uses: actions/checkout@v4
-        repository: enso-org/engine-benchmark-results
         with:
+          repository: enso-org/engine-benchmark-results
           path: engine-benchmark-results
       - name: Install dependencies
         run: |

From 46ef8efec1bc324ef385762dd675c53cc25b86e5 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Thu, 22 Feb 2024 18:37:24 +0100
Subject: [PATCH 56/93] Add dry-run step

---
 .github/workflows/bench-upload.yml | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/bench-upload.yml b/.github/workflows/bench-upload.yml
index 5f4392eb30b2..8a4bc5bfc16f 100644
--- a/.github/workflows/bench-upload.yml
+++ b/.github/workflows/bench-upload.yml
@@ -34,10 +34,17 @@ jobs:
           sudo apt-get update
           sudo apt-get install -y python3 python3-jinja2 python3-numpy
           sudo apt-get install -y gh
+      - name: Upload benchmarks (dry-run)
+        if: ${{ github.event.inputs.just-check }}
+        run: |
+          cd enso/tools/performance/engine-benchmarks/bench_tool
+          python3 website_regen.py \
+            -v \
+            --dry-run \
+            --local-repo  engine-benchmark-results
       - name: Upload benchmarks
         run: |
           cd enso/tools/performance/engine-benchmarks/bench_tool
           python3 website_regen.py \
             -v \
-            ${{ if github.event.inputs.just-check}} --dry-run ${{ endif }} \
             --local-repo  engine-benchmark-results

From b650ac6e0590cc21c18b84a847132117d4e714da Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Thu, 22 Feb 2024 18:39:49 +0100
Subject: [PATCH 57/93] Fix cd

---
 .github/workflows/bench-upload.yml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/bench-upload.yml b/.github/workflows/bench-upload.yml
index 8a4bc5bfc16f..fa81949a422b 100644
--- a/.github/workflows/bench-upload.yml
+++ b/.github/workflows/bench-upload.yml
@@ -37,14 +37,16 @@ jobs:
       - name: Upload benchmarks (dry-run)
         if: ${{ github.event.inputs.just-check }}
         run: |
-          cd enso/tools/performance/engine-benchmarks/bench_tool
+          cd enso/tools/performance/engine-benchmarks
           python3 website_regen.py \
             -v \
             --dry-run \
             --local-repo  engine-benchmark-results
       - name: Upload benchmarks
+        # TODO: Remove the --dry-run arg
         run: |
-          cd enso/tools/performance/engine-benchmarks/bench_tool
+          cd enso/tools/performance/engine-benchmarks
           python3 website_regen.py \
             -v \
+            --dry-run \
             --local-repo  engine-benchmark-results

From c53f8549acadb0a9ac3e668c2585c1605acef948 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Thu, 22 Feb 2024 18:43:37 +0100
Subject: [PATCH 58/93] Update deps

---
 .github/workflows/bench-upload.yml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/bench-upload.yml b/.github/workflows/bench-upload.yml
index fa81949a422b..1dd8dbeadac4 100644
--- a/.github/workflows/bench-upload.yml
+++ b/.github/workflows/bench-upload.yml
@@ -32,7 +32,11 @@ jobs:
       - name: Install dependencies
         run: |
           sudo apt-get update
-          sudo apt-get install -y python3 python3-jinja2 python3-numpy
+          sudo apt-get install -y \
+            python3 \
+            python3-jinja2 \
+            python3-numpy \
+            python3-pandas
           sudo apt-get install -y gh
       - name: Upload benchmarks (dry-run)
         if: ${{ github.event.inputs.just-check }}

From 4d7c694a4416303d7c0d05d60d1739fddf5c7b8e Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Thu, 22 Feb 2024 18:47:54 +0100
Subject: [PATCH 59/93] Fix path to engine-benchmark-results repo

---
 .github/workflows/bench-upload.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/bench-upload.yml b/.github/workflows/bench-upload.yml
index 1dd8dbeadac4..cc5071afa2a6 100644
--- a/.github/workflows/bench-upload.yml
+++ b/.github/workflows/bench-upload.yml
@@ -53,4 +53,4 @@ jobs:
           python3 website_regen.py \
             -v \
             --dry-run \
-            --local-repo  engine-benchmark-results
+            --local-repo  ${{ github.workspace }}/engine-benchmark-results

From b4e52bb16752e6c51434a7b34f3d39f219105159 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Thu, 22 Feb 2024 18:54:50 +0100
Subject: [PATCH 60/93] Use GH_TOKEn secret

---
 .github/workflows/bench-upload.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/bench-upload.yml b/.github/workflows/bench-upload.yml
index cc5071afa2a6..b9ff5f519245 100644
--- a/.github/workflows/bench-upload.yml
+++ b/.github/workflows/bench-upload.yml
@@ -54,3 +54,5 @@ jobs:
             -v \
             --dry-run \
             --local-repo  ${{ github.workspace }}/engine-benchmark-results
+        env:
+            GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

From 1278ad3db3afc97c3824fe940d6a69776341af68 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Thu, 22 Feb 2024 19:01:31 +0100
Subject: [PATCH 61/93] Use GH_TOKEn secret even in dry-run

---
 .github/workflows/bench-upload.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/bench-upload.yml b/.github/workflows/bench-upload.yml
index b9ff5f519245..404ab57df463 100644
--- a/.github/workflows/bench-upload.yml
+++ b/.github/workflows/bench-upload.yml
@@ -44,15 +44,15 @@ jobs:
           cd enso/tools/performance/engine-benchmarks
           python3 website_regen.py \
             -v \
-            --dry-run \
-            --local-repo  engine-benchmark-results
+           --dry-run \
+            --local-repo  ${{ github.workspace }}/engine-benchmark-results
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
       - name: Upload benchmarks
-        # TODO: Remove the --dry-run arg
         run: |
           cd enso/tools/performance/engine-benchmarks
           python3 website_regen.py \
             -v \
-            --dry-run \
             --local-repo  ${{ github.workspace }}/engine-benchmark-results
         env:
-            GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

From d4b3259f896ac62bc4388690258bccd91d7466c5 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Thu, 22 Feb 2024 19:06:56 +0100
Subject: [PATCH 62/93] Better error message in git.commit

---
 tools/performance/engine-benchmarks/bench_tool/git.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/performance/engine-benchmarks/bench_tool/git.py b/tools/performance/engine-benchmarks/bench_tool/git.py
index a4519efd2544..d8bf8c6fb2b5 100644
--- a/tools/performance/engine-benchmarks/bench_tool/git.py
+++ b/tools/performance/engine-benchmarks/bench_tool/git.py
@@ -68,14 +68,16 @@ async def add(repo: Path, files: Set[str]) -> None:
 
 
 async def commit(repo: Path, msg: str) -> None:
-    _logger.debug("Committing %s with message %s", repo, msg)
+    _logger.debug("Committing %s with message '%s'", repo, msg)
     stat = await status(repo)
     assert len(stat.added) > 0
     args = ["commit", "-m", msg]
     proc = await asyncio.create_subprocess_exec("git", *args, cwd=repo, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     ret = await proc.wait()
     if ret != 0:
-        raise RuntimeError(f"Failed to commit {repo}")
+        out, err = await proc.communicate()
+        all_out = out.decode() + err.decode()
+        raise RuntimeError(f"Failed to commit {repo}. Output: {all_out}")
 
 
 async def push(repo: Path) -> None:

From dc1f2ddbaf62625dbdd9ae20f5dc7690da662da1 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Thu, 22 Feb 2024 19:11:26 +0100
Subject: [PATCH 63/93] Add setup git step

---
 .github/workflows/bench-upload.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/bench-upload.yml b/.github/workflows/bench-upload.yml
index 404ab57df463..abc335ad83ab 100644
--- a/.github/workflows/bench-upload.yml
+++ b/.github/workflows/bench-upload.yml
@@ -38,6 +38,10 @@ jobs:
             python3-numpy \
             python3-pandas
           sudo apt-get install -y gh
+      - name: Set up git
+        run: |
+          git config --global user.email "pavel.marek@enso.org"
+          git config --global user.name "enso-bench-bot"
       - name: Upload benchmarks (dry-run)
         if: ${{ github.event.inputs.just-check }}
         run: |

From 9c23c859832b07a2a086b6429911ec45bceec267 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Thu, 22 Feb 2024 19:16:56 +0100
Subject: [PATCH 64/93] Add even more error messages

---
 .../engine-benchmarks/bench_tool/git.py          | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/tools/performance/engine-benchmarks/bench_tool/git.py b/tools/performance/engine-benchmarks/bench_tool/git.py
index d8bf8c6fb2b5..2b53d6097518 100644
--- a/tools/performance/engine-benchmarks/bench_tool/git.py
+++ b/tools/performance/engine-benchmarks/bench_tool/git.py
@@ -35,7 +35,9 @@ async def pull(repo: Path) -> None:
     proc = await asyncio.create_subprocess_exec("git", *args, cwd=repo, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     ret = await proc.wait()
     if ret != 0:
-        raise RuntimeError(f"Failed to pull {repo}")
+        stdout, stderr = await proc.communicate()
+        out = stdout.decode() + stderr.decode()
+        raise RuntimeError(f"Failed to pull {repo}: {out}")
 
 
 async def status(repo: Path) -> GitStatus:
@@ -64,7 +66,9 @@ async def add(repo: Path, files: Set[str]) -> None:
     proc = await asyncio.create_subprocess_exec("git", *args, cwd=repo, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     ret = await proc.wait()
     if ret != 0:
-        raise RuntimeError(f"Failed to add {files} to {repo}")
+        out, err = await proc.communicate()
+        all_out = out.decode() + err.decode()
+        raise RuntimeError(f"Failed to add {files} to {repo}. Output: {all_out}")
 
 
 async def commit(repo: Path, msg: str) -> None:
@@ -86,7 +90,9 @@ async def push(repo: Path) -> None:
     proc = await asyncio.create_subprocess_exec("git", *args, cwd=repo, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     ret = await proc.wait()
     if ret != 0:
-        raise RuntimeError(f"Failed to push {repo}")
+        out, err = await proc.communicate()
+        all_out = out.decode() + err.decode()
+        raise RuntimeError(f"Failed to push {repo}. Output: {all_out}")
 
 
 async def init(repo: Path) -> None:
@@ -96,4 +102,6 @@ async def init(repo: Path) -> None:
     proc = await asyncio.create_subprocess_exec("git", *args, cwd=repo, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     ret = await proc.wait()
     if ret != 0:
-        raise RuntimeError(f"Failed to init {repo}")
+        out, err = await proc.communicate()
+        all_out = out.decode() + err.decode()
+        raise RuntimeError(f"Failed to init {repo}. Output: {all_out}")

From d640a40abaa091341477ecd2314dc69ea249e1d9 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Fri, 23 Feb 2024 11:51:12 +0100
Subject: [PATCH 65/93] Do not do --dry-run on the CI

---
 .github/workflows/bench-upload.yml | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/.github/workflows/bench-upload.yml b/.github/workflows/bench-upload.yml
index abc335ad83ab..e76c56b8e3ac 100644
--- a/.github/workflows/bench-upload.yml
+++ b/.github/workflows/bench-upload.yml
@@ -7,13 +7,6 @@ on:
     branches: ["*"]
   schedule:
     - cron: 0 0 * * *
-  workflow_dispatch:
-    inputs:
-      just-check:
-        description: If set, no results will be uploaded.
-        required: true
-        type: boolean
-        default: false
 jobs:
   upload-benchmarks:
     name: Upload benchmarks
@@ -42,16 +35,6 @@ jobs:
         run: |
           git config --global user.email "pavel.marek@enso.org"
           git config --global user.name "enso-bench-bot"
-      - name: Upload benchmarks (dry-run)
-        if: ${{ github.event.inputs.just-check }}
-        run: |
-          cd enso/tools/performance/engine-benchmarks
-          python3 website_regen.py \
-            -v \
-           --dry-run \
-            --local-repo  ${{ github.workspace }}/engine-benchmark-results
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
       - name: Upload benchmarks
         run: |
           cd enso/tools/performance/engine-benchmarks

From 6e4c9f0077822d471da223f56f67afb466bae8d1 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Fri, 23 Feb 2024 11:54:47 +0100
Subject: [PATCH 66/93] Run the job every day at 5:30 AM

---
 .github/workflows/bench-upload.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/bench-upload.yml b/.github/workflows/bench-upload.yml
index e76c56b8e3ac..31ee6dbcec0d 100644
--- a/.github/workflows/bench-upload.yml
+++ b/.github/workflows/bench-upload.yml
@@ -6,7 +6,9 @@ on:
   pull_request:
     branches: ["*"]
   schedule:
-    - cron: 0 0 * * *
+    # Run every day at 5:30 AM. At midnight, benchmarks run, and we want this job to be
+    # scheduled after all the benchmarks are finished, so that we have the newest results.
+    - cron: 30 5 * * *
 jobs:
   upload-benchmarks:
     name: Upload benchmarks

From 99286f4186db91a988404e3a439a953c47cd6935 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Fri, 23 Feb 2024 11:56:38 +0100
Subject: [PATCH 67/93] Set the git email to actions-at-github.org

---
 .github/workflows/bench-upload.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/bench-upload.yml b/.github/workflows/bench-upload.yml
index 31ee6dbcec0d..bcb829c3a226 100644
--- a/.github/workflows/bench-upload.yml
+++ b/.github/workflows/bench-upload.yml
@@ -35,7 +35,7 @@ jobs:
           sudo apt-get install -y gh
       - name: Set up git
         run: |
-          git config --global user.email "pavel.marek@enso.org"
+          git config --global user.email "actions@github.org"
           git config --global user.name "enso-bench-bot"
       - name: Upload benchmarks
         run: |

From e1ee00d854dcd3f11aeab7d8b903db182b03679f Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Fri, 23 Feb 2024 11:58:09 +0100
Subject: [PATCH 68/93] Use the correct token for bench uploadeds

---
 .github/workflows/bench-upload.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/bench-upload.yml b/.github/workflows/bench-upload.yml
index bcb829c3a226..ffc71b04348b 100644
--- a/.github/workflows/bench-upload.yml
+++ b/.github/workflows/bench-upload.yml
@@ -44,4 +44,4 @@ jobs:
             -v \
             --local-repo  ${{ github.workspace }}/engine-benchmark-results
         env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GITHUB_TOKEN: ${{ secrets.ENSO_BENCHMARK_RESULTS_TOKEN }}

From 56c04cbc968e7fd08f69371e33f0197310cd9419 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Mon, 26 Feb 2024 12:57:51 +0100
Subject: [PATCH 69/93] Set the remote url to enable push to
 engine-benchmark-results repo

---
 .github/workflows/bench-upload.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/bench-upload.yml b/.github/workflows/bench-upload.yml
index ffc71b04348b..3402c361710a 100644
--- a/.github/workflows/bench-upload.yml
+++ b/.github/workflows/bench-upload.yml
@@ -37,6 +37,8 @@ jobs:
         run: |
           git config --global user.email "actions@github.org"
           git config --global user.name "enso-bench-bot"
+          cd ${{ github.workspace }}/engine-benchmark-results
+          git remote set-url origin https://x-access-token:${{ secrets.ENSO_BENCHMARK_RESULTS_TOKEN }}@github.com/enso-org/engine-benchmark-results
       - name: Upload benchmarks
         run: |
           cd enso/tools/performance/engine-benchmarks

From 6ecbbff215b822903a41398b583d05d07bb60700 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Mon, 26 Feb 2024 16:39:47 +0100
Subject: [PATCH 70/93] Add optional fields param to gh.invoke_api

---
 tools/performance/engine-benchmarks/bench_tool/gh.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tools/performance/engine-benchmarks/bench_tool/gh.py b/tools/performance/engine-benchmarks/bench_tool/gh.py
index 1bfe827edb78..3a19803c3b9b 100644
--- a/tools/performance/engine-benchmarks/bench_tool/gh.py
+++ b/tools/performance/engine-benchmarks/bench_tool/gh.py
@@ -27,6 +27,7 @@ async def invoke_gh_api(
         repo: str,
         endpoint: str,
         query_params: Dict[str, str] = {},
+        fields: Dict[str, str] = {},
         result_as_json: bool = True,
         method: str = "GET"
 ) -> Optional[Union[Dict[str, Any], bytes]]:
@@ -35,12 +36,16 @@ async def invoke_gh_api(
     :param repo: Repository name in the form `owner/repo`
     :param endpoint: Endpoint of the query. Must start with `/`.
     :param query_params: Additional query parameters.
+    :param fields: Additional fields to be added to the query. add static
+    string parameters to the request payload.
     :param result_as_json: If result should be parsed as JSON.
           If false, the raw bytes are returned.
     :param method: HTTP method to use, 'GET' by default.
     :return: None if the query fails
     """
     assert endpoint.startswith("/")
+    if len(fields) > 0 and method != "POST":
+        raise ValueError("Fields can be used only with POST method")
     urlencode(query_params)
     cmd = [
         "gh",
@@ -48,6 +53,8 @@ async def invoke_gh_api(
         "--method", method,
         f"/repos/{repo}{endpoint}" + "?" + urlencode(query_params)
     ]
+    for k, v in fields.items():
+        cmd.append(f"-f {k}='{v}'")
     _logger.debug("Invoking gh API with `%s`", " ".join(cmd))
     proc = await asyncio.create_subprocess_exec("gh", *cmd[1:],
                                                 stdout=subprocess.PIPE,
@@ -58,6 +65,8 @@ async def invoke_gh_api(
         _logger.error("Command `%s` FAILED with errcode %d",
                       " ".join(cmd),
                       proc.returncode)
+        _logger.error("  stdout: %s", out.decode())
+        _logger.error("  stderr: %s", err.decode())
         return None
     if result_as_json:
         return json.loads(out.decode())

From 51e9cce8a9a886290e9c976fd3c3699375c4412b Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Mon, 26 Feb 2024 16:40:11 +0100
Subject: [PATCH 71/93] Add git.head_commit function

---
 tools/performance/engine-benchmarks/bench_tool/git.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tools/performance/engine-benchmarks/bench_tool/git.py b/tools/performance/engine-benchmarks/bench_tool/git.py
index 2b53d6097518..95a5e7289603 100644
--- a/tools/performance/engine-benchmarks/bench_tool/git.py
+++ b/tools/performance/engine-benchmarks/bench_tool/git.py
@@ -105,3 +105,14 @@ async def init(repo: Path) -> None:
         out, err = await proc.communicate()
         all_out = out.decode() + err.decode()
         raise RuntimeError(f"Failed to init {repo}. Output: {all_out}")
+
+
+async def head_commit(repo: Path) -> str:
+    args = ["rev-parse", "HEAD"]
+    proc = await asyncio.create_subprocess_exec("git", *args, cwd=repo, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    ret = await proc.wait()
+    out, err = await proc.communicate()
+    if ret != 0:
+        raise RuntimeError(f"Failed to get HEAD commit of {repo}: {err.decode()}")
+    else:
+        return out.decode().strip()

From ce319dc6a69035a904c3eaa9d4d55516377ebd55 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Mon, 26 Feb 2024 16:40:24 +0100
Subject: [PATCH 72/93] Push via GH API, and not via `git push`

---
 .../engine-benchmarks/bench_tool/remote_cache.py     | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/tools/performance/engine-benchmarks/bench_tool/remote_cache.py b/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
index fa753fb82908..091fb7db9aad 100644
--- a/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
+++ b/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
@@ -160,7 +160,17 @@ async def sync(self) -> None:
                 _logger.debug("Untracked files: %s", status.untracked)
                 await git.add(self._repo_root_dir, status.untracked)
             await git.commit(self._repo_root_dir, f"Add {len(status.untracked)} new reports")
-            await git.push(self._repo_root_dir)
+            head_commit_sha = await git.head_commit(self._repo_root_dir)
+            # Push the changes to the remote. Do not use `git push`, as that
+            # does not use authentication via GH_TOKEN
+            ret = await gh.invoke_gh_api(BENCH_REPO,
+                             "/merges",
+                             fields={
+                                 "base": "main",
+                                 "head": head_commit_sha
+                             },
+                             method="POST")
+            _logger.debug(f"Successfully merged the changes: {ret.__dict__}")
 
 
 def _is_benchrun_id(name: str) -> bool:

From 01faa53ed6eeb87d9d497dfbd9d6b755cb8a7e29 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Tue, 27 Feb 2024 09:29:27 +0100
Subject: [PATCH 73/93] [WIP] Temporarily disable the Upload benchmarks job

---
 .github/workflows/bench-upload.yml | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/bench-upload.yml b/.github/workflows/bench-upload.yml
index 3402c361710a..87a321523fd7 100644
--- a/.github/workflows/bench-upload.yml
+++ b/.github/workflows/bench-upload.yml
@@ -39,11 +39,11 @@ jobs:
           git config --global user.name "enso-bench-bot"
           cd ${{ github.workspace }}/engine-benchmark-results
           git remote set-url origin https://x-access-token:${{ secrets.ENSO_BENCHMARK_RESULTS_TOKEN }}@github.com/enso-org/engine-benchmark-results
-      - name: Upload benchmarks
-        run: |
-          cd enso/tools/performance/engine-benchmarks
-          python3 website_regen.py \
-            -v \
-            --local-repo  ${{ github.workspace }}/engine-benchmark-results
-        env:
-          GITHUB_TOKEN: ${{ secrets.ENSO_BENCHMARK_RESULTS_TOKEN }}
+      #- name: Upload benchmarks
+        #run: |
+          #cd enso/tools/performance/engine-benchmarks
+          #python3 website_regen.py \
+          #  -v \
+          #  --local-repo  ${{ github.workspace }}/engine-benchmark-results
+        #env:
+        #  GITHUB_TOKEN: ${{ secrets.ENSO_BENCHMARK_RESULTS_TOKEN }}

From d6aa6ca85bd2dbe419559f7defe0efee66719242 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Tue, 27 Feb 2024 09:31:00 +0100
Subject: [PATCH 74/93] [WIP] Do plain `git push` in the action

---
 .github/workflows/bench-upload.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/bench-upload.yml b/.github/workflows/bench-upload.yml
index 87a321523fd7..8a8f9873ac61 100644
--- a/.github/workflows/bench-upload.yml
+++ b/.github/workflows/bench-upload.yml
@@ -39,6 +39,10 @@ jobs:
           git config --global user.name "enso-bench-bot"
           cd ${{ github.workspace }}/engine-benchmark-results
           git remote set-url origin https://x-access-token:${{ secrets.ENSO_BENCHMARK_RESULTS_TOKEN }}@github.com/enso-org/engine-benchmark-results
+          echo "Hello world" > hello.txt
+          git add hello.txt
+          git commit -m "Add hello.txt"
+          git push origin
       #- name: Upload benchmarks
         #run: |
           #cd enso/tools/performance/engine-benchmarks

From 0506edb35bf082adefcbde4314fb8d14af5c994e Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Tue, 27 Feb 2024 09:47:26 +0100
Subject: [PATCH 75/93] Set token in checkout action

---
 .github/workflows/bench-upload.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/bench-upload.yml b/.github/workflows/bench-upload.yml
index 8a8f9873ac61..64fb898c91c2 100644
--- a/.github/workflows/bench-upload.yml
+++ b/.github/workflows/bench-upload.yml
@@ -24,6 +24,7 @@ jobs:
         with:
           repository: enso-org/engine-benchmark-results
           path: engine-benchmark-results
+          token: ${{ secrets.ENSO_BENCHMARK_RESULTS_TOKEN }}
       - name: Install dependencies
         run: |
           sudo apt-get update

From 3f8b2900fd66e2405fae5ea913a11d184af60d49 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Tue, 27 Feb 2024 09:56:10 +0100
Subject: [PATCH 76/93] Unset extra authentication header

---
 .github/workflows/bench-upload.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/bench-upload.yml b/.github/workflows/bench-upload.yml
index 64fb898c91c2..c31721655670 100644
--- a/.github/workflows/bench-upload.yml
+++ b/.github/workflows/bench-upload.yml
@@ -36,9 +36,10 @@ jobs:
           sudo apt-get install -y gh
       - name: Set up git
         run: |
+          cd ${{ github.workspace }}/engine-benchmark-results
           git config --global user.email "actions@github.org"
           git config --global user.name "enso-bench-bot"
-          cd ${{ github.workspace }}/engine-benchmark-results
+          git config --unset-all http.https://github.com/.extraheader
           git remote set-url origin https://x-access-token:${{ secrets.ENSO_BENCHMARK_RESULTS_TOKEN }}@github.com/enso-org/engine-benchmark-results
           echo "Hello world" > hello.txt
           git add hello.txt

From 7af7146cb1d0a15dbea5179295d21865fe0f6962 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Tue, 27 Feb 2024 10:35:19 +0100
Subject: [PATCH 77/93] Fix field body in gh.invoke_gh_api

---
 tools/performance/engine-benchmarks/bench_tool/gh.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/performance/engine-benchmarks/bench_tool/gh.py b/tools/performance/engine-benchmarks/bench_tool/gh.py
index 3a19803c3b9b..d521beb3e57f 100644
--- a/tools/performance/engine-benchmarks/bench_tool/gh.py
+++ b/tools/performance/engine-benchmarks/bench_tool/gh.py
@@ -54,7 +54,8 @@ async def invoke_gh_api(
         f"/repos/{repo}{endpoint}" + "?" + urlencode(query_params)
     ]
     for k, v in fields.items():
-        cmd.append(f"-f {k}='{v}'")
+        cmd.append("-f")
+        cmd.append(f"{k}='{v}'")
     _logger.debug("Invoking gh API with `%s`", " ".join(cmd))
     proc = await asyncio.create_subprocess_exec("gh", *cmd[1:],
                                                 stdout=subprocess.PIPE,

From c08148813b157e7c8cc84d0c1407f8534c2a1be4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20W=2E=20Urba=C5=84czyk?= <mwu-tow@gazeta.pl>
Date: Wed, 28 Feb 2024 01:23:18 +0100
Subject: [PATCH 78/93] experiment

---
 .github/workflows/bench-upload.yml | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/bench-upload.yml b/.github/workflows/bench-upload.yml
index c31721655670..2d8e1012c97c 100644
--- a/.github/workflows/bench-upload.yml
+++ b/.github/workflows/bench-upload.yml
@@ -37,14 +37,13 @@ jobs:
       - name: Set up git
         run: |
           cd ${{ github.workspace }}/engine-benchmark-results
-          git config --global user.email "actions@github.org"
-          git config --global user.name "enso-bench-bot"
-          git config --unset-all http.https://github.com/.extraheader
-          git remote set-url origin https://x-access-token:${{ secrets.ENSO_BENCHMARK_RESULTS_TOKEN }}@github.com/enso-org/engine-benchmark-results
+          git config --global user.email "ci@enso.org"
+          git config --global user.name "Enso CI Bot"
           echo "Hello world" > hello.txt
           git add hello.txt
           git commit -m "Add hello.txt"
           git push origin
+
       #- name: Upload benchmarks
         #run: |
           #cd enso/tools/performance/engine-benchmarks

From 37473f930dad11c097b1900ab556bd4c6c98d922 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20W=2E=20Urba=C5=84czyk?= <mwu-tow@gazeta.pl>
Date: Wed, 28 Feb 2024 01:48:13 +0100
Subject: [PATCH 79/93] experiment

---
 .github/workflows/bench-upload.yml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/bench-upload.yml b/.github/workflows/bench-upload.yml
index 2d8e1012c97c..049b58cba326 100644
--- a/.github/workflows/bench-upload.yml
+++ b/.github/workflows/bench-upload.yml
@@ -42,7 +42,11 @@ jobs:
           echo "Hello world" > hello.txt
           git add hello.txt
           git commit -m "Add hello.txt"
-          git push origin
+      - name: Push changes
+        uses: ad-m/github-push-action@master
+        with:
+          directory: engine-benchmark-results
+          github_token: ${{ secrets.ENSO_BENCHMARK_RESULTS_TOKEN }}
 
       #- name: Upload benchmarks
         #run: |

From f52c0587c708dccdc5edddc1ab5b29b639be0840 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20W=2E=20Urba=C5=84czyk?= <mwu-tow@gazeta.pl>
Date: Wed, 28 Feb 2024 01:50:48 +0100
Subject: [PATCH 80/93] experiment

---
 .github/workflows/bench-upload.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/bench-upload.yml b/.github/workflows/bench-upload.yml
index 049b58cba326..79a55a9b7926 100644
--- a/.github/workflows/bench-upload.yml
+++ b/.github/workflows/bench-upload.yml
@@ -47,6 +47,7 @@ jobs:
         with:
           directory: engine-benchmark-results
           github_token: ${{ secrets.ENSO_BENCHMARK_RESULTS_TOKEN }}
+          repository: enso-org/engine-benchmark-results
 
       #- name: Upload benchmarks
         #run: |

From 05d90830f8467176ff26a50287ee38063e69d02b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20W=2E=20Urba=C5=84czyk?= <mwu-tow@gazeta.pl>
Date: Wed, 28 Feb 2024 02:05:42 +0100
Subject: [PATCH 81/93] experiment

---
 .github/workflows/bench-upload.yml | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/.github/workflows/bench-upload.yml b/.github/workflows/bench-upload.yml
index 79a55a9b7926..bd04dee95293 100644
--- a/.github/workflows/bench-upload.yml
+++ b/.github/workflows/bench-upload.yml
@@ -42,12 +42,7 @@ jobs:
           echo "Hello world" > hello.txt
           git add hello.txt
           git commit -m "Add hello.txt"
-      - name: Push changes
-        uses: ad-m/github-push-action@master
-        with:
-          directory: engine-benchmark-results
-          github_token: ${{ secrets.ENSO_BENCHMARK_RESULTS_TOKEN }}
-          repository: enso-org/engine-benchmark-results
+          git push
 
       #- name: Upload benchmarks
         #run: |

From aa1d2836bfcef9331be101c9b93e377a8669f02c Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Wed, 28 Feb 2024 09:46:16 +0100
Subject: [PATCH 82/93] Use workflow_run trigger instead of schedule

---
 .github/workflows/bench-upload.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/bench-upload.yml b/.github/workflows/bench-upload.yml
index bd04dee95293..64d6a6d10a75 100644
--- a/.github/workflows/bench-upload.yml
+++ b/.github/workflows/bench-upload.yml
@@ -5,10 +5,10 @@ name: Benchmarks upload
 on:
   pull_request:
     branches: ["*"]
-  schedule:
-    # Run every day at 5:30 AM. At midnight, benchmarks run, and we want this job to be
-    # scheduled after all the benchmarks are finished, so that we have the newest results.
-    - cron: 30 5 * * *
+  workflow_run:
+    workflows: ["Benchmark Engine", "Benchmark Standard Libraries"]
+    types:
+      - completed
 jobs:
   upload-benchmarks:
     name: Upload benchmarks

From 970e3da132d35c0ad999b394907cbb1eb49d9f5d Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Wed, 28 Feb 2024 09:48:35 +0100
Subject: [PATCH 83/93] Fix "Set up git" step

---
 .github/workflows/bench-upload.yml | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/.github/workflows/bench-upload.yml b/.github/workflows/bench-upload.yml
index 64d6a6d10a75..b959711c70b3 100644
--- a/.github/workflows/bench-upload.yml
+++ b/.github/workflows/bench-upload.yml
@@ -36,13 +36,8 @@ jobs:
           sudo apt-get install -y gh
       - name: Set up git
         run: |
-          cd ${{ github.workspace }}/engine-benchmark-results
           git config --global user.email "ci@enso.org"
           git config --global user.name "Enso CI Bot"
-          echo "Hello world" > hello.txt
-          git add hello.txt
-          git commit -m "Add hello.txt"
-          git push
 
       #- name: Upload benchmarks
         #run: |

From 010843d2072004d358e3bdd199b0cdef9112dc54 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Wed, 28 Feb 2024 09:49:29 +0100
Subject: [PATCH 84/93] Enable the Upload benchmarks step

---
 .github/workflows/bench-upload.yml | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/bench-upload.yml b/.github/workflows/bench-upload.yml
index b959711c70b3..c02db8b83cbd 100644
--- a/.github/workflows/bench-upload.yml
+++ b/.github/workflows/bench-upload.yml
@@ -38,12 +38,11 @@ jobs:
         run: |
           git config --global user.email "ci@enso.org"
           git config --global user.name "Enso CI Bot"
-
-      #- name: Upload benchmarks
-        #run: |
-          #cd enso/tools/performance/engine-benchmarks
-          #python3 website_regen.py \
-          #  -v \
-          #  --local-repo  ${{ github.workspace }}/engine-benchmark-results
-        #env:
-        #  GITHUB_TOKEN: ${{ secrets.ENSO_BENCHMARK_RESULTS_TOKEN }}
+      - name: Upload benchmarks
+        run: |
+          cd enso/tools/performance/engine-benchmarks
+          python3 website_regen.py \
+            -v \
+            --local-repo  ${{ github.workspace }}/engine-benchmark-results
+        env:
+          GITHUB_TOKEN: ${{ secrets.ENSO_BENCHMARK_RESULTS_TOKEN }}

From 7da61c724f0d721ec91150ae66e77dd088347937 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Wed, 28 Feb 2024 10:57:11 +0100
Subject: [PATCH 85/93] Add anchor to the HTML template

---
 .../engine-benchmarks/templates/template_jinja.html          | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/performance/engine-benchmarks/templates/template_jinja.html b/tools/performance/engine-benchmarks/templates/template_jinja.html
index 97311c0d8af8..ac8667fd8e0c 100644
--- a/tools/performance/engine-benchmarks/templates/template_jinja.html
+++ b/tools/performance/engine-benchmarks/templates/template_jinja.html
@@ -334,7 +334,10 @@ <h3 class="card-header text-center">Applied filters</h3>
     <div id="benchmarks-container" class="container-fluid">
       {% for bench_data in bench_datas %}
       <div class="bench-container card">
-        <h3 class="card-header text-center">{{ bench_data.id }}</h3>
+        <a name="{{ bench_data.id }}"></a>
+        <h3 class="card-header text-center">
+          <a href="#{{ bench_data.id }}">{{ bench_data.id }}</a>
+        </h3>
         <!-- This is a placeholder div for a bench chart -->
         <div id="{{ bench_data.id }}" class="bench-chart"></div>
         <!-- selection-info div will be shown once user selects a point in the chart -->

From 9128d335470b455f9bcfebc967abd0cf7721d03e Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Wed, 28 Feb 2024 11:01:00 +0100
Subject: [PATCH 86/93] Revert "Push via GH API, and not via `git push`"

This reverts commit ce319dc6a69035a904c3eaa9d4d55516377ebd55.
---
 .../engine-benchmarks/bench_tool/remote_cache.py     | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/tools/performance/engine-benchmarks/bench_tool/remote_cache.py b/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
index 091fb7db9aad..fa753fb82908 100644
--- a/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
+++ b/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
@@ -160,17 +160,7 @@ async def sync(self) -> None:
                 _logger.debug("Untracked files: %s", status.untracked)
                 await git.add(self._repo_root_dir, status.untracked)
             await git.commit(self._repo_root_dir, f"Add {len(status.untracked)} new reports")
-            head_commit_sha = await git.head_commit(self._repo_root_dir)
-            # Push the changes to the remote. Do not use `git push`, as that
-            # does not use authentication via GH_TOKEN
-            ret = await gh.invoke_gh_api(BENCH_REPO,
-                             "/merges",
-                             fields={
-                                 "base": "main",
-                                 "head": head_commit_sha
-                             },
-                             method="POST")
-            _logger.debug(f"Successfully merged the changes: {ret.__dict__}")
+            await git.push(self._repo_root_dir)
 
 
 def _is_benchrun_id(name: str) -> bool:

From d7283ae7bb16510a0fd2cbeabbd143122bd916f2 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Wed, 28 Feb 2024 11:19:06 +0100
Subject: [PATCH 87/93] Implement backoff in gh api

---
 .../engine-benchmarks/bench_tool/gh.py        | 51 ++++++++++++++-----
 1 file changed, 37 insertions(+), 14 deletions(-)

diff --git a/tools/performance/engine-benchmarks/bench_tool/gh.py b/tools/performance/engine-benchmarks/bench_tool/gh.py
index d521beb3e57f..d8899e15b22d 100644
--- a/tools/performance/engine-benchmarks/bench_tool/gh.py
+++ b/tools/performance/engine-benchmarks/bench_tool/gh.py
@@ -9,10 +9,13 @@
 
 _logger = logging.getLogger(__name__)
 
+MAX_BACKOFF_SECONDS = 120
+
 
 def ensure_gh_installed() -> None:
     try:
-        out = subprocess.run(["gh", "--version"], check=True, capture_output=True)
+        out = subprocess.run(["gh", "--version"], check=True,
+                             capture_output=True)
         if out.returncode != 0:
             print("`gh` command not found - GH CLI utility is not installed. "
                   "See https://cli.github.com/", file=sys.stderr)
@@ -24,12 +27,13 @@ def ensure_gh_installed() -> None:
 
 
 async def invoke_gh_api(
-        repo: str,
-        endpoint: str,
-        query_params: Dict[str, str] = {},
-        fields: Dict[str, str] = {},
-        result_as_json: bool = True,
-        method: str = "GET"
+    repo: str,
+    endpoint: str,
+    query_params: Dict[str, str] = {},
+    fields: Dict[str, str] = {},
+    result_as_json: bool = True,
+    method: str = "GET",
+    backoff: int = 0,
 ) -> Optional[Union[Dict[str, Any], bytes]]:
     """
     Invokes the GitHub API using the `gh` command line tool.
@@ -41,6 +45,9 @@ async def invoke_gh_api(
     :param result_as_json: If result should be parsed as JSON.
           If false, the raw bytes are returned.
     :param method: HTTP method to use, 'GET' by default.
+    :param backoff: Number of seconds to wait before retrying the request.
+    If higher than 0, it means that the request has already been retried,
+    try to do it again, with a higher backoff.
     :return: None if the query fails
     """
     assert endpoint.startswith("/")
@@ -56,6 +63,12 @@ async def invoke_gh_api(
     for k, v in fields.items():
         cmd.append("-f")
         cmd.append(f"{k}='{v}'")
+    if 0 < backoff <= MAX_BACKOFF_SECONDS:
+        _logger.debug(f"Backing off for {backoff} seconds")
+        await asyncio.sleep(backoff)
+    elif backoff > MAX_BACKOFF_SECONDS:
+        _logger.error(f"Backoff of {backoff} seconds is too high, giving up.")
+        return None
     _logger.debug("Invoking gh API with `%s`", " ".join(cmd))
     proc = await asyncio.create_subprocess_exec("gh", *cmd[1:],
                                                 stdout=subprocess.PIPE,
@@ -63,12 +76,21 @@ async def invoke_gh_api(
     out, err = await proc.communicate()
     _logger.debug("Finished gh API `%s`", " ".join(cmd))
     if proc.returncode != 0:
-        _logger.error("Command `%s` FAILED with errcode %d",
-                      " ".join(cmd),
-                      proc.returncode)
-        _logger.error("  stdout: %s", out.decode())
-        _logger.error("  stderr: %s", err.decode())
-        return None
+        # Special handling of rate limit exceeded - just try to make the
+        # request one more time after some backoff.
+        if "You have exceeded a secondary rate limit" in err.decode():
+            new_backoff = 10 if backoff == 0 else backoff * 2
+            _logger.warning(f"Trying to retry the request with a new backoff "
+                            f"of {new_backoff} seconds.")
+            return await invoke_gh_api(repo, endpoint, query_params, fields,
+                                       result_as_json, method, new_backoff)
+        else:
+            _logger.error("Command `%s` FAILED with errcode %d",
+                          " ".join(cmd),
+                          proc.returncode)
+            _logger.error("  stdout: %s", out.decode())
+            _logger.error("  stderr: %s", err.decode())
+            return None
     if result_as_json:
         return json.loads(out.decode())
     else:
@@ -76,7 +98,8 @@ async def invoke_gh_api(
 
 
 async def fetch_file(repo: str, file_path: str) -> Optional[str]:
-    ret = await invoke_gh_api(repo, f"/contents/{file_path}", result_as_json=True)
+    ret = await invoke_gh_api(repo, f"/contents/{file_path}",
+                              result_as_json=True)
     if ret is None:
         _logger.warning("File %s not found in %s", file_path, repo)
         return None

From 8753c7cf81659d91bad639b05dd8b48d9370a0a7 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Wed, 28 Feb 2024 11:32:21 +0100
Subject: [PATCH 88/93] Add time when the website was generated

---
 tools/performance/engine-benchmarks/bench_download.py           | 1 +
 tools/performance/engine-benchmarks/bench_tool/__init__.py      | 2 ++
 tools/performance/engine-benchmarks/bench_tool/website.py       | 1 +
 .../performance/engine-benchmarks/templates/template_jinja.html | 2 +-
 4 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/performance/engine-benchmarks/bench_download.py b/tools/performance/engine-benchmarks/bench_download.py
index 834f747f2e39..9aa6b3c081d4 100755
--- a/tools/performance/engine-benchmarks/bench_download.py
+++ b/tools/performance/engine-benchmarks/bench_download.py
@@ -275,6 +275,7 @@ def _parse_bench_source(_bench_source: str) -> Source:
         bench_datas=template_bench_datas,
         bench_source=bench_source,
         branches=branches,
+        timestamp=datetime.now()
     )
 
     # Render Jinja template with jinja_data
diff --git a/tools/performance/engine-benchmarks/bench_tool/__init__.py b/tools/performance/engine-benchmarks/bench_tool/__init__.py
index c62504599ca3..f3318a98c0c7 100644
--- a/tools/performance/engine-benchmarks/bench_tool/__init__.py
+++ b/tools/performance/engine-benchmarks/bench_tool/__init__.py
@@ -213,6 +213,8 @@ class JinjaData:
     until: datetime
     display_since: datetime
     """ The date from which all the datapoints are first displayed """
+    timestamp: datetime
+    """ The time when the website was generated """
 
 
 
diff --git a/tools/performance/engine-benchmarks/bench_tool/website.py b/tools/performance/engine-benchmarks/bench_tool/website.py
index babca05203a3..57f6f6da29d3 100644
--- a/tools/performance/engine-benchmarks/bench_tool/website.py
+++ b/tools/performance/engine-benchmarks/bench_tool/website.py
@@ -60,6 +60,7 @@ async def generate_bench_website(
         bench_datas=template_bench_datas,
         bench_source=bench_source,
         branches=[BRANCH_DEVELOP],
+        timestamp=datetime.now()
     )
     _logger.debug(f"Rendering HTML to {generated_html}")
     render_html(jinja_data, generated_html)
diff --git a/tools/performance/engine-benchmarks/templates/template_jinja.html b/tools/performance/engine-benchmarks/templates/template_jinja.html
index ac8667fd8e0c..137aab00df0e 100644
--- a/tools/performance/engine-benchmarks/templates/template_jinja.html
+++ b/tools/performance/engine-benchmarks/templates/template_jinja.html
@@ -280,7 +280,7 @@ <h2 class="text-center">
       </p>
 
       <br />
-      Generated by the <code>bench_download.py</code> script.
+      Generated by the <code>bench_download.py</code> script in <code>{{ timestamp }}</code>.
     </div>
 
     <div id="top-panel" class="container">

From 3e765d1653e75f26fd3ae7813cbcdbd385e62929 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Wed, 28 Feb 2024 12:22:19 +0100
Subject: [PATCH 89/93] Relax the assertion in git.commit

---
 tools/performance/engine-benchmarks/bench_tool/git.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/performance/engine-benchmarks/bench_tool/git.py b/tools/performance/engine-benchmarks/bench_tool/git.py
index 95a5e7289603..2d172463a228 100644
--- a/tools/performance/engine-benchmarks/bench_tool/git.py
+++ b/tools/performance/engine-benchmarks/bench_tool/git.py
@@ -74,7 +74,7 @@ async def add(repo: Path, files: Set[str]) -> None:
 async def commit(repo: Path, msg: str) -> None:
     _logger.debug("Committing %s with message '%s'", repo, msg)
     stat = await status(repo)
-    assert len(stat.added) > 0
+    assert len(stat.added) > 0 or len(stat.modified) > 0
     args = ["commit", "-m", msg]
     proc = await asyncio.create_subprocess_exec("git", *args, cwd=repo, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     ret = await proc.wait()

From 45d68fcbb9a5186efea038f8120726521c76ce71 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Wed, 28 Feb 2024 12:26:42 +0100
Subject: [PATCH 90/93] Add more reasonable commit message

---
 .../engine-benchmarks/bench_tool/remote_cache.py            | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/performance/engine-benchmarks/bench_tool/remote_cache.py b/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
index fa753fb82908..b6627fb639cd 100644
--- a/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
+++ b/tools/performance/engine-benchmarks/bench_tool/remote_cache.py
@@ -153,13 +153,17 @@ async def sync(self) -> None:
         is_repo_dirty = len(status.modified) > 0 or len(status.added) > 0
         if is_repo_dirty:
             _logger.info("Untracked or modified files found in the repo: %s", self._repo_root_dir)
+            commit_msg = "Regenerate websites"
             if len(status.modified) > 0:
                 _logger.debug("Modified files: %s", status.modified)
                 await git.add(self._repo_root_dir, status.modified)
             if len(status.untracked) > 0:
                 _logger.debug("Untracked files: %s", status.untracked)
                 await git.add(self._repo_root_dir, status.untracked)
-            await git.commit(self._repo_root_dir, f"Add {len(status.untracked)} new reports")
+                commit_msg += f" - Add {len(status.untracked)} new reports."
+            else:
+                commit_msg += "."
+            await git.commit(self._repo_root_dir, commit_msg)
             await git.push(self._repo_root_dir)
 
 

From 76802d39da486f3ea62470deb16c10fc5a730b66 Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Wed, 28 Feb 2024 12:56:38 +0100
Subject: [PATCH 91/93] Fix git.status

Strip lines of the stdout
---
 tools/performance/engine-benchmarks/bench_tool/git.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/performance/engine-benchmarks/bench_tool/git.py b/tools/performance/engine-benchmarks/bench_tool/git.py
index 2d172463a228..8e3529f05cd3 100644
--- a/tools/performance/engine-benchmarks/bench_tool/git.py
+++ b/tools/performance/engine-benchmarks/bench_tool/git.py
@@ -50,9 +50,10 @@ async def status(repo: Path) -> GitStatus:
     modified: Set[str] = set()
     added: Set[str] = set()
     for line in lines:
+        line = line.strip()
         if line.startswith("??"):
             untracked.add(line.split()[1])
-        elif line.startswith(" M"):
+        elif line.startswith("M "):
             modified.add(line.split()[1])
         elif line.startswith("A "):
             added.add(line.split()[1])

From 9cefc5ff5eea216336d3b66b4ea9808592b7bfda Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Wed, 28 Feb 2024 16:51:08 +0100
Subject: [PATCH 92/93] Remove the `on: pull_request` trigger

---
 .github/workflows/bench-upload.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/workflows/bench-upload.yml b/.github/workflows/bench-upload.yml
index c02db8b83cbd..69d8d44f504f 100644
--- a/.github/workflows/bench-upload.yml
+++ b/.github/workflows/bench-upload.yml
@@ -3,8 +3,6 @@
 
 name: Benchmarks upload
 on:
-  pull_request:
-    branches: ["*"]
   workflow_run:
     workflows: ["Benchmark Engine", "Benchmark Standard Libraries"]
     types:

From 846e02e8bd800c08e2bd04d370ce7625cb5a7d1a Mon Sep 17 00:00:00 2001
From: Pavel Marek <pavel.marek@enso.org>
Date: Wed, 28 Feb 2024 17:44:58 +0100
Subject: [PATCH 93/93] fmt

---
 tools/performance/engine-benchmarks/README.md                 | 4 ++--
 .../engine-benchmarks/templates/template_jinja.html           | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/tools/performance/engine-benchmarks/README.md b/tools/performance/engine-benchmarks/README.md
index e1d94eb63ba2..3b7bba729079 100644
--- a/tools/performance/engine-benchmarks/README.md
+++ b/tools/performance/engine-benchmarks/README.md
@@ -4,8 +4,8 @@ This directory contains a python script `bench_download.py` for downloading
 Engine and stdlib benchmark results from GitHub, and `Engine_Benchs` Enso
 project for analysing the downloaded data.
 
-Note that for convenience, there is `bench_tool` directory that is a Python package.
-The `bench_download.py` script uses this package.
+Note that for convenience, there is `bench_tool` directory that is a Python
+package. The `bench_download.py` script uses this package.
 
 To run all the Python tests for that package, run `python -m unittest` in this
 directory.
diff --git a/tools/performance/engine-benchmarks/templates/template_jinja.html b/tools/performance/engine-benchmarks/templates/template_jinja.html
index 137aab00df0e..838f0968c368 100644
--- a/tools/performance/engine-benchmarks/templates/template_jinja.html
+++ b/tools/performance/engine-benchmarks/templates/template_jinja.html
@@ -280,7 +280,8 @@ <h2 class="text-center">
       </p>
 
       <br />
-      Generated by the <code>bench_download.py</code> script in <code>{{ timestamp }}</code>.
+      Generated by the <code>bench_download.py</code> script in
+      <code>{{ timestamp }}</code>.
     </div>
 
     <div id="top-panel" class="container">