From 2623a5764ed9284e3128dadd19006dacea3a2cbc Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Mon, 24 Feb 2025 23:46:44 +0100 Subject: [PATCH] Add make_backports script --- tools/make_backports.py | 435 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 435 insertions(+) create mode 100755 tools/make_backports.py diff --git a/tools/make_backports.py b/tools/make_backports.py new file mode 100755 index 00000000000..5ad4276b1f3 --- /dev/null +++ b/tools/make_backports.py @@ -0,0 +1,435 @@ +#!/usr/bin/env python3 +import argparse +import functools +import re +import subprocess +import sys +from collections import namedtuple +from copy import deepcopy +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Self + +CHANGELOG = Path(__file__).parents[1] / "docs/project/changelog.md" +NEEDS_BACKPORTS_CACHE = Path(__file__).parent / "needs_backport_prs_cached.txt" + + +def run( + args: list[str | Path], check: bool = True, **kwargs: Any +) -> subprocess.CompletedProcess[Any]: + result = subprocess.run(args, check=False, text=True, **kwargs) + if check and result.returncode: + print(f"Command failed with exit status {result.returncode}") + print("Command was:", " ".join(str(x) for x in args)) + sys.exit(result.returncode) + return result + + +def fetch_needs_backport_pr_numbers() -> tuple[int, ...]: + """Use gh cli to collect the set of PRs that are labeled as needs_backport. + + Then cache them to disk. This is the implementation for --fetch-backport-prs. + """ + result = run( + ["gh", "pr", "list", "--label", "needs backport", "--state", "closed"], + capture_output=True, + ) + lines = [line.split("\t", 1)[0] for line in result.stdout.splitlines()] + NEEDS_BACKPORTS_CACHE.write_text("\n".join(lines) + "\n") + + +@functools.cache +def get_needs_backport_pr_numbers() -> tuple[int, ...]: + """Read the set of backports we need to make from disk.""" + if not NEEDS_BACKPORTS_CACHE.exists(): + print( + f"error: {NEEDS_BACKPORTS_CACHE} does not exist. Run with --fetch-backport-prs first", + file=sys.stdout, + ) + sys.exit(1) + lines = NEEDS_BACKPORTS_CACHE.read_text().splitlines() + return tuple(int(line) for line in lines) + + +# we use history_idx to sort by age. +CommitInfo = namedtuple( + "CommitInfo", ["pr_number", "shorthash", "shortlog", "history_idx"] +) + + +class CommitHistory: + """Store the history of the github PRs with a map from pr_number to CommitInfo""" + + commits: dict[int, CommitInfo] + + def __init__(self): + result = run(["git", "log", "--oneline", "main"], capture_output=True) + lines = result.stdout.splitlines() + commits = {} + PR_NUMBER_RE = re.compile(r"\(#[0-9]+\)$") + for history_idx, line in enumerate(lines): + if not (m := PR_NUMBER_RE.search(line)): + continue + pr_number = int(m.group(0)[2:-1]) + shorthash, shortlog = line.split(" ", 1) + commits[pr_number] = CommitInfo(pr_number, shorthash, shortlog, history_idx) + + self.commits = commits + + def lookup_pr(self, pr_number: int) -> CommitInfo: + return self.commits[pr_number] + + +@functools.cache +def get_commits() -> list[CommitInfo]: + """Return the CommitInfo of the PRs we want to backport""" + pr_numbers = get_needs_backport_pr_numbers() + commit_history = CommitHistory() + commits = [commit_history.lookup_pr(x) for x in pr_numbers] + return sorted(commits, key=lambda c: -c.history_idx) + + +# Changelog parsing + + +@dataclass +class ChangelogEntry: + """A changelog entry, represented as a list of strings. + + An entry is started by a line beginning with `-`. It ends when there is a + line starting with `#` (begins a new section) or `-` (begins a new entry). + + This is nearly the same thing as it's content. + """ + + content: list[str] = field(default_factory=list) + + def get_text(self) -> str: + if self.content: + return "\n".join(self.content) + "\n" + return "" + + def __bool__(self) -> bool: + return bool(self.content) + + def append(self, line: str) -> None: + self.content.append(line) + + +@dataclass +class ChangelogSubSection: + """A changelog subsection + + Introduced by ### or ##. Ends when there is another line with ### or ##. + + header: + Consists of all the lines starting with and the subsection start "###" + line and including all content lines that do not start with -. Generally + this will be ### plus one or more empty lines. + + entries: + The list of entries. + + cur_entry: + Parser state. + """ + + header: list[str] = field(default_factory=list) + entries: list[ChangelogEntry] = field(default_factory=list) + cur_entry: ChangelogEntry = field(default_factory=ChangelogEntry) + + def get_text(self) -> str: + """Unparse the subsection""" + header = "" + if self.header: + header = "\n".join(self.header) + "\n" + res = header + "".join(x.get_text() for x in self.entries) + # Special case: if the last entry already ends in a blank line, we don't + # add another one. This keeps the spacing more consistent with the + # backported entries. + if not res.endswith("\n\n"): + res += "\n" + return res + + def __bool__(self) -> bool: + return bool(self.header or self.entries or self.cur_entry) + + def append(self, line: str) -> None: + """Main parsing logic.""" + if line.startswith("-"): + self.finish_entry() + if self.cur_entry or line.startswith("-"): + self.cur_entry.append(line) + else: + self.header.append(line) + + def finish_entry(self) -> None: + """If cur_entry is nonempty, add it to entries. Then empty out cur_entry""" + if self.cur_entry: + self.entries.append(self.cur_entry) + self.cur_entry = ChangelogEntry() + + +PrChangelogIndex = namedtuple("PrChangelogIndex", ["subsection", "entry", "is_unique"]) + + +@dataclass +class ChangelogSection: + """A changelog subsection + + Introduced by ##. Ends when there is a ##. + + header: + Other than the unreleased section we don't actually bother parsing out + the changelog. So for the "prelude" and "rest" sections, this is + actually all the content. + + For the unreleased and patch_release sections, this is only the content + up to the first entry or subsection. So that should include just the `## + Unreleased` line and a blank line or two. + + subsections: + The list of subsections. + + cur_subsection: + Parser state. + + pr_index: + For the unreleased section, we populate this with information about + where the release note for each PR is. Populated by create_pr_index(). + """ + + header: list[str] = field(default_factory=list) + subsections: list[ChangelogSubSection] = field(default_factory=list) + cur_subsection: ChangelogSubSection = field(default_factory=ChangelogSubSection) + pr_index: dict[int, PrChangelogIndex] = field(default_factory=dict) + + def get_text(self) -> str: + """Unparse the section""" + header = "" + if self.header: + header = "\n".join(self.header) + "\n" + return header + "".join(x.get_text() for x in self.subsections) + + def append(self, line: str) -> None: + """Main parsing logic.""" + if line.startswith("### "): + self.finish_subsection() + if self.cur_subsection or line.startswith(("-", "### ")): + self.cur_subsection.append(line) + else: + self.header.append(line) + + def append_lines(self, lines: list[str]) -> None: + for line in lines: + self.append(line) + + def finish_subsection(self) -> None: + """If cur_subsection is nonempty, add it to entries. Then empty out cur_entry""" + if self.cur_subsection: + self.cur_subsection.finish_entry() + self.subsections.append(self.cur_subsection) + self.cur_subsection = ChangelogSubSection() + + def create_pr_index(self) -> None: + PR_NUMBER_RE = re.compile(r"{pr}`[0-9]+`") + for subsection_idx, subsection in enumerate(self.subsections): + for entry_idx, entry in enumerate(subsection.entries): + pr_strs = PR_NUMBER_RE.findall(entry.get_text()) + is_unique = len(pr_strs) == 1 + for pr_str in pr_strs: + pr = int(pr_str[5:-1]) + self.pr_index[pr] = PrChangelogIndex( + subsection_idx, entry_idx, is_unique + ) + + def delete_entry(self, pr_changelog_index: PrChangelogIndex) -> None: + subsection = self.subsections[pr_changelog_index.subsection] + del subsection.entries[pr_changelog_index.entry] + if not subsection.entries: + del self.subsections[pr_changelog_index.subsection] + + +@dataclass +class Changelog: + """Class for keeping track of an item in inventory.""" + + file: Path + prelude: ChangelogSection = field(default_factory=ChangelogSection) + unreleased: ChangelogSection = field(default_factory=ChangelogSection) + patch_release: ChangelogSection = field(default_factory=ChangelogSection) + rest: ChangelogSection = field(default_factory=ChangelogSection) + + def parse(self) -> Self: + changelog = self.file.read_text().splitlines() + + it = iter(changelog) + for line in it: + if line.startswith("## Unreleased"): + self.unreleased.header.append(line) + break + # We don't care what's in the prelude so it all goes in the header + self.prelude.header.append(line) + # Parse unreleased section + for line in it: + if line.startswith("## "): + self.unreleased.finish_subsection() + self.rest.header.append(line) + break + self.unreleased.append(line) + + # We don't care what's in the rest so it all goes in the header + self.rest.header.extend(it) + return self + + def get_text(self, include_unreleased=True): + # For the backports changelog we want to drop the unreleased section + # entirely. + unreleased = self.unreleased.get_text() if include_unreleased else "" + return ( + self.prelude.get_text() + + unreleased + + self.patch_release.get_text() + + self.rest.get_text() + ) + + def write_text(self, include_unreleased=True): + self.file.write_text(self.get_text(include_unreleased=include_unreleased)) + + def set_patch_release_notes(self, version: str, commits: list[CommitInfo]) -> None: + """Given a list of PRs, check if they have a changelog entry in + "Unreleased". + + If so add the entry to the patch_release section. Don't remove the entry + from the unreleased section, just duplicate it. + """ + self.patch_release = ChangelogSection() + self.patch_release.append_lines( + [f"## Version {version}", "", "_Insert Date Here_", ""] + ) + backport_subsections = {} + for commit in commits: + pr_number = commit.pr_number + if pr_index := self.unreleased.pr_index.get(pr_number, None): + subsection = self.unreleased.subsections[pr_index.subsection] + header = "\n".join(subsection.header) + if header not in backport_subsections: + new_subsection = deepcopy(subsection) + new_subsection.entries = [] + backport_subsections[header] = new_subsection + entry = subsection.entries[pr_index.entry] + backport_subsections[header].entries.append(entry) + for subsection in self.unreleased.subsections: + header = "\n".join(subsection.header) + if backport_subsection := backport_subsections.get(header, None): + self.patch_release.subsections.append(backport_subsection) + + def remove_release_notes_from_unreleased_section( + self, commits: list[CommitInfo] + ) -> None: + # Have to do this in two passes: + # 1. collect up entries to delete + indices_to_delete = [] + for commit in commits: + pr_number = commit.pr_number + if pr_index := self.unreleased.pr_index.get(pr_number, None): + indices_to_delete.append(pr_index) + + # 2. Sort by reverse order of appearance and then delete. + for idx in sorted( + indices_to_delete, key=lambda idx: (-idx.subsection, -idx.entry) + ): + self.unreleased.delete_entry(idx) + + +def show_missing_changelogs() -> None: + changelog = Changelog(CHANGELOG).parse() + commits = get_commits() + for commit in commits: + pr_number = commit.pr_number + if pr_number not in changelog.unreleased.pr_index: + print(pr_number, commit.shortlog) + + +def make_changelog_branch(version: str) -> None: + changelog = Changelog(CHANGELOG).parse() + changelog.unreleased.create_pr_index() + run(["git", "switch", "main"]) + run(["git", "switch", "-C", f"changelog-for-{version}-tmp"]) + commits = get_commits() + changelog.set_patch_release_notes(version, commits) + changelog.remove_release_notes_from_unreleased_section(commits) + changelog.write_text() + run(["git", "add", CHANGELOG]) + run(["git", "commit", "-m", f"Update changelog for v{version}"]) + + +def make_backport_branch(version: str) -> None: + changelog = Changelog(CHANGELOG).parse() + changelog.unreleased.create_pr_index() + run(["git", "switch", "stable"]) + run(["git", "submodule", "update"]) + run(["git", "switch", "-C", f"backports-for-{version}-tmp"]) + commits = get_commits() + for n, cur_commit in enumerate(commits): + result = run(["git", "cherry-pick", cur_commit.shorthash], check=False) + changelog.set_patch_release_notes(version, commits[: n + 1]) + changelog.write_text(include_unreleased=False) + run(["git", "add", "docs/project/changelog.md"]) + if result.returncode == 0: + run(["git", "commit", "--amend"]) + else: + run(["git", "cherry-pick", "--continue"]) + + commits = get_commits() + + +def remove_needs_backport_labels() -> None: + for pr_number in get_needs_backport_pr_numbers(): + run(["gh", "pr", "edit", str(pr_number), "--remove-label", "needs backport"]) + + +def parse_args(): + parser = argparse.ArgumentParser("Apply backports") + parser.add_argument("new_version") + parser.add_argument( + "--fetch-backport-prs", + action="store_true", + help="Fetch the list of PRs with the 'needs backport' label and cache to disk. Must be run first.", + ) + parser.add_argument( + "--missing-changelogs", + action="store_true", + help="List the PRs labeled as 'needs backport' that don't have a changelog", + ) + parser.add_argument( + "--changelog-branch", + action="store_true", + help="Make changelog-for-version branch", + ) + parser.add_argument( + "--backport-branch", + action="store_true", + help="Make backports-for-version branch", + ) + return parser.parse_args() + + +def main(): + args = parse_args() + if args.fetch_backport_prs: + fetch_needs_backport_pr_numbers() + return + if args.missing_changelogs: + show_missing_changelogs() + return + if args.changelog_branch: + make_changelog_branch(args.new_version) + return + if args.backport_branch: + make_backport_branch(args.new_version) + return + + +if __name__ == "__main__": + main()