From ff7527fb31c0304ca81b138a57dc08515521773a Mon Sep 17 00:00:00 2001 From: Marc Foley Date: Mon, 23 Oct 2023 13:30:07 +0100 Subject: [PATCH 1/5] trafficjam: get more than 100 files in a pr --- Lib/gftools/gfgithub.py | 16 +++++++++++----- Lib/gftools/push/trafficjam.py | 16 ++++++++++++++++ 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/Lib/gftools/gfgithub.py b/Lib/gftools/gfgithub.py index de4f31593..7c9d9454e 100644 --- a/Lib/gftools/gfgithub.py +++ b/Lib/gftools/gfgithub.py @@ -93,8 +93,14 @@ def create_issue(self, title: str, body: str): } ) - - - - - + def pr_files(self, pr_number: int): + res = [] + cur_page = 1 + url = self.rest_url(f"pulls/{pr_number}/files", per_page="100", page=str(cur_page)) + request = self._get(url) + while request: + res += request + cur_page += 1 + url = self.rest_url(f"pulls/{pr_number}/files", per_page="100", page=str(cur_page)) + request = self._get(url) + return res diff --git a/Lib/gftools/push/trafficjam.py b/Lib/gftools/push/trafficjam.py index 98e91f414..e86815376 100644 --- a/Lib/gftools/push/trafficjam.py +++ b/Lib/gftools/push/trafficjam.py @@ -9,6 +9,7 @@ from pathlib import Path from typing import Optional, Any from functools import cached_property +import math from gftools.push.items import Axis, Designer, Family, FamilyMeta from gftools.push.utils import google_path_to_repo_path, repo_path_to_google_path @@ -130,7 +131,9 @@ def from_string(string: str): # type: ignore[misc] content { ... on PullRequest { id + number files(first: 100) { + totalCount nodes { path } @@ -463,6 +466,19 @@ def from_traffic_jam(cls): # sort items by pr number board_items.sort(key=lambda k: k["content"]["url"]) + # get files for prs which have more than 100 changed files + for item in board_items: + changed_files = item["content"]["files"]["totalCount"] + if changed_files <= 100: + continue + pr_number = item['content']['number'] + pr_url = item["content"]["url"] + log.warn( + f"{pr_url} has {changed_files} changed files. Attempting to fetch them." + ) + files = g.pr_files(pr_number) + item["content"]["files"]["nodes"] = [{"path": f["filename"]} for f in files] + results = cls() for item in board_items: status = item.get("status", {}).get("name", None) From a461b8046894ea17839db1b852a5e2011c53990d Mon Sep 17 00:00:00 2001 From: Marc Foley Date: Mon, 23 Oct 2023 13:42:05 +0100 Subject: [PATCH 2/5] gfgithub: sleep to avoid hitting rate limits --- Lib/gftools/gfgithub.py | 7 ++++++- tests/test_gfgithub.py | 16 ++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 tests/test_gfgithub.py diff --git a/Lib/gftools/gfgithub.py b/Lib/gftools/gfgithub.py index 7c9d9454e..36bf0e968 100644 --- a/Lib/gftools/gfgithub.py +++ b/Lib/gftools/gfgithub.py @@ -3,6 +3,7 @@ import requests import typing import urllib +import time GITHUB_GRAPHQL_API = 'https://api.github.com/graphql' @@ -93,7 +94,7 @@ def create_issue(self, title: str, body: str): } ) - def pr_files(self, pr_number: int): + def pr_files(self, pr_number: int, sleep=4): res = [] cur_page = 1 url = self.rest_url(f"pulls/{pr_number}/files", per_page="100", page=str(cur_page)) @@ -103,4 +104,8 @@ def pr_files(self, pr_number: int): cur_page += 1 url = self.rest_url(f"pulls/{pr_number}/files", per_page="100", page=str(cur_page)) request = self._get(url) + # sleep so we don't hit api rate limits. We should get at least 1k + # requests per hour so sleeping for 4 secs by default means we + # shouldn't hit any issues. + time.sleep(sleep) return res diff --git a/tests/test_gfgithub.py b/tests/test_gfgithub.py new file mode 100644 index 000000000..661af2069 --- /dev/null +++ b/tests/test_gfgithub.py @@ -0,0 +1,16 @@ +from gftools.gfgithub import GitHubClient +import pytest + + +@pytest.mark.parametrize( + "pr_number,file_count", + [ + (6874, 1), + (6779, 3), + (2987, 178), + (6787, 568), + ] +) +def test_pr_files(pr_number, file_count): + client = GitHubClient("google", "fonts") + assert len(client.pr_files(pr_number)) == file_count From 063c0da8c2b072a8db5f425528aa47cd14997218 Mon Sep 17 00:00:00 2001 From: Marc Foley Date: Mon, 23 Oct 2023 13:42:21 +0100 Subject: [PATCH 3/5] black --- Lib/gftools/gfgithub.py | 93 +++++++++++++++++++++-------------------- 1 file changed, 47 insertions(+), 46 deletions(-) diff --git a/Lib/gftools/gfgithub.py b/Lib/gftools/gfgithub.py index 36bf0e968..7c07b4686 100644 --- a/Lib/gftools/gfgithub.py +++ b/Lib/gftools/gfgithub.py @@ -6,20 +6,20 @@ import time -GITHUB_GRAPHQL_API = 'https://api.github.com/graphql' -GITHUB_V3_REST_API = 'https://api.github.com' +GITHUB_GRAPHQL_API = "https://api.github.com/graphql" +GITHUB_V3_REST_API = "https://api.github.com" class GitHubClient: def __init__(self, repo_owner, repo_name): - if not 'GH_TOKEN' in os.environ: + if not "GH_TOKEN" in os.environ: raise Exception("GH_TOKEN environment variable not set") - self.gh_token = os.environ['GH_TOKEN'] + self.gh_token = os.environ["GH_TOKEN"] self.repo_owner = repo_owner self.repo_name = repo_name - + def _post(self, url, payload: typing.Dict): - headers = {'Authorization': f'bearer {self.gh_token}'} + headers = {"Authorization": f"bearer {self.gh_token}"} response = requests.post(url, json=payload, headers=headers) if response.status_code == requests.codes.unprocessable: # has a helpful response.json with an 'errors' key. @@ -27,82 +27,83 @@ def _post(self, url, payload: typing.Dict): else: response.raise_for_status() json = response.json() - if 'errors' in json: - errors = pprint.pformat(json['errors'], indent=2) - raise Exception(f'GitHub POST query failed to url {url}:\n {errors}') + if "errors" in json: + errors = pprint.pformat(json["errors"], indent=2) + raise Exception(f"GitHub POST query failed to url {url}:\n {errors}") return json - + def _get(self, url): - headers = {'Authorization': f'bearer {self.gh_token}'} + headers = {"Authorization": f"bearer {self.gh_token}"} response = requests.get(url, headers=headers) response.raise_for_status() json = response.json() - if 'errors' in json: - errors = pprint.pformat(json['errors'], indent=2) - raise Exception(f'GitHub REST query failed:\n {errors}') + if "errors" in json: + errors = pprint.pformat(json["errors"], indent=2) + raise Exception(f"GitHub REST query failed:\n {errors}") return json - + def _run_graphql(self, query, variables): - payload = {'query': query, 'variables': variables} + payload = {"query": query, "variables": variables} return self._post(GITHUB_GRAPHQL_API, payload) - + def rest_url(self, path, **kwargs): - base_url = f'{GITHUB_V3_REST_API}/repos/{self.repo_owner}/{self.repo_name}/{path}' + base_url = ( + f"{GITHUB_V3_REST_API}/repos/{self.repo_owner}/{self.repo_name}/{path}" + ) if kwargs: - base_url += '?' + '&'.join(f'{k}={urllib.parse.quote(v)}' for k, v in kwargs.items()) + base_url += "?" + "&".join( + f"{k}={urllib.parse.quote(v)}" for k, v in kwargs.items() + ) return base_url def get_blob(self, file_sha): - url = self.rest_url(f'git/blobs/{file_sha}') + url = self.rest_url(f"git/blobs/{file_sha}") headers = { - 'Accept': 'application/vnd.github.v3.raw', - 'Authorization': f'bearer {self.gh_token}' + "Accept": "application/vnd.github.v3.raw", + "Authorization": f"bearer {self.gh_token}", } response = requests.get(url, headers=headers) response.raise_for_status() return response - + def open_prs(self, pr_head: str, pr_base_branch: str) -> typing.List: - return self._get(self.rest_url("pulls", state="open", head=pr_head, base=pr_base_branch)) - + return self._get( + self.rest_url("pulls", state="open", head=pr_head, base=pr_base_branch) + ) + def create_pr(self, title: str, body: str, head: str, base: str): return self._post( self.rest_url("pulls"), { - 'title': title, - 'body': body, - 'head': head, - 'base': base, - 'maintainer_can_modify': True - } + "title": title, + "body": body, + "head": head, + "base": base, + "maintainer_can_modify": True, + }, ) - + def create_issue_comment(self, issue_number: int, body: str): return self._post( - self.rest_url(f'issues/{issue_number}/comments'), - { - 'body': body - } + self.rest_url(f"issues/{issue_number}/comments"), {"body": body} ) - + def create_issue(self, title: str, body: str): - return self._post( - self.rest_url("issues"), - { - 'title': title, - 'body': body - } - ) + return self._post(self.rest_url("issues"), {"title": title, "body": body}) def pr_files(self, pr_number: int, sleep=4): res = [] cur_page = 1 - url = self.rest_url(f"pulls/{pr_number}/files", per_page="100", page=str(cur_page)) + url = self.rest_url( + f"pulls/{pr_number}/files", per_page="100", page=str(cur_page) + ) request = self._get(url) while request: res += request cur_page += 1 - url = self.rest_url(f"pulls/{pr_number}/files", per_page="100", page=str(cur_page)) + url = self.rest_url( + f"pulls/{pr_number}/files", per_page="100", page=str(cur_page) + ) request = self._get(url) # sleep so we don't hit api rate limits. We should get at least 1k # requests per hour so sleeping for 4 secs by default means we From 06d054d1fa3ee56479cda89962eaad527a652558 Mon Sep 17 00:00:00 2001 From: Marc Foley Date: Mon, 23 Oct 2023 14:14:32 +0100 Subject: [PATCH 4/5] manage-traffic-jam: add * option to bump pushlists for all push items in a pr --- Lib/gftools/scripts/manage_traffic_jam.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Lib/gftools/scripts/manage_traffic_jam.py b/Lib/gftools/scripts/manage_traffic_jam.py index 385a4ccde..9503d8cea 100644 --- a/Lib/gftools/scripts/manage_traffic_jam.py +++ b/Lib/gftools/scripts/manage_traffic_jam.py @@ -69,6 +69,13 @@ def user_input(self, item: PushItem): "Bump pushlist: [y/n], block: [b] skip pr: [s], inspect: [i], quit: [q]?: " ) + if "*" in user_input: + item.bump_pushlist() + for sub_item in self.push_items: + if sub_item.url != item.url: + continue + sub_item.push_list = item.push_list + self.skip_pr = item.url if "y" in user_input: item.bump_pushlist() if "b" in user_input: From a9b4177a00589f0a7aeea7f93e36f313dce95331 Mon Sep 17 00:00:00 2001 From: Marc Foley Date: Mon, 23 Oct 2023 14:38:51 +0100 Subject: [PATCH 5/5] rm redundant import --- Lib/gftools/push/trafficjam.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Lib/gftools/push/trafficjam.py b/Lib/gftools/push/trafficjam.py index e86815376..c6bb8ac47 100644 --- a/Lib/gftools/push/trafficjam.py +++ b/Lib/gftools/push/trafficjam.py @@ -9,7 +9,6 @@ from pathlib import Path from typing import Optional, Any from functools import cached_property -import math from gftools.push.items import Axis, Designer, Family, FamilyMeta from gftools.push.utils import google_path_to_repo_path, repo_path_to_google_path