|
| 1 | +import fire |
1 | 2 | import os
|
2 | 3 | import logging
|
3 | 4 | from code_intelligence import github_app
|
4 | 5 | import typing
|
5 | 6 | import yaml
|
6 | 7 |
|
7 | 8 | def get_issue_handle(installation_id, username, repository, number):
|
8 |
| - "get an issue object." |
9 |
| - ghapp = github_app.GitHubApp.create_from_env() |
10 |
| - install = ghapp.get_installation(installation_id) |
11 |
| - return install.issue(username, repository, number) |
| 9 | + "get an issue object." |
| 10 | + ghapp = github_app.GitHubApp.create_from_env() |
| 11 | + install = ghapp.get_installation(installation_id) |
| 12 | + return install.issue(username, repository, number) |
12 | 13 |
|
13 | 14 | def get_yaml(owner, repo, ghapp=None):
|
14 |
| - """ |
15 |
| - Looks for the yaml file in a /.github directory. |
16 |
| -
|
17 |
| - yaml file must be named issue_label_bot.yaml |
18 |
| - """ |
19 |
| - |
20 |
| - if not ghapp: |
21 |
| - # TODO(jlewi): Should we deprecate this code path and always pass |
22 |
| - # in the github app? |
23 |
| - ghapp = github_app.GitHubApp.create_from_env() |
24 |
| - |
25 |
| - try: |
26 |
| - # get the app installation handle |
27 |
| - inst_id = ghapp.get_installation_id(owner=owner, repo=repo) |
28 |
| - inst = ghapp.get_installation(installation_id=inst_id) |
29 |
| - # get the repo handle, which allows you got get the file contents |
30 |
| - repo = inst.repository(owner=owner, repository=repo) |
31 |
| - results = repo.file_contents('.github/issue_label_bot.yaml').decoded |
32 |
| - # TODO(jlewi): We should probably catching more narrow exceptions and |
33 |
| - # not swallowing all exceptions. The exceptions we should swallow are |
34 |
| - # the ones related to the configuration file not existing. |
35 |
| - except Exception as e: |
36 |
| - logging.info(f"Exception occured getting .github/issue_label_bot.yaml: {e}") |
37 |
| - return None |
38 |
| - |
39 |
| - return yaml.safe_load(results) |
| 15 | + """ |
| 16 | + Looks for the yaml file in a /.github directory. |
| 17 | +
|
| 18 | + yaml file must be named issue_label_bot.yaml |
| 19 | + """ |
| 20 | + |
| 21 | + if not ghapp: |
| 22 | + # TODO(jlewi): Should we deprecate this code path and always pass |
| 23 | + # in the github app? |
| 24 | + ghapp = github_app.GitHubApp.create_from_env() |
| 25 | + |
| 26 | + try: |
| 27 | + # get the app installation handle |
| 28 | + inst_id = ghapp.get_installation_id(owner=owner, repo=repo) |
| 29 | + inst = ghapp.get_installation(installation_id=inst_id) |
| 30 | + # get the repo handle, which allows you got get the file contents |
| 31 | + repo = inst.repository(owner=owner, repository=repo) |
| 32 | + results = repo.file_contents('.github/issue_label_bot.yaml').decoded |
| 33 | + # TODO(jlewi): We should probably catching more narrow exceptions and |
| 34 | + # not swallowing all exceptions. The exceptions we should swallow are |
| 35 | + # the ones related to the configuration file not existing. |
| 36 | + except Exception as e: |
| 37 | + logging.info(f"Exception occured getting .github/issue_label_bot.yaml: {e}") |
| 38 | + return None |
| 39 | + |
| 40 | + return yaml.safe_load(results) |
40 | 41 |
|
41 | 42 | def build_issue_doc(org:str, repo:str, title:str, text:typing.List[str]):
|
42 |
| - """Build a document string out of various github features. |
43 |
| -
|
44 |
| - Args: |
45 |
| - org: The organization the issue belongs in |
46 |
| - repo: The repository. |
47 |
| - title: Issue title |
48 |
| - text: List of contents of the comments on the issue |
49 |
| -
|
50 |
| - Returns: |
51 |
| - content: The document to classify |
52 |
| - """ |
53 |
| - pieces = [title] |
54 |
| - pieces.append(f"{org.lower()}_{repo.lower()}") |
55 |
| - pieces.extend(text) |
56 |
| - content = "\n".join(pieces) |
57 |
| - return content |
| 43 | + """Build a document string out of various github features. |
| 44 | +
|
| 45 | + Args: |
| 46 | + org: The organization the issue belongs in |
| 47 | + repo: The repository. |
| 48 | + title: Issue title |
| 49 | + text: List of contents of the comments on the issue |
| 50 | +
|
| 51 | + Returns: |
| 52 | + content: The document to classify |
| 53 | + """ |
| 54 | + pieces = [title] |
| 55 | + pieces.append(f"{org.lower()}_{repo.lower()}") |
| 56 | + pieces.extend(text) |
| 57 | + content = "\n".join(pieces) |
| 58 | + return content |
| 59 | + |
| 60 | +# TODO(https://github.com/kubeflow/code-intelligence/issues/126): This function should replace |
| 61 | +# get_issue_text |
| 62 | +def get_issue(url, gh_client): |
| 63 | + """Fetch the issue data using GraphQL. |
| 64 | +
|
| 65 | + Args: |
| 66 | + url: Url of the GitHub isue to fetch |
| 67 | + gh_client: GitHub GraphQl client. |
| 68 | +
|
| 69 | + Returns |
| 70 | + ------ |
| 71 | + dict |
| 72 | + {'title':str, |
| 73 | + 'comments':List[str] |
| 74 | + 'labels': List[str] |
| 75 | + 'removed_labels': List[str]} |
| 76 | +
|
| 77 | + comments is a list of comments. The first one will be the body of the issue. |
| 78 | +
|
| 79 | + labels: Labels currently on the issue |
| 80 | + removed_labels: Labels that have been removed |
| 81 | + """ |
| 82 | + |
| 83 | + # The "!" means the variable can't be null. We allow the cursors |
| 84 | + # to be null so that on the first call we fetch the first couple items. |
| 85 | + issue_query = """query getIssue($url: URI!, $labelCursor: String, $timelineCursor: String, $commentsCursor: String) { |
| 86 | + resource(url: $url) { |
| 87 | + __typename |
| 88 | + ... on Issue { |
| 89 | + author { |
| 90 | + __typename |
| 91 | + ... on User { |
| 92 | + login |
| 93 | + } |
| 94 | + ... on Bot { |
| 95 | + login |
| 96 | + } |
| 97 | + } |
| 98 | + id |
| 99 | + title |
| 100 | + body |
| 101 | + url |
| 102 | + state |
| 103 | + comments(first: 100, after: $commentsCursor) { |
| 104 | + totalCount |
| 105 | + edges { |
| 106 | + node { |
| 107 | + author { |
| 108 | + login |
| 109 | + } |
| 110 | + body |
| 111 | + } |
| 112 | + } |
| 113 | + pageInfo { |
| 114 | + hasNextPage |
| 115 | + endCursor |
| 116 | + } |
| 117 | + } |
| 118 | + timelineItems(first: 100, itemTypes: [UNLABELED_EVENT], after: $timelineCursor) { |
| 119 | + totalCount |
| 120 | + edges { |
| 121 | + node { |
| 122 | + __typename |
| 123 | + ... on UnlabeledEvent { |
| 124 | + createdAt |
| 125 | + label { |
| 126 | + name |
| 127 | + } |
| 128 | + } |
| 129 | + } |
| 130 | + } |
| 131 | + pageInfo { |
| 132 | + hasNextPage |
| 133 | + endCursor |
| 134 | + } |
| 135 | + } |
| 136 | + labels(first: 100, after: $labelCursor) { |
| 137 | + totalCount |
| 138 | + pageInfo { |
| 139 | + hasNextPage |
| 140 | + endCursor |
| 141 | + } |
| 142 | + edges { |
| 143 | + node { |
| 144 | + name |
| 145 | + } |
| 146 | + } |
| 147 | + } |
| 148 | + } |
| 149 | + } |
| 150 | +}""" |
| 151 | + |
| 152 | + variables = { |
| 153 | + "url": url, |
| 154 | + "labelCursor": None, |
| 155 | + "commentsCursor": None, |
| 156 | + "timelineCurosr": None, |
| 157 | + } |
| 158 | + |
| 159 | + has_more = True |
| 160 | + |
| 161 | + result = { |
| 162 | + "title": None, |
| 163 | + "comments": [], |
| 164 | + "comment_authors": [], |
| 165 | + "labels": set(), |
| 166 | + "removed_labels": set(), |
| 167 | + } |
| 168 | + while has_more: |
| 169 | + issue_results = gh_client.run_query(issue_query, variables) |
| 170 | + |
| 171 | + if "errors" in issue_results: |
| 172 | + logging.error(f"There was a problem running the github query; {issue_results['errors']}") |
| 173 | + raise ValueError(f"There was a problem running the github query: {issue_results['errors']}") |
| 174 | + |
| 175 | + issue = issue_results["data"]["resource"] |
| 176 | + |
| 177 | + # Only set the title once on the first call |
| 178 | + if not result["title"]: |
| 179 | + result["title"] = issue["title"] |
| 180 | + |
| 181 | + if not result["comments"]: |
| 182 | + result["comments"].append(issue["body"]) |
| 183 | + result["comment_authors"].append(issue["author"]["login"]) |
| 184 | + |
| 185 | + for e in issue["comments"]["edges"]: |
| 186 | + node = e["node"] |
| 187 | + result["comments"].append(node["body"]) |
| 188 | + result["comment_authors"].append(node["author"]["login"]) |
| 189 | + |
| 190 | + for e in issue["labels"]["edges"]: |
| 191 | + node = e["node"] |
| 192 | + result["labels"].add(node["name"]) |
| 193 | + |
| 194 | + for e in issue["timelineItems"]["edges"]: |
| 195 | + node = e["node"] |
| 196 | + result["removed_labels"].add(node["label"]["name"]) |
| 197 | + |
| 198 | + has_more = False |
| 199 | + |
| 200 | + for f in ["comments", "labels", "timelineItems"]: |
| 201 | + has_more = has_more or issue[f].get("pageInfo").get("hasNextPage") |
| 202 | + |
| 203 | + variables["labelCursor"] = issue["labels"]["pageInfo"]["endCursor"] |
| 204 | + variables["commentsCursor"] = issue["comments"]["pageInfo"]["endCursor"] |
| 205 | + variables["timelineCursor"] = issue["timelineItems"]["pageInfo"]["endCursor"] |
| 206 | + |
| 207 | + # For removed_labels we only want labels that were permanently removed |
| 208 | + result["removed_labels"] = result["removed_labels"] - result["labels"] |
| 209 | + |
| 210 | + result["labels"] = list(result["labels"]) |
| 211 | + result["removed_labels"] = list(result["removed_labels"]) |
| 212 | + return result |
0 commit comments