diff --git a/Label_Microservice/.build/prod/extensions_v1beta1_deployment_label-bot-worker.yaml b/Label_Microservice/.build/prod/extensions_v1beta1_deployment_label-bot-worker.yaml index 3fdebf672d..d75258d8eb 100644 --- a/Label_Microservice/.build/prod/extensions_v1beta1_deployment_label-bot-worker.yaml +++ b/Label_Microservice/.build/prod/extensions_v1beta1_deployment_label-bot-worker.yaml @@ -42,7 +42,7 @@ spec: value: "27079" - name: GITHUB_APP_PEM_KEY value: /var/secrets/github/issue-label-bot-github-app.private-key.pem - image: gcr.io/issue-label-bot-dev/bot-worker:011a589 + image: gcr.io/issue-label-bot-dev/bot-worker:6848ad6 name: app resources: requests: diff --git a/Label_Microservice/README.md b/Label_Microservice/README.md index cb3ebf2e2a..a84ba3ca05 100644 --- a/Label_Microservice/README.md +++ b/Label_Microservice/README.md @@ -62,7 +62,7 @@ The following describes the GCP projects and clusters where the two services are - **repository**: [machine-learning-apps/Issue-Label-Bot](https://github.com/machine-learning-apps/Issue-Label-Bot) - **GCP project**: github-probots - **cluster**: kf-ci-ml - - **namespace**: mlapp + - **namespace**: label-bot-prod - **yaml files**: [deployment](https://github.com/machine-learning-apps/Issue-Label-Bot/tree/master/deployment) 1. Repo-specific label microservice @@ -76,9 +76,9 @@ The following describes the GCP projects and clusters where the two services are 1. The flask app - **repository**: [machine-learning-apps/Issue-Label-Bot](https://github.com/machine-learning-apps/Issue-Label-Bot) - - **GCP project**: issue-label-bot-dev - - **cluster**: github-mlapp-test - - **namespace**: mlapp + - **GCP project**: github-probots + - **cluster**: kf-ci-ml + - **namespace**: label-bot-dev - **yaml files**: [deployment](https://github.com/machine-learning-apps/Issue-Label-Bot/tree/master/deployment) 1. Repo-specific label microservice @@ -88,6 +88,10 @@ The following describes the GCP projects and clusters where the two services are - **namespace**: default - **yaml files**: [Label\_Microservice/deployment](https://github.com/kubeflow/code-intelligence/tree/master/Label_Microservice/deployment) +1, GitHub bot - **kf-label-bot-dev** + + - see [kubeflow/code-intelligence#84](https://github.com/kubeflow/code-intelligence/issues/84) for information on the setup + - see [machine-learning-apps/Issue-Label-Bot#57](https://github.com/machine-learning-apps/Issue-Label-Bot/issues/57) ## Instructions diff --git a/Label_Microservice/deployment/overlays/prod/kustomization.yaml b/Label_Microservice/deployment/overlays/prod/kustomization.yaml index 5c0f2ce563..1b051a5037 100644 --- a/Label_Microservice/deployment/overlays/prod/kustomization.yaml +++ b/Label_Microservice/deployment/overlays/prod/kustomization.yaml @@ -10,4 +10,4 @@ resources: images: - name: gcr.io/issue-label-bot-dev/bot-worker newName: gcr.io/issue-label-bot-dev/bot-worker - newTag: 011a589 + newTag: 6848ad6 diff --git a/py/code_intelligence/embeddings.py b/py/code_intelligence/embeddings.py index 1b1b442c69..afd39e41c6 100644 --- a/py/code_intelligence/embeddings.py +++ b/py/code_intelligence/embeddings.py @@ -42,6 +42,7 @@ def get_issue_text(num, idx, owner, repo, skip_issue=True): dict {'title':str, 'body':str} """ + logging.warning("get_issue_text is deprecated; use github_util.get_issue") url = f'https://github.com/{owner}/{repo}/issues/{num}' status_code = requests.head(url).status_code if status_code != 200: @@ -73,60 +74,6 @@ def get_issue_text(num, idx, owner, repo, skip_issue=True): 'labels': labels, 'num': num} -# TODO(https://github.com/kubeflow/code-intelligence/issues/126): This function should replace -# get_issue_text -def get_issue(url, gh_client): - """Fetch the issue data using GraphQL - - Args: - url: Url of the GitHub isue to fetch - gh_client: GitHub GraphQl client. - - Returns - ------ - dict - {'title':str, 'body':str} - """ - issue_query = """query getIssue($url: URI!) { - resource(url: $url) { - __typename - ... on Issue { - author { - __typename - ... on User { - login - } - ... on Bot { - login - } - } - id - title - body - url - state - labels(first: 30) { - totalCount - edges { - node { - name - } - } - } - } - } -}""" - - variables = { - "url": url, - } - issue_results = gh_client.run_query(issue_query, variables) - - if "errors" in issue_results: - logging.error(f"There was a problem running the github query; {issue_results['errors']}") - raise ValueError(f"There was a problem running the github query: {issue_results['errors']}") - return issue_results["data"]["resource"] - def get_all_issue_text(owner, repo, inf_wrapper, workers=64): """ Prepare embedding features of all issues in a given repository. @@ -191,9 +138,9 @@ def load_model_artifact(model_url, local_dir=None): if not local_dir: home = str(Path.home()) local_dir = os.path.join(home, "model_files") - + full_path = os.path.join(local_dir, 'model.pkl') - + if not full_path.exists(): logging.info('Loading model.') path.mkdir(exist_ok=True) diff --git a/py/code_intelligence/github_util.py b/py/code_intelligence/github_util.py index 2774ccbbe2..8d86050e65 100644 --- a/py/code_intelligence/github_util.py +++ b/py/code_intelligence/github_util.py @@ -1,3 +1,4 @@ +import fire import os import logging from code_intelligence import github_app @@ -5,53 +6,207 @@ import yaml def get_issue_handle(installation_id, username, repository, number): - "get an issue object." - ghapp = github_app.GitHubApp.create_from_env() - install = ghapp.get_installation(installation_id) - return install.issue(username, repository, number) + "get an issue object." + ghapp = github_app.GitHubApp.create_from_env() + install = ghapp.get_installation(installation_id) + return install.issue(username, repository, number) def get_yaml(owner, repo, ghapp=None): - """ - Looks for the yaml file in a /.github directory. - - yaml file must be named issue_label_bot.yaml - """ - - if not ghapp: - # TODO(jlewi): Should we deprecate this code path and always pass - # in the github app? - ghapp = github_app.GitHubApp.create_from_env() - - try: - # get the app installation handle - inst_id = ghapp.get_installation_id(owner=owner, repo=repo) - inst = ghapp.get_installation(installation_id=inst_id) - # get the repo handle, which allows you got get the file contents - repo = inst.repository(owner=owner, repository=repo) - results = repo.file_contents('.github/issue_label_bot.yaml').decoded - # TODO(jlewi): We should probably catching more narrow exceptions and - # not swallowing all exceptions. The exceptions we should swallow are - # the ones related to the configuration file not existing. - except Exception as e: - logging.info(f"Exception occured getting .github/issue_label_bot.yaml: {e}") - return None - - return yaml.safe_load(results) + """ + Looks for the yaml file in a /.github directory. + + yaml file must be named issue_label_bot.yaml + """ + + if not ghapp: + # TODO(jlewi): Should we deprecate this code path and always pass + # in the github app? + ghapp = github_app.GitHubApp.create_from_env() + + try: + # get the app installation handle + inst_id = ghapp.get_installation_id(owner=owner, repo=repo) + inst = ghapp.get_installation(installation_id=inst_id) + # get the repo handle, which allows you got get the file contents + repo = inst.repository(owner=owner, repository=repo) + results = repo.file_contents('.github/issue_label_bot.yaml').decoded + # TODO(jlewi): We should probably catching more narrow exceptions and + # not swallowing all exceptions. The exceptions we should swallow are + # the ones related to the configuration file not existing. + except Exception as e: + logging.info(f"Exception occured getting .github/issue_label_bot.yaml: {e}") + return None + + return yaml.safe_load(results) def build_issue_doc(org:str, repo:str, title:str, text:typing.List[str]): - """Build a document string out of various github features. - - Args: - org: The organization the issue belongs in - repo: The repository. - title: Issue title - text: List of contents of the comments on the issue - - Returns: - content: The document to classify - """ - pieces = [title] - pieces.append(f"{org.lower()}_{repo.lower()}") - pieces.extend(text) - content = "\n".join(pieces) - return content + """Build a document string out of various github features. + + Args: + org: The organization the issue belongs in + repo: The repository. + title: Issue title + text: List of contents of the comments on the issue + + Returns: + content: The document to classify + """ + pieces = [title] + pieces.append(f"{org.lower()}_{repo.lower()}") + pieces.extend(text) + content = "\n".join(pieces) + return content + +# TODO(https://github.com/kubeflow/code-intelligence/issues/126): This function should replace +# get_issue_text +def get_issue(url, gh_client): + """Fetch the issue data using GraphQL. + + Args: + url: Url of the GitHub isue to fetch + gh_client: GitHub GraphQl client. + + Returns + ------ + dict + {'title':str, + 'comments':List[str] + 'labels': List[str] + 'removed_labels': List[str]} + + comments is a list of comments. The first one will be the body of the issue. + + labels: Labels currently on the issue + removed_labels: Labels that have been removed + """ + + # The "!" means the variable can't be null. We allow the cursors + # to be null so that on the first call we fetch the first couple items. + issue_query = """query getIssue($url: URI!, $labelCursor: String, $timelineCursor: String, $commentsCursor: String) { + resource(url: $url) { + __typename + ... on Issue { + author { + __typename + ... on User { + login + } + ... on Bot { + login + } + } + id + title + body + url + state + comments(first: 100, after: $commentsCursor) { + totalCount + edges { + node { + author { + login + } + body + } + } + pageInfo { + hasNextPage + endCursor + } + } + timelineItems(first: 100, itemTypes: [UNLABELED_EVENT], after: $timelineCursor) { + totalCount + edges { + node { + __typename + ... on UnlabeledEvent { + createdAt + label { + name + } + } + } + } + pageInfo { + hasNextPage + endCursor + } + } + labels(first: 100, after: $labelCursor) { + totalCount + pageInfo { + hasNextPage + endCursor + } + edges { + node { + name + } + } + } + } + } +}""" + + variables = { + "url": url, + "labelCursor": None, + "commentsCursor": None, + "timelineCurosr": None, + } + + has_more = True + + result = { + "title": None, + "comments": [], + "comment_authors": [], + "labels": set(), + "removed_labels": set(), + } + while has_more: + issue_results = gh_client.run_query(issue_query, variables) + + if "errors" in issue_results: + logging.error(f"There was a problem running the github query; {issue_results['errors']}") + raise ValueError(f"There was a problem running the github query: {issue_results['errors']}") + + issue = issue_results["data"]["resource"] + + # Only set the title once on the first call + if not result["title"]: + result["title"] = issue["title"] + + if not result["comments"]: + result["comments"].append(issue["body"]) + result["comment_authors"].append(issue["author"]["login"]) + + for e in issue["comments"]["edges"]: + node = e["node"] + result["comments"].append(node["body"]) + result["comment_authors"].append(node["author"]["login"]) + + for e in issue["labels"]["edges"]: + node = e["node"] + result["labels"].add(node["name"]) + + for e in issue["timelineItems"]["edges"]: + node = e["node"] + result["removed_labels"].add(node["label"]["name"]) + + has_more = False + + for f in ["comments", "labels", "timelineItems"]: + has_more = has_more or issue[f].get("pageInfo").get("hasNextPage") + + variables["labelCursor"] = issue["labels"]["pageInfo"]["endCursor"] + variables["commentsCursor"] = issue["comments"]["pageInfo"]["endCursor"] + variables["timelineCursor"] = issue["timelineItems"]["pageInfo"]["endCursor"] + + # For removed_labels we only want labels that were permanently removed + result["removed_labels"] = result["removed_labels"] - result["labels"] + + result["labels"] = list(result["labels"]) + result["removed_labels"] = list(result["removed_labels"]) + return result diff --git a/py/code_intelligence/util.py b/py/code_intelligence/util.py index bea4553a54..0be2945898 100644 --- a/py/code_intelligence/util.py +++ b/py/code_intelligence/util.py @@ -37,7 +37,7 @@ def parse_issue_spec(issue): def parse_issue_url(issue): """Parse an issue in the form https://github.com/{owner}/{repo}/issues/{number} Args: - isue: An issue in the form {owner}/{repo}#{number} + issue: An issue in the form {owner}/{repo}#{number} Returns: owner, repo, number """ @@ -46,6 +46,20 @@ def parse_issue_url(issue): return None, None, None return m.group(1), m.group(2), int(m.group(3)) +# TODO(jlewi): Unittest +def build_issue_url(org, repo, number): + """Return a url in the form https://github.com/{owner}/{repo}/issues/{number} + + Args: + org: The organization that owns the issue + repo: The repo that owns the issue + number: The issue number + + Returns: + owner, repo, number + """ + return f"https://github.com/{org}/{repo}/issues/{number}" + pacific = pytz.timezone("US/Pacific") def now(): diff --git a/py/code_intelligence/util_test.py b/py/code_intelligence/util_test.py index 743655a4af..4456c7eb7d 100644 --- a/py/code_intelligence/util_test.py +++ b/py/code_intelligence/util_test.py @@ -23,6 +23,11 @@ def test_parse_issue_spec(): assert repo == c["expected"][1] assert number == c["expected"][2] +def test_build_issue_url(): + url = util.build_issue_url("kubeflow", "testing", "1234") + expected = "https://github.com/kubeflow/testing/issues/1234" + assert url == expected + if __name__ == "__main__": logging.basicConfig( level=logging.INFO, diff --git a/py/label_microservice/automl_model.py b/py/label_microservice/automl_model.py index 551486acce..01a1a2ad24 100644 --- a/py/label_microservice/automl_model.py +++ b/py/label_microservice/automl_model.py @@ -91,6 +91,6 @@ def _predict(): for l in labels_to_remove: del predictions[l] - logging.info(f"Labels below precision and recall {labels_to_remove}", + logging.info(f"Labels below AutoML threshold {labels_to_remove}", extra=context) return predictions diff --git a/py/label_microservice/cli.py b/py/label_microservice/cli.py index 0343458ed5..a733d98df0 100644 --- a/py/label_microservice/cli.py +++ b/py/label_microservice/cli.py @@ -6,12 +6,24 @@ import logging import json import fire +from code_intelligence import graphql +from code_intelligence import github_util from code_intelligence import util from google.cloud import pubsub import subprocess DEFAULT_TOPIC = "projects/issue-label-bot-dev/topics/TEST_event_queue" class Cli: + @staticmethod + def get_issue(url): + """Get the data for a specific issue. + + Args: + url: URL of the issue + """ + gh_client = graphql.GraphQLClient() + result = github_util.get_issue(url, gh_client) + print(json.dumps(result, indent=4, sort_keys=True)) @staticmethod def label_issue(issue, pubsub_topic=DEFAULT_TOPIC): diff --git a/py/label_microservice/issue_label_predictor.py b/py/label_microservice/issue_label_predictor.py index 7a785f3173..8a07cdab52 100644 --- a/py/label_microservice/issue_label_predictor.py +++ b/py/label_microservice/issue_label_predictor.py @@ -1,10 +1,11 @@ import logging -import os -from code_intelligence import embeddings +from code_intelligence import github_app +from code_intelligence import graphql +from code_intelligence import github_util +from code_intelligence import util from label_microservice import automl_model from label_microservice import combined_model -from label_microservice import repo_specific_model from label_microservice import universal_kind_label_model as universal_model UNIVERSAL_MODEL_NAME = "universal" @@ -22,11 +23,10 @@ def _combined_model_name(org, repo=None): org: Name of the org. repo: (Optional) The name of the repo """ - if repo: return f"{org}/{repo}_combined" - else: - return f"{org}_combined" + + return f"{org}_combined" def _dict_has_keys(d, keys): for k in keys: @@ -49,6 +49,12 @@ def __init__(self): # A dictionary mapping keys to individual models. self._models = {} self._load_models() + self._gh_client = graphql.GraphQLClient() + + if not self._gh_client._headers: + logging.error("client._headers not set on GraphQLClient. This likely " + "means no GitHub credentials are loaded and requests to " + "GitHub API will likely fail") def _load_models(self): """Load the models.""" @@ -57,7 +63,7 @@ def _load_models(self): # TODO(jlewi): How should we get a list of all models for which we # have repo or org specific models. mlbot is doing this based on a config - # file; https://github.com/machine-learning-apps/Issue-Label-Bot/blob/26d8fb65be3b39de244c4be9e32b2838111dac10/flask_app/forward_utils.py#L5 + # file; https://github.com/machine-learning-apps/Issue-Label-Bot/blob/26d8fb65be3b39de244c4be9e32b2838111dac10/flask_app/forward_utils.py#L5 # pylint: disable=line-too-long for org in ["kubeflow"]: logging.info(f"Loading AutoML model for org: {org}; model: {KUBEFLOW_AUTOML_MODEL}") @@ -70,7 +76,7 @@ def _load_models(self): self._models[_combined_model_name(org)] = combined - def predict_labels_for_data(self, model_name, org, repo, title, body, + def predict_labels_for_data(self, model_name, org, repo, title, text, context=None): """Generate label predictions for the specified data. @@ -79,7 +85,8 @@ def predict_labels_for_data(self, model_name, org, repo, title, body, org: org repo: Repo name title: Title for the issue - body: body of the issue + text: A list of strings representing the body and any comments on the + issue. Returns dict: str -> float; dictionary mapping labels to their probability @@ -88,13 +95,28 @@ def predict_labels_for_data(self, model_name, org, repo, title, body, raise ValueError(f"No model named {model_name}") model = self._models[model_name] - logging.info(f"Generating predictions for title={title} text={body} using" - f"model: {model_name} class:{model.__class__}") - predictions = model.predict_issue_labels(org, repo, title, body, + logging.info(f"Generating predictions for title={title} text={text} using" + f"model: {model_name} class:{model.__class__}", extra=context) + predictions = model.predict_issue_labels(org, repo, title, text, context=context) return predictions + def graphql_client(self, org, repo): + """Return a GitHub GraphQL client for the specified org and repository. + + Args: + org: The org. + repo: The repo + """ + # TODO(jlewi): Should we cache these? + ghapp = github_app.GitHubApp.create_from_env() + token_generator = github_app.GitHubAppTokenGenerator( + ghapp, f"{org}/{repo}") + gh_client = graphql.GraphQLClient(headers=token_generator.auth_headers) + + return gh_client + def predict_labels_for_issue(self, org, repo, issue_number, model_name=None): """Generate label predictions for a github issue. @@ -125,22 +147,24 @@ def predict_labels_for_issue(self, org, repo, issue_number, model_name=None): f"{org}/{repo}#{issue_number} using " f"model {model_name}") - data = embeddings.get_issue_text(issue_number, None, org, repo) + + url = util.build_issue_url(org, repo, issue_number) + data = github_util.get_issue(url, self.graphql_client(org, repo)) if not data.get("title"): logging.warning(f"Got empty title for {org}/{repo}#{issue_number}") - if not data.get("body"): - logging.warning(f"Got empty title for {org}/{repo}#{issue_number}") + if not data.get("coment"): + logging.warning(f"Got empty body and comments for {org}/{repo}#{issue_number}") - context={ + context = { "repo_owner": org, "repo_name": repo, "issue_num": issue_number, } predictions = self.predict_labels_for_data( - model_name, org, repo, data.get("title"), [data.get("body")], + model_name, org, repo, data.get("title"), data.get("comments"), context=context) return predictions @@ -152,8 +176,10 @@ def predict(self, data): The payload can either look like { + "repo_owner": + "repo_name": "title": "some issue title" - "text": "text for some issue + "text": ["This is the body of the issue", "First comment"] "model_name": Name of model to use ... } @@ -172,10 +198,11 @@ def predict(self, data): """ text_keys = ["title", "text", "model_name"] issue_keys = ["repo_owner", "repo_name", "issue_num"] - if _dict_has_keys(data, text_keys): - return self.predict_labels_for_data(data["model_name"], data["title"], + if _dict_has_keys(data, text_keys): # pylint: disable=no-else-return + return self.predict_labels_for_data(data["model_name"], data["repo_owner"], + data["repo_name"]. data["title"], data["text"]) - elif _dict_has_keys(data, issue_keys): + elif _dict_has_keys(data, issue_keys): # pylint: disable=no-else-return return self.predict_labels_for_issue(data["repo_owner"], data["repo_name"], data["issue_num"], @@ -187,4 +214,3 @@ def predict(self, data): want = f"[{text_str}] or [{issue_str}]" logging.error(f"Data is missing required keys; got {actual}; want {want}") raise ValueError(f"Data is missing required keys; got {actual}; want {want}") - diff --git a/py/label_microservice/worker.py b/py/label_microservice/worker.py index f3d1ae962b..be7febd7dc 100644 --- a/py/label_microservice/worker.py +++ b/py/label_microservice/worker.py @@ -9,6 +9,7 @@ from label_microservice.repo_config import RepoConfig from code_intelligence import github_app from code_intelligence import github_util +from code_intelligence import graphql from code_intelligence.pubsub_util import check_subscription_name_exists from code_intelligence.pubsub_util import create_subscription_if_not_exists from code_intelligence import util @@ -20,6 +21,16 @@ DEFAULT_APP_URL = "https://github.com/marketplace/issue-label-bot" +# Repo containing org wide config +ORG_CONFIG_REPO = ".github" + +# The GitHub logins of the label bots. These are used to tell +# whether we have already commented on an issue. +# TODO(jlewi): Can we get this programmatically so it is always in sync? +# with the bot being used? e.g. we have to get credentials for the bot +# that we are acting as so we should be able to get the login +LABEL_BOT_LOGINS = ["kf-label-bot-dev", "issue-label-bot"] + class Worker: """ The worker class aims to do label prediction for issues from github repos. @@ -167,7 +178,7 @@ def callback(message): log_dict['predictions'] = predictions self.add_labels_to_issue(installation_id, repo_owner, repo_name, issue_num, predictions) - + # I think this log message is used for analysis. logging.info("Add labels to issue.", extra=log_dict) # TODO(jlewi): I observed cases where some of the initial inferences @@ -302,28 +313,90 @@ def add_labels_to_issue(self, installation_id, repo_owner, repo_name, # expiration? ghapp = github_app.GitHubApp.create_from_env() - # handle the yaml file + # Load IssueLabelBot config. Look for both organization configuration + # and repo specific configuration. + # TODO(jlewi): We should really cache these and use some form of + # expiration to pick up changes. + org_config = github_util.get_yaml(owner=repo_owner, + repo=ORG_CONFIG_REPO, ghapp=ghapp) + repo_config = github_util.get_yaml(owner=repo_owner, repo=repo_name, ghapp=ghapp) - predictions = self.apply_repo_config(repo_config, repo_owner, repo_name, + context = { + "repo_owner": repo_owner, + "repo_name": repo_name, + "issue_num": issue_num + } + config = {} + + if org_config: + config.update(org_config) + + if repo_config: + config.update(repo_config) + + predictions = self.apply_repo_config(config, repo_owner, repo_name, predictions, ghapp) + url = util.build_issue_url(repo_owner, repo_name, issue_num) + + token_generator = github_app.GitHubAppTokenGenerator( + ghapp, f"{repo_owner}/{repo_name}") + gh_client = graphql.GraphQLClient(headers=token_generator.auth_headers) + issue_data = github_util.get_issue(url, gh_client) + + predicted_labels = set(predictions.keys()) + + # Remove from label_names any labels which already been applied + # or which were explicitly removed. + label_names = set(predicted_labels) - set(issue_data["labels"]) + label_names = label_names - set(issue_data["removed_labels"]) + + already_applied = predicted_labels.intersection(issue_data["labels"]) + removed = predicted_labels.intersection(issue_data["removed_labels"]) + + filtered_info = {} + filtered_info.update(context) + filtered_info["predicted_labels"] = list(predicted_labels) + filtered_info["already_applied"] = list(already_applied) + filtered_info["removed"] = list(removed) + + logging.info("Filtered predictions", extra=filtered_info) + label_names = list(label_names) + + # Check whether the bot has already commented on this issue. + already_commented = False + for a in LABEL_BOT_LOGINS: + if a in issue_data["comment_authors"]: + already_commented = True + break + + if already_commented: + logging.info("Label bot has already commented on issue.", + extra=context) + else: + logging.info("Label bot has not commented on issue.", + extra=context) + if not installation_id: logging.info("No GitHub App Installation Provided Fetching it") installation_id = ghapp.get_installation_id(repo_owner, repo_name) install = ghapp.get_installation(installation_id) + + # We are using the GitHub3 library to add comments. We should + # TODO(jlewi): We should Use GraphQL so we can use a single library. issue = install.issue(repo_owner, repo_name, issue_num) - label_names = predictions.keys() + message = None if label_names: # create message # Create a markdown table with probabilities. rows = ["| Label | Probability |", "| ------------- | ------------- |"] - for l, p in predictions.items(): - rows.append("| {} | {:.2f} |".format(l, p)) + for l in label_names: + rows.append("| {} | {:.2f} |".format(l, predictions[l])) lines = ["Issue-Label Bot is automatically applying the labels:", ""] @@ -340,17 +413,27 @@ def add_labels_to_issue(self, installation_id, repo_owner, repo_name, message = "\n".join(lines) # label the issue using the GitHub api issue.add_labels(*label_names) - logging.info(f'Add `{"`, `".join(label_names)}` to the issue # {issue_num}') + context["labels"] = label_names + logging.info(f'Add `{"`, `".join(label_names)}` to the issue # {issue_num}', extra=context) else: - message = """Issue Label Bot is not confident enough to auto-label this issue. - See [dashboard]({app_url}data/{repo_owner}/{repo_name}) for more details. - """.format(app_url=self.app_url, - repo_owner=repo_owner, - repo_name=repo_name) - logging.warning(f'Not confident enough to label this issue: # {issue_num}') + # We don't want a spam an issue with comments. So once label + # bot comments on an issue we will not chime in to report that + # we aren't commented. + if not already_commented: + # TODO(jlewi): Should we include top predictions for area and + # platform? Maybe we should include top predictions for + # all areas? The problem is the model only returns predictions + # above the threshold. + message = """Issue Label Bot is not confident enough to auto-label this issue. + See [dashboard]({app_url}data/{repo_owner}/{repo_name}) for more details. + """.format(app_url=self.app_url, + repo_owner=repo_owner, + repo_name=repo_name) + logging.warning(f'Not confident enough to label this issue: # {issue_num}', extra=context) # make a comment using the GitHub api - comment = issue.create_comment(message) + if message: + comment = issue.create_comment(message) class NoGCPCredentials(Exception): pass