From 18449a280771468a94e231a15e5ce00dd3ba30e8 Mon Sep 17 00:00:00 2001
From: Jeremy Lewi <jeremy+github@lewi.us>
Date: Fri, 1 May 2020 23:10:26 -0700
Subject: [PATCH] Use the AutoML Model and the universal model to generate
 predictions. (#134)

Create an AutoML Model class to generate predictions using AutoML.

* Create a new class/module automl_model to generate predictions
  using AutoML models

* Change the function signature of predict_labels to include org and repo
  because we want org and repo to be features because they can be highly
  informative.

* Also change predict_issue_labels to take in a list of strings for the
  body text because in follow on PRs we will start taking into additional
  comments and not just the first one.

* Define github_util.build_issue_doc to construct a text document
  out of the various features.

Testing

* Dev instance successfully used AutoML model.
https://github.com/kubeflow/code-intelligence/issues/131#issuecomment-622616399

* Check in hydrated configs for prod.

* prod has also been updated and looks to be using the new model correctly.

Related issues:

  * Hopefully this model is an improvement.

Miscellaneous changes

Add logging and monitoring instructions.

Update automl notebook to use the new code to build an issue.
---
 .gitignore                                    |   2 +
 ...s_v1beta1_deployment_label-bot-worker.yaml |  59 ++++++++
 .../prod/~g_v1_service_label-bot-worker.yaml  |  20 +++
 Label_Microservice/Makefile                   |  10 ++
 .../overlays/dev/kustomization.yaml           |   4 +-
 .../overlays/prod/kustomization.yaml          |   2 +-
 .../deployment/requirements.worker.txt        |   1 +
 Label_Microservice/developer_guide.md         |  12 +-
 .../docs/logging_and_monitoring.md            |  19 +++
 Label_Microservice/notebooks/automl.ipynb     | 134 +++++++++++++++++-
 py/code_intelligence/github_util.py           |  25 +++-
 py/code_intelligence/github_util_test.py      |  29 ++++
 py/label_microservice/automl_model.py         |  96 +++++++++++++
 py/label_microservice/automl_model_test.py    |  54 +++++++
 py/label_microservice/cli.py                  |  21 +++
 py/label_microservice/combined_model.py       |   5 +-
 .../issue_label_predictor.py                  |  60 +++++---
 py/label_microservice/models.py               |  12 +-
 .../universal_kind_label_model.py             |  14 +-
 py/label_microservice/worker.py               |   2 -
 20 files changed, 544 insertions(+), 37 deletions(-)
 create mode 100644 Label_Microservice/.build/prod/extensions_v1beta1_deployment_label-bot-worker.yaml
 create mode 100644 Label_Microservice/.build/prod/~g_v1_service_label-bot-worker.yaml
 create mode 100644 Label_Microservice/Makefile
 create mode 100644 Label_Microservice/docs/logging_and_monitoring.md
 create mode 100644 py/code_intelligence/github_util_test.py
 create mode 100644 py/label_microservice/automl_model.py
 create mode 100644 py/label_microservice/automl_model_test.py

diff --git a/.gitignore b/.gitignore
index c003aab28f..99a8f27dbc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,8 @@
 !.gitignore
 !.dockerignore
 **/flask_session
+**/.cache
+**/.data
 build/**
 fairing/__pycache__/**
 **/__pycache__/**
diff --git a/Label_Microservice/.build/prod/extensions_v1beta1_deployment_label-bot-worker.yaml b/Label_Microservice/.build/prod/extensions_v1beta1_deployment_label-bot-worker.yaml
new file mode 100644
index 0000000000..3fdebf672d
--- /dev/null
+++ b/Label_Microservice/.build/prod/extensions_v1beta1_deployment_label-bot-worker.yaml
@@ -0,0 +1,59 @@
+apiVersion: extensions/v1beta1
+kind: Deployment
+metadata:
+  labels:
+    app: label-bot
+    environment: prod
+    service: label-bot
+  name: label-bot-worker
+  namespace: label-bot-prod
+spec:
+  replicas: 5
+  selector:
+    matchLabels:
+      app: label-bot
+      environment: prod
+      service: label-bot
+  template:
+    metadata:
+      labels:
+        app: label-bot
+        environment: prod
+        service: label-bot
+    spec:
+      containers:
+      - command:
+        - python3
+        - -m
+        - label_microservice.worker
+        - subscribe_from_env
+        env:
+        - name: PORT
+          value: "80"
+        - name: ISSUE_EMBEDDING_SERVICE
+          value: http://issue-embedding-server
+        - name: PROJECT
+          value: issue-label-bot-dev
+        - name: ISSUE_EVENT_TOPIC
+          value: event_queue
+        - name: ISSUE_EVENT_SUBSCRIPTION
+          value: label_bot_prod
+        - name: GITHUB_APP_ID
+          value: "27079"
+        - name: GITHUB_APP_PEM_KEY
+          value: /var/secrets/github/issue-label-bot-github-app.private-key.pem
+        image: gcr.io/issue-label-bot-dev/bot-worker:011a589
+        name: app
+        resources:
+          requests:
+            cpu: "4"
+            memory: 4Gi
+        volumeMounts:
+        - mountPath: /var/secrets/github
+          name: github-app
+      restartPolicy: Always
+      serviceAccountName: default-editor
+      volumes:
+      - name: github-app
+        secret:
+          secretName: github-app
diff --git a/Label_Microservice/.build/prod/~g_v1_service_label-bot-worker.yaml b/Label_Microservice/.build/prod/~g_v1_service_label-bot-worker.yaml
new file mode 100644
index 0000000000..ff3a0c1419
--- /dev/null
+++ b/Label_Microservice/.build/prod/~g_v1_service_label-bot-worker.yaml
@@ -0,0 +1,20 @@
+apiVersion: v1
+kind: Service
+metadata:
+  labels:
+    app: label-bot
+    environment: prod
+    service: label-bot
+  name: label-bot-worker
+  namespace: label-bot-prod
+spec:
+  ports:
+  - name: http
+    port: 80
+    protocol: TCP
+    targetPort: 80
+  selector:
+    app: label-bot
+    environment: prod
+    service: label-bot
+  type: ClusterIP
diff --git a/Label_Microservice/Makefile b/Label_Microservice/Makefile
new file mode 100644
index 0000000000..613c028d07
--- /dev/null
+++ b/Label_Microservice/Makefile
@@ -0,0 +1,10 @@
+
+CONTEXT=issue-label-bot
+
+hydrate-prod:
+	rm -rf .build/prod
+	mkdir -p .build/prod
+	kustomize build -o .build/prod deployment/overlays/prod
+
+apply-prod: hydrate-prod
+	kubectl --context=$(CONTEXT) apply -f .build/prod
\ No newline at end of file
diff --git a/Label_Microservice/deployment/overlays/dev/kustomization.yaml b/Label_Microservice/deployment/overlays/dev/kustomization.yaml
index 418ab163ba..b1910aa697 100644
--- a/Label_Microservice/deployment/overlays/dev/kustomization.yaml
+++ b/Label_Microservice/deployment/overlays/dev/kustomization.yaml
@@ -3,9 +3,9 @@ kind: Kustomization
 bases:
 - ../../base
 images:
-- digest: sha256:cb2b2e604d4056b78ecd51d7113de04ebfa60e542310265b3871e7873417e34a
+- #digest: sha256:cb2b2e604d4056b78ecd51d7113de04ebfa60e542310265b3871e7873417e34a
   name: gcr.io/issue-label-bot-dev/bot-worker
-  newName: gcr.io/issue-label-bot-dev/bot-worker:3a82547
+  #newName: gcr.io/issue-label-bot-dev/bot-worker:3a82547
 commonLabels:
   environment: dev
 namespace: label-bot-dev
diff --git a/Label_Microservice/deployment/overlays/prod/kustomization.yaml b/Label_Microservice/deployment/overlays/prod/kustomization.yaml
index 19b13d3055..5c0f2ce563 100644
--- a/Label_Microservice/deployment/overlays/prod/kustomization.yaml
+++ b/Label_Microservice/deployment/overlays/prod/kustomization.yaml
@@ -10,4 +10,4 @@ resources:
 images:
 - name: gcr.io/issue-label-bot-dev/bot-worker
   newName: gcr.io/issue-label-bot-dev/bot-worker
-  newTag: 79cd85a-dirty
+  newTag: 011a589
diff --git a/Label_Microservice/deployment/requirements.worker.txt b/Label_Microservice/deployment/requirements.worker.txt
index 87a850067a..1ea4a012f5 100644
--- a/Label_Microservice/deployment/requirements.worker.txt
+++ b/Label_Microservice/deployment/requirements.worker.txt
@@ -11,6 +11,7 @@ google-api-core==1.14.2
 google-api-python-client==1.7.10
 google-auth==1.6.3
 google-auth-httplib2==0.0.3
+google-cloud-automl==0.10.0
 #google-cloud-bigquery==1.17.0
 google-cloud-core==1.0.3
 google-cloud-pubsub==0.45.0
diff --git a/Label_Microservice/developer_guide.md b/Label_Microservice/developer_guide.md
index 82ead87088..e8b59177fa 100644
--- a/Label_Microservice/developer_guide.md
+++ b/Label_Microservice/developer_guide.md
@@ -68,19 +68,29 @@ Setup a namespace for your development
 1. Send a prediction request using pubsub
 
    ```
-   python -m label_microservice.py --issue=kubeflow/kubeflow#4602
+   python -m label_microservice.cli label-issue --issue=kubeflow/kubeflow#4602 --topic=projects/issue-label-bot-dev/topics/TEST_event_queue
    ```   
 
    * Look at the logs of the pod to see the prediction
    * Ensure that you don't have other pods using the same pubsub subscription; otherwise your item might not get handled by the pod you are looking at
 
 
+1. Get pod logs
+
+   ```
+   python -m label_microservice.cli pod-logs --pod=<pod name>
+   ```
+
+   * This will pretty print the json logs which is easier to read.
+
 1. Ensure your kubeconfig context sets the namespace to the namespace skaffold is deploying in; otherwise file sync and log streaming doesn't seem to work.
 
 ## Unresolved Issues
 
 * skaffold continuous mode (`skaffold dev` ) doesn't appear to detect changes in the python files and retrigger the build and deployment
 
+* skaffold doesn't appear to substitute the newly built image into the kustomize package
+
 
 ### Kaniko Image Caching
 
diff --git a/Label_Microservice/docs/logging_and_monitoring.md b/Label_Microservice/docs/logging_and_monitoring.md
new file mode 100644
index 0000000000..c7e0003031
--- /dev/null
+++ b/Label_Microservice/docs/logging_and_monitoring.md
@@ -0,0 +1,19 @@
+# Logging and Monitoring
+
+
+## Stackdriver logs
+
+* Label bot workers use structured json logs
+* You can search the logs in stackdrive some examples below
+* There is also a BigQuery sink for the stackdriver logs to facilitate analysis and querying
+
+
+Use a label like the following to see messages for
+a specific issue
+
+```
+jsonPayload.repo_owner = "kubeflow"
+jsonPayload.repo_name = "code-intelligence"
+jsonPayload.issue_num = "132"
+resource.labels.namespace_name = "label-bot-prod"
+```
\ No newline at end of file
diff --git a/Label_Microservice/notebooks/automl.ipynb b/Label_Microservice/notebooks/automl.ipynb
index 7e720cbba9..61d5f7c8b8 100644
--- a/Label_Microservice/notebooks/automl.ipynb
+++ b/Label_Microservice/notebooks/automl.ipynb
@@ -8351,7 +8351,8 @@
     "        blob = bucket.blob(obj_path)\n",
     "        \n",
     "        # Include the owner and repo in the text body because it is predictive\n",
-    "        blob.upload_from_string(issue[\"title\"] + \"\\n\" + owner_repo + \"\\n\" + issue[\"body\"])\n",
+    "        doc = github_util.build_issue_doc(owner, repo, issue[\"title\"], [issue[\"body\"]])\n",
+    "        blob.upload_from_string(doc)\n",
     "        logging.info(f\"Created {target}\")\n",
     "\n",
     "    info.iloc[i][\"url\"] = target    \n",
@@ -8674,6 +8675,26 @@
     "model_name = result.name"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'projects/976279526634/locations/us-central1/models/TCN654213816573231104'"
+      ]
+     },
+     "execution_count": 55,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model_name"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 39,
@@ -8790,6 +8811,117 @@
     "        )\n",
     "    )"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "google.protobuf.pyext._message.RepeatedCompositeContainer"
+      ]
+     },
+     "execution_count": 57,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "response.payload.__class__"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "automl.types"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 61,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from google.cloud.automl import types as automl_types"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 74,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "predict_response = automl_types.PredictResponse()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 77,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "predict_response.payload.append(annotation)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 78,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[classification {\n",
+       "  score: 0.8999999761581421\n",
+       "}\n",
+       "display_name: \"area-jupyter\"\n",
+       "]"
+      ]
+     },
+     "execution_count": 78,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "predict_response.payload"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "google.cloud.automl_v1.types.AnnotationPayload"
+      ]
+     },
+     "execution_count": 67,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "annotation_payload.__class__"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 70,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "annotation = automl_types.AnnotationPayload()\n",
+    "annotation.display_name = \"area-jupyter\"\n",
+    "annotation.classification.score = .9"
+   ]
   }
  ],
  "metadata": {
diff --git a/py/code_intelligence/github_util.py b/py/code_intelligence/github_util.py
index 7511c214ca..2774ccbbe2 100644
--- a/py/code_intelligence/github_util.py
+++ b/py/code_intelligence/github_util.py
@@ -1,6 +1,7 @@
 import os
 import logging
 from code_intelligence import github_app
+import typing
 import yaml
 
 def get_issue_handle(installation_id, username, repository, number):
@@ -28,7 +29,29 @@ def get_yaml(owner, repo, ghapp=None):
         # get the repo handle, which allows you got get the file contents
         repo = inst.repository(owner=owner, repository=repo)
         results = repo.file_contents('.github/issue_label_bot.yaml').decoded
-    except:
+    # TODO(jlewi): We should probably catching more narrow exceptions and
+    # not swallowing all exceptions. The exceptions we should swallow are
+    # the ones related to the configuration file not existing.
+    except Exception as e:
+        logging.info(f"Exception occured getting .github/issue_label_bot.yaml: {e}")
         return None
 
     return yaml.safe_load(results)
+
+def build_issue_doc(org:str, repo:str, title:str, text:typing.List[str]):
+    """Build a document string out of various github features.
+
+    Args:
+     org: The organization the issue belongs in
+     repo: The repository.
+     title: Issue title
+     text: List of contents of the comments on the issue
+
+    Returns:
+     content: The document to classify
+    """
+    pieces = [title]
+    pieces.append(f"{org.lower()}_{repo.lower()}")
+    pieces.extend(text)
+    content = "\n".join(pieces)
+    return content
diff --git a/py/code_intelligence/github_util_test.py b/py/code_intelligence/github_util_test.py
new file mode 100644
index 0000000000..3f15d7d826
--- /dev/null
+++ b/py/code_intelligence/github_util_test.py
@@ -0,0 +1,29 @@
+"""Unittest for github_util. """
+import logging
+import pytest
+
+from code_intelligence import github_util
+
+def test_build_issue_doc():
+  result = github_util.build_issue_doc("someOrg", "someRepo", "issue title",
+                                       ["line1", "line2"])
+
+  expected = """issue title
+someorg_somerepo
+line1
+line2"""
+  assert result == expected
+
+if __name__ == "__main__":
+  logging.basicConfig(
+      level=logging.INFO,
+      format=('%(levelname)s|%(asctime)s'
+              '|%(pathname)s|%(lineno)d| %(message)s'),
+      datefmt='%Y-%m-%dT%H:%M:%S',
+  )
+  logging.getLogger().setLevel(logging.INFO)
+
+  pytest.main()
+
+
+
diff --git a/py/label_microservice/automl_model.py b/py/label_microservice/automl_model.py
new file mode 100644
index 0000000000..551486acce
--- /dev/null
+++ b/py/label_microservice/automl_model.py
@@ -0,0 +1,96 @@
+"""Use a model trained on automl.
+"""
+import hashlib
+import logging
+import numpy as np
+import os
+import requests
+import retrying
+import typing
+import yaml
+
+from code_intelligence import github_util
+from google.cloud import automl
+from label_microservice import models
+
+# TODO(jlewi): Do we want to hardcode this?
+CONFIDENCE_THRESHOLD = .5
+
+class AutoMLModel(models.IssueLabelModel):
+  """Use a model training with GCP AutoML."""
+
+  def __init__(self, model_name = None, prediction_client=None):
+    self.config = None
+    # The model name
+    # e.g. a value like value like
+    # "projects/976279526634/locations/us-central1/models/TCN654213816573231104'"
+    self.model_name = model_name
+
+    if not prediction_client:
+      prediction_client = automl.PredictionServiceClient()
+
+    self._prediction_client = prediction_client
+
+  def predict_issue_labels(self, org:str, repo:str, title:str,
+                           text:typing.List[str], context=None):
+    """Return a dictionary of label probabilities.
+
+    Args:
+      org: The organization the issue belongs in
+      repo: The repository.
+      title: Issue title
+      text: List of contents of the comments on the issue
+
+      context: (Optional) dictionary of information like the issue. Used
+        for logging
+    Return
+    ------
+    dict: Dictionary of label to probability of that label for the
+      the issue str -> float
+    """
+    if not context:
+      context = {}
+
+    content = github_util.build_issue_doc(org, repo, title, text)
+    text_snippet = automl.types.TextSnippet(content=content)
+    payload = automl.types.ExamplePayload(text_snippet=text_snippet)
+
+    # TODO(jlewi): Retry longer? Distinguish permanent vs. retryable errors
+    #@retrying.retry(stop_max_delay=60*1000)
+    def _predict():
+      response = self._prediction_client.predict(self.model_name, payload)
+      return response
+
+    response = _predict()
+
+    predictions = {}
+
+    for annotation_payload in response.payload:
+      # TODO(jlewi): Can we do this in a more principled way?
+      # AutoML doesn't allow "/" in the label names so during training
+      # we convert them from "/" to "-". So here we need to convert them
+      # back to "/"
+      # Only replace the first occurence of the "-". In principle I think
+      # we might have sub areas as well and we should fix this.
+      label = annotation_payload.display_name.replace("-", "/", 1)
+      predictions[label] = annotation_payload.classification.score
+
+    # TODO(https://github.com/kubeflow/code-intelligence/issues/79):
+    # We should use some sort of context to pass along information
+    # about the issue so we can log what issue these predictions pertain
+    # to.
+    extra = {}
+    extra.update(context)
+    extra["predictions"] = predictions
+    logging.info(f"Unfiltered predictions: {predictions}", extra=extra)
+
+    labels_to_remove = []
+    for label, probability in predictions.items():
+      if probability < CONFIDENCE_THRESHOLD:
+        labels_to_remove.append(label)
+
+    for l in labels_to_remove:
+      del predictions[l]
+    logging.info(f"Labels below precision and recall {labels_to_remove}",
+                 extra=context)
+    return predictions
diff --git a/py/label_microservice/automl_model_test.py b/py/label_microservice/automl_model_test.py
new file mode 100644
index 0000000000..38a4e511bb
--- /dev/null
+++ b/py/label_microservice/automl_model_test.py
@@ -0,0 +1,54 @@
+"""Unittest for repo_specific_model. """
+import logging
+from unittest import mock
+import pytest
+
+from google.cloud.automl import types as automl_types
+from label_microservice import automl_model
+from label_microservice import test_util
+
+def test_predict_labels():
+  """A unittest for predict labels.
+
+  This function mocks out AutoML.
+  """
+  mock_client = mock.MagicMock()
+  payload = []
+
+  predict_response = automl_types.PredictResponse()
+
+  annotation = automl_types.AnnotationPayload()
+  annotation.display_name = "area-jupyter"
+  annotation.classification.score = 1
+  predict_response.payload.append(annotation)
+
+  annotation = automl_types.AnnotationPayload()
+  annotation.display_name = "area-operator"
+  annotation.classification.score = .4
+  predict_response.payload.append(annotation)
+
+  mock_client.predict.return_value = predict_response
+
+  model = automl_model.AutoMLModel(model_name="some/model", prediction_client=mock_client)
+
+  results = model.predict_issue_labels("kubeflow", "docs", "some title",
+                                       ["some text"])
+
+  expected = {
+    # Use an integer and not float to avoid numeric issues in evalueation
+    "area/jupyter": 1,
+  }
+  test_util.assert_dict_equal(expected, results)
+
+if __name__ == "__main__":
+  logging.basicConfig(
+      level=logging.INFO,
+      format=('%(levelname)s|%(asctime)s'
+              '|%(pathname)s|%(lineno)d| %(message)s'),
+      datefmt='%Y-%m-%dT%H:%M:%S',
+  )
+  logging.getLogger().setLevel(logging.INFO)
+
+  pytest.main()
+
+
diff --git a/py/label_microservice/cli.py b/py/label_microservice/cli.py
index fcd8c4804d..0343458ed5 100644
--- a/py/label_microservice/cli.py
+++ b/py/label_microservice/cli.py
@@ -4,9 +4,11 @@
 to be picked up by the backends.
 """
 import logging
+import json
 import fire
 from code_intelligence import util
 from google.cloud import pubsub
+import subprocess
 
 DEFAULT_TOPIC = "projects/issue-label-bot-dev/topics/TEST_event_queue"
 class Cli:
@@ -37,6 +39,25 @@ def label_issue(issue, pubsub_topic=DEFAULT_TOPIC):
                       repo_name=repo_name,
                       issue_num=str(issue_num))
 
+  @staticmethod
+  def pod_logs(pod):
+    """Pretty print pod logs
+
+    Args:
+      pod: Name of the pod
+    """
+    output = subprocess.check_output(["kubectl", "logs", pod])
+
+    for l in output.splitlines():
+      try:
+        entry = json.loads(l)
+        filename = entry.get("filename")
+        line = entry.get("line")
+        message = entry.get("message")
+        print(f"{filename}:{line}: {message}")
+      except json.JSONDecodeError:
+        print(l)
+        continue
 if __name__ == "__main__":
   logging.basicConfig(level=logging.INFO,
                       format=('%(levelname)s|%(asctime)s'
diff --git a/py/label_microservice/combined_model.py b/py/label_microservice/combined_model.py
index 4811a11f3b..42baa6e88b 100644
--- a/py/label_microservice/combined_model.py
+++ b/py/label_microservice/combined_model.py
@@ -12,7 +12,8 @@ def __init__(self, models=None):
     # A list of models to generate predictions
     self._models = models
 
-  def predict_issue_labels(self, title:str , text:str, context=None):
+  def predict_issue_labels(self,  org:str, repo:str,
+                           title:str , text:str, context=None):
     """Return a dictionary of label probabilities.
 
     Args:
@@ -31,7 +32,7 @@ def predict_issue_labels(self, title:str , text:str, context=None):
     for i, m in enumerate(self._models):
       logging.info(f"Generating predictions with model {i}")
 
-      latest = m.predict_issue_labels(title, text, context=context)
+      latest = m.predict_issue_labels(org, repo, title, text, context=context)
 
       predictions = self._combine_predictions(predictions, latest)
 
diff --git a/py/label_microservice/issue_label_predictor.py b/py/label_microservice/issue_label_predictor.py
index 0f22f3ed4e..7a785f3173 100644
--- a/py/label_microservice/issue_label_predictor.py
+++ b/py/label_microservice/issue_label_predictor.py
@@ -2,15 +2,31 @@
 import os
 
 from code_intelligence import embeddings
+from label_microservice import automl_model
 from label_microservice import combined_model
 from label_microservice import repo_specific_model
 from label_microservice import universal_kind_label_model as universal_model
 
 UNIVERSAL_MODEL_NAME = "universal"
 
-def _combined_model_name(org, repo):
-  """Return the name of the combined model for a repo"""
-  return f"{org}/{repo}_combined"
+# TODO(jlewi): Lets not hardcode this.
+KUBEFLOW_AUTOML_MODEL = "projects/976279526634/locations/us-central1/models/TCN654213816573231104"
+
+def _combined_model_name(org, repo=None):
+  """Return the name of the combined model for a repo or organization.
+
+  If repo is specified looks for a repo specific model. If repo is
+  none we return an org wide model.
+
+  Args:
+    org: Name of the org.
+    repo: (Optional) The name of the repo
+  """
+
+  if repo:
+    return f"{org}/{repo}_combined"
+  else:
+    return f"{org}_combined"
 
 def _dict_has_keys(d, keys):
   for k in keys:
@@ -30,36 +46,38 @@ class IssueLabelPredictor:
 
   def __init__(self):
 
+    # A dictionary mapping keys to individual models.
     self._models = {}
     self._load_models()
 
   def _load_models(self):
+    """Load the models."""
     logging.info("Loading the universal model")
     self._models[UNIVERSAL_MODEL_NAME] = universal_model.UniversalKindLabelModel()
 
     # TODO(jlewi): How should we get a list of all models for which we
-    # have repo specific models. mlbot is doing this based on a config
+    # have repo or org specific models. mlbot is doing this based on a config
     # file; https://github.com/machine-learning-apps/Issue-Label-Bot/blob/26d8fb65be3b39de244c4be9e32b2838111dac10/flask_app/forward_utils.py#L5
-    for org_and_repo in [("kubeflow", "kubeflow")]:
-      org = org_and_repo[0]
-      repo = org_and_repo[1]
-      logging.info(f"Loading model for repo {org}/{repo}")
+    for org in ["kubeflow"]:
+      logging.info(f"Loading AutoML model for org: {org}; model: {KUBEFLOW_AUTOML_MODEL}")
 
-      repo_model = repo_specific_model.RepoSpecificLabelModel.from_repo(
-              org, repo,
-              embedding_api_endpoint=os.environ.get("ISSUE_EMBEDDING_SERVICE"))
+      org_model = automl_model.AutoMLModel(model_name=KUBEFLOW_AUTOML_MODEL)
 
-      self._models[f"{org}/{repo}"] = repo_model
+      self._models[f"{org}"] = org_model
 
       combined = combined_model.CombinedLabelModels(
-              models=[self._models["universal"], repo_model])
-      self._models[_combined_model_name(org, repo)] = combined
+              models=[self._models["universal"], org_model])
+      self._models[_combined_model_name(org)] = combined
+
 
-  def predict_labels_for_data(self, model_name, title, body, context=None):
+  def predict_labels_for_data(self, model_name, org, repo, title, body,
+                              context=None):
     """Generate label predictions for the specified data.
 
     Args:
       model_name: Which model to use
+      org: org
+      repo: Repo name
       title: Title for the issue
       body: body of the issue
 
@@ -70,8 +88,10 @@ def predict_labels_for_data(self, model_name, title, body, context=None):
       raise ValueError(f"No model named {model_name}")
 
     model = self._models[model_name]
-    logging.info(f"Generating predictions for title={title} text={body}")
-    predictions = model.predict_issue_labels(title, body, context=context)
+    logging.info(f"Generating predictions for title={title} text={body} using"
+                 f"model: {model_name} class:{model.__class__}")
+    predictions = model.predict_issue_labels(org, repo, title, body,
+                                             context=context)
 
     return predictions
 
@@ -91,10 +111,13 @@ def predict_labels_for_issue(self, org, repo, issue_number, model_name=None):
      dict: str -> float; dictionary mapping labels to their probability
     """
     if not model_name:
+      org_model = _combined_model_name(org)
       repo_model = _combined_model_name(org, repo)
 
       if repo_model in self._models:
         model_name = repo_model
+      elif org_model in self._models:
+        model_name = org_model
       else:
         model_name = UNIVERSAL_MODEL_NAME
 
@@ -117,7 +140,8 @@ def predict_labels_for_issue(self, org, repo, issue_number, model_name=None):
     }
 
     predictions = self.predict_labels_for_data(
-      model_name, data.get("title"), data.get("body"), context=context)
+      model_name, org, repo, data.get("title"), [data.get("body")],
+      context=context)
 
     return predictions
 
diff --git a/py/label_microservice/models.py b/py/label_microservice/models.py
index 238894c167..8b550dd620 100644
--- a/py/label_microservice/models.py
+++ b/py/label_microservice/models.py
@@ -1,5 +1,6 @@
 """The models packages defines wrappers around different models."""
 import abc
+import typing
 
 class IssueLabelModel:
   """A base class for all Issue label models.
@@ -8,13 +9,18 @@ class IssueLabelModel:
   """
 
   @abc.abstractmethod
-  def predict_issue_labels(self, title:str , text:str, context=None):
+  def predict_issue_labels(self, org:str, repo:str, title:str,
+                           text:typing.List[str], context=None):
     """Return a dictionary of label probabilities.
 
     Args:
-      title: The title for the issue
-      text: The text for the issue
+      org: The organization the issue belongs in
+      repo: The repository.
+      title: Issue title
+      text: List of contents of the comments on the issue
+
       context: (Optional) Dictionary of additional context information
+
     Return
     ------
     dict: Dictionary of label to probability of that label for the
diff --git a/py/label_microservice/universal_kind_label_model.py b/py/label_microservice/universal_kind_label_model.py
index 46c5d90ac6..0ee5315bcf 100644
--- a/py/label_microservice/universal_kind_label_model.py
+++ b/py/label_microservice/universal_kind_label_model.py
@@ -9,6 +9,7 @@
 
 from urllib.request import urlopen
 from label_microservice import models
+import typing
 
 class UniversalKindLabelModel(models.IssueLabelModel):
   """UniversalKindLabelModel is a universal model that is trained across all repos.
@@ -49,16 +50,17 @@ def __init__(self,  class_names=['bug', 'feature', 'question']):
     self._prediction_threshold = defaultdict(lambda: .52)
     self._prediction_threshold["question"] = .60
 
-  def predict_issue_labels(self,  title:str, body:str, context=None):
+  def predict_issue_labels(self, org:str, repo:str, title:str,
+                           text:typing.List[str], context=None):
     """
     Get probabilities for the each class.
 
     Parameters
     ----------
-    title: str
-        the issue title
-    body: str
-       the issue body
+     org: The organization the issue belongs in. Ignored by model.
+     repo: The repository. Ignored by model
+     title: Issue title
+     text: List of contents of the comments on the issue
 
     Returns
     ------
@@ -75,7 +77,7 @@ def predict_issue_labels(self,  title:str, body:str, context=None):
     if not context:
       context = {}
     #transform raw text into array of ints
-    vec_body = self.body_pp.transform([body])
+    vec_body = self.body_pp.transform(["\n".join(text)])
     vec_title = self.title_pp.transform([title])
 
     # make predictions with the model
diff --git a/py/label_microservice/worker.py b/py/label_microservice/worker.py
index 7b566c9219..f3d1ae962b 100644
--- a/py/label_microservice/worker.py
+++ b/py/label_microservice/worker.py
@@ -20,8 +20,6 @@
 
 DEFAULT_APP_URL = "https://github.com/marketplace/issue-label-bot"
 
-DEFAULT_APP_URL = "https://github.com/marketplace/issue-label-bot"
-
 class Worker:
     """
     The worker class aims to do label prediction for issues from github repos.