jlewi
diff --git a/‎.gitignore
Lines changed: 2 additions & 0 deletions b/‎.gitignore
Lines changed: 2 additions & 0 deletions
diff --git a/‎Label_Microservice/.build/prod/extensions_v1beta1_deployment_label-bot-worker.yaml
Lines changed: 59 additions & 0 deletions b/‎Label_Microservice/.build/prod/extensions_v1beta1_deployment_label-bot-worker.yaml
Lines changed: 59 additions & 0 deletions
diff --git a/‎Label_Microservice/.build/prod/~g_v1_service_label-bot-worker.yaml
Lines changed: 20 additions & 0 deletions b/‎Label_Microservice/.build/prod/~g_v1_service_label-bot-worker.yaml
Lines changed: 20 additions & 0 deletions
diff --git a/‎Label_Microservice/Makefile
Lines changed: 10 additions & 0 deletions b/‎Label_Microservice/Makefile
Lines changed: 10 additions & 0 deletions
diff --git a/‎Label_Microservice/deployment/overlays/dev/kustomization.yaml
Lines changed: 2 additions & 2 deletions b/‎Label_Microservice/deployment/overlays/dev/kustomization.yaml
Lines changed: 2 additions & 2 deletions
diff --git a/‎Label_Microservice/deployment/overlays/prod/kustomization.yaml
Lines changed: 1 addition & 1 deletion b/‎Label_Microservice/deployment/overlays/prod/kustomization.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎Label_Microservice/deployment/requirements.worker.txt
Lines changed: 1 addition & 0 deletions b/‎Label_Microservice/deployment/requirements.worker.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎Label_Microservice/developer_guide.md
Lines changed: 11 additions & 1 deletion b/‎Label_Microservice/developer_guide.md
Lines changed: 11 additions & 1 deletion
diff --git a/‎Label_Microservice/docs/logging_and_monitoring.md
Lines changed: 19 additions & 0 deletions b/‎Label_Microservice/docs/logging_and_monitoring.md
Lines changed: 19 additions & 0 deletions
diff --git a/‎Label_Microservice/notebooks/automl.ipynb
Lines changed: 133 additions & 1 deletion b/‎Label_Microservice/notebooks/automl.ipynb
Lines changed: 133 additions & 1 deletion
diff --git a/‎py/code_intelligence/github_util.py
Lines changed: 24 additions & 1 deletion b/‎py/code_intelligence/github_util.py
Lines changed: 24 additions & 1 deletion
@@ -6,6 +6,8 @@
 !.gitignore
 !.dockerignore
 **/flask_session
+**/.cache
+**/.data
 build/**
 fairing/__pycache__/**
 **/__pycache__/**
 
@@ -0,0 +1,59 @@
+apiVersion: extensions/v1beta1
+kind: Deployment
+metadata:
+  labels:
+    app: label-bot
+    environment: prod
+    service: label-bot
+  name: label-bot-worker
+  namespace: label-bot-prod
+spec:
+  replicas: 5
+  selector:
+    matchLabels:
+      app: label-bot
+      environment: prod
+      service: label-bot
+  template:
+    metadata:
+      labels:
+        app: label-bot
+        environment: prod
+        service: label-bot
+    spec:
+      containers:
+      - command:
+        - python3
+        - -m
+        - label_microservice.worker
+        - subscribe_from_env
+        env:
+        - name: PORT
+          value: "80"
+        - name: ISSUE_EMBEDDING_SERVICE
+          value: http://issue-embedding-server
+        - name: PROJECT
+          value: issue-label-bot-dev
+        - name: ISSUE_EVENT_TOPIC
+          value: event_queue
+        - name: ISSUE_EVENT_SUBSCRIPTION
+          value: label_bot_prod
+        - name: GITHUB_APP_ID
+          value: "27079"
+        - name: GITHUB_APP_PEM_KEY
+          value: /var/secrets/github/issue-label-bot-github-app.private-key.pem
+        image: gcr.io/issue-label-bot-dev/bot-worker:011a589
+        name: app
+        resources:
+          requests:
+            cpu: "4"
+            memory: 4Gi
+        volumeMounts:
+        - mountPath: /var/secrets/github
+          name: github-app
+      restartPolicy: Always
+      serviceAccountName: default-editor
+      volumes:
+      - name: github-app
+        secret:
+          secretName: github-app
@@ -0,0 +1,20 @@
+apiVersion: v1
+kind: Service
+metadata:
+  labels:
+    app: label-bot
+    environment: prod
+    service: label-bot
+  name: label-bot-worker
+  namespace: label-bot-prod
+spec:
+  ports:
+  - name: http
+    port: 80
+    protocol: TCP
+    targetPort: 80
+  selector:
+    app: label-bot
+    environment: prod
+    service: label-bot
+  type: ClusterIP
@@ -0,0 +1,10 @@
+
+CONTEXT=issue-label-bot
+
+hydrate-prod:
+	rm -rf .build/prod
+	mkdir -p .build/prod
+	kustomize build -o .build/prod deployment/overlays/prod
+
+apply-prod: hydrate-prod
+	kubectl --context=$(CONTEXT) apply -f .build/prod
@@ -3,9 +3,9 @@ kind: Kustomization
 bases:
 - ../../base
 images:
-- digest: sha256:cb2b2e604d4056b78ecd51d7113de04ebfa60e542310265b3871e7873417e34a
+- #digest: sha256:cb2b2e604d4056b78ecd51d7113de04ebfa60e542310265b3871e7873417e34a
   name: gcr.io/issue-label-bot-dev/bot-worker
-  newName: gcr.io/issue-label-bot-dev/bot-worker:3a82547
+  #newName: gcr.io/issue-label-bot-dev/bot-worker:3a82547
 commonLabels:
   environment: dev
 namespace: label-bot-dev
 
@@ -10,4 +10,4 @@ resources:
 images:
 - name: gcr.io/issue-label-bot-dev/bot-worker
   newName: gcr.io/issue-label-bot-dev/bot-worker
-  newTag: 79cd85a-dirty
+  newTag: 011a589
@@ -11,6 +11,7 @@ google-api-core==1.14.2
 google-api-python-client==1.7.10
 google-auth==1.6.3
 google-auth-httplib2==0.0.3
+google-cloud-automl==0.10.0
 #google-cloud-bigquery==1.17.0
 google-cloud-core==1.0.3
 google-cloud-pubsub==0.45.0
 
@@ -68,19 +68,29 @@ Setup a namespace for your development
 1. Send a prediction request using pubsub
 
    ```
-   python -m label_microservice.py --issue=kubeflow/kubeflow#4602
+   python -m label_microservice.cli label-issue --issue=kubeflow/kubeflow#4602 --topic=projects/issue-label-bot-dev/topics/TEST_event_queue
    ```   
 
    * Look at the logs of the pod to see the prediction
    * Ensure that you don't have other pods using the same pubsub subscription; otherwise your item might not get handled by the pod you are looking at
 
 
+1. Get pod logs
+
+   ```
+   python -m label_microservice.cli pod-logs --pod=<pod name>
+   ```
+
+   * This will pretty print the json logs which is easier to read.
+
 1. Ensure your kubeconfig context sets the namespace to the namespace skaffold is deploying in; otherwise file sync and log streaming doesn't seem to work.
 
 ## Unresolved Issues
 
 * skaffold continuous mode (`skaffold dev` ) doesn't appear to detect changes in the python files and retrigger the build and deployment
 
+* skaffold doesn't appear to substitute the newly built image into the kustomize package
+
 
 ### Kaniko Image Caching
 
 
@@ -0,0 +1,19 @@
+# Logging and Monitoring
+
+
+## Stackdriver logs
+
+* Label bot workers use structured json logs
+* You can search the logs in stackdrive some examples below
+* There is also a BigQuery sink for the stackdriver logs to facilitate analysis and querying
+
+
+Use a label like the following to see messages for
+a specific issue
+
+```
+jsonPayload.repo_owner = "kubeflow"
+jsonPayload.repo_name = "code-intelligence"
+jsonPayload.issue_num = "132"
+resource.labels.namespace_name = "label-bot-prod"
+```
@@ -8351,7 +8351,8 @@
     "        blob = bucket.blob(obj_path)\n",
     "        \n",
     "        # Include the owner and repo in the text body because it is predictive\n",
-    "        blob.upload_from_string(issue[\"title\"] + \"\\n\" + owner_repo + \"\\n\" + issue[\"body\"])\n",
+    "        doc = github_util.build_issue_doc(owner, repo, issue[\"title\"], [issue[\"body\"]])\n",
+    "        blob.upload_from_string(doc)\n",
     "        logging.info(f\"Created {target}\")\n",
     "\n",
     "    info.iloc[i][\"url\"] = target    \n",
@@ -8674,6 +8675,26 @@
     "model_name = result.name"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'projects/976279526634/locations/us-central1/models/TCN654213816573231104'"
+      ]
+     },
+     "execution_count": 55,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model_name"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 39,
@@ -8790,6 +8811,117 @@
     "        )\n",
     "    )"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "google.protobuf.pyext._message.RepeatedCompositeContainer"
+      ]
+     },
+     "execution_count": 57,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "response.payload.__class__"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "automl.types"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 61,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from google.cloud.automl import types as automl_types"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 74,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "predict_response = automl_types.PredictResponse()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 77,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "predict_response.payload.append(annotation)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 78,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[classification {\n",
+       "  score: 0.8999999761581421\n",
+       "}\n",
+       "display_name: \"area-jupyter\"\n",
+       "]"
+      ]
+     },
+     "execution_count": 78,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "predict_response.payload"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "google.cloud.automl_v1.types.AnnotationPayload"
+      ]
+     },
+     "execution_count": 67,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "annotation_payload.__class__"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 70,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "annotation = automl_types.AnnotationPayload()\n",
+    "annotation.display_name = \"area-jupyter\"\n",
+    "annotation.classification.score = .9"
+   ]
   }
  ],
  "metadata": {
 
@@ -1,6 +1,7 @@
 import os
 import logging
 from code_intelligence import github_app
+import typing
 import yaml
 
 def get_issue_handle(installation_id, username, repository, number):
@@ -28,7 +29,29 @@ def get_yaml(owner, repo, ghapp=None):
         # get the repo handle, which allows you got get the file contents
         repo = inst.repository(owner=owner, repository=repo)
         results = repo.file_contents('.github/issue_label_bot.yaml').decoded
-    except:
+    # TODO(jlewi): We should probably catching more narrow exceptions and
+    # not swallowing all exceptions. The exceptions we should swallow are
+    # the ones related to the configuration file not existing.
+    except Exception as e:
+        logging.info(f"Exception occured getting .github/issue_label_bot.yaml: {e}")
         return None
 
     return yaml.safe_load(results)
+
+def build_issue_doc(org:str, repo:str, title:str, text:typing.List[str]):
+    """Build a document string out of various github features.
+
+    Args:
+     org: The organization the issue belongs in
+     repo: The repository.
+     title: Issue title
+     text: List of contents of the comments on the issue
+
+    Returns:
+     content: The document to classify
+    """
+    pieces = [title]
+    pieces.append(f"{org.lower()}_{repo.lower()}")
+    pieces.extend(text)
+    content = "\n".join(pieces)
+    return content