Merge branch 'main' into main

tonybaloney · web-flow · commit 6e7beec3ee67 · 2023-06-30T14:03:09.000+10:00
diff --git a/README.md b/README.md
@@ -53,14 +53,14 @@ You can run this repo virtually by using GitHub Codespaces or VS Code Remote Con
 1. Create a new folder and switch to it in the terminal
 1. Run `azd auth login`
 1. Run `azd init -t azure-search-openai-demo`
-    * For the target location, the regions that currently support the models used in this sample are **East US** or **South Central US**. For an up-to-date list of regions and models, check [here](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/concepts/models)
     * note that this command will initialize a git repository and you do not need to clone this repository
 
 #### Starting from scratch
 
 Execute the following command, if you don't have any pre-existing Azure services and want to start from a fresh deployment.
 
 1. Run `azd up` - This will provision Azure resources and deploy this sample to those resources, including building the search index based on the files found in the `./data` folder.
+    * For the target location, the regions that currently support the models used in this sample are **East US**, **France Central**, **South Central US**, **UK South**, and **West Europe**. For an up-to-date list of regions and models, check [here](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/concepts/models)
 1. After the application has been successfully deployed you will see a URL printed to the console.  Click that URL to interact with the application in your browser.  
 
 It will look like the following:
@@ -129,4 +129,4 @@ Once in the web app:
 
 If you see this error while running `azd deploy`: `read /tmp/azd1992237260/backend_env/lib64: is a directory`, then delete the `./app/backend/backend_env folder` and re-run the `azd deploy` command.  This issue is being tracked here: <https://github.com/Azure/azure-dev/issues/1237>
 
-If the web app fails to deploy and you receive a '404 Not Found' message in your browser, run 'azd deploy'.
+If the web app fails to deploy and you receive a '404 Not Found' message in your browser, run `azd deploy`.
diff --git a/app/backend/app.py b/app/backend/app.py
@@ -1,9 +1,10 @@
 import os
+import io
 import mimetypes
 import time
 import logging
 import openai
-from flask import Flask, request, jsonify
+from flask import Flask, request, jsonify, send_file, abort
 from azure.identity import DefaultAzureCredential
 from azure.search.documents import SearchClient
 from approaches.retrievethenread import RetrieveThenReadApproach
@@ -76,14 +77,21 @@ def static_file(path):
 @app.route("/content/<path>")
 def content_file(path):
     blob = blob_container.get_blob_client(path).download_blob()
+    if not blob.properties or not blob.properties.has_key("content_settings"):
+        abort(404)
     mime_type = blob.properties["content_settings"]["content_type"]
     if mime_type == "application/octet-stream":
         mime_type = mimetypes.guess_type(path)[0] or "application/octet-stream"
-    return blob.readall(), 200, {"Content-Type": mime_type, "Content-Disposition": f"inline; filename={path}"}
+    blob_file = io.BytesIO()
+    blob.readinto(blob_file)
+    blob_file.seek(0)
+    return send_file(blob_file, mimetype=mime_type, as_attachment=False, download_name=path)
     
 @app.route("/ask", methods=["POST"])
 def ask():
     ensure_openai_token()
+    if not request.json:
+        return jsonify({"error": "request must be json"}), 400
     approach = request.json["approach"]
     try:
         impl = ask_approaches.get(approach)
@@ -98,6 +106,8 @@ def ask():
 @app.route("/chat", methods=["POST"])
 def chat():
     ensure_openai_token()
+    if not request.json:
+        return jsonify({"error": "request must be json"}), 400
     approach = request.json["approach"]
     try:
         impl = chat_approaches.get(approach)
diff --git a/app/backend/approaches/approach.py b/app/backend/approaches/approach.py
@@ -1,3 +1,6 @@
+from typing import Any
+
+
 class Approach:
-    def run(self, q: str, use_summaries: bool) -> any:
+    def run(self, q: str, overrides: dict[str, Any]) -> Any:
         raise NotImplementedError
diff --git a/app/backend/approaches/chatreadretrieveread.py b/app/backend/approaches/chatreadretrieveread.py
@@ -1,18 +1,23 @@
+from typing import Any, Sequence
+
 import openai
 from azure.search.documents import SearchClient
 from azure.search.documents.models import QueryType
 from approaches.approach import Approach
 from text import nonewlines
 
-# Simple retrieve-then-read implementation, using the Cognitive Search and OpenAI APIs directly. It first retrieves
-# top documents from search, then constructs a prompt with them, and then uses OpenAI to generate an completion 
-# (answer) with that prompt.
 class ChatReadRetrieveReadApproach(Approach):
+    """
+    Simple retrieve-then-read implementation, using the Cognitive Search and OpenAI APIs directly. It first retrieves
+    top documents from search, then constructs a prompt with them, and then uses OpenAI to generate an completion
+    (answer) with that prompt.
+    """
+
     prompt_prefix = """<|im_start|>system
 Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.
 Answer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.
 For tabular information return it as an html table. Do not return markdown format.
-Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brakets to reference the source, e.g. [info1.txt]. Don't combine sources, list each source separately, e.g. [info1.txt][info2.pdf].
+Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, e.g. [info1.txt]. Don't combine sources, list each source separately, e.g. [info1.txt][info2.pdf].
 {follow_up_questions_prompt}
 {injected_prompt}
 Sources:
@@ -48,7 +53,7 @@ def __init__(self, search_client: SearchClient, chatgpt_deployment: str, gpt_dep
         self.sourcepage_field = sourcepage_field
         self.content_field = content_field
 
-    def run(self, history: list[dict], overrides: dict) -> any:
+    def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> Any:
         use_semantic_captions = True if overrides.get("semantic_captions") else False
         top = overrides.get("top") or 3
         exclude_category = overrides.get("exclude_category") or None
@@ -105,10 +110,10 @@ def run(self, history: list[dict], overrides: dict) -> any:
 
         return {"data_points": results, "answer": completion.choices[0].text, "thoughts": f"Searched for:<br>{q}<br><br>Prompt:<br>" + prompt.replace('\n', '<br>')}
     
-    def get_chat_history_as_text(self, history, include_last_turn=True, approx_max_tokens=1000) -> str:
+    def get_chat_history_as_text(self, history: Sequence[dict[str, str]], include_last_turn: bool=True, approx_max_tokens: int=1000) -> str:
         history_text = ""
         for h in reversed(history if include_last_turn else history[:-1]):
-            history_text = """<|im_start|>user""" +"\n" + h["user"] + "\n" + """<|im_end|>""" + "\n" + """<|im_start|>assistant""" + "\n" + (h.get("bot") + """<|im_end|>""" if h.get("bot") else "") + "\n" + history_text
+            history_text = """<|im_start|>user""" + "\n" + h["user"] + "\n" + """<|im_end|>""" + "\n" + """<|im_start|>assistant""" + "\n" + (h.get("bot", "") + """<|im_end|>""" if h.get("bot") else "") + "\n" + history_text
             if len(history_text) > approx_max_tokens*4:
                 break    
-        return history_text
+        return history_text
diff --git a/app/backend/approaches/readdecomposeask.py b/app/backend/approaches/readdecomposeask.py
@@ -10,7 +10,7 @@
 from langchain.agents.react.base import ReActDocstoreAgent
 from langchainadapters import HtmlCallbackHandler
 from text import nonewlines
-from typing import List
+from typing import Any, List, Optional
 
 class ReadDecomposeAsk(Approach):
     def __init__(self, search_client: SearchClient, openai_deployment: str, sourcepage_field: str, content_field: str):
@@ -19,7 +19,7 @@ def __init__(self, search_client: SearchClient, openai_deployment: str, sourcepa
         self.sourcepage_field = sourcepage_field
         self.content_field = content_field
 
-    def search(self, q: str, overrides: dict) -> str:
+    def search(self, q: str, overrides: dict[str, Any]) -> str:
         use_semantic_captions = True if overrides.get("semantic_captions") else False
         top = overrides.get("top") or 3
         exclude_category = overrides.get("exclude_category") or None
@@ -42,7 +42,7 @@ def search(self, q: str, overrides: dict) -> str:
             self.results = [doc[self.sourcepage_field] + ":" + nonewlines(doc[self.content_field][:500]) for doc in r]
         return "\n".join(self.results)
 
-    def lookup(self, q: str) -> str:
+    def lookup(self, q: str) -> Optional[str]:
         r = self.search_client.search(q,
                                       top = 1,
                                       include_total_count=True,
@@ -58,9 +58,9 @@ def lookup(self, q: str) -> str:
             return answers[0].text
         if r.get_count() > 0:
             return "\n".join(d['content'] for d in r)
-        return None        
+        return None
 
-    def run(self, q: str, overrides: dict) -> any:
+    def run(self, q: str, overrides: dict[str, Any]) -> Any:
         # Not great to keep this as instance state, won't work with interleaving (e.g. if using async), but keeps the example simple
         self.results = None
 
diff --git a/app/backend/approaches/readretrieveread.py b/app/backend/approaches/readretrieveread.py
@@ -6,17 +6,22 @@
 from langchain.callbacks.manager import CallbackManager, Callbacks
 from langchain.chains import LLMChain
 from langchain.agents import Tool, ZeroShotAgent, AgentExecutor
-from langchain.llms.openai import AzureOpenAI
 from langchainadapters import HtmlCallbackHandler
 from text import nonewlines
 from lookuptool import CsvLookupTool
+from typing import Any
 
-# Attempt to answer questions by iteratively evaluating the question to see what information is missing, and once all information
-# is present then formulate an answer. Each iteration consists of two parts: first use GPT to see if we need more information, 
-# second if more data is needed use the requested "tool" to retrieve it. The last call to GPT answers the actual question.
-# This is inspired by the MKRL paper[1] and applied here using the implementation in Langchain.
-# [1] E. Karpas, et al. arXiv:2205.00445
 class ReadRetrieveReadApproach(Approach):
+    """
+    Attempt to answer questions by iteratively evaluating the question to see what information is missing, and once all information
+    is present then formulate an answer. Each iteration consists of two parts:
+     1. use GPT to see if we need more information
+     2. if more data is needed, use the requested "tool" to retrieve it.
+    The last call to GPT answers the actual question.
+    This is inspired by the MKRL paper[1] and applied here using the implementation in Langchain.
+
+    [1] E. Karpas, et al. arXiv:2205.00445
+    """
 
     template_prefix = \
 "You are an intelligent assistant helping Contoso Inc employees with their healthcare plan questions and employee handbook questions. " \
@@ -45,7 +50,7 @@ def __init__(self, search_client: SearchClient, openai_deployment: str, sourcepa
         self.sourcepage_field = sourcepage_field
         self.content_field = content_field
 
-    def retrieve(self, q: str, overrides: dict) -> any:
+    def retrieve(self, q: str, overrides: dict[str, Any]) -> Any:
         use_semantic_captions = True if overrides.get("semantic_captions") else False
         top = overrides.get("top") or 3
         exclude_category = overrides.get("exclude_category") or None
@@ -69,7 +74,7 @@ def retrieve(self, q: str, overrides: dict) -> any:
         content = "\n".join(self.results)
         return content
         
-    def run(self, q: str, overrides: dict) -> any:
+    def run(self, q: str, overrides: dict[str, Any]) -> Any:
         # Not great to keep this as instance state, won't work with interleaving (e.g. if using async), but keeps the example simple
         self.results = None
 
@@ -115,5 +120,5 @@ def __init__(self, employee_name: str, callbacks: Callbacks = None):
         self.func = self.employee_info
         self.employee_name = employee_name
 
-    def employee_info(self, unused: str) -> str:
-        return self.lookup(self.employee_name)
+    def employee_info(self, name: str) -> str:
+        return self.lookup(name)
diff --git a/app/backend/approaches/retrievethenread.py b/app/backend/approaches/retrievethenread.py
@@ -3,11 +3,15 @@
 from azure.search.documents import SearchClient
 from azure.search.documents.models import QueryType
 from text import nonewlines
+from typing import Any
+
 
-# Simple retrieve-then-read implementation, using the Cognitive Search and OpenAI APIs directly. It first retrieves
-# top documents from search, then constructs a prompt with them, and then uses OpenAI to generate an completion 
-# (answer) with that prompt.
 class RetrieveThenReadApproach(Approach):
+    """
+    Simple retrieve-then-read implementation, using the Cognitive Search and OpenAI APIs directly. It first retrieves
+    top documents from search, then constructs a prompt with them, and then uses OpenAI to generate an completion
+    (answer) with that prompt.
+    """
 
     template = \
 "You are an intelligent assistant helping Contoso Inc employees with their healthcare plan questions and employee handbook questions. " + \
@@ -45,7 +49,7 @@ def __init__(self, search_client: SearchClient, openai_deployment: str, sourcepa
         self.sourcepage_field = sourcepage_field
         self.content_field = content_field
 
-    def run(self, q: str, overrides: dict) -> any:
+    def run(self, q: str, overrides: dict[str, Any]) -> Any:
         use_semantic_captions = True if overrides.get("semantic_captions") else False
         top = overrides.get("top") or 3
         exclude_category = overrides.get("exclude_category") or None
diff --git a/app/backend/langchainadapters.py b/app/backend/langchainadapters.py
@@ -1,8 +1,8 @@
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Union
 from langchain.callbacks.base import BaseCallbackHandler
 from langchain.schema import AgentAction, AgentFinish, LLMResult
 
-def ch(text: str) -> str:
+def ch(text: Union[str, object]) -> str:
     s = text if isinstance(text, str) else str(text)
     return s.replace("<", "&lt;").replace(">", "&gt;").replace("\r", "").replace("\n", "<br>")
 
diff --git a/app/backend/lookuptool.py b/app/backend/lookuptool.py
@@ -1,13 +1,13 @@
-from os import path
 import csv
+from pathlib import Path
 from langchain.agents import Tool
 from langchain.callbacks.manager import Callbacks
-from typing import Optional
+from typing import Optional, Union
 
 class CsvLookupTool(Tool):
     data: dict[str, str] = {}
 
-    def __init__(self, filename: path, key_field: str, name: str = "lookup", 
+    def __init__(self, filename: Union[str, Path], key_field: str, name: str = "lookup", 
                  description: str = "useful to look up details given an input key as opposite to searching data with an unstructured question",
                  callbacks: Callbacks = None):
         super().__init__(name, self.lookup, description, callbacks=callbacks)
diff --git a/app/backend/requirements.txt b/app/backend/requirements.txt
@@ -1,5 +1,5 @@
-azure-identity==1.13.0b3
-Flask==2.2.2
+azure-identity==1.13.0
+Flask==2.2.5
 langchain==0.0.187
 openai==0.26.4
 azure-search-documents==11.4.0b3
diff --git a/app/start.sh b/app/start.sh
@@ -17,7 +17,7 @@ if [ $? -ne 0 ]; then
 fi
 
 echo 'Creating python virtual environment "backend/backend_env"'
-python -m venv backend/backend_env
+python3 -m venv backend/backend_env
 
 echo ""
 echo "Restoring backend python packages"
diff --git a/infra/core/ai/cognitiveservices.bicep b/infra/core/ai/cognitiveservices.bicep
@@ -30,9 +30,9 @@ resource deployment 'Microsoft.CognitiveServices/accounts/deployments@2023-05-01
     model: deployment.model
     raiPolicyName: contains(deployment, 'raiPolicyName') ? deployment.raiPolicyName : null
   }
-  sku: {
+  sku: contains(deployment, 'sku') ? deployment.sku : {
     name: 'Standard'
-    capacity: deployment.capacity
+    capacity: 20
   }
 }]
 
diff --git a/infra/main.bicep b/infra/main.bicep
@@ -37,10 +37,10 @@ param formRecognizerResourceGroupLocation string = location
 
 param formRecognizerSkuName string = 'S0'
 
-param gptDeploymentName string = 'davinci'
+param gptDeploymentName string = ''
 param gptDeploymentCapacity int = 30
 param gptModelName string = 'text-davinci-003'
-param chatGptDeploymentName string = 'chat'
+param chatGptDeploymentName string = ''
 param chatGptDeploymentCapacity int = 30
 param chatGptModelName string = 'gpt-35-turbo'
 
@@ -50,6 +50,8 @@ param principalId string = ''
 var abbrs = loadJsonContent('abbreviations.json')
 var resourceToken = toLower(uniqueString(subscription().id, environmentName, location))
 var tags = { 'azd-env-name': environmentName }
+var gptDeployment = empty(gptDeploymentName) ? 'davinci' : gptDeploymentName
+var chatGptDeployment = empty(chatGptDeploymentName) ? 'chat' : chatGptDeploymentName
 
 // Organize resources in a resource group
 resource resourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' = {
@@ -109,8 +111,8 @@ module backend 'core/host/appservice.bicep' = {
       AZURE_OPENAI_SERVICE: openAi.outputs.name
       AZURE_SEARCH_INDEX: searchIndexName
       AZURE_SEARCH_SERVICE: searchService.outputs.name
-      AZURE_OPENAI_GPT_DEPLOYMENT: gptDeploymentName
-      AZURE_OPENAI_CHATGPT_DEPLOYMENT: chatGptDeploymentName
+      AZURE_OPENAI_GPT_DEPLOYMENT: gptDeployment
+      AZURE_OPENAI_CHATGPT_DEPLOYMENT: chatGptDeployment
     }
   }
 }
@@ -127,22 +129,28 @@ module openAi 'core/ai/cognitiveservices.bicep' = {
     }
     deployments: [
       {
-        name: gptDeploymentName
+        name: gptDeployment
         model: {
           format: 'OpenAI'
           name: gptModelName
           version: '1'
         }
-        capacity: gptDeploymentCapacity
+        sku: {
+          name: 'Standard'
+          capacity: gptDeploymentCapacity
+        }
       }
       {
-        name: chatGptDeploymentName
+        name: chatGptDeployment
         model: {
           format: 'OpenAI'
           name: chatGptModelName
           version: '0301'
         }
-        capacity: chatGptDeploymentCapacity
+        sku: {
+          name: 'Standard'
+          capacity: chatGptDeploymentCapacity
+        }
       }
     ]
   }
@@ -313,8 +321,8 @@ output AZURE_RESOURCE_GROUP string = resourceGroup.name
 
 output AZURE_OPENAI_SERVICE string = openAi.outputs.name
 output AZURE_OPENAI_RESOURCE_GROUP string = openAiResourceGroup.name
-output AZURE_OPENAI_GPT_DEPLOYMENT string = gptDeploymentName
-output AZURE_OPENAI_CHATGPT_DEPLOYMENT string = chatGptDeploymentName
+output AZURE_OPENAI_GPT_DEPLOYMENT string = gptDeployment
+output AZURE_OPENAI_CHATGPT_DEPLOYMENT string = chatGptDeployment
 
 output AZURE_FORMRECOGNIZER_SERVICE string = formRecognizer.outputs.name
 output AZURE_FORMRECOGNIZER_RESOURCE_GROUP string = formRecognizerResourceGroup.name
diff --git a/infra/main.parameters.json b/infra/main.parameters.json
@@ -43,6 +43,12 @@
     },
     "storageResourceGroupName": {
       "value": "${AZURE_STORAGE_RESOURCE_GROUP}"
+    },
+    "chatGptDeploymentName": {
+      "value": "${AZURE_OPENAI_CHATGPT_DEPLOYMENT}"
+    },
+    "gptDeploymentName": {
+      "value": "${AZURE_OPENAI_GPT_DEPLOYMENT}"
     }
   }
 }
diff --git a/notebooks/requirements.txt b/notebooks/requirements.txt
diff --git a/scripts/prepdocs.sh b/scripts/prepdocs.sh
diff --git a/scripts/requirements.txt b/scripts/requirements.txt