inference chat mode

apocas · apocas · commit f484cc250783 · 2024-09-17T12:42:03.000Z
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -4,5 +4,6 @@
   ],
   "python.testing.unittestEnabled": false,
   "python.testing.pytestEnabled": true,
-  "python.venvPath": "~/.cache/pypoetry/virtualenvs"
+  "python.venvPath": "~/.cache/pypoetry/virtualenvs",
+  "makefile.configureOnOpen": false
 }
diff --git a/app/projects/inference.py b/app/projects/inference.py
@@ -2,6 +2,7 @@
 from fastapi import HTTPException
 from requests import Session
 from app import tools
+from app.chat import Chat
 from app.guard import Guard
 from app.models.models import ChatModel, QuestionModel, User
 from app.project import Project
@@ -13,7 +14,67 @@
 class Inference(ProjectBase):
   
     def chat(self, project: Project, chatModel: ChatModel, user: User, db: Session):
-        raise HTTPException(status_code=400, detail='{"error": "Chat mode not available for this project type."}')
+        chat = Chat(chatModel)
+        output = {
+          "question": chatModel.question,
+          "type": "inference",
+          "sources": [],
+          "guard": False,
+          "tokens": {
+              "input": 0,
+              "output": 0
+          },
+          "project": project.model.name,
+          "id": chat.id
+        }
+        
+        if project.model.guard:
+            guard = Guard(project.model.guard, self.brain, db)
+            if guard.verify(chatModel.question):
+                output["answer"] = project.model.censorship or self.brain.defaultCensorship
+                output["guard"] = True
+                output["tokens"] = {
+                  "input": tools.tokens_from_string(output["question"]),
+                  "output": tools.tokens_from_string(output["answer"])
+                }
+                yield output
+                
+        model = self.brain.getLLM(project.model.llm, db)
+
+        sysTemplate = project.model.system or self.brain.defaultSystem
+        model.llm.system_prompt = sysTemplate
+
+        if not chat.memory.get_all():
+            chat.memory.chat_store.add_message(chat.memory.chat_store_key, ChatMessage(role="system", content=sysTemplate))
+
+        chat.memory.chat_store.add_message(chat.memory.chat_store_key, ChatMessage(role="user", content=chatModel.question))
+        messages = chat.memory.get_all()
+        
+        try:
+            if(chatModel.stream):
+                respgen = model.llm.stream_chat(messages)
+                response = ""
+                for text in respgen:
+                    response += text.delta
+                    yield "data: " + json.dumps({"text": text.delta}) + "\n\n"
+                output["answer"] = response
+                chat.memory.chat_store.add_message(chat.memory.chat_store_key, ChatMessage(role="assistant", content=response))
+                yield "data: " + json.dumps(output) + "\n"
+                yield "event: close\n\n"
+            else:
+                resp = model.llm.chat(messages)
+                output["answer"] = resp.message.content.strip()
+                output["tokens"] = {
+                    "input": tokens_from_string(output["question"]),
+                    "output": tokens_from_string(output["answer"])
+                }
+                chat.memory.chat_store.add_message(chat.memory.chat_store_key, ChatMessage(role="assistant", content=resp.message.content.strip()))
+                yield output
+        except Exception as e:              
+            if chatModel.stream:
+                yield "data: Inference failed\n"
+                yield "event: error\n\n"
+            raise e
   
     def question(self, project: Project, questionModel: QuestionModel, user: User, db: Session):
         output = {
diff --git a/download.py b/download.py
@@ -0,0 +1,2 @@
+import nltk
+nltk.download('averaged_perceptron_tagger')

Original file line number	Diff line number	Diff line change
`@@ -4,5 +4,6 @@`
`4`	`4`	`],`
`5`	`5`	`"python.testing.unittestEnabled": false,`
`6`	`6`	`"python.testing.pytestEnabled": true,`
`7`		`- "python.venvPath": "~/.cache/pypoetry/virtualenvs"`
	`7`	`+ "python.venvPath": "~/.cache/pypoetry/virtualenvs",`
	`8`	`+ "makefile.configureOnOpen": false`
`8`	`9`	`}`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+import nltk`
	`2`	`+nltk.download('averaged_perceptron_tagger')`