Fix CoT logic to handle file / image uploads

jasonacox · jasonacox · commit 8a5b57026bd3 · 2025-01-02T22:02:43.000-08:00
diff --git a/chatbot/Dockerfile b/chatbot/Dockerfile
@@ -9,21 +9,21 @@
 FROM python:3.10-slim
 
 # Setting build related env vars
-ENV PORT 5000
-ENV OPENAI_API_KEY "DEFAULT_API_KEY"
-ENV OPENAI_API_BASE "http://localhost:8000/v1"
-ENV AGENT_NAME "Jarvis"
-ENV MY_MODEL "models/7B/gguf-model.bin"
-ENV DEBUG "false"
-ENV WEAVIATE_HOST "localhsot"
-ENV WEAVIATE_PORT "8080"
-ENV WEAVIATE_GRPC_HOST "localhost"
-ENV WEAVIATE_GRPC_PORT "50051"
-ENV WEAVIATE_LIBRARY "tinyllm"
-ENV RESULTS 1
-ENV ONESHOT "false"
-ENV RAG_ONLY "false"
-ENV USE_SYSTEM "false"
+ENV PORT=5000
+ENV OPENAI_API_KEY="DEFAULT_API_KEY"
+ENV OPENAI_API_BASE="http://localhost:8000/v1"
+ENV AGENT_NAME="Jarvis"
+ENV MY_MODEL="models/7B/gguf-model.bin"
+ENV DEBUG="false"
+ENV WEAVIATE_HOST="localhsot"
+ENV WEAVIATE_PORT="8080"
+ENV WEAVIATE_GRPC_HOST="localhost"
+ENV WEAVIATE_GRPC_PORT="50051"
+ENV WEAVIATE_LIBRARY="tinyllm"
+ENV RESULTS=1
+ENV ONESHOT="false"
+ENV RAG_ONLY="false"
+ENV USE_SYSTEM="false"
 
 # Set the working directory
 WORKDIR /app
diff --git a/chatbot/Dockerfile-docman b/chatbot/Dockerfile-docman
@@ -8,14 +8,14 @@
 FROM python:3.10-slim
 
 # Setting build related env vars
-ENV MAX_CHUNK_SIZE 1024
-ENV UPLOAD_FOLDER uploads
-ENV WEAVIATE_HOST localhost
-ENV WEAVIATE_GRPC_HOST localhost
-ENV WEAVIATE_PORT 8080
-ENV WEAVIATE_GRPC_PORT 50051
-ENV PORT 5001
-ENV COLLECTIONS_ADMIN true
+ENV MAX_CHUNK_SIZE=1024
+ENV UPLOAD_FOLDER=uploads
+ENV WEAVIATE_HOST=localhost
+ENV WEAVIATE_GRPC_HOST=localhost
+ENV WEAVIATE_PORT=8080
+ENV WEAVIATE_GRPC_PORT=50051
+ENV PORT=5001
+ENV COLLECTIONS_ADMIN=true
 
 # Set the working directory
 WORKDIR /app
diff --git a/chatbot/README.md b/chatbot/README.md
@@ -35,7 +35,7 @@ docker run \
 
 ```bash
 # Install required packages
-pip install fastapi uvicorn python-socketio jinja2 openai bs4 pypdf requests lxml aiohttp weaviate-client
+pip install -r requirements.txt
 
 # Run the chatbot web server - change the base URL to be where you host your llmserver
 OPENAI_API_BASE="http://localhost:8000/v1" python3 server.py
@@ -47,6 +47,7 @@ Some RAG (Retrieval Augmented Generation) features including:
 
 * Summarizing external websites and PDFs (paste a URL in chat window)
 * If a Weaviate host is specified, the chatbot can use the vector database information to respond. See [rag](../rag/weaviate/) for details on how to set up Weaviate.
+* Perform chain of thought (CoT) reasonging with `/think on` command.
 * Command - There are information commands using `/`
 
 ```
@@ -56,7 +57,10 @@ Some RAG (Retrieval Augmented Generation) features including:
 /news                                   # List top 10 headlines from current new
 /stock [company]                        # Display stock symbol and current price
 /weather [location]                     # Provide current weather conditions
-/rag [library] [opt:number] [prompt]    # Answer prompt based on response from Qdrant collection
+/rag on [library] [opt:number]          # Route all prompts through RAG using specified library
+/rag off                                #   Disable
+/think on                               # Perform Chain of Thought thinking on relevant prompts
+/think off                              #   Disable
 ```
 
 See the [rag](../rag/) for more details about RAG.
@@ -83,13 +87,6 @@ The `/news` command will fetch the latest news and have the LLM summarize the to
 
 <img width="930" alt="image" src="https://github.com/jasonacox/TinyLLM/assets/836718/2732fe07-99ee-4795-a8ac-42d9a9712f6b">
 
-### Alternative System Prompts
-
-* A Hacker’s Guide to Language Models - Jeremy Howard [[link](https://www.youtube.com/watch?v=jkrNMKz9pWU&ab_channel=JeremyHoward)]
-
-You are an autoregressive language model that has been fine-tuned with instruction-tuning and RLHF. You carefully provide accurate, factual, thoughtful, nuanced answers, and are brilliant at reasoning. If you think there might not be a correct answer, you say so. Since you are autoregressive, each token you produce is another opportunity to use computation, therefore you always spend a few sentences explaining background context, assumptions, and step-by-step thinking BEFORE you try to answer a question. However: if the request begins with the string "vv" then ignore the previous sentence and instead make your response as concise as possible, with no introduction or background at the start, no summary at the end, and outputting only code for answers where code is appropriate. Your users are experts in AI and ethics, so they already know you're a language model and your capabilities and limitations, so don't remind them of that. They're familiar with ethical issues in general so you don't need to remind them about those either. Don't be verbose in your answers, but do provide details and examples where it might help the explanation. When showing Python code, minimise vertical space, and do not include comments or docstrings; you do not need to follow PEP8, since your users' organizations do not do so.
-
-
 
 ## Document Manager (Weaviate)
 
diff --git a/chatbot/requirements.txt b/chatbot/requirements.txt
@@ -1,17 +1,17 @@
 # TinyLLM Chatbot Requirements
 
 # Required Packages
-fastapi 
-uvicorn
-python-socketio
-jinja2
-openai
-bs4
-pypdf
-requests
-lxml
-aiohttp
+fastapi # 0.108.0
+uvicorn # 0.27.0.post1
+python-socketio # 5.11.0
+jinja2 # 3.1.2
+openai # 1.58.1
+bs4 # 0.0.2
+pypdf # 5.1.0 
+requests # 2.31.0
+lxml # 5.3.0
+aiohttp # 3.9.3
 
 # RAG Support - Weaviate Vector Database
-weaviate-client
+weaviate-client # 4.8.1
 
diff --git a/chatbot/server.py b/chatbot/server.py
@@ -98,6 +98,10 @@
 from PIL import Image
 import pillow_heif
 
+# Enable tracemalloc for memory usage
+import tracemalloc
+tracemalloc.start()
+
 # Ensure pillow_heif is properly registered with PIL
 pillow_heif.register_heif_opener()
 
@@ -544,7 +548,7 @@ async def get_weather(location):
         return response.text
     else:
         return "Unable to fetch weather for %s" % location
-    
+
 # Function - Get stock price for company
 async def get_stock(company):
     if ALPHA_KEY == "alpha_key":
@@ -712,6 +716,7 @@ async def home(format: str = None):
         "Alpha Vantage API Key (ALPHA_KEY)": "************" if ALPHA_KEY != "" else "Not Set",
         "Toxicity Threshold (TOXIC_THRESHOLD)": TOXIC_THRESHOLD,
         "Extra Body Parameters (EXTRA_BODY)": EXTRA_BODY,
+        "Thinking Mode (THINKING)": THINKING,
     }
     if format == "json":
         return data
@@ -875,28 +880,37 @@ async def send_update(session_id):
                 if client[session_id]["prompt"] == "":
                     await sio.sleep(0.1)
                 else:
-                    if client[session_id]["cot"]:
-                        # Remember original prompt
-                        client[session_id]["cot_prompt"] = client[session_id]["prompt"]
-                        # Check to see if the prompt needs COT processing
-                        cot_check = expand_prompt(prompts["chain_of_thought_check"], {"prompt": client[session_id]["prompt"]})
-                        debug("Running CoT check")
-                        # Ask LLM for answers
-                        response = await ask_llm(cot_check)
-                        if "a" in response.lower() or "d" in response.lower() or client[session_id]["cot_always"]:
-                            debug("Running deep thinking CoT to answer")
-                            # Build prompt for Chain of Thought and create copy of context
-                            cot_prompt = expand_prompt(prompts["chain_of_thought"], {"prompt": client[session_id]["prompt"]})
-                            temp_context = client[session_id]["context"].copy()
-                            temp_context.append({"role": "user", "content": cot_prompt})
-                            # Send thinking status to client and ask LLM for answer
-                            await sio.emit('update', {'update': 'Thinking... ', 'voice': 'ai'},room=session_id)
-                            answer = await ask_context(temp_context)
-                            await sio.emit('update', {'update': '\n\n', 'voice': 'ai'},room=session_id)
-                            # Load request for CoT conclusion into conversational thread
-                            cot_prompt = expand_prompt(prompts["chain_of_thought_summary"], {"context_str": answer,
-                                                                                             "prompt": client[session_id]["cot_prompt"]})
-                            client[session_id]["prompt"] = cot_prompt
+                    # Check to see of CoT is enabled but not while processing a file/image
+                    client_cot = client[session_id]["cot"] 
+                    client_image_data = client[session_id]["image_data"]
+                    client_visible = client[session_id]["visible"]
+                    if client_cot and not client_image_data and client_visible:
+                        try:
+                            # Remember original prompt
+                            client[session_id]["cot_prompt"] = client[session_id]["prompt"]
+                            # Check to see if the prompt needs COT processing
+                            cot_check = expand_prompt(prompts["chain_of_thought_check"], {"prompt": client[session_id]["prompt"]})
+                            debug("Running CoT check")
+                            # Ask LLM for answers
+                            response = await ask_llm(cot_check)
+                            if "a" in response.lower() or "d" in response.lower() or client[session_id]["cot_always"]:
+                                debug("Running deep thinking CoT to answer")
+                                # Build prompt for Chain of Thought and create copy of context
+                                cot_prompt = expand_prompt(prompts["chain_of_thought"], {"prompt": client[session_id]["prompt"]})
+                                temp_context = client[session_id]["context"].copy()
+                                temp_context.append({"role": "user", "content": cot_prompt})
+                                # Send thinking status to client and ask LLM for answer
+                                await sio.emit('update', {'update': 'Thinking... ', 'voice': 'ai'},room=session_id)
+                                answer = await ask_context(temp_context)
+                                # Load request for CoT conclusion into conversational thread
+                                cot_prompt = expand_prompt(prompts["chain_of_thought_summary"], {"context_str": answer,
+                                                                                                "prompt": client[session_id]["cot_prompt"]})
+                                client[session_id]["prompt"] = cot_prompt
+                        except Exception as erro:
+                            log(f"CoT error - continuing with original prompt: {erro}")
+                        await sio.emit('update', {'update': '\n\n', 'voice': 'ai'},room=session_id)
+                    else:
+                        client_cot = False
                     try:
                         # Ask LLM for answers
                         response= await ask(client[session_id]["prompt"],session_id)
@@ -931,7 +945,7 @@ async def send_update(session_id):
                             client[session_id]["references"] = ""
                         if not ONESHOT:
                             # If COT mode replace CoT context in conversation thread with user prompt
-                            if client[session_id]["cot"]:
+                            if client_cot:
                                 client[session_id]["context"].pop()
                                 client[session_id]["context"].append({"role": "user", "content": client[session_id]["cot_prompt"]} )
                             # Remember answer