Merge pull request #14 from jasonacox/v0.15.15

jasonacox · web-flow · commit 597b887ab3ce · 2025-01-26T20:30:34.000-08:00
Docker Compose Quickstart
diff --git a/RELEASE.md b/RELEASE.md
@@ -1,5 +1,12 @@
 # Releases
 
+## 0.15.15 - Docker Compose
+
+* Quick Start using Docker compose for Chatbot.
+* Chatbot - Bug Fix: Remove token limit on response. The `MAXTOKENS` setting is used to prune content sent to LLM. If not set, no pruning will happen.
+* Chatbot - Added additional LiteLLM support with the environmental settings `LITELLM_PROXY` and `LITELLM_KEY`. If set, these will override the OpenAI API settings to use LiteLLM and will remove `EXTRA_BODY` defaults that conflict with LiteLLM.
+* LiteLLM - Added docker compose to start LiteLLM, PostgreSQL, and Chatbot.
+
 ## 0.15.14 - Multi-model Support
 
 * Chatbot - Add `/model` command to list available models and dynamically set models during the session. 
diff --git a/chatbot/Dockerfile b/chatbot/Dockerfile
@@ -10,12 +10,12 @@ FROM python:3.10-slim
 
 # Setting build related env vars
 ENV PORT=5000
-ENV OPENAI_API_KEY="DEFAULT_API_KEY"
+ENV OPENAI_API_KEY="no-key"
 ENV OPENAI_API_BASE="http://localhost:8000/v1"
 ENV AGENT_NAME="Jarvis"
 ENV MY_MODEL="models/7B/gguf-model.bin"
 ENV DEBUG="false"
-ENV WEAVIATE_HOST="localhsot"
+ENV WEAVIATE_HOST="localhost"
 ENV WEAVIATE_PORT="8080"
 ENV WEAVIATE_GRPC_HOST="localhost"
 ENV WEAVIATE_GRPC_PORT="50051"
diff --git a/chatbot/README.md b/chatbot/README.md
@@ -12,7 +12,26 @@ Below are steps to get the Chatbot and Document Manager running.
 
 The Chatbot can be launched as a Docker container or via command line.
 
-### Docker
+### Method 1: Docker Compose
+
+A quickstart method is located in the [litellm](./litellm/) folder. This setup will launch the Chatbot + LiteLLM and PostgreSQL. This works on Mac and Linux (or WSL) systems.
+
+```bash
+cd litellm
+
+# Edit compose.yaml and config.yaml for your setup.
+nano compose.yaml
+nano config.yaml
+
+# Launch
+docker compose up -d
+```
+
+The containers will download and launch. The database will be set up in the `./db` folder.
+- The Chatbot will be available at http://localhost:5000
+- The LiteLLM usage dashboard will be available at http://localhost:4000/ui
+
+### Method 2: Docker
 
 ```bash
 # Create placeholder prompts.json
@@ -85,7 +104,8 @@ docker run \
     -d \
     -p 5000:5000 \
     -e PORT=5000 \
-    -e OPENAI_API_BASE="http://localhost:4000/v1" \
+    -e LITELLM_PROXY="http://localhost:4000/v1" \
+    -e LITELLM_KEY="sk-mykey" \
     -e LLM_MODEL="local-pixtral" \
     -e TZ="America/Los_Angeles" \
     -v $PWD/.tinyllm:/app/.tinyllm \
@@ -98,7 +118,7 @@ The Chatbot will try to use the specified model (`LLM_MODEL`) but if it is not a
 
 View the chatbot at http://localhost:5000
 
-#### Command Line Option
+### Method 3: Command Line
 
 ```bash
 # Install required packages
diff --git a/chatbot/litellm/README.md b/chatbot/litellm/README.md
@@ -0,0 +1,15 @@
+# TinyLLM Chatbot with LiteLLM + PostgreSQL
+
+This folder contains a docker-compose file that will start a TinyLLM Chatbot with a LiteLLM proxy and a PostgreSQL database.  The Chatbot will connect to the LiteLLM proxy to access the models.  The LiteLLM proxy will connect to the PostgreSQL database to store usage data.
+
+## Instructions
+
+   1. Edit the config.yaml file to add your models and settings.
+   2. Edit the compose.yaml file to adjust the environment variables in the services as needed.
+   3. Run `docker compose up -d` to start the services.
+
+The containers will download and launch. The database will be set up in the `./db` folder.
+
+- The Chatbot will be available at http://localhost:5000
+- The LiteLLM proxy will be available at http://localhost:4000/ui
+- The PostgreSQL pgAdmin interface will be available at http://localhost:5050
diff --git a/chatbot/litellm/compose.yaml b/chatbot/litellm/compose.yaml
@@ -0,0 +1,84 @@
+# TinyLLM Chatbot with LiteLLM + PostgreSQL
+#
+# This docker-compose file will start a TinyLLM Chatbot with a LiteLLM proxy
+# and a PostgreSQL database.  The Chatbot will connect to the LiteLLM proxy
+# to access the models.  The LiteLLM proxy will connect to the PostgreSQL
+# database to store usage data.
+#
+# Instructions:
+#   1. Edit the config.yaml file to add your models and settings.
+#   2. Edit the environment variables in the services section below as needed.
+#   3. Run `docker-compose up -d` to start the services.
+# 
+# The Chatbot will be available at http://localhost:5000
+# The LiteLLM proxy will be available at http://localhost:4000/ui
+# The PostgreSQL pgAdmin interface will be available at http://localhost:5050
+#
+# https://github.com/jasonacox/TinyLLM
+
+services:
+  # PostgreSQL database setup - No changes needed
+  postgres:
+    container_name: container-pg
+    image: postgres
+    hostname: localhost
+    ports:
+      - "5432:5432"
+    environment:
+      POSTGRES_USER: litellm
+      POSTGRES_PASSWORD: 3-laws-safe
+      POSTGRES_DB: litellm
+    volumes:
+      - ./db:/var/lib/postgresql/data
+    restart: unless-stopped
+
+  # pgAdmin interface for PostgreSQL - Edit login credentials as needed
+  pgadmin:
+    container_name: container-pgadmin
+    image: dpage/pgadmin4
+    depends_on:
+      - postgres
+    ports:
+      - "5050:80"
+    environment:
+      PGADMIN_DEFAULT_EMAIL: admin@admin.com
+      PGADMIN_DEFAULT_PASSWORD: 3-laws-safe
+    restart: unless-stopped
+
+  # LiteLLM proxy service - Edit KEYs and LOCAL settings as needed
+  litellm-proxy:
+    image: ghcr.io/berriai/litellm:main-latest
+    container_name: litellm-proxy
+    ports:
+      - "4000:4000"
+    environment:
+      - CUSTOM_AWS_ACCESS_KEY_ID=YourAWSAccessKeyID
+      - CUSTOM_AWS_SECRET_ACCESS_KEY=YourAWSAccessKeyID
+      - CUSTOM_AWS_REGION_NAME=us-east-1
+      - OPENAI_API_KEY=YourOpenAIAPIKey
+      - LITELLM_MASTER_KEY=sk-3-laws-safe
+      - MASTER_KEY=sk-3-laws-safe
+      - LOCAL_LLM_URL=http://localhost:8000/v1
+      - LOCAL_LLM_KEY=sk-3-laws-safe
+      - DATABASE_URL=postgresql://litellm:3-laws-safe@container-pg:5432/litellm
+    volumes:
+      - ./config.yaml:/app/config.yaml
+    command: --config /app/config.yaml
+    restart: unless-stopped
+
+  # Chatbot service - No changes needed
+  chatbot:
+    image: jasonacox/chatbot
+    container_name: chatbot
+    ports:
+      - "5000:5000"
+    environment:
+      - PORT=5000
+      - LITELLM_PROXY=http://litellm-proxy:4000/v1
+      - LITELLM_KEY=sk-3-laws-safe
+      - LLM_MODEL=local-pixtral
+      - TZ=America/Los_Angeles
+    volumes:
+      - ./.tinyllm:/app/.tinyllm
+    restart: unless-stopped
+
diff --git a/chatbot/litellm/config.yaml b/chatbot/litellm/config.yaml
@@ -0,0 +1,46 @@
+# LiteLLM Model Definitions
+#
+# This config.yaml file defines the models and settings for the LiteLLM proxy.
+# See https://docs.litellm.ai/docs/providers for examples.
+#
+# https://github.com/jasonacox/TinyLLM
+
+model_list:
+
+  # Local OpenAI Compatible API - e.g. vLLM
+  - model_name: local-pixtral
+    litellm_params:
+      model: openai/mistralai/Pixtral-12B-2409
+      api_base: os.environ/LOCAL_LLM_URL
+      api_key: os.environ/LOCAL_LLM_KEY
+
+  # AWS Bedrock Model Examples
+  - model_name: aws-titan
+    litellm_params:
+      model: bedrock/amazon.titan-text-premier-v1:0
+      aws_access_key_id: os.environ/CUSTOM_AWS_ACCESS_KEY_ID
+      aws_secret_access_key: os.environ/CUSTOM_AWS_SECRET_ACCESS_KEY
+      aws_region_name: os.environ/CUSTOM_AWS_REGION_NAME
+  - model_name: aws-mixtral
+    litellm_params:
+      model: bedrock/mistral.mixtral-8x7b-instruct-v0:1
+      aws_access_key_id: os.environ/CUSTOM_AWS_ACCESS_KEY_ID
+      aws_secret_access_key: os.environ/CUSTOM_AWS_SECRET_ACCESS_KEY
+      aws_region_name: os.environ/CUSTOM_AWS_REGION_NAME
+
+  # OpenAI Model Example - GPT-3.5 Turbo
+  - model_name: gpt-3.5-turbo
+    litellm_params:
+      model: openai/gpt-3.5-turbo
+      api_key: os.environ/OPENAI_API_KEY
+
+  # Ollama Model Example
+  - model_name: ollama-llama3.1           
+    litellm_params:
+      model: ollama_chat/llama3.1
+      api_base: http://ollama:11434
+
+# General Settings for LiteLLM - no changes needed
+general_settings: 
+  master_key: sk-3-laws-safe 
+  database_url: "postgresql://litellm:3-laws-safe@container-pg:5432/litellm"
diff --git a/chatbot/server.py b/chatbot/server.py
@@ -120,13 +120,15 @@ def debug(text):
     logger.debug(text)
 
 # Configuration Settings
-API_KEY = os.environ.get("OPENAI_API_KEY", "open_api_key")                  # Required, use bogus string for Llama.cpp
+API_KEY = os.environ.get("OPENAI_API_KEY", "Asimov-3-Laws")                 # Required, use bogus string for local LLMs
 API_BASE = os.environ.get("OPENAI_API_BASE", "http://localhost:8000/v1")    # Required, use https://api.openai.com for OpenAI
+LITELLM_PROXY = os.environ.get("LITELLM_PROXY", None)                       # Optional - LITELLM Proxy URL
+LITELLM_KEY = os.environ.get("LITELLM_KEY", "")                             # Optional - LITELLM Secret Key - Begins with sk-
 AGENTNAME = os.environ.get("AGENT_NAME", "")                                # Set the name of your bot
 MYMODEL = os.environ.get("LLM_MODEL", "models/7B/gguf-model.bin")           # Pick model to use e.g. gpt-3.5-turbo for OpenAI
 DEBUG = os.environ.get("DEBUG", "false").lower() == "true"                  # Set to True to enable debug mode
 MAXCLIENTS = int(os.environ.get("MAXCLIENTS", 1000))                        # Maximum number of concurrent clients
-MAXTOKENS = int(os.environ.get("MAXTOKENS", 16*1024))                       # Maximum number of tokens to send to LLM
+MAXTOKENS = int(os.environ.get("MAXTOKENS", 0))                             # Maximum number of tokens to send to LLM for RAG
 TEMPERATURE = float(os.environ.get("TEMPERATURE", 0.0))                     # LLM temperature
 PORT = int(os.environ.get("PORT", 5000))                                    # Port to listen on
 PROMPT_FILE = os.environ.get("PROMPT_FILE", f".tinyllm/prompts.json")       # File to store system prompts
@@ -147,12 +149,18 @@ def debug(text):
         log("EXTRA_BODY is not valid JSON")
         EXTRA_BODY = {}
 else:
-    if API_BASE.startswith("https://api.openai.com"):
+    if API_BASE.startswith("https://api.openai.com") or LITELLM_PROXY:
         EXTRA_BODY = {}
     else:
         # Extra stop tokens are needed for some non-OpenAI LLMs
         EXTRA_BODY = {"stop_token_ids":[128001, 128009]}
 
+# LiteLLM Proxy
+if LITELLM_PROXY:
+    log(f"Using LiteLLM Proxy at {LITELLM_PROXY}")
+    API_BASE = LITELLM_PROXY
+    API_KEY = LITELLM_KEY
+
 # RAG Configuration Settings
 WEAVIATE_HOST = os.environ.get("WEAVIATE_HOST", "")                         # Empty = no Weaviate support
 WEAVIATE_GRPC_HOST = os.environ.get("WEAVIATE_GRPC_HOST", WEAVIATE_HOST)    # Empty = no Weaviate gRPC support
@@ -262,10 +270,9 @@ def test_model():
                 log("LLM: Switching to an available model: %s" % model_list[0])
                 MYMODEL = model_list[0]
         # Test LLM
-        log(f"LLM: Using and testing model {MYMODEL}")
+        log(f"LLM: Using model: {MYMODEL}")
         llm.chat.completions.create(
             model=MYMODEL,
-            max_tokens=MAXTOKENS,
             stream=False,
             temperature=TEMPERATURE,
             messages=[{"role": "user", "content": "Hello"}],
@@ -278,10 +285,6 @@ def test_model():
     except Exception as erro:
         log("OpenAI API Error: %s" % erro)
         log(f"Unable to connect to OpenAI API at {API_BASE} using model {MYMODEL}.")
-        if "maximum context length" in str(erro):
-            if MAXTOKENS > 1024:
-                MAXTOKENS = int(MAXTOKENS / 2)
-                log(f"LLM: Maximum context length exceeded reducing MAXTOKENS to {MAXTOKENS}.")
         return False
 
 # Fetch list of LLM models
@@ -308,9 +311,9 @@ def get_models():
 if WEAVIATE_HOST != "":
     try:
         rag_documents.connect()
-        log(f"Connected to Weaviate at {WEAVIATE_HOST}")
+        log(f"RAG: Connected to Weaviate at {WEAVIATE_HOST}")
     except Exception as err:
-        log(f"Unable to connect to Weaviate at {WEAVIATE_HOST} - {str(err)}")
+        log(f"RAG: Unable to connect to Weaviate at {WEAVIATE_HOST} - {str(err)}")
         WEAVIATE_HOST = ""
         log("RAG support disabled.")
 
@@ -335,7 +338,7 @@ def query_index(query, library, num_results=RESULTS):
         if ans['content'] == previous_content:
             continue
         new_content = ans['content']
-        if len(new_content) > MAXTOKENS:
+        if MAXTOKENS and len(new_content) > MAXTOKENS:
             debug("RAG: Content size exceeded maximum size using chunk.")
             # Cut the middle and insert the chunk in the middle
             new_content = ans['content'][:MAXTOKENS//4] + "..." + (ans.get('chunk') or " ") + "..." + ans['content'][-MAXTOKENS//4:]
@@ -475,7 +478,6 @@ async def ask(prompt, sid=None):
                 llm_stream = openai.OpenAI(api_key=API_KEY, base_url=API_BASE)
             response = llm_stream.chat.completions.create(
                 model=client[sid]["model"],
-                max_tokens=MAXTOKENS,
                 stream=True, # Send response chunks as LLM computes next tokens
                 temperature=TEMPERATURE,
                 messages=client[sid]["context"],
@@ -529,7 +531,6 @@ async def ask_llm(query, format="", model=MYMODEL):
     llm = openai.AsyncOpenAI(api_key=API_KEY, base_url=API_BASE)
     response = await llm.chat.completions.create(
         model=model,
-        max_tokens=MAXTOKENS,
         stream=False,
         temperature=TEMPERATURE,
         messages=content,
@@ -548,7 +549,6 @@ async def ask_context(messages, model=MYMODEL):
     llm = openai.AsyncOpenAI(api_key=API_KEY, base_url=API_BASE)
     response = await llm.chat.completions.create(
         model=model,
-        max_tokens=MAXTOKENS,
         stream=False,
         temperature=TEMPERATURE,
         messages=messages,
@@ -718,13 +718,15 @@ async def home(format: str = None):
         "LLM Main User Queries": stats["ask"],
         "LLM Helper Queries": stats["ask_llm"],
         "LLM CoT Context Queries": stats["ask_context"],
+        "OpenAI API URL (OPENAI_API_URL)": API_BASE if not LITELLM_PROXY else "Disabled",
         "OpenAI API Key (OPENAI_API_KEY)": "************" if API_KEY != "" else "Not Set",
-        "OpenAI API URL (OPENAI_API_URL)": API_BASE,
+        "LiteLLM Proxy (LITELLM_PROXY)": LITELLM_PROXY or "Disabled",
+        "LiteLLM Secret Key (LITELLM_KEY)": "************" if LITELLM_KEY != "" else "Not Set",
         "Agent Name (AGENT_NAME)": AGENTNAME,
         "LLM Model (LLM_MODEL)": MYMODEL,
         "Debug Mode (DEBUG)": DEBUG,
         "Current Clients (MAXCLIENTS)": f"{len(client)} of {MAXCLIENTS}",
-        "LLM Max tokens Limit (MAXTOKENS)": MAXTOKENS,
+        "LLM Max Tokens to Send (MAXTOKENS)": MAXTOKENS,
         "LLM Temperature (TEMPERATURE)": TEMPERATURE,
         "Server Port (PORT)": PORT,
         "Saved Prompts (PROMPT_FILE)": PROMPT_FILE,
diff --git a/chatbot/version.py b/chatbot/version.py
@@ -1 +1 @@
-VERSION = "v0.15.14"
+VERSION = "v0.15.15"

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-VERSION = "v0.15.14"`
	`1`	`+VERSION = "v0.15.15"`