ggml-org · Riyavesuwala · Jan 10, 2026 · Jan 10, 2026
diff --git a/.gitignore b/.gitignore
@@ -4,3 +4,7 @@ node_modules
 .vscode-test/
 *.vsix
 .idea/
+.env
+__pycache__/
+.venv/
+
diff --git a/docs/dynamic-model-switching.md b/docs/dynamic-model-switching.md
@@ -0,0 +1,46 @@
+# Dynamic Model Switching with Remote LLaMA-Swap Server in VS Code
+
+## Overview
+
+This document demonstrates how to dynamically switch between multiple AI models hosted on a **remote LLaMA-swap server** using **VS Code**.  
+
+This approach is useful for developers who:
+
+- Have multiple models available on remote servers.
+- Want to choose models dynamically based on **task type, processing speed, or CPU usage**.
+- Need a flexible workflow without restarting services or VS Code.
+
+---
+
+## Folder Structure
+
+project/
+├── docs/
+│ └── dynamic-model-switching.md
+├── examples/
+│ ├── dynamic_model_switcher.py
+│ └── requirements.txt
+├── .gitignore
+
+
+- `docs/` contains this markdown documentation.
+- `examples/` contains the Python example code for **dynamic model switching**.
+- `.gitignore` excludes local environments, caches, and secret files (`.venv/`, `.env`, `__pycache__/`).
+
+---
+
+## How It Works
+
+1. **Dynamic Model Loading**
+
+   Models can be loaded by **name** at runtime.  
+   Example:
+   ```python
+   from langchain_groq import ChatGroq
+
+   llm = ChatGroq(
+       temperature=0.2,
+       model_name="llama-3.1-8b-instant",
+       api_key="YOUR_API_KEY"
+   )
+
diff --git a/examples/dynamic_model_switcher.py b/examples/dynamic_model_switcher.py
@@ -0,0 +1,81 @@
+from fastapi import FastAPI
+from pydantic import BaseModel, Field, ConfigDict
+from fastapi.middleware.cors import CORSMiddleware
+from langchain_groq import ChatGroq
+from langchain_core.prompts import ChatPromptTemplate
+from transformers import pipeline
+from fastapi import HTTPException
+import os
+import smtplib
+from email.mime.text import MIMEText
+from email.mime.multipart import MIMEMultipart
+from dotenv import load_dotenv
+
+load_dotenv()
+
+app = FastAPI()
+
+# -----------Request Models-----------
+
+class LLMRequest(BaseModel):
+    prompt: str
+    model_name: str  # dynamic LLama Model
+
+class SummarizeRequest(BaseModel):
+    text: str
+    model_name: str  # dynamic Hugging Face summarizer
+
+
+# -----------Initialize Models-----------
+
+def get_llama_model(model_name: str) -> ChatGroq:
+    # Return a ChatGroq instance for the requested LLama model.
+    return ChatGroq(
+        temperature=0.2,
+        model_name=model_name,
+        api_key="API_KEY"
+    )
+
+def get_hf_model(model_name: str):
+    # Return a Hugging Face pipeline for summarization.
+    models=[
+        "facebook/bart-large-cnn",
+        "sshleifer/distilbart-cnn-12-6"
+    ]
+
+    # If user provides a model, try it first
+
+    if model_name:
+        try:
+            return pipeline("summarization",model=model_name,device=-1)
+        except Exception:
+            pass # fallback to the predefined model list
+
+
+    for model in models:
+        try:
+            return pipeline("summarization",model=model,device=-1)
+        except Exception:
+            continue
+
+    raise HTTPException(status_code=500,detail="No summarization model available")
+
+# -----------Endpoints-----------
+
+@app.post("/generate")
+def generate_text(req: LLMRequest):
+    try:
+        llm = get_llm_model(req.model_name)
+        response = llm.invoke(req.prompt)
+        return {"output": response.content}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+@app.post("/summarize")
+def summarize_text(req: SummarizeRequest):
+    summarizer = get_hf_model(req.model_name)
+    try:
+        summary = summarizer(req.text, max_length=150, min_length=30, do_sample=False)
+        return {"summary": summary[0]["summary_text"]}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
diff --git a/examples/requirements.txt b/examples/requirements.txt
@@ -0,0 +1,13 @@
+fastapi==0.111.0
+uvicorn[standard]==0.29.0
+
+pydantic==2.7.1
+python-dotenv==1.0.1
+
+langchain==0.1.20
+langchain-groq==0.1.5
+
+transformers==4.41.2
+torch==2.3.0
+
+email-validator==2.1.1
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,3 +4,7 @@ node_modules @@
     .vscode-test/
     *.vsix
     .idea/
+    .env
+    __pycache__/
+    .venv/