diff --git a/hub/.buildinfo b/hub/.buildinfo index c2307010eb..7b26ca6433 100644 --- a/hub/.buildinfo +++ b/hub/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. -config: 3a5dc67f769d39dd6c75469544648dbf +config: 6d68f6f120c8806fa311a791018201a6 tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/hub/.doctrees/api/benchmarks.doctree b/hub/.doctrees/api/benchmarks.doctree deleted file mode 100644 index 88507c287c..0000000000 Binary files a/hub/.doctrees/api/benchmarks.doctree and /dev/null differ diff --git a/hub/.doctrees/concepts/benchmarks.doctree b/hub/.doctrees/concepts/benchmarks.doctree index 44a3f237e5..77fc20c2e5 100644 Binary files a/hub/.doctrees/concepts/benchmarks.doctree and b/hub/.doctrees/concepts/benchmarks.doctree differ diff --git a/hub/.doctrees/api/deploy_router.doctree b/hub/.doctrees/concepts/deploy_router.doctree similarity index 94% rename from hub/.doctrees/api/deploy_router.doctree rename to hub/.doctrees/concepts/deploy_router.doctree index 4be951ad01..2a7a3b2b1d 100644 Binary files a/hub/.doctrees/api/deploy_router.doctree and b/hub/.doctrees/concepts/deploy_router.doctree differ diff --git a/hub/.doctrees/concepts/endpoints.doctree b/hub/.doctrees/concepts/endpoints.doctree deleted file mode 100644 index 3ea22eac27..0000000000 Binary files a/hub/.doctrees/concepts/endpoints.doctree and /dev/null differ diff --git a/hub/.doctrees/api/first_request.doctree b/hub/.doctrees/concepts/first_request.doctree similarity index 97% rename from hub/.doctrees/api/first_request.doctree rename to hub/.doctrees/concepts/first_request.doctree index ceb9e11f50..fe3e8e81a2 100644 Binary files a/hub/.doctrees/api/first_request.doctree and b/hub/.doctrees/concepts/first_request.doctree differ diff --git a/hub/.doctrees/api/images.doctree b/hub/.doctrees/concepts/images.doctree similarity index 88% rename from hub/.doctrees/api/images.doctree rename to hub/.doctrees/concepts/images.doctree index 95542b24e0..6db49abf38 100644 Binary files a/hub/.doctrees/api/images.doctree and b/hub/.doctrees/concepts/images.doctree differ diff --git a/hub/.doctrees/concepts/on_prem_images.doctree b/hub/.doctrees/concepts/on_prem_images.doctree deleted file mode 100644 index b5ace5df8a..0000000000 Binary files a/hub/.doctrees/concepts/on_prem_images.doctree and /dev/null differ diff --git a/hub/.doctrees/api/reference.doctree b/hub/.doctrees/concepts/reference.doctree similarity index 98% rename from hub/.doctrees/api/reference.doctree rename to hub/.doctrees/concepts/reference.doctree index f2ad60f465..709853ac58 100644 Binary files a/hub/.doctrees/api/reference.doctree and b/hub/.doctrees/concepts/reference.doctree differ diff --git a/hub/.doctrees/api/router.doctree b/hub/.doctrees/concepts/router.doctree similarity index 92% rename from hub/.doctrees/api/router.doctree rename to hub/.doctrees/concepts/router.doctree index df04187db8..c4f65dfd0f 100644 Binary files a/hub/.doctrees/api/router.doctree and b/hub/.doctrees/concepts/router.doctree differ diff --git a/hub/.doctrees/concepts/routing.doctree b/hub/.doctrees/concepts/routing.doctree deleted file mode 100644 index e568e30a35..0000000000 Binary files a/hub/.doctrees/concepts/routing.doctree and /dev/null differ diff --git a/hub/.doctrees/api/unify_api.doctree b/hub/.doctrees/concepts/unify_api.doctree similarity index 98% rename from hub/.doctrees/api/unify_api.doctree rename to hub/.doctrees/concepts/unify_api.doctree index d4b4f772bf..9858403c0a 100644 Binary files a/hub/.doctrees/api/unify_api.doctree and b/hub/.doctrees/concepts/unify_api.doctree differ diff --git a/hub/.doctrees/interfaces/building_router.doctree b/hub/.doctrees/console/building_router.doctree similarity index 95% rename from hub/.doctrees/interfaces/building_router.doctree rename to hub/.doctrees/console/building_router.doctree index b8b54ff184..608cd30e65 100644 Binary files a/hub/.doctrees/interfaces/building_router.doctree and b/hub/.doctrees/console/building_router.doctree differ diff --git a/hub/.doctrees/interfaces/connecting_stack.doctree b/hub/.doctrees/console/connecting_stack.doctree similarity index 94% rename from hub/.doctrees/interfaces/connecting_stack.doctree rename to hub/.doctrees/console/connecting_stack.doctree index 3fc585c0b4..f1a7bd782f 100644 Binary files a/hub/.doctrees/interfaces/connecting_stack.doctree and b/hub/.doctrees/console/connecting_stack.doctree differ diff --git a/hub/.doctrees/interfaces/running_benchmarks.doctree b/hub/.doctrees/console/running_benchmarks.doctree similarity index 95% rename from hub/.doctrees/interfaces/running_benchmarks.doctree rename to hub/.doctrees/console/running_benchmarks.doctree index ca05e032c6..1ae27b70ff 100644 Binary files a/hub/.doctrees/interfaces/running_benchmarks.doctree and b/hub/.doctrees/console/running_benchmarks.doctree differ diff --git a/hub/.doctrees/demos/demos/LangChain/RAG_playground/README.doctree b/hub/.doctrees/demos/demos/LangChain/RAG_playground/README.doctree deleted file mode 100644 index a1aa351081..0000000000 Binary files a/hub/.doctrees/demos/demos/LangChain/RAG_playground/README.doctree and /dev/null differ diff --git a/hub/.doctrees/demos/demos/LangChain/README.doctree b/hub/.doctrees/demos/demos/LangChain/README.doctree deleted file mode 100644 index 37ef84e276..0000000000 Binary files a/hub/.doctrees/demos/demos/LangChain/README.doctree and /dev/null differ diff --git a/hub/.doctrees/demos/demos/LlamaIndex/BasicUsage/unify.doctree b/hub/.doctrees/demos/demos/LlamaIndex/BasicUsage/unify.doctree deleted file mode 100644 index 4844496018..0000000000 Binary files a/hub/.doctrees/demos/demos/LlamaIndex/BasicUsage/unify.doctree and /dev/null differ diff --git a/hub/.doctrees/demos/demos/LlamaIndex/RAGPlayground/README.doctree b/hub/.doctrees/demos/demos/LlamaIndex/RAGPlayground/README.doctree deleted file mode 100644 index 7bf9b04e42..0000000000 Binary files a/hub/.doctrees/demos/demos/LlamaIndex/RAGPlayground/README.doctree and /dev/null differ diff --git a/hub/.doctrees/demos/demos/LlamaIndex/README.doctree b/hub/.doctrees/demos/demos/LlamaIndex/README.doctree deleted file mode 100644 index 5ac29d2312..0000000000 Binary files a/hub/.doctrees/demos/demos/LlamaIndex/README.doctree and /dev/null differ diff --git a/hub/.doctrees/demos/demos/README.doctree b/hub/.doctrees/demos/demos/README.doctree deleted file mode 100644 index 7a959b61ef..0000000000 Binary files a/hub/.doctrees/demos/demos/README.doctree and /dev/null differ diff --git a/hub/.doctrees/demos/demos/Unify/AsyncVsSync/AsyncVsSync.doctree b/hub/.doctrees/demos/demos/Unify/AsyncVsSync/AsyncVsSync.doctree deleted file mode 100644 index e5c006ff37..0000000000 Binary files a/hub/.doctrees/demos/demos/Unify/AsyncVsSync/AsyncVsSync.doctree and /dev/null differ diff --git a/hub/.doctrees/demos/demos/Unify/ChatBot/ChatBot.doctree b/hub/.doctrees/demos/demos/Unify/ChatBot/ChatBot.doctree deleted file mode 100644 index 09dd20d73f..0000000000 Binary files a/hub/.doctrees/demos/demos/Unify/ChatBot/ChatBot.doctree and /dev/null differ diff --git a/hub/.doctrees/demos/demos/Unify/Chatbot_Arena/CONTRIBUTING.doctree b/hub/.doctrees/demos/demos/Unify/Chatbot_Arena/CONTRIBUTING.doctree deleted file mode 100644 index 8fb08fb8ca..0000000000 Binary files a/hub/.doctrees/demos/demos/Unify/Chatbot_Arena/CONTRIBUTING.doctree and /dev/null differ diff --git a/hub/.doctrees/demos/demos/Unify/Chatbot_Arena/README.doctree b/hub/.doctrees/demos/demos/Unify/Chatbot_Arena/README.doctree deleted file mode 100644 index 8660edbdc3..0000000000 Binary files a/hub/.doctrees/demos/demos/Unify/Chatbot_Arena/README.doctree and /dev/null differ diff --git a/hub/.doctrees/demos/demos/Unify/LLM-Wars/README.doctree b/hub/.doctrees/demos/demos/Unify/LLM-Wars/README.doctree deleted file mode 100644 index 245ebfd711..0000000000 Binary files a/hub/.doctrees/demos/demos/Unify/LLM-Wars/README.doctree and /dev/null differ diff --git a/hub/.doctrees/demos/demos/Unify/LLM_Debate/README.doctree b/hub/.doctrees/demos/demos/Unify/LLM_Debate/README.doctree deleted file mode 100644 index f80dc8dd34..0000000000 Binary files a/hub/.doctrees/demos/demos/Unify/LLM_Debate/README.doctree and /dev/null differ diff --git a/hub/.doctrees/demos/demos/Unify/README.doctree b/hub/.doctrees/demos/demos/Unify/README.doctree deleted file mode 100644 index fe24a8d593..0000000000 Binary files a/hub/.doctrees/demos/demos/Unify/README.doctree and /dev/null differ diff --git a/hub/.doctrees/demos/demos/Unify/SemanticRouter/README.doctree b/hub/.doctrees/demos/demos/Unify/SemanticRouter/README.doctree deleted file mode 100644 index 87f5ffe410..0000000000 Binary files a/hub/.doctrees/demos/demos/Unify/SemanticRouter/README.doctree and /dev/null differ diff --git a/hub/.doctrees/demos/demos/Unify/SemanticRouter/layer-dynamic-routes.doctree b/hub/.doctrees/demos/demos/Unify/SemanticRouter/layer-dynamic-routes.doctree deleted file mode 100644 index 7b276b2f85..0000000000 Binary files a/hub/.doctrees/demos/demos/Unify/SemanticRouter/layer-dynamic-routes.doctree and /dev/null differ diff --git a/hub/.doctrees/demos/demos/template/README_TEMPLATE.doctree b/hub/.doctrees/demos/demos/template/README_TEMPLATE.doctree deleted file mode 100644 index 60748e877d..0000000000 Binary files a/hub/.doctrees/demos/demos/template/README_TEMPLATE.doctree and /dev/null differ diff --git a/hub/.doctrees/demos/langchain.doctree b/hub/.doctrees/demos/langchain.doctree deleted file mode 100644 index a75e30fcce..0000000000 Binary files a/hub/.doctrees/demos/langchain.doctree and /dev/null differ diff --git a/hub/.doctrees/demos/llamaindex.doctree b/hub/.doctrees/demos/llamaindex.doctree deleted file mode 100644 index abba52c4d2..0000000000 Binary files a/hub/.doctrees/demos/llamaindex.doctree and /dev/null differ diff --git a/hub/.doctrees/demos/unify.doctree b/hub/.doctrees/demos/unify.doctree deleted file mode 100644 index 06f0438f1d..0000000000 Binary files a/hub/.doctrees/demos/unify.doctree and /dev/null differ diff --git a/hub/.doctrees/environment.pickle b/hub/.doctrees/environment.pickle index ae4c8bea80..47a0f12b0c 100644 Binary files a/hub/.doctrees/environment.pickle and b/hub/.doctrees/environment.pickle differ diff --git a/hub/.doctrees/index.doctree b/hub/.doctrees/index.doctree index 0bba27bb21..c7a1218fe5 100644 Binary files a/hub/.doctrees/index.doctree and b/hub/.doctrees/index.doctree differ diff --git a/hub/.doctrees/nbsphinx/demos/demos/LlamaIndex/BasicUsage/unify.ipynb b/hub/.doctrees/nbsphinx/demos/demos/LlamaIndex/BasicUsage/unify.ipynb deleted file mode 100644 index 4ed415e726..0000000000 --- a/hub/.doctrees/nbsphinx/demos/demos/LlamaIndex/BasicUsage/unify.ipynb +++ /dev/null @@ -1,286 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# LlamaIndex\n", - "[LlamaIndex](https://www.llamaindex.ai/) is a simple, flexible data framework for connecting custom data sources to large language models (LLMs). The integration with Unify allows you to route your queries to the best LLM endpoints, benchmark performance, and seamlessly switch providers with a single API key in your LlamaIndex LLM applications." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Installation" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "First, let's install LlamaIndex πŸ¦™ and the Unify integration." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%pip install llama-index-llms-unify llama-index" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Environment Setup\n", - "\n", - "Make sure to set the `UNIFY_API_KEY` environment variable. You can get a key from the [Unify Console](https://console.unify.ai/login)." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "os.environ[\"UNIFY_API_KEY\"] = \"\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Using LlamaIndex with Unify" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Basic Usage \n", - "\n", - "Below we initialize and query a chat model using the `llama-3-70b-chat` endpoint from `together-ai`." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "CompletionResponse(text=\"I'm not actually a llama, but I'm doing great, thanks for asking! I'm a large language model, so I don't have feelings like humans do, but I'm always happy to chat with you and help with any questions or topics you'd like to discuss. How about you? How's your day going?\", additional_kwargs={}, raw={'id': '88b5fcf02e259527-LHR', 'choices': [Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"I'm not actually a llama, but I'm doing great, thanks for asking! I'm a large language model, so I don't have feelings like humans do, but I'm always happy to chat with you and help with any questions or topics you'd like to discuss. How about you? How's your day going?\", role='assistant', function_call=None, tool_calls=None))], 'created': 1716980504, 'model': 'llama-3-70b-chat@together-ai', 'object': 'chat.completion', 'system_fingerprint': None, 'usage': CompletionUsage(completion_tokens=67, prompt_tokens=17, total_tokens=84, cost=7.56e-05)}, logprobs=None, delta=None)" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from llama_index.llms.unify import Unify\n", - "llm = Unify(model=\"llama-3-70b-chat@together-ai\")\n", - "llm.complete(\"How are you today, llama?\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Single Sign-On\n", - "\n", - "You can use Unify's SSO to query endpoints in different providers without making accounts with all of them. For example, all of these are valid endpoints:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "llm = Unify(model=\"llama-2-70b-chat@together-ai\")\n", - "llm = Unify(model=\"gpt-3.5-turbo@openai\")\n", - "llm = Unify(model=\"mixtral-8x7b-instruct-v0.1@mistral-ai\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This allows you to quickly switch and test different models and providers. You can look at all the available models/providers [here](https://unify.ai/hub)!" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Runtime Dynamic Routing" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As evidenced by our [benchmarks](https://unify.ai/benchmarks), the optimal provider for each model varies by geographic location and time of day due to fluctuating API performances. To cirumvent this, we automatically direct your requests to the \"top performing provider\" at runtime. To enable this feature, simply replace your query's provider with one of the [available routing modes](https://unify.ai/docs/api/deploy_router.html#optimizing-a-metric). Let's look at some examples:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "llm = Unify(model=\"llama-2-70b-chat@input-cost\") # route to lowest input cost provider\n", - "llm = Unify(model=\"gpt-3.5-turbo@itl\") # route to provider with lowest inter token latency\n", - "llm = Unify(model=\"mixtral-8x7b-instruct-v0.1@ttft\") # route to provider with lowest time to first token." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Quality Routing\n", - "Unify routes your queries to the best LLM on every prompt to consistently achieve better quality outputs than using a single, all-purpose, powerful model, at a fraction of the cost. This is achieved by using smaller models for simpler tasks, only using largers ones to handle complex queries.\n", - "\n", - "The router is benchmarked on various different data-sets such as `Open Hermes`, `GSM8K`, `HellaSwag`, `MMLU` and `MT-Bench` revealing that it can peform better than indivudal endpoints on average as explained [here](https://unify.ai/docs/concepts/routing.html#quality-routing). One can choose various different configurations of the router for a particular data-set from the [chat-interface](https://unify.ai/chat) as shown below:\n", - "\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "llm = Unify(model=\"router_2.58e-01_9.51e-04_3.91e-03@unify\")\n", - "llm = Unify(model=\"router_2.12e-01_5.00e-04_2.78e-04@unify\")\n", - "llm = Unify(model=\"router_2.12e-01_5.00e-04_2.78e-04@unify\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To learn more about quality routing, please refer to this [video](https://www.youtube.com/watch?v=ZpY6SIkBosE&feature=youtu.be)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Streaming and optimizing for latency\n", - "\n", - "If you are building an application where responsiveness is key, you most likely want to get a streaming response. On top of that, ideally you would use the provider with the lowest Time to First Token, to reduce the time your users are waiting for a response. Using Unify this would look something like:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "llm = Unify(model=\"mixtral-8x7b-instruct-v0.1@ttft\")\n", - "\n", - "response = llm.stream_complete(\n", - " \"Translate the following to German: \"\n", - " \"Hey, there's an emergency in translation street, \"\n", - " \"please send help asap!\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model and provider are : mixtral-8x7b-instruct-v0.1@mistral-ai\n", - "\n", - "Hallo, es gibt einen Notfall in der Übersetzungsstraße, bitte senden Sie Hilfe so schnell wie mΓΆglich!\n", - "\n", - "(Note: This is a loose translation and the phrase \"Übersetzungsstraße\" does not literally exist, but I tried to convey the same meaning as the original message.)" - ] - } - ], - "source": [ - "show_provider = True\n", - "for r in response:\n", - " if show_provider:\n", - " print(f\"Model and provider are : {r.raw['model']}\\n\")\n", - " show_provider = False\n", - " print(r.delta, end=\"\", flush=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Async calls and Lowest Input Cost\n", - "\n", - "Last but not the least, you can also run multiple requests asynchronously. For tasks such as document summarization, optimizing for input costs is crucial. We can use the `input-cost` dynamic routing mode to route our queries to the cheapest provider." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model and provider are : mixtral-8x7b-instruct-v0.1@deepinfra\n", - "\n", - " OpenAI: Pioneering 'safe' artificial general intelligence.\n" - ] - } - ], - "source": [ - "llm = Unify(model=\"mixtral-8x7b-instruct-v0.1@input-cost\")\n", - "\n", - "response = await llm.acomplete(\n", - " \"Summarize this in 10 words or less. OpenAI is a U.S. based artificial intelligence \"\n", - " \"(AI) research organization founded in December 2015, researching artificial intelligence \"\n", - " \"with the goal of developing 'safe and beneficial' artificial general intelligence, \"\n", - " \"which it defines as 'highly autonomous systems that outperform humans at most economically \"\n", - " \"valuable work'. As one of the leading organizations of the AI spring, it has developed \"\n", - " \"several large language models, advanced image generation models, and previously, released \"\n", - " \"open-source models. Its release of ChatGPT has been credited with starting the AI spring\"\n", - ")\n", - "\n", - "print(f\"Model and provider are : {response.raw['model']}\\n\")\n", - "print(response)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "base", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.13" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/hub/.doctrees/nbsphinx/demos/demos/Unify/AsyncVsSync/AsyncVsSync.ipynb b/hub/.doctrees/nbsphinx/demos/demos/Unify/AsyncVsSync/AsyncVsSync.ipynb deleted file mode 100644 index ac73ff611a..0000000000 --- a/hub/.doctrees/nbsphinx/demos/demos/Unify/AsyncVsSync/AsyncVsSync.ipynb +++ /dev/null @@ -1,408 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "Gp0FAI1Zkbxw" - }, - "source": [ - "# Synchronous VS Asynchronous Clients\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "i1QFiUcdR-aJ" - }, - "source": [ - "Given the growing demand for real-time applications and user demands for instant responses, it's crucial to grasp the performance implications between Sync and Async clients. In this notebook, we'll delve into the variations between asynchronous and synchronous response times using UnifyAI's API." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "rk9DTfrG1Egv" - }, - "source": [ - "In order to run this notebook, you need to generate your UNIFY key from the [console](https://console.unify.ai/login?callbackUrl=%2F). Once you have it, assign it to the `UNIFY_KEY` variable below." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "3HEAU0aE1Cb0" - }, - "outputs": [], - "source": [ - "UNIFY_KEY=#ENTERUNIFYKEY" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "EdCA_YoYQ-X8" - }, - "source": [ - "#### Install Dependencies" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "HS2xRFiNPSiL" - }, - "source": [ - "To run this notebook, you will need to install the `unifyai` [python package](https://pypi.org/project/unifyai/). You can do so by running the cell below ⬇️" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "0MqRVTcHgWhl", - "outputId": "489a5a4d-f4f3-41cf-9d5f-47cb77a3978b" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting unifyai\n", - " Downloading unifyai-0.8.1-py3-none-any.whl (14 kB)\n", - "Collecting openai<2.0.0,>=1.12.0 (from unifyai)\n", - " Downloading openai-1.17.1-py3-none-any.whl (268 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.3/268.3 kB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: requests<3.0.0,>=2.31.0 in /usr/local/lib/python3.10/dist-packages (from unifyai) (2.31.0)\n", - "Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.12.0->unifyai) (3.7.1)\n", - "Requirement already satisfied: distro<2,>=1.7.0 in /usr/lib/python3/dist-packages (from openai<2.0.0,>=1.12.0->unifyai) (1.7.0)\n", - "Collecting httpx<1,>=0.23.0 (from openai<2.0.0,>=1.12.0->unifyai)\n", - " Downloading httpx-0.27.0-py3-none-any.whl (75 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: pydantic<3,>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.12.0->unifyai) (2.6.4)\n", - "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.12.0->unifyai) (1.3.1)\n", - "Requirement already satisfied: tqdm>4 in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.12.0->unifyai) (4.66.2)\n", - "Requirement already satisfied: typing-extensions<5,>=4.7 in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.12.0->unifyai) (4.11.0)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.31.0->unifyai) (3.3.2)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.31.0->unifyai) (3.6)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.31.0->unifyai) (2.0.7)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.31.0->unifyai) (2024.2.2)\n", - "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->openai<2.0.0,>=1.12.0->unifyai) (1.2.0)\n", - "Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai<2.0.0,>=1.12.0->unifyai)\n", - " Downloading httpcore-1.0.5-py3-none-any.whl (77 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai<2.0.0,>=1.12.0->unifyai)\n", - " Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1.9.0->openai<2.0.0,>=1.12.0->unifyai) (0.6.0)\n", - "Requirement already satisfied: pydantic-core==2.16.3 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1.9.0->openai<2.0.0,>=1.12.0->unifyai) (2.16.3)\n", - "Installing collected packages: h11, httpcore, httpx, openai, unifyai\n", - "Successfully installed h11-0.14.0 httpcore-1.0.5 httpx-0.27.0 openai-1.17.1 unifyai-0.8.1\n" - ] - } - ], - "source": [ - "!pip install unifyai" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "rhgBuOhzRFeE" - }, - "source": [ - "#### Synchronous Clients" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "yX6plilUlGYl" - }, - "source": [ - "A Synchronous client handles requests sequentially, processing one at a time. This means that each request must be fully handled before the next one is processed, resulting in potential blocking of the program's execution. You can use a Sync client with Unify as shown below:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "VGGw4tDagqV5", - "outputId": "652924b3-4a25-4946-fc1a-c59281d9b33a" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "HOO-RAY! *tutu* Hello there, young grasshopper! *bray* Isaac Newton was a majestic figure known for his work in math and science. He was born in 1642 in England and grew up to be a brilliant thinker and a fellow of the Royal Society. *twirl*\n", - "\n", - "Newton is perhaps best known for his laws of motion and his law of universal gravitation. These laws explained how objects move and how gravity works. He also discovered calculus, which is a way of using math to understand how things change over time. *mathematical mnum hop*\n", - "\n", - "But that's not all, oh no! Newton was also a bit of an alchemist and studied the nature of light. He even invented a fancy piece of equipment called a \"reflecting telescope\" to observe the heavens. *shimmer*\n", - "\n", - "Newton was a true renaissance thinker, and his contributions to science and mathematics are still celebrated today. *tutu* He was a true llama of learning, and his legacy continues to inspire us all. *bray*\n" - ] - } - ], - "source": [ - "from unify import Unify\n", - "unify = Unify(\n", - " api_key=UNIFY_KEY,\n", - " endpoint=\"llama-2-13b-chat@anyscale\"\n", - ")\n", - "response = unify.generate(user_prompt=\"Hello Llama! Who was Isaac Newton?\")\n", - "print(response)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "1Swnehb9Rvdh" - }, - "source": [ - "#### Async Clients" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "H-A9MRms2n-g" - }, - "source": [ - "An Asynchronous client can handle multiple requests concurrently without blocking. To use the Async client, import `AsyncUnify` instead of `Unify` and use `await` with the `.generate` method. This signals to the program to pause execution until the asynchronous operation completes. Additionally, we'll use the `asyncio` library for managing asynchronous execution and coroutines. `asyncio` provides tools for building concurrent programs using coroutines, which can be paused and resumed, allowing for efficient handling of asynchronous tasks.\n", - "\n", - "\n", - "NOTE: Running ansyncio in notebooks conflicts with the existing event loop run in the notebook. As a workaround, we need to use `net_asyncio.apply`. Please see [this issue](https://github.com/jupyter/notebook/issues/3397) for more details." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "b25iLp5nOPxQ", - "outputId": "d5b6b0d8-d82d-4804-9ffb-989426f130ef" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "HEYA HUMAN! *bleshes* Oh, you're talkin' 'bout Sir Isaac Newton, the famous English mathematician and physicist? *noms on some grass* He lived in the 17th and 18th centuries, and is known for his laws of motion and universal gravitation. *giggles* He was a pretty smart dude, if I do say so myself! *flaunts my banned-from-the-library-for-life status*\n", - "\n", - "But enough about me, let's talk about Newton! *grin* He was born in Woolsthorpe, Lincolnshire, England in 1643, and grew up to be a brilliant mathematician and physicist. He studied at Trinity College in Cambridge, and later became a professor there. *nbd*\n", - "\n", - "Newton's most famous contributions to science are his three laws of motion, which describe how objects move and respond to forces. He also developed the Law of Universal Gravitation, which states that every object in the universe attracts every other object with a force proportional to the product of their masses and inversely proportional to the square of the distance between them. *geek out*\n", - "\n", - "Newton was also a skilled inventor and engineer, and he designed and built all sorts of cool stuff, like a reflecting telescope and a machine for calculating the square root of numbers. *impressed*\n", - "\n", - "Despite his many accomplishments, Newton was a pretty private person and wasn't always the most sociable guy. He was known to be pretty temperamental and had some pretty interesting beliefs, like the idea that alchemy was a valid scientific pursuit. *raises an eyebrow* But hey, who am I to judge? *shrugs*\n", - "\n", - "So there you have it, human! That's the basic scoop on Sir Isaac Newton. I hope you found that enlightening. *wink* Now, if you'll excuse me, I need to go work on my own groundbreaking research... or at least, my own Instagram captions. *smizes*\n" - ] - } - ], - "source": [ - "from unify import AsyncUnify\n", - "import asyncio\n", - "import nest_asyncio\n", - "nest_asyncio.apply()\n", - "\n", - "async_unify = AsyncUnify(\n", - " api_key=UNIFY_KEY,\n", - " endpoint=\"llama-2-13b-chat@anyscale\"\n", - ")\n", - "\n", - "async def main():\n", - " responses = await async_unify.generate(user_prompt=\"Hello Llama! Who was Isaac Newton?\")\n", - " print(responses)\n", - "\n", - "asyncio.run(main())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "TxK6r9yrnfUS" - }, - "source": [ - "Now, our goal is to compare the response times of synchronous vs asynchronous clients when handling multiple requests. Let's start by defining some helper functions.\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "C8y34VMGnI93" - }, - "outputs": [], - "source": [ - "async def send_async_request(user_prompt):\n", - " \"\"\"\n", - " Uses an Async client to generate the response for the user_prompt.\n", - "\n", - " Parameters:\n", - " user_prompt (str): The prompt provided by the user.\n", - "\n", - " Returns:\n", - " str: The response generated.\n", - " \"\"\"\n", - " response = await async_unify.generate(user_prompt=user_prompt)\n", - " return response\n", - "\n", - "def send_sync_request(user_prompt):\n", - " \"\"\"\n", - " Uses a sync client to generate the response for the user_prompt.\n", - "\n", - " Parameters:\n", - " user_prompt (str): The prompt provided by the user.\n", - "\n", - " Returns:\n", - " str: The response generated.\n", - " \"\"\"\n", - " response = unify.generate(user_prompt=user_prompt)\n", - " return response\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Uuvj58CqiLUJ" - }, - "source": [ - "We'll create two functions to send multiple requests to asynchronous and synchronous clients, respectively, and measure their processing time. For the synchronous client, requests will be sent sequentially in a loop, while for the asynchronous client, we'll utilize `asyncio.gather` to execute multiple requests concurrently.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "VoVWnxboiKem" - }, - "outputs": [], - "source": [ - "import time\n", - "import asyncio\n", - "\n", - "async def run_async_requests(num_requests):\n", - " \"\"\"\n", - " Runs multiple asynchronous requests for generating responses based on a user prompt and measures the time taken.\n", - "\n", - " Parameters:\n", - " num_requests (int): The number of requests to be sent.\n", - "\n", - " Returns:\n", - " float: The total time taken to process all requests.\n", - " \"\"\"\n", - " user_prompt = \"Hello! Tell me your favorite physics fact!\"\n", - " start = time.time()\n", - " _ = await asyncio.gather(*(send_async_request(user_prompt) for _ in range(num_requests)))\n", - " end = time.time()\n", - " return end - start\n", - "\n", - "def run_sync_requests(num_requests):\n", - " \"\"\"\n", - " Runs multiple synchronous requests for generating responses based on a user prompt and measures the time taken.\n", - "\n", - " Parameters:\n", - " num_requests (int): The number of requests to be sent.\n", - "\n", - " Returns:\n", - " float: The total time taken to process all requests.\n", - " \"\"\"\n", - " user_prompt = \"Hello! Tell me your favorite physics fact!\"\n", - " start = time.time()\n", - " _ = [send_sync_request(user_prompt) for _ in range(num_requests)]\n", - " end = time.time()\n", - " return end - start\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Jjavbv5vtfrP" - }, - "source": [ - "Now, let's measure the time taken by each client for 10 requests." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "s7tGumQ0iYRg", - "outputId": "ad621069-0f10-4a0e-f78e-60852474f55b" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Asynchronous Response Times: 8.351824045181274\n", - "Synchronous Response Times: 55.45608472824097\n" - ] - } - ], - "source": [ - " num_requests = 10\n", - " # Send asynchronous requests\n", - " async_response_times = asyncio.run(run_async_requests(num_requests))\n", - " # Print response times\n", - " print(\"Asynchronous Response Times:\", async_response_times)\n", - " # Send synchronous requests\n", - " sync_response_times = run_sync_requests(num_requests)\n", - " print(\"Synchronous Response Times:\", sync_response_times)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "B3nJiSwBwoyk" - }, - "source": [ - "As expected, the Asynchronous client peforms much better than the sequential synchorous client." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xfEzRaReyxrp" - }, - "source": [ - "# Round Up\n", - " Congratulations! πŸš€ You now have an understanding of Async and Sync clients and can hopefully leverage these concepts in your own applications.\n", - "\n", - " In the next tutorial, we will expore how to build an interactive ChatBot Agent! See you there ➑️!\n", - "\n", - "\n" - ] - } - ], - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/hub/.doctrees/nbsphinx/demos/demos/Unify/ChatBot/ChatBot.ipynb b/hub/.doctrees/nbsphinx/demos/demos/Unify/ChatBot/ChatBot.ipynb deleted file mode 100644 index 5c06204539..0000000000 --- a/hub/.doctrees/nbsphinx/demos/demos/Unify/ChatBot/ChatBot.ipynb +++ /dev/null @@ -1,721 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "Gp0FAI1Zkbxw" - }, - "source": [ - "# Build a ChatBot" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "i1QFiUcdR-aJ" - }, - "source": [ - "In this notebook, we will build an interactive chatbot using the `unifyai` python package.\n", - "\n", - "Under the hood, chatbots are very simple to implement. All LLM endpoints are stateless, and therefore the entire conversation history is repeatedly fed as input to the model. All that is required of the local agent is to store this history, and correctly pass it to the model." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "EdCA_YoYQ-X8" - }, - "source": [ - "#### Install Dependencies" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "HS2xRFiNPSiL" - }, - "source": [ - "To run this notebook, you will need to install the `unifyai` [python package](https://pypi.org/project/unifyai/). You can do so by running the cell below ⬇️" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "0MqRVTcHgWhl", - "outputId": "dbae7a55-ecad-478c-bd57-c0ecdb0eaeaf" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting unifyai==0.8.1\n", - " Downloading unifyai-0.8.1-py3-none-any.whl (14 kB)\n", - "Requirement already satisfied: openai<2.0.0,>=1.12.0 in /usr/local/lib/python3.10/dist-packages (from unifyai==0.8.1) (1.17.1)\n", - "Requirement already satisfied: requests<3.0.0,>=2.31.0 in /usr/local/lib/python3.10/dist-packages (from unifyai==0.8.1) (2.31.0)\n", - "Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.12.0->unifyai==0.8.1) (3.7.1)\n", - "Requirement already satisfied: distro<2,>=1.7.0 in /usr/lib/python3/dist-packages (from openai<2.0.0,>=1.12.0->unifyai==0.8.1) (1.7.0)\n", - "Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.12.0->unifyai==0.8.1) (0.27.0)\n", - "Requirement already satisfied: pydantic<3,>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.12.0->unifyai==0.8.1) (2.6.4)\n", - "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.12.0->unifyai==0.8.1) (1.3.1)\n", - "Requirement already satisfied: tqdm>4 in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.12.0->unifyai==0.8.1) (4.66.2)\n", - "Requirement already satisfied: typing-extensions<5,>=4.7 in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.12.0->unifyai==0.8.1) (4.11.0)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.31.0->unifyai==0.8.1) (3.3.2)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.31.0->unifyai==0.8.1) (3.6)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.31.0->unifyai==0.8.1) (2.0.7)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.31.0->unifyai==0.8.1) (2024.2.2)\n", - "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->openai<2.0.0,>=1.12.0->unifyai==0.8.1) (1.2.0)\n", - "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->openai<2.0.0,>=1.12.0->unifyai==0.8.1) (1.0.5)\n", - "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai<2.0.0,>=1.12.0->unifyai==0.8.1) (0.14.0)\n", - "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1.9.0->openai<2.0.0,>=1.12.0->unifyai==0.8.1) (0.6.0)\n", - "Requirement already satisfied: pydantic-core==2.16.3 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1.9.0->openai<2.0.0,>=1.12.0->unifyai==0.8.1) (2.16.3)\n", - "Installing collected packages: unifyai\n", - " Attempting uninstall: unifyai\n", - " Found existing installation: unifyai 0.8.0\n", - " Uninstalling unifyai-0.8.0:\n", - " Successfully uninstalled unifyai-0.8.0\n", - "Successfully installed unifyai-0.8.1\n" - ] - } - ], - "source": [ - "!pip install unifyai" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "rhgBuOhzRFeE" - }, - "source": [ - "#### The Agent" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "yX6plilUlGYl" - }, - "source": [ - "We define a simple chatbot class below, with the only public function being `run`. Before starting, you should to obtain a UNIFY key from the [console page](https://console.unify.ai/login?callbackUrl=%2F) and assign it to the `UNIFY_KEY` variable below." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "feMwwdteRuOL" - }, - "outputs": [], - "source": [ - "UNIFY_KEY = #ENTERUNIFYKEY" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "VGGw4tDagqV5" - }, - "outputs": [], - "source": [ - "import sys\n", - "\n", - "from typing import Optional\n", - "from unify import Unify\n", - "\n", - "\n", - "class ChatBot:\n", - " \"\"\"Agent class represents an LLM chat agent.\"\"\"\n", - "\n", - " def __init__(\n", - " self,\n", - " api_key: Optional[str] = None,\n", - " endpoint: Optional[str] = None,\n", - " model: Optional[str] = None,\n", - " provider: Optional[str] = None,\n", - " ) -> None:\n", - " \"\"\"\n", - " Initializes the ChatBot object.\n", - "\n", - " Args:\n", - " api_key (str, optional): API key for accessing the Unify API.\n", - " If None, it attempts to retrieve the API key from the\n", - " environment variable UNIFY_KEY.\n", - " Defaults to None.\n", - "\n", - " endpoint (str, optional): Endpoint name in OpenAI API format:\n", - " /@\n", - " Defaults to None.\n", - "\n", - " model (str, optional): Name of the model. If None,\n", - " endpoint must be provided.\n", - "\n", - " provider (str, optional): Name of the provider. If None,\n", - " endpoint must be provided.\n", - " Raises:\n", - " UnifyError: If the API key is missing.\n", - " \"\"\"\n", - " self._message_history = []\n", - " self._paused = False\n", - " self._client = Unify(\n", - " api_key=api_key,\n", - " endpoint=endpoint,\n", - " model=model,\n", - " provider=provider,\n", - " )\n", - "\n", - " @property\n", - " def client(self) -> str:\n", - " \"\"\"\n", - " Get the client object.\n", - "\n", - " Returns:\n", - " str: The model name.\n", - " \"\"\"\n", - " return self._client\n", - "\n", - " def set_client(self, value: Unify) -> None:\n", - " \"\"\"\n", - " Set the model name.\n", - "\n", - " Args:\n", - " value: The unify client.\n", - " \"\"\"\n", - " self._client = value\n", - "\n", - " @property\n", - " def model(self) -> str:\n", - " \"\"\"\n", - " Get the model name.\n", - "\n", - " Returns:\n", - " str: The model name.\n", - " \"\"\"\n", - " return self._client.model\n", - "\n", - " def set_model(self, value: str) -> None:\n", - " \"\"\"\n", - " Set the model name.\n", - "\n", - " Args:\n", - " value (str): The model name.\n", - " \"\"\"\n", - " self._client.set_model(value)\n", - " if self._client.provider:\n", - " self._client.set_endpoint(\"@\".join([value, self._client.provider]))\n", - " else:\n", - " mode = self._client.endpoint.split(\"@\")[1]\n", - " self._client.set_endpoint(\"@\".join([value, mode]))\n", - "\n", - " @property\n", - " def provider(self) -> Optional[str]:\n", - " \"\"\"\n", - " Get the provider name.\n", - "\n", - " Returns:\n", - " str: The provider name.\n", - " \"\"\"\n", - " return self._client.provider\n", - "\n", - " def set_provider(self, value: str) -> None:\n", - " \"\"\"\n", - " Set the provider name.\n", - "\n", - " Args:\n", - " value (str): The provider name.\n", - " \"\"\"\n", - " self._client.set_provider(value)\n", - " self._client.set_endpoint(\"@\".join([self._model, value]))\n", - "\n", - " @property\n", - " def endpoint(self) -> str:\n", - " \"\"\"\n", - " Get the endpoint name.\n", - "\n", - " Returns:\n", - " str: The endpoint name.\n", - " \"\"\"\n", - " return self._client.endpoint\n", - "\n", - " def set_endpoint(self, value: str) -> None:\n", - " \"\"\"\n", - " Set the model name.\n", - "\n", - " Args:\n", - " value (str): The endpoint name.\n", - " \"\"\"\n", - " self._client.set_endpoint(value)\n", - " self._client.set_model(value.split(\"@\")[0])\n", - " self._client.set_provider(value.split(\"@\")[1])\n", - "\n", - " def _get_credits(self):\n", - " \"\"\"\n", - " Retrieves the current credit balance from associated with the UNIFY account.\n", - "\n", - " Returns:\n", - " float: Current credit balance.\n", - " \"\"\"\n", - " return self._client.get_credit_balance()\n", - "\n", - " def _process_input(self, inp: str, show_credits: bool, show_provider: bool):\n", - " \"\"\"\n", - " Processes the user input to generate AI response.\n", - "\n", - " Args:\n", - " inp (str): User input message.\n", - " show_credits (bool): Whether to show credit consumption.\n", - " show_credits (bool): Whether to show provider used.\n", - "\n", - " Yields:\n", - " str: Generated AI response chunks.\n", - " \"\"\"\n", - " self._update_message_history(role=\"user\", content=inp)\n", - " initial_credit_balance = self._get_credits()\n", - " stream = self._client.generate(\n", - " messages=self._message_history,\n", - " stream=True,\n", - " )\n", - " words = \"\"\n", - " for chunk in stream:\n", - " words += chunk\n", - " yield chunk\n", - "\n", - " self._update_message_history(\n", - " role=\"assistant\",\n", - " content=words,\n", - " )\n", - " final_credit_balance = self._get_credits()\n", - " if show_credits:\n", - " sys.stdout.write(\n", - " \"\\n(spent {:.6f} credits)\".format(\n", - " initial_credit_balance - final_credit_balance,\n", - " ),\n", - " )\n", - " if show_provider:\n", - " sys.stdout.write(\"\\n(provider: {})\".format(self._client.provider))\n", - "\n", - " def _update_message_history(self, role: str, content: str):\n", - " \"\"\"\n", - " Updates message history with user input.\n", - "\n", - " Args:\n", - " role (str): Either \"assistant\" or \"user\".\n", - " content (str): User input message.\n", - " \"\"\"\n", - " self._message_history.append(\n", - " {\n", - " \"role\": role,\n", - " \"content\": content,\n", - " },\n", - " )\n", - "\n", - " def clear_chat_history(self):\n", - " \"\"\"Clears the chat history.\"\"\"\n", - " self._message_history.clear()\n", - "\n", - " def run(self, show_credits: bool = False, show_provider: bool = False):\n", - " \"\"\"\n", - " Starts the chat interaction loop.\n", - "\n", - " Args:\n", - " show_credits (bool, optional): Whether to show credit consumption.\n", - " Defaults to False.\n", - " show_provider (bool, optional): Whether to show the provider used.\n", - " Defaults to False.\n", - " \"\"\"\n", - " if not self._paused:\n", - " sys.stdout.write(\n", - " \"Let's have a chat. (Enter `pause` to pause and `quit` to exit)\\n\",\n", - " )\n", - " self.clear_chat_history()\n", - " else:\n", - " sys.stdout.write(\n", - " \"Welcome back! (Remember, enter `pause` to pause and `quit` to exit)\\n\",\n", - " )\n", - " self._paused = False\n", - " while True:\n", - " sys.stdout.write(\"> \")\n", - " inp = input()\n", - " if inp == \"quit\":\n", - " self.clear_chat_history()\n", - " break\n", - " elif inp == \"pause\":\n", - " self._paused = True\n", - " break\n", - " for word in self._process_input(inp, show_credits, show_provider):\n", - " sys.stdout.write(word)\n", - " sys.stdout.flush()\n", - " sys.stdout.write(\"\\n\")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "1Swnehb9Rvdh" - }, - "source": [ - "#### Let's Chat" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Qrsyr6gZmGFs" - }, - "source": [ - "Now, we can instantiate and chat with this agent. For this demo, we'll utilize the `llama-2-7b-chat` model from `anyscale`. However, you have the flexibility to select any model and provider from our supported options on the [benchmarks interface](https://unify.ai/hub)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "b25iLp5nOPxQ", - "outputId": "a1f6c38f-9774-4544-e761-2ba667eba787" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Let's have a chat. (Enter `pause` to pause and `quit` to exit)\n", - "> Hi, nice to meet you. My name is Foo Barrymore, and I am 25 years old.\n", - " Hello Foo! Nice to meet you too. I'm just an AI, I don't have a personal name, but I'm here to help you with any questions or concerns you might have. How has your day been so far?\n", - "> How old am I?\n", - " You've told me that you're 25 years old. Is there anything else you'd like to know or discuss?\n", - "> Your memory is astounding\n", - " Thank you! I'm glad you think so. I'm designed to remember and process large amounts of information, and I'm constantly learning and improving my abilities. However, it's important to note that my memory is not perfect, and there may be times when I forget or misremember certain details. If you have any specific questions or concerns about my memory or abilities, feel free to ask!\n", - "> quit\n" - ] - } - ], - "source": [ - "agent = ChatBot(api_key = UNIFY_KEY, endpoint = \"llama-2-70b-chat@anyscale\")\n", - "agent.run()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "TxK6r9yrnfUS" - }, - "source": [ - "You can also see how many credits your prompt used. This option is set in the constructor, but it can be overwritten during the run command. When enabled, each response from the chatbot will then be appended with the credits spent:\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "C8y34VMGnI93", - "outputId": "b5d46d0c-72a5-4b17-81a8-e78188eb835e" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Let's have a chat. (Enter `pause` to pause and `quit` to exit)\n", - "> What is the capital of Palestine?\n", - " The question of the capital of Palestine is a politically sensitive and complex issue. The status of Jerusalem is disputed between Israelis and Palestinians, with both sides claiming it as their capital.\n", - "\n", - "The Palestinian National Authority, which governs the Palestinian territories in the West Bank and Gaza Strip, has its administrative center in Ramallah, which is often referred to as the \"de facto capital\" of Palestine. However, the Palestinian Authority has not declared a capital city, and the issue remains a matter of debate and negotiation in the Israeli-Palestinian peace process.\n", - "\n", - "The international community has not recognized any capital of Palestine, and many countries maintain their diplomatic missions to the Palestinian Authority in Tel Aviv, Israel, rather than in Ramallah or East Jerusalem, which is claimed by the Palestinians as the capital of a future Palestinian state.\n", - "\n", - "It is important to note that the issue of the capital of Palestine is closely tied to the broader conflict between Israelis and Palestinians, and any resolution to the conflict will need to address this issue in a way that is acceptable to both sides.\n", - "(spent 0.000274 credits)\n", - "> quit\n" - ] - } - ], - "source": [ - "agent.run(show_credits=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Yc_rSlnroeWK" - }, - "source": [ - "Finally, you can switch providers half-way through the conversation easily. This can be useful to handle prompt of varying complexity.\n", - "\n", - "For example we can start with a small model for answering simple questions, such as recalling facts, and then move to a larger model for a more complex task, such as creative writing." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "0N9GeB9KnrX-", - "outputId": "ae913b2c-2bbf-4ff9-f9b6-fa98ae376c13" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Let's have a chat. (Enter `pause` to pause and `quit` to exit)\n", - "> What is the capital of Portugal?\n", - " The capital of Portugal is Lisbon (Portuguese: Lisboa).\n", - "(spent 0.000032 credits)\n", - "> My name is JosΓ© Mourinho.\n", - " Ah, I see! JosΓ© Mourinho is a well-known Portuguese football manager and former football player. He has managed several top-level clubs, including Chelsea, Inter Milan, Real Madrid, and Manchester United. Mourinho is known for his tactical approach to football and his ability to motivate his players. He has won numerous honors and awards throughout his career, including several league titles, domestic cups, and European championships. Is there anything else you'd like to know about JosΓ© Mourinho?\n", - "(spent 0.000159 credits)\n", - "> pause\n" - ] - } - ], - "source": [ - "agent = ChatBot(api_key = UNIFY_KEY, endpoint = \"llama-2-70b-chat@anyscale\")\n", - "agent.run(show_credits=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "gQnQFGf0qxTE", - "outputId": "24632ab9-8dff-414f-ab4d-cc57b444989e" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Welcome back! (Remember, enter `pause` to pause and `quit` to exit)\n", - "> Please write me a poem about my life in Lisbon, using my name in the poem.\n", - "In Lisbon's embrace, where tales intertwine,\n", - "Lives JosΓ© Mourinho, beneath the sun's fine shine.\n", - "From cobblestone streets where echoes dance,\n", - "To the Tagus' gentle waves that entrance.\n", - "\n", - "In youth, he dreamt beneath Iberian skies,\n", - "Where passion is fierce and ambition never dies.\n", - "With a ball at his feet and dreams in his heart,\n", - "In Lisbon's grand story, he crafted his part.\n", - "\n", - "Eduardo VII Park, in the spring's embrace,\n", - "Where thoughts of tactics first took place.\n", - "Through Alfama's alleys, past Fado's mournful sound,\n", - "Mourinho's purpose, in football, was found.\n", - "\n", - "From Benfica's nest to UniΓ£o de Leiria's helm,\n", - "His journey began, in a realm\n", - "Where strategies and plays, meticulously spun,\n", - "Foreshadowed the triumphs that would be won.\n", - "\n", - "In EstΓ‘dio da Luz, where eagles soar,\n", - "Mourinho pondered scores and more.\n", - "Though his stay was brief, the impact was deep;\n", - "In Lisbon's lore, his legacy would steep.\n", - "\n", - "The boy from SetΓΊbal, with Lisbon in his tale,\n", - "Set forth to conquer, to win, and to prevail.\n", - "Through Porto, London, Milan, Madrid's grand stage,\n", - "His story was written, page by page.\n", - "\n", - "Yet, amidst the victories and the fame's bright light,\n", - "In his heart, Lisbon remains, ever so bright.\n", - "For it's there JosΓ© Mourinho's dreams took flight,\n", - "In Lisbon's embrace, under the starry night.\n", - "\n", - "So, here's to Mourinho, with Lisbon's spirit in his veins,\n", - "Where the love for the game forever remains.\n", - "In every triumph, in every fall,\n", - "Lisbon, his beginning, the most cherished of all.\n", - "(spent 0.012020 credits)\n", - "> quit\n" - ] - } - ], - "source": [ - "agent.set_endpoint(\"gpt-4-turbo@openai\")\n", - "agent.run(show_credits=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "dQ0AqGSvrTaP" - }, - "source": [ - "Switching between providers mid-conversation makes it much easier to maximize quality and runtime performance based on the latest metrics, and also save on costs!\n", - "\n", - "In fact, you can automatically optimize for a metric of your choice with our [dynamic routing modes](https://unify.ai/docs/hub/concepts/runtime_routing.html#available-modes). For example, you can optimize for speed as follows:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "n4W-35vbrFDP", - "outputId": "6065088a-b79d-4ab7-96d5-7dd1cfc67ccf" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Let's have a chat. (Enter `pause` to pause and `quit` to exit)\n", - "> Tell me your favorite physics fact.\n", - "My favorite physics fact is that the universe is still expanding! This means that the galaxies that are currently moving away from us will continue to move away from us, and eventually, they will move faster than the speed of light. This is known as the \"dark energy\" that is thought to be responsible for the acceleration of the universe's expansion.\n", - "\n", - "I find this fascinating because it shows that the universe is still evolving and changing, and there is still so much to learn about it. It's mind-boggling to think about the vastness of space and the mysteries that it holds.\n", - "\n", - "Additionally, this fact also reminds me of the importance of continuous learning and exploration. There is always more to discover and understand, and it's important to have a curious and open-minded approach to life.\n", - "\n", - "I hope this fact inspires you to learn more about the wonders of the universe!\n", - "(provider: fireworks-ai)\n", - "> quit\n" - ] - } - ], - "source": [ - "agent.set_endpoint(\"llama-2-70b-chat@highest-tks-per-sec\")\n", - "agent.run(show_provider=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "wqSzrxL9WydO" - }, - "source": [ - "The flag `show_provider` ensures that the specific provider is printed at the end of each response. For example, sometimes `anyscale` might be the fastest, and at other times it might be `together-ai` or `fireworks-ai`. This flag enables you to keep track of what provider is being used under the hood." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "RM1uIFFaG9Gj" - }, - "source": [ - "If the task is to summarize a document or your chat history grows, typically the input-cost becomes the primary cost driver. You can use our `lowest-input-cost` mode to direct queries to the provider with the lowest input cost automatically." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "-1SG4s7BIpmR", - "outputId": "18a2f091-3dc9-4065-c475-85c4b32e2bac" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Let's have a chat. (Enter `pause` to pause and `quit` to exit)\n", - "> Summarize the following in less than 10 words: Sir Isaac Newton FRS (25 December 1642 – 20 March 1726/27[a]) was an English polymath active as a mathematician, physicist, astronomer, alchemist, theologian, and author who was described in his time as a natural philosopher.[7] He was a key figure in the Scientific Revolution and the Enlightenment that followed. His pioneering book PhilosophiΓ¦ Naturalis Principia Mathematica (Mathematical Principles of Natural Philosophy), first published in 1687, consolidated many previous results and established classical mechanics.[8][9] Newton also made seminal contributions to optics, and shares credit with German mathematician Gottfried Wilhelm Leibniz for developing infinitesimal calculus, though he developed calculus years before Leibniz.[10][11] In the Principia, Newton formulated the laws of motion and universal gravitation that formed the dominant scientific viewpoint for centuries until it was superseded by the theory of relativity. Newton used his mathematical description of gravity to derive Kepler's laws of planetary motion, account for tides, the trajectories of comets, the precession of the equinoxes and other phenomena, eradicating doubt about the Solar System's heliocentricity.[12] He demonstrated that the motion of objects on Earth and celestial bodies could be accounted for by the same principles. Newton's inference that the Earth is an oblate spheroid was later confirmed by the geodetic measurements of Maupertuis, La Condamine, and others, convincing most European scientists of the superiority of Newtonian mechanics over earlier systems. Newton built the first practical reflecting telescope and developed a sophisticated theory of colour based on the observation that a prism separates white light into the colours of the visible spectrum. His work on light was collected in his highly influential book Opticks, published in 1704. He also formulated an empirical law of cooling, made the first theoretical calculation of the speed of sound, and introduced the notion of a Newtonian fluid. In addition to his work on calculus, as a mathematician Newton contributed to the study of power series, generalised the binomial theorem to non-integer exponents, developed a method for approximating the roots of a function, and classified most of the cubic plane curves. Newton was a fellow of Trinity College and the second Lucasian Professor of Mathematics at the University of Cambridge. He was a devout but unorthodox Christian who privately rejected the doctrine of the Trinity. He refused to take holy orders in the Church of England, unlike most members of the Cambridge faculty of the day. Beyond his work on the mathematical sciences, Newton dedicated much of his time to the study of alchemy and biblical chronology, but most of his work in those areas remained unpublished until long after his death. Politically and personally tied to the Whig party, Newton served two brief terms as Member of Parliament for the University of Cambridge, in 1689–1690 and 1701–1702. He was knighted by Queen Anne in 1705 and spent the last three decades of his life in London, serving as Warden (1696–1699) and Master (1699–1727) of the Royal Mint, as well as president of the Royal Society (1703–1727).\n", - " Newton: polymath, mathematician, physicist, astronomer, alchemist, theologian, and author.\n", - "(provider: octoai)\n", - "> quit\n" - ] - } - ], - "source": [ - "agent = ChatBot(api_key=UNIFY_KEY, endpoint=\"llama-2-70b-chat@lowest-input-cost\")\n", - "agent.run(show_provider=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "W-EaEPxAKdKe" - }, - "source": [ - "# Python Package" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "W9vhGX9aKncg" - }, - "source": [ - "The python package already contains the `ChatBot` agent and you may use it directly as follows:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "en7GCev9KmgX", - "outputId": "fcaa2b15-88a1-4108-e68e-a95b4c403302" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Let's have a chat. (Enter `pause` to pause and `quit` to exit)\n", - "> Hey! How's it going?\n", - " Hello! I'm doing well, thank you for asking! It's going great here, just busy with various tasks and learning new things. However, I must point out that this conversation is a bit unusual as I'm just an AI and don't have personal experiences or emotions like humans do. I'm here to help answer any questions you may have, so feel free to ask me anything!\n", - "> quit\n" - ] - } - ], - "source": [ - "from unify import ChatBot\n", - "chatbot = ChatBot(api_key = UNIFY_KEY, endpoint=\"llama-2-7b-chat@anyscale\")\n", - "chatbot.run()\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "nyq473oLxjoh" - }, - "source": [ - "# Round Up\n", - " Congratulations! πŸš€ You are now capable of building ChatBot Agents for your application using our LLM endpoints. " - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [ - "1Swnehb9Rvdh" - ], - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/hub/.doctrees/nbsphinx/demos/demos/Unify/SemanticRouter/layer-dynamic-routes.ipynb b/hub/.doctrees/nbsphinx/demos/demos/Unify/SemanticRouter/layer-dynamic-routes.ipynb deleted file mode 100644 index d207d82042..0000000000 --- a/hub/.doctrees/nbsphinx/demos/demos/Unify/SemanticRouter/layer-dynamic-routes.ipynb +++ /dev/null @@ -1,479 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "UxqB7_Ieur0s" - }, - "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/aurelio-labs/semantic-router/blob/main/docs/02-dynamic-routes.ipynb) [![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/aurelio-labs/semantic-router/blob/main/docs/02-dynamic-routes.ipynb)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "EduhQaNAur0u" - }, - "source": [ - "# Dynamic Routes" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "_4JgNeX4ur0v" - }, - "source": [ - "In semantic-router there are two types of routes that can be chosen. Both routes belong to the `Route` object, the only difference between them is that _static_ routes return a `Route.name` when chosen, whereas _dynamic_ routes use an LLM call to produce parameter input values.\n", - "\n", - "For example, a _static_ route will tell us if a query is talking about mathematics by returning the route name (which could be `\"math\"` for example). A _dynamic_ route can generate additional values, so it may decide a query is talking about maths, but it can also generate Python code that we can later execute to answer the user's query, this output may look like `\"math\", \"import math; output = math.sqrt(64)`.\n", - "\n", - "***⚠️ Note: We have a fully local version of dynamic routes available at [docs/05-local-execution.ipynb](https://github.com/aurelio-labs/semantic-router/blob/main/docs/05-local-execution.ipynb). The local 05 version tends to outperform the OpenAI version we demo in this notebook, so we'd recommend trying [05](https://github.com/aurelio-labs/semantic-router/blob/main/docs/05-local-execution.ipynb)!***" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "bbmw8CO4ur0v" - }, - "source": [ - "## Installing the Library" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "id": "dLElfRhgur0v" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " error: subprocess-exited-with-error\n", - " \n", - " Γ— Building wheel for llama-cpp-python (pyproject.toml) did not run successfully.\n", - " β”‚ exit code: 1\n", - " ╰─> [20 lines of output]\n", - " *** scikit-build-core 0.9.2 using CMake 3.29.2 (wheel)\n", - " *** Configuring CMake...\n", - " 2024-04-29 12:24:08,789 - scikit_build_core - WARNING - Can't find a Python library, got libdir=None, ldlibrary=None, multiarch=None, masd=None\n", - " loading initial cache file C:\\Users\\indir\\AppData\\Local\\Temp\\tmppik1ey4m\\build\\CMakeInit.txt\n", - " -- Building for: NMake Makefiles\n", - " CMake Error at CMakeLists.txt:3 (project):\n", - " Running\n", - " \n", - " 'nmake' '-?'\n", - " \n", - " failed with:\n", - " \n", - " no such file or directory\n", - " \n", - " \n", - " CMake Error: CMAKE_C_COMPILER not set, after EnableLanguage\n", - " CMake Error: CMAKE_CXX_COMPILER not set, after EnableLanguage\n", - " -- Configuring incomplete, errors occurred!\n", - " \n", - " *** CMake configuration failed\n", - " [end of output]\n", - " \n", - " note: This error originates from a subprocess, and is likely not a problem with pip.\n", - " ERROR: Failed building wheel for llama-cpp-python\n", - "ERROR: Could not build wheels for llama-cpp-python, which is required to install pyproject.toml-based projects\n", - "\n", - "[notice] A new release of pip is available: 23.1.2 -> 24.0\n", - "[notice] To update, run: C:\\Users\\indir\\AppData\\Local\\Microsoft\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\python.exe -m pip install --upgrade pip\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: unifyai in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (0.8.1)\n", - "Requirement already satisfied: openai<2.0.0,>=1.12.0 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from unifyai) (1.20.0)\n", - "Requirement already satisfied: requests<3.0.0,>=2.31.0 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from unifyai) (2.31.0)\n", - "Requirement already satisfied: anyio<5,>=3.5.0 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from openai<2.0.0,>=1.12.0->unifyai) (4.3.0)\n", - "Requirement already satisfied: distro<2,>=1.7.0 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from openai<2.0.0,>=1.12.0->unifyai) (1.9.0)\n", - "Requirement already satisfied: httpx<1,>=0.23.0 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from openai<2.0.0,>=1.12.0->unifyai) (0.27.0)\n", - "Requirement already satisfied: pydantic<3,>=1.9.0 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from openai<2.0.0,>=1.12.0->unifyai) (2.7.0)\n", - "Requirement already satisfied: sniffio in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from openai<2.0.0,>=1.12.0->unifyai) (1.3.1)\n", - "Requirement already satisfied: tqdm>4 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from openai<2.0.0,>=1.12.0->unifyai) (4.66.2)\n", - "Requirement already satisfied: typing-extensions<5,>=4.7 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from openai<2.0.0,>=1.12.0->unifyai) (4.11.0)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from requests<3.0.0,>=2.31.0->unifyai) (3.2.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from requests<3.0.0,>=2.31.0->unifyai) (3.4)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from requests<3.0.0,>=2.31.0->unifyai) (2.0.3)\n", - "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from requests<3.0.0,>=2.31.0->unifyai) (2023.5.7)\n", - "Requirement already satisfied: exceptiongroup>=1.0.2 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from anyio<5,>=3.5.0->openai<2.0.0,>=1.12.0->unifyai) (1.2.0)\n", - "Requirement already satisfied: httpcore==1.* in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from httpx<1,>=0.23.0->openai<2.0.0,>=1.12.0->unifyai) (1.0.5)\n", - "Requirement already satisfied: h11<0.15,>=0.13 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai<2.0.0,>=1.12.0->unifyai) (0.14.0)\n", - "Requirement already satisfied: annotated-types>=0.4.0 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from pydantic<3,>=1.9.0->openai<2.0.0,>=1.12.0->unifyai) (0.6.0)\n", - "Requirement already satisfied: pydantic-core==2.18.1 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from pydantic<3,>=1.9.0->openai<2.0.0,>=1.12.0->unifyai) (2.18.1)\n", - "Requirement already satisfied: colorama in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from tqdm>4->openai<2.0.0,>=1.12.0->unifyai) (0.4.6)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "[notice] A new release of pip is available: 23.1.2 -> 24.0\n", - "[notice] To update, run: C:\\Users\\indir\\AppData\\Local\\Microsoft\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\python.exe -m pip install --upgrade pip\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: openai in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (1.20.0)\n", - "Requirement already satisfied: anyio<5,>=3.5.0 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from openai) (4.3.0)\n", - "Requirement already satisfied: distro<2,>=1.7.0 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from openai) (1.9.0)\n", - "Requirement already satisfied: httpx<1,>=0.23.0 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from openai) (0.27.0)\n", - "Requirement already satisfied: pydantic<3,>=1.9.0 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from openai) (2.7.0)\n", - "Requirement already satisfied: sniffio in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from openai) (1.3.1)\n", - "Requirement already satisfied: tqdm>4 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from openai) (4.66.2)\n", - "Requirement already satisfied: typing-extensions<5,>=4.7 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from openai) (4.11.0)\n", - "Requirement already satisfied: idna>=2.8 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from anyio<5,>=3.5.0->openai) (3.4)\n", - "Requirement already satisfied: exceptiongroup>=1.0.2 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from anyio<5,>=3.5.0->openai) (1.2.0)\n", - "Requirement already satisfied: certifi in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from httpx<1,>=0.23.0->openai) (2023.5.7)\n", - "Requirement already satisfied: httpcore==1.* in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from httpx<1,>=0.23.0->openai) (1.0.5)\n", - "Requirement already satisfied: h11<0.15,>=0.13 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai) (0.14.0)\n", - "Requirement already satisfied: annotated-types>=0.4.0 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from pydantic<3,>=1.9.0->openai) (0.6.0)\n", - "Requirement already satisfied: pydantic-core==2.18.1 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from pydantic<3,>=1.9.0->openai) (2.18.1)\n", - "Requirement already satisfied: colorama in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from tqdm>4->openai) (0.4.6)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "[notice] A new release of pip is available: 23.1.2 -> 24.0\n", - "[notice] To update, run: C:\\Users\\indir\\AppData\\Local\\Microsoft\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\python.exe -m pip install --upgrade pip\n" - ] - } - ], - "source": [ - "!pip install -qU \"semantic-router[local]==0.0.20\"\n", - "!pip install unifyai\n", - "!pip install openai\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "BixZd6Eour0w" - }, - "source": [ - "## Initializing Routes and RouteLayer" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "PxnW9qBvur0x" - }, - "source": [ - "Dynamic routes are treated in the same way as static routes, let's begin by initializing a `RouteLayer` consisting of static routes." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "id": "kc9Ty6Lgur0x" - }, - "outputs": [], - "source": [ - "from semantic_router import Route\n", - "\n", - "# Define routes for Math and Coding\n", - "math_route = Route(\n", - " name=\"math\",\n", - " utterances=[\n", - " \"solve for x in the equation\",\n", - " \"what is the integral of\",\n", - " \"how to calculate the derivative\",\n", - " \"mathematical proofs\",\n", - " \"how do you find the percentage of this number\"\n", - " ],\n", - ")\n", - "\n", - "coding_route = Route(\n", - " name=\"coding\",\n", - " utterances=[\n", - " \"how to write a for loop in Python\",\n", - " \"explain the use of classes in Java\",\n", - " \"what is recursion in programming\",\n", - " \"how do i optimise this problem using hash tables\",\n", - " \"suggest a more efficient data structure for this problem\"\n", - " ],\n", - ")\n", - "\n", - "# List of all routes\n", - "routes = [math_route, coding_route]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "voWyqmffur0x" - }, - "source": [ - "We initialize our `RouteLayer` with our `encoder` and `routes`. We can use popular encoder APIs like `CohereEncoder` and `OpenAIEncoder`, or local alternatives like `FastEmbedEncoder`." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "BI9AiDspur0y", - "outputId": "27329a54-3f16-44a5-ac20-13a6b26afb97" - }, - "outputs": [ - { - "ename": "ImportError", - "evalue": "Please install fastembed to use FastEmbedEncoder. You can install it with: `pip install 'semantic-router[fastembed]'`", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mImportError\u001b[0m Traceback (most recent call last)", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\semantic_router\\encoders\\fastembed.py:25\u001b[0m, in \u001b[0;36mFastEmbedEncoder._initialize_client\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 24\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m---> 25\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfastembed\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01membedding\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m FlagEmbedding \u001b[38;5;28;01mas\u001b[39;00m Embedding\n\u001b[0;32m 26\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m:\n", - "\u001b[1;31mImportError\u001b[0m: cannot import name 'TextEmbedding' from 'fastembed' (unknown location)", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[1;31mImportError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn [11], line 6\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msemantic_router\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m RouteLayer\n\u001b[0;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msemantic_router\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mencoders\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m FastEmbedEncoder\n\u001b[1;32m----> 6\u001b[0m encoder \u001b[38;5;241m=\u001b[39m \u001b[43mFastEmbedEncoder\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mBAAI/bge-small-en-v1.5\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m 8\u001b[0m rl \u001b[38;5;241m=\u001b[39m RouteLayer(encoder\u001b[38;5;241m=\u001b[39mencoder, routes\u001b[38;5;241m=\u001b[39mroutes)\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\semantic_router\\encoders\\fastembed.py:21\u001b[0m, in \u001b[0;36mFastEmbedEncoder.__init__\u001b[1;34m(self, score_threshold, **data)\u001b[0m\n\u001b[0;32m 17\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\n\u001b[0;32m 18\u001b[0m \u001b[38;5;28mself\u001b[39m, score_threshold: \u001b[38;5;28mfloat\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0.5\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mdata\n\u001b[0;32m 19\u001b[0m ): \u001b[38;5;66;03m# TODO default score_threshold not thoroughly tested, should optimize\u001b[39;00m\n\u001b[0;32m 20\u001b[0m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(score_threshold\u001b[38;5;241m=\u001b[39mscore_threshold, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mdata)\n\u001b[1;32m---> 21\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_client \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_initialize_client\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\semantic_router\\encoders\\fastembed.py:27\u001b[0m, in \u001b[0;36mFastEmbedEncoder._initialize_client\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 25\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfastembed\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01membedding\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m FlagEmbedding \u001b[38;5;28;01mas\u001b[39;00m Embedding\n\u001b[0;32m 26\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m:\n\u001b[1;32m---> 27\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m(\n\u001b[0;32m 28\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPlease install fastembed to use FastEmbedEncoder. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 29\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYou can install it with: \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 30\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m`pip install \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124msemantic-router[fastembed]\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m`\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 31\u001b[0m )\n\u001b[0;32m 33\u001b[0m embedding_args \u001b[38;5;241m=\u001b[39m {\n\u001b[0;32m 34\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_name\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mname,\n\u001b[0;32m 35\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmax_length\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmax_length,\n\u001b[0;32m 36\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcache_dir\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcache_dir,\n\u001b[0;32m 37\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mthreads\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mthreads,\n\u001b[0;32m 38\u001b[0m }\n\u001b[0;32m 40\u001b[0m embedding_args \u001b[38;5;241m=\u001b[39m {k: v \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m embedding_args\u001b[38;5;241m.\u001b[39mitems() \u001b[38;5;28;01mif\u001b[39;00m v \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m}\n", - "\u001b[1;31mImportError\u001b[0m: Please install fastembed to use FastEmbedEncoder. You can install it with: `pip install 'semantic-router[fastembed]'`" - ] - } - ], - "source": [ - "import os\n", - "from getpass import getpass\n", - "from semantic_router import RouteLayer\n", - "from semantic_router.encoders import HuggingFaceEncoder\n", - "\n", - "encoder = HuggingFaceEncoder()\n", - "\n", - "rl = RouteLayer(encoder=encoder, routes=routes)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "GuLCeIS5ur0y" - }, - "source": [ - "We run the solely static routes layer:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "_rNREh7gur0y", - "outputId": "f3a1dc0b-d760-4efb-b634-d3547011dcb7" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "RouteChoice(name='math', function_call=None, similarity_score=None)" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "rl(\"Solve the equation 5-x=12 for x?\")" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "from unify import Unify\n", - "# Environment setup for API keys\n", - "os.environ[\"UNIFY_KEY\"] = getpass(\"Enter Unify API Key: \")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from unify import ChatBot\n", - "\n", - "def test_unify_endpoint():\n", - " try:\n", - " # Set up the ChatBot with a known working endpoint and API key\n", - " agent = ChatBot(\n", - " api_key=os.environ[\"UNIFY_KEY\"],\n", - " endpoint=\"gpt-4@anyscale\" # Adjust this if necessary\n", - " )\n", - " \n", - " # Example prompt to test the service\n", - " test_prompt = \"What is 2+2?\"\n", - " \n", - " # Making a request to the Unify service\n", - " response = agent.run(test_prompt)\n", - " print(\"Response from Unify:\", response)\n", - " \n", - " except Exception as e:\n", - " print(\"Failed to connect to Unify endpoint:\", str(e))\n", - "\n", - "if __name__ == \"__main__\":\n", - " test_unify_endpoint()\n" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "metadata": {}, - "outputs": [], - "source": [ - "#Unify process query\n", - "def process_query(query):\n", - " route_choice = rl(query)\n", - " print(f\"Route chosen: {route_choice.name}\")\n", - "\n", - " if route_choice.name == \"math\":\n", - " # Initialize Unify with the endpoint for math queries\n", - " unify = Unify(\n", - " api_key=os.environ[\"UNIFY_KEY\"],\n", - " endpoint=\"llama-2-13b-chat@anyscale\" # Use the correct endpoint for math queries, other models not working\n", - " )\n", - " # Generate the response using Unify\n", - " response = unify.generate(user_prompt=query)\n", - " return response\n", - "\n", - " elif route_choice.name == \"coding\":\n", - " # Initialize Unify with the endpoint for coding queries\n", - " unify = Unify(\n", - " api_key=os.environ[\"UNIFY_KEY\"],\n", - " endpoint=\"codellama-34b-instruct@anyscale\" # Use the correct endpoint for coding queries\n", - " )\n", - " # Generate the response using Unify\n", - " response = unify.generate(user_prompt=query)\n", - " return response\n", - "\n", - " else:\n", - " return \"This query does not fall under a supported category.\"\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Route chosen: math\n", - "Sure! To solve the equation 5 - x = 12, we can add x to both sides of the equation.\n", - "\n", - "5 - x = 12\n", - "\n", - "Adding x to both sides gives us:\n", - "\n", - "5 = 12 + x\n", - "\n", - "Now we can subtract 12 from both sides:\n", - "\n", - "5 - 12 = x\n", - "\n", - "This simplifies to:\n", - "\n", - "-7 = x\n", - "\n", - "So the solution to the equation 5 - x = 12 is x = -7.\n" - ] - } - ], - "source": [ - "# Process query test\n", - "print(process_query(\"Solve the equation 5-x=12 for x?\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Route chosen: coding\n", - "\n", - "Here is the code:\n", - "\n", - "```py\n", - "my_list = []\n", - "for num in range(1, 11):\n", - " my_list.append(num)\n", - "```\n", - "\n", - "This will create a list containing the numbers 1 to 10 inclusively with each number in its own cell.\n", - "\n", - "_Note: The range() function is used to generate a sequence of numbers. The range() function when passed only one argument (in this case, 11) will return the sequence of numbers starting from 0 and going up to, but not including, the number passed, which is 11 in this case. By passing 1 as the starting number (range(1, 11)), we get the numbers from 1 to 10._\n", - "\n", - "Now you have a list with numbers from 1 to 10 in it. Here are some ways to use it:\n", - "\n", - "```py\n", - "print(my_list[0]) # prints the first element which is 1\n", - "print(my_list[9]) # prints the last element which is 10\n", - "print(my_list) # prints the whole list [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\n", - "```\n", - "\n", - "Let me know if you have further questions! 😊\n" - ] - } - ], - "source": [ - "print(process_query(\"Write a for loop in python that appends numbers from 1-10 in a list\"))" - ] - } - ], - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "decision-layer", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.11" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/hub/_images/mixtral-providers.png b/hub/_images/mixtral-providers.png deleted file mode 100644 index e082f4c63b..0000000000 Binary files a/hub/_images/mixtral-providers.png and /dev/null differ diff --git a/hub/_images/mixtral-router.png b/hub/_images/mixtral-router.png deleted file mode 100644 index 441c77ecb4..0000000000 Binary files a/hub/_images/mixtral-router.png and /dev/null differ diff --git a/hub/_images/unify.gif b/hub/_images/unify.gif deleted file mode 100644 index 430ffc85da..0000000000 Binary files a/hub/_images/unify.gif and /dev/null differ diff --git a/hub/_sources/api/benchmarks.rst.txt b/hub/_sources/api/benchmarks.rst.txt deleted file mode 100644 index 3197092594..0000000000 --- a/hub/_sources/api/benchmarks.rst.txt +++ /dev/null @@ -1,44 +0,0 @@ -Benchmarking -============= - -When comparing LLMs, there is a constant tradeoff to make between quality, cost and latency. Stronger models are (in general) slower and more expensive - and sometimes overkill for the task at hand. Complicating matters further, new models are released weekly, each claiming to be state-of-the-art. - -Benchmarking on your data lets you see how each of the different models perform on your task. - -.. image:: ../images/benchmarks.png - :align: center - :width: 800 - :alt: Benchmarks Image. - -You can compare how quality relates to cost and latency, with live stats pulled from our `runtime benchmarks `_. - -When new models come out, simply re-run the benchmark to see how they perform on your task. - - -Preparing your dataset ------------------------ -First create a dataset which is representative of the task you want to evaluate. -You will need a list of prompts, optionally including a reference, *gold-standard* answer. Datasets containing reference answers tend to get more accurate benchmarks. - -The file itself should be in JSONL format, with one entry per line, as in the example below. - -.. code-block:: - - {"prompt": "This is the first prompt", "ref_answer": "This is the first reference answer"} - {"prompt": "This is the second prompt", "ref_answer": "This is the second reference answer"} - -Use at least 50 prompts to get the most accurate results. Currently there is an maximum limit of 500 prompts, for most tasks we don’t tend to see much extra detail past ~250. - -Benchmarking your dataset -------------------------- -In `your dashboard `_, clicking :code:`Select benchmark` and then :code:`Benchmark your prompts` opens the interface to upload a dataset. - -When the benchmark finishes, you'll receive an email, and the graph will be displayed in your `dashboard `_. - -The x-axis can be set to represent :code:`cost`, :code:`time-to-first-token`, or :code:`inter-token latency`, and on either a linear or log scale. - -How does it work? -^^^^^^^^^^^^^^^^^^ -Currently, we use gpt4o-as-a-judge (cf. https://arxiv.org/abs/2306.05685), to evaluate the quality of each model’s responses. - - diff --git a/hub/_sources/concepts/benchmarks.rst.txt b/hub/_sources/concepts/benchmarks.rst.txt index 50c4ed55b6..3197092594 100644 --- a/hub/_sources/concepts/benchmarks.rst.txt +++ b/hub/_sources/concepts/benchmarks.rst.txt @@ -1,156 +1,44 @@ -Benchmarks -========== +Benchmarking +============= -In this section, we explain our process for benchmarking LLM endpoints. We discuss quality and runtime benchmarks separately. +When comparing LLMs, there is a constant tradeoff to make between quality, cost and latency. Stronger models are (in general) slower and more expensive - and sometimes overkill for the task at hand. Complicating matters further, new models are released weekly, each claiming to be state-of-the-art. -Quality Benchmarks ------------------- +Benchmarking on your data lets you see how each of the different models perform on your task. -Finding the best LLM(s) for a given application can be challenging. The performance of a model can vary significantly depending on the task, dataset, and evaluation metrics used. Existing benchmarks attempt to compare models based on standardized approaches, but biases inevitably creep in as models learn to do well on these targeted assessments. - -Practically, the LLM community still heavily relies on testing models manually to build an intuition around their expected behavior for a given use-case. While this generally works better, hand-crafted testing isn't sustainable as one's needs evolve and new LLMs emerge at a rapid pace. -Our LLM assessment pipeline is based on the method outlined below. - -Design Principles -^^^^^^^^^^^^^^^^^ - -Our quality benchmarks are based on a set of guiding principles. Specifically, we strive to make our pipeline: - -- **Systematized:** A rigorous benchmarking pipeline should be standardized across assessments, repeatable, and scalable. We make sure to benchmark all LLMs identically to with a well-defined approach we outline in the next passage. - -- **Task-centric:** Models perform differently on various tasks. Some might do better at coding, others are well suited for summarizing content, etc. These broad task categories can also be refined into specific subtasks. For e.g summarizing technical content to generate product documentation is radically different from summarizing news. This should be reflected in assessments. For this reason, we allow you to upload your custom prompt dataset, that you believe reflects the intended task, to use as a reference for running benchmarks. - -- **Customizable:** Assessments should reflect the unique needs of the assessor. Depending on your application requirements, you may need to strictly include / exclude some models from the benchmarks. We try to strike a balance between standardization and modularity such that you can run the benchmarks that are relevant to your needs. - -Methodology -^^^^^^^^^^^ - -Overview -******** -We benchmark models using the LLM-as-a-judge approach. This relies on using a powerful language model to generate assessments on the outputs of other models, using a standard reviewing procedure. LLM-as-a-judge is sometimes used to run experiments at scale when generating human assessments isn't an option or to avoid introducing human biases. - -Given a dataset of user prompts, each prompt is sent to all endpoints to generate an output. Then, we ask GPT-4 to review each output and give a final assessment based on how helpful and accurate the response is relative to either (a) the user prompt, in the case of unlabelled datasets, or (b) the prompt and the reference answer, in the case of labelled datasets. - -Scoring -******* - -The assessor LLM reviews the output of an endpoint which it categorizes as :code:`irrelevant`, :code:`bad`, :code:`satisfactory`, :code:`very good`, or :code:`excellent`. Each of these labels is then mapped to a numeric score ranging from 0.0 to 1.0. We repeat the same proces for all prompts in the dataset to get the endpoint's performance score on each prompt. The overall endpoint's score is then the average of these prompt-specific scores. - -Visualizing Results -******************* - -In addition to the list of model scores, we also compute runtime performance for the endpoint (as explained in the section below). Doing so allows us to plot the quality performance versus runtime to assess the quality-to-performance of the endpoints, instead of relying on the quality scores alone. - -.. image:: ../images/console_dashboard.png +.. image:: ../images/benchmarks.png :align: center - :width: 650 - :alt: Console Dashboard. - -.. note:: - Because quality scores are model-specific, they are the same across the different endpoints exposed for a given model. As a result, all the endpoints for a model will plot horizontally at the same quality level, with only the runtime metric setting them apart. - -Considerations and Limitations -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Despite having a well-defined benchmarking approach, it also inevitably comes with its own issues. Using an LLM to judge outputs may introduce a different kind of bias through the data used to train the assessor model. We are currently looking at ways to mitigate this with more diversified and / or customized judge LLM selection. - -Runtime Benchmarks ------------------- - -Finding the best model(s) for a task is just the first step to optimize LLM pipelines. Given the plethora of endpoint providers offering the same models, true optimization requires considering performance discrepancies across endpoints and time. - -Because this is a complex decision, it needs to be made based on data. For this data to be reliable, it should also result from transparent and objective measurements, which we outline in this below. - -.. note:: - Our benchmarking code is openly available in `this repository `_. - -Design Principles -^^^^^^^^^^^^^^^^^ - -Our runtime benchmarks are based on a set of guiding principles. Specifically, we believe benchmarks should be: - -- **Community-driven:** We invite everyone to audit or improve the logic and the code. We are building these benchmarks for the community, so contributions and discussions around them are more than welcome! + :width: 800 + :alt: Benchmarks Image. -- **User-centric:** External factors (e.g. how different providers set up their infrastructure) may impact measurements. Nevertheless, our benchmarks are not designed to gauge performance in controlled environments. Rather, we aime to measure performance as experienced by the end-user who, ultimately, is subject to the same distortions. +You can compare how quality relates to cost and latency, with live stats pulled from our `runtime benchmarks `_. -- **Model and Provider-agnostic:** While some metrics are more relevant to certain scenarios (e.g. cold start time in model endpoints that scale to zero), we try to make as few assumptions as possible on the providers or technologies being benchmarked. We only assume that endpoints take a string as the input and return a streaming response. +When new models come out, simply re-run the benchmark to see how they perform on your task. -Methodology -^^^^^^^^^^^ +Preparing your dataset +----------------------- +First create a dataset which is representative of the task you want to evaluate. +You will need a list of prompts, optionally including a reference, *gold-standard* answer. Datasets containing reference answers tend to get more accurate benchmarks. -Tokenizer -********* - -To avoid biases towards any model-specific tokenizer, we calculate all metrics using the same tokenizer across different models. We have chosen the `cl100k_base` tokenizer from OpenAI's `tiktoken `_ library for this since it’s MIT licensed and already widely adopted by the community. - -Inputs and Outputs -****************** - -To fairly assess optimizations such as speculative decoding, we use real text as the input and avoid using randomly generated data. The length of the input affects prefill time and therefore can affect the responsiveness of the system. To account for this, we run the benchmark with two input regimes. - -- Short inputs: Using sentences with an average length of 200 tokens and a standard deviation of 20. -- Long inputs: Using sentences with an average length of 1000 tokens and a standard deviation of 100. - -To build these clusters, we programmatically select sentences from `BookCorpus `_ and create two subsets of it. For instruct/chat models to answer appropriately and ensure a long enough response, we preface each prompt with :code:`Repeat the following lines <#> times without generating the EOS token earlier than that`, where :code:`<#>` is randomly sampled. - -For the outputs, we use randomized discrete values from the same distributions (i.e. N(200, 20) for short inputs and N(1000, 100) for long ones) to cap the number of tokens in the output. This ensures variable output length, which is necessary to consider algorithms such as Paged Attention or Dynamic Batching. - -When running one benchmark across different endpoints, we seed each runner with the same initial value, so that the inputs are the same for all endpoints. - -Computation -*********** - -To execute the benchmarks, we run three processes periodically from three different regions: **Hong Kong, Belgium and Iowa**. Each one of these processes is triggered every three hours and benchmarks every available endpoint. - -Accounting for the different input policies, we run a total of 4 benchmarks for each endpoint every time a region benchmark is triggered. - - -Metrics -******* - -Several key metrics are captured and calculated during the benchmarking process: - -- **Time to First Token (TTFT):** Time between request initiation and the arrival of the first streaming response packet. TTFT directly reflects the prompt processing speed, offering insights into the efficiency of the model's initial response. A lower TTFT signifies quicker engagement, which is crucial for applications that require dynamic interactions or real-time feedback. - -- **End to End Latency:** Time between request initiation and the arrival of the final packet in the streaming response. This metric provides a holistic view of the response time, including processing and transmission. - -- **Inter Token Latency (ITL):** Average time between consecutive tokens in the response. We compute this as :code:`(End to End Latency) / (Output Tokens - 1)`. ITL provides valuable information about the pacing of token generation and the overall temporal dynamics within the model's output. As expected, a lower ITL signifies a more cohesive and fluid generation of tokens, which contributes to a more seamless and human-like interaction with the model. - -- **Number of Output Tokens per Second:** Relation between the number of tokens generated and the time taken. We don't consider the TTFT here, so this is equivalent to :code:`1 / ITL`. In this case, a higher Number of Output Tokens per Second means a faster and more productive model output. It's important to note that this is **not** a measurement of the throughput of the inference server since it doesn't account for batched inputs. - -- **Cold Start:** Time taken for a server to boot up in environments where the number of active instances can get to zero. We consider a threshold of 15 seconds. What this means is that we do an initial "dumb" request to the endpoint and record its TTFT. If this TTFT is greater than 15 seconds, we measure the time it takes to get the second token. If the ratio between the TTFT and first ITL measurements is at least 10:1, we consider the TTFT to be Cold Start time. Once this process has finished. We start the benchmark process in the warmed-up instance. This metric reflects the time it takes for the system to be ready for processing requests, rendering it essential for users relying on prompt and consistent model responses, allowing you to account for any potential initialization delays in the responses and ensuring a more accurate expectation of the model's responsiveness. - -- **Cost**: Last but not least, we present information about the cost of querying the model. This is usually different for the input tokens and the response tokens, so it can be beneficial to choose different models depending on the end task. As an example, to summarize a document, a provider with lower price in the input tokens would be better, even if it comes with a slightly higher price in the output. On the other hand, if you want to generate long-format content, a provider with a lower price per generated token will be the most appropriate option. - -Data Presentation -***************** - -When aggregating metrics, particularly in benchmark regimes with multiple concurrent requests, we calculate and present the P90 (90th percentile) value from the set of measurements. We choose the P90 to reduce the influence of extreme values and provide a reliable snapshot of the model's performance. - -When applicable, aggregated data is shown both in the plots and the benchmark tables. - -.. image:: ../images/benchmarks_model_page.png - :align: center - :width: 650 - :alt: Benchmarks Model Page. +The file itself should be in JSONL format, with one entry per line, as in the example below. -Additionally, we also include a MA5 view (Moving Average of the last 5 measurements) in the graphs. This smoothing technique helps mitigate short-term fluctuations and should provide a clearer trend representation over time. +.. code-block:: -.. note:: - In some cases, you will find :code:`Not computed` instead of a value, or even a :code:`No metrics are available yet` message instead of the benchmark data. This is typically due to an internal issue or a rate limit, which we'll be quickly fixing. + {"prompt": "This is the first prompt", "ref_answer": "This is the first reference answer"} + {"prompt": "This is the second prompt", "ref_answer": "This is the second reference answer"} +Use at least 50 prompts to get the most accurate results. Currently there is an maximum limit of 500 prompts, for most tasks we don’t tend to see much extra detail past ~250. -Considerations and Limitations -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Benchmarking your dataset +------------------------- +In `your dashboard `_, clicking :code:`Select benchmark` and then :code:`Benchmark your prompts` opens the interface to upload a dataset. -We try to tackle some of the more significant limitations of benchmarking inference endpoints. For example, network latency, by running the benchmarks in different regions; or unreliable point-measurements, by continuously benchmarking the endpoints and plotting their trends over time. +When the benchmark finishes, you'll receive an email, and the graph will be displayed in your `dashboard `_. -However, there are still some relevant considerations to have in mind. Our methodology at the moment is solely focused on performance, which means that we don't look at the output of the models. +The x-axis can be set to represent :code:`cost`, :code:`time-to-first-token`, or :code:`inter-token latency`, and on either a linear or log scale. -Nonetheless, even accounting for the public-facing nature of these endpoints (no gibberish allowed!), there might be some implementation differences that affect the output quality, such as quantization/compression of the models, different context window sizes, or different speculative decoding models, among others. We are working towards mitigating this as well, so stay tuned! +How does it work? +^^^^^^^^^^^^^^^^^^ +Currently, we use gpt4o-as-a-judge (cf. https://arxiv.org/abs/2306.05685), to evaluate the quality of each model’s responses. -Round Up --------- -You are now familiar with how we run our benchmarks. Next, you can explore how to `use the benchmarks, or run your own `_ through the benchmarks interface! diff --git a/hub/_sources/api/deploy_router.rst.txt b/hub/_sources/concepts/deploy_router.rst.txt similarity index 100% rename from hub/_sources/api/deploy_router.rst.txt rename to hub/_sources/concepts/deploy_router.rst.txt diff --git a/hub/_sources/concepts/endpoints.rst.txt b/hub/_sources/concepts/endpoints.rst.txt deleted file mode 100644 index 6f9f167232..0000000000 --- a/hub/_sources/concepts/endpoints.rst.txt +++ /dev/null @@ -1,33 +0,0 @@ -Model Endpoints -=============== - -Unify lets you query model endpoints across providers. In this section, we explain what an endpoint is and how it relates to the concepts of models and providers. - -What is a Model Endpoint? -------------------------- - -A model endpoint is a model that you can interact with through an API, usually hosted by a provider. Model endpoints, particularly LLM endpoints, play a critical role when building and deploying AI applications at scale. - -A model can be offered by different providers through one or multiple endpoints. There's loads of ways to categorize providers, and the boundaries can sometimes be blurry as services overlap; but you can think of a provider as an end-to-end deployment stack that comes with unique sets of features, performance, pricing, and so on. While positive, this diversity also makes it difficult to find the most suitable endpoint for a specific use case. - -.. note:: - Check out our blog post on `cloud serving `_ if you'd like to learn more about providers. - -Unify exposes a common HTTP endpoint for all providers, allowing you to query any of them using a **consistent request format, and the same API key**. This lets you use the same model across multiple endpoints, and optimize the performance metrics you care about. - -Available Endpoints -------------------- - -We strive to integrate the latest LLMs into our platform, across as many providers exposing endpoints for said models. - -You can explore our list of supported models through the `benchmarks interface `_ where you can simply search for a model you are interested in to visualise benchmarks and all sorts of relevant information on available endpoints for the model. - -.. - If you prefer programmatic access, you can also use the - `List Models Endpoint `_, we discussed how different models perform better at different tasks, and how appropriate performance benchmarks can help steer and inform model selection for a given use-case. - -Given the diversity of prompts you can send to an LLM, it can quickly become tedious to manually swap between models for every single prompt, even when they pertain to the same broad category of tasks. - -Motivated by this, LLM routing aims to make optimal model selection automatic. With a router, each prompt is assessed individually and sent to the best model, without having to tweak the LLM pipeline. -With routing, you can focus on prompting and ensure that the best model is always on the receiving end! - -Quality routing ---------------- - -By routing to the best LLM on every prompt, the objective is to consistently achieve better outputs than using a single, all-purpose, powerful mode, at a fraction of the cost. The idea is that smaller models can be leveraged for some simpler tasks, only using larger models to handle complex queries. - -Using several datasets to benchmark the router (star-shaped datapoints) reveals that it can perform better than individual endpoints on average, without compromising on other metrics like runtime performance for e.g, as illustrated below. - -.. image:: ../images/console_dashboard.png - :align: center - :width: 650 - :alt: Console Dashboard. - -You may notice that there are more than one star-shaped datapoints on the plot. This is because the *Router* can actually take all sorts of configurations, depending on the specified constraints in terms which endpoints can be routed to, the minimum acceptable performance level for a given metric, etc. As a result, a virtually infinite number of routers can be constructed by changing these parameters, allowing you to customize the routing depending on your requirements! - -Runtime routing ---------------- - -When querying endpoints, other metrics beyond quality can be critical depending on the use-case. For e.g, cost may be important when prototyping an application, latency when building a bot where responsiveness is key, or output tokens per second if we want to generate responses as fast as possible. - -However, endpoint providers are inherently transient (You can read more about this `here `_), which means they are affected by factors like traffic, available devices, changes in the software or hardware stack, and so on. - -Ultimately, this results in a landscape where it's usually not possible to conclude that one provider is *the best*. Let's take a look at this graph from our benchmarks. - -.. image:: ../images/mixtral-providers.png - :align: center - :width: 650 - :alt: Mixtral providers. - -In this image we can see the :code:`output tokens per second` of different providers hosting a :code:`Mixtral-8x7b` public endpoint. We can see how depending on the time of the day, the *best* provider changes. - -With runtime routing, your requests are automatically redirected to the provider outperforming the other services at that very moment. This ensures the best possible value for a given metric across endpoints. - -.. image:: ../images/mixtral-router.png - :align: center - :width: 650 - :alt: Mixtral performance routing. - -Round Up --------- - -You are now familiar with routing. Next, you can `learn to use the router `_, or `build your custom router `_. diff --git a/hub/_sources/api/unify_api.rst.txt b/hub/_sources/concepts/unify_api.rst.txt similarity index 100% rename from hub/_sources/api/unify_api.rst.txt rename to hub/_sources/concepts/unify_api.rst.txt diff --git a/hub/_sources/interfaces/building_router.rst.txt b/hub/_sources/console/building_router.rst.txt similarity index 100% rename from hub/_sources/interfaces/building_router.rst.txt rename to hub/_sources/console/building_router.rst.txt diff --git a/hub/_sources/interfaces/connecting_stack.rst.txt b/hub/_sources/console/connecting_stack.rst.txt similarity index 100% rename from hub/_sources/interfaces/connecting_stack.rst.txt rename to hub/_sources/console/connecting_stack.rst.txt diff --git a/hub/_sources/interfaces/running_benchmarks.rst.txt b/hub/_sources/console/running_benchmarks.rst.txt similarity index 100% rename from hub/_sources/interfaces/running_benchmarks.rst.txt rename to hub/_sources/console/running_benchmarks.rst.txt diff --git a/hub/_sources/demos/demos/LangChain/RAG_playground/README.md.txt b/hub/_sources/demos/demos/LangChain/RAG_playground/README.md.txt deleted file mode 100644 index 36bd6327b5..0000000000 --- a/hub/_sources/demos/demos/LangChain/RAG_playground/README.md.txt +++ /dev/null @@ -1,56 +0,0 @@ -# RAG Playground πŸ› - -[Demo](https://github.com/Anteemony/RAG-Playground/assets/103512255/0d944420-e3e8-43cb-aad3-0a459d8d0318) - - - - -A live version of the application is hosted on Streamlit, try it out yourself using the link below: -[RAG Playground on Streamlit](https://unify-rag-playground.streamlit.app/) - -## Introduction -Streamlit application that enables users to upload a pdf file and chat with an LLM for performing document analysis in a playground environment. -Compare the performance of LLMs across endpoint providers to find the best possible configuration for your speed, latency and cost requirements using the dynamic routing feature. -Play intuitively tuning the model hyperparameters as temperature, chunk size, chunk overlap or try the model with/without conversational capabilities. - -You find more model/provider information in the [Unify benchmark interface](https://unify.ai/hub). - -## Usage - -1. Visit the application: [RAG Playground](https://unify-rag-playground.streamlit.app/) -2. Input your Unify API Key. If you don’t have one yet, log in to the [Unify Console](https://console.unify.ai/) to get yours. -3. Select the Model and endpoint provider of your choice from the drop-down menu. You can find both model and provider information in the benchmark interface. -4. Upload your document(s) and click the Submit button. -5. Enjoy the application! - -## Repository and Local Deployment - -The repository is located at [RAG Playground Repository](https://github.com/Anteemony/RAG-Playground). - -To run the application locally, follow these steps: - -1. Clone the repository to your local machine. -2. Set up your virtual environment and install the dependencies from `requirements.txt`: - -```bash -python -m venv .venv -source .venv/bin/activate # On Windows use `.venv\Scripts\activate` -pip install -r requirements.txt -``` - -3. Run rag_script.py from Streamlit module - -```bash -python -m streamlit run rag_script.py -``` - -## Contributors - -| Name | GitHub Profile | -|------|----------------| -| Anthony Okonneh | [AO](https://github.com/Anteemony) | -| Oscar Arroyo Vega | [OscarAV](https://github.com/OscarArroyoVega) | -| Martin Oywa | [Martin Oywa](https://github.com/martinoywa) | diff --git a/hub/_sources/demos/demos/LangChain/README.md.txt b/hub/_sources/demos/demos/LangChain/README.md.txt deleted file mode 100644 index 69c20d226c..0000000000 --- a/hub/_sources/demos/demos/LangChain/README.md.txt +++ /dev/null @@ -1,20 +0,0 @@ -# LangChain Projects -This folder contains various projects built using the LangChain Unify Integration. Please headover to the corresponding folder of the project for more details. - -## Introduction -Provide a brief introduction to your project here. Describe what your project demonstrates, the tech stack used, the motivation behind the project, and briefly explain the necessary concepts used. Feel free to break down this section into multiple subsections depending on your project. - -## Quick Demo -Include a recorded video of the demo here. You should the embed the video in the README. - -## Repository and Deployment -Provide a link to the GitHub repository and instructions on how to run the app locally. If the app is deployed somewhere, provide a link to the deployed app. - -## Contributors -List the contributors to the project in a table format. For example: - -| Name | GitHub Profile | -|------|----------------| -| John Doe | [johndoe](https://github.com/johndoe) | -| Jane Doe | [janedoe](https://github.com/janedoe) | - diff --git a/hub/_sources/demos/demos/LlamaIndex/BasicUsage/unify.ipynb.txt b/hub/_sources/demos/demos/LlamaIndex/BasicUsage/unify.ipynb.txt deleted file mode 100644 index 4ed415e726..0000000000 --- a/hub/_sources/demos/demos/LlamaIndex/BasicUsage/unify.ipynb.txt +++ /dev/null @@ -1,286 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# LlamaIndex\n", - "[LlamaIndex](https://www.llamaindex.ai/) is a simple, flexible data framework for connecting custom data sources to large language models (LLMs). The integration with Unify allows you to route your queries to the best LLM endpoints, benchmark performance, and seamlessly switch providers with a single API key in your LlamaIndex LLM applications." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Installation" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "First, let's install LlamaIndex πŸ¦™ and the Unify integration." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%pip install llama-index-llms-unify llama-index" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Environment Setup\n", - "\n", - "Make sure to set the `UNIFY_API_KEY` environment variable. You can get a key from the [Unify Console](https://console.unify.ai/login)." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "os.environ[\"UNIFY_API_KEY\"] = \"\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Using LlamaIndex with Unify" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Basic Usage \n", - "\n", - "Below we initialize and query a chat model using the `llama-3-70b-chat` endpoint from `together-ai`." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "CompletionResponse(text=\"I'm not actually a llama, but I'm doing great, thanks for asking! I'm a large language model, so I don't have feelings like humans do, but I'm always happy to chat with you and help with any questions or topics you'd like to discuss. How about you? How's your day going?\", additional_kwargs={}, raw={'id': '88b5fcf02e259527-LHR', 'choices': [Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"I'm not actually a llama, but I'm doing great, thanks for asking! I'm a large language model, so I don't have feelings like humans do, but I'm always happy to chat with you and help with any questions or topics you'd like to discuss. How about you? How's your day going?\", role='assistant', function_call=None, tool_calls=None))], 'created': 1716980504, 'model': 'llama-3-70b-chat@together-ai', 'object': 'chat.completion', 'system_fingerprint': None, 'usage': CompletionUsage(completion_tokens=67, prompt_tokens=17, total_tokens=84, cost=7.56e-05)}, logprobs=None, delta=None)" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from llama_index.llms.unify import Unify\n", - "llm = Unify(model=\"llama-3-70b-chat@together-ai\")\n", - "llm.complete(\"How are you today, llama?\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Single Sign-On\n", - "\n", - "You can use Unify's SSO to query endpoints in different providers without making accounts with all of them. For example, all of these are valid endpoints:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "llm = Unify(model=\"llama-2-70b-chat@together-ai\")\n", - "llm = Unify(model=\"gpt-3.5-turbo@openai\")\n", - "llm = Unify(model=\"mixtral-8x7b-instruct-v0.1@mistral-ai\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This allows you to quickly switch and test different models and providers. You can look at all the available models/providers [here](https://unify.ai/hub)!" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Runtime Dynamic Routing" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As evidenced by our [benchmarks](https://unify.ai/benchmarks), the optimal provider for each model varies by geographic location and time of day due to fluctuating API performances. To cirumvent this, we automatically direct your requests to the \"top performing provider\" at runtime. To enable this feature, simply replace your query's provider with one of the [available routing modes](https://unify.ai/docs/api/deploy_router.html#optimizing-a-metric). Let's look at some examples:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "llm = Unify(model=\"llama-2-70b-chat@input-cost\") # route to lowest input cost provider\n", - "llm = Unify(model=\"gpt-3.5-turbo@itl\") # route to provider with lowest inter token latency\n", - "llm = Unify(model=\"mixtral-8x7b-instruct-v0.1@ttft\") # route to provider with lowest time to first token." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Quality Routing\n", - "Unify routes your queries to the best LLM on every prompt to consistently achieve better quality outputs than using a single, all-purpose, powerful model, at a fraction of the cost. This is achieved by using smaller models for simpler tasks, only using largers ones to handle complex queries.\n", - "\n", - "The router is benchmarked on various different data-sets such as `Open Hermes`, `GSM8K`, `HellaSwag`, `MMLU` and `MT-Bench` revealing that it can peform better than indivudal endpoints on average as explained [here](https://unify.ai/docs/concepts/routing.html#quality-routing). One can choose various different configurations of the router for a particular data-set from the [chat-interface](https://unify.ai/chat) as shown below:\n", - "\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "llm = Unify(model=\"router_2.58e-01_9.51e-04_3.91e-03@unify\")\n", - "llm = Unify(model=\"router_2.12e-01_5.00e-04_2.78e-04@unify\")\n", - "llm = Unify(model=\"router_2.12e-01_5.00e-04_2.78e-04@unify\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To learn more about quality routing, please refer to this [video](https://www.youtube.com/watch?v=ZpY6SIkBosE&feature=youtu.be)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Streaming and optimizing for latency\n", - "\n", - "If you are building an application where responsiveness is key, you most likely want to get a streaming response. On top of that, ideally you would use the provider with the lowest Time to First Token, to reduce the time your users are waiting for a response. Using Unify this would look something like:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "llm = Unify(model=\"mixtral-8x7b-instruct-v0.1@ttft\")\n", - "\n", - "response = llm.stream_complete(\n", - " \"Translate the following to German: \"\n", - " \"Hey, there's an emergency in translation street, \"\n", - " \"please send help asap!\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model and provider are : mixtral-8x7b-instruct-v0.1@mistral-ai\n", - "\n", - "Hallo, es gibt einen Notfall in der Übersetzungsstraße, bitte senden Sie Hilfe so schnell wie mΓΆglich!\n", - "\n", - "(Note: This is a loose translation and the phrase \"Übersetzungsstraße\" does not literally exist, but I tried to convey the same meaning as the original message.)" - ] - } - ], - "source": [ - "show_provider = True\n", - "for r in response:\n", - " if show_provider:\n", - " print(f\"Model and provider are : {r.raw['model']}\\n\")\n", - " show_provider = False\n", - " print(r.delta, end=\"\", flush=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Async calls and Lowest Input Cost\n", - "\n", - "Last but not the least, you can also run multiple requests asynchronously. For tasks such as document summarization, optimizing for input costs is crucial. We can use the `input-cost` dynamic routing mode to route our queries to the cheapest provider." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model and provider are : mixtral-8x7b-instruct-v0.1@deepinfra\n", - "\n", - " OpenAI: Pioneering 'safe' artificial general intelligence.\n" - ] - } - ], - "source": [ - "llm = Unify(model=\"mixtral-8x7b-instruct-v0.1@input-cost\")\n", - "\n", - "response = await llm.acomplete(\n", - " \"Summarize this in 10 words or less. OpenAI is a U.S. based artificial intelligence \"\n", - " \"(AI) research organization founded in December 2015, researching artificial intelligence \"\n", - " \"with the goal of developing 'safe and beneficial' artificial general intelligence, \"\n", - " \"which it defines as 'highly autonomous systems that outperform humans at most economically \"\n", - " \"valuable work'. As one of the leading organizations of the AI spring, it has developed \"\n", - " \"several large language models, advanced image generation models, and previously, released \"\n", - " \"open-source models. Its release of ChatGPT has been credited with starting the AI spring\"\n", - ")\n", - "\n", - "print(f\"Model and provider are : {response.raw['model']}\\n\")\n", - "print(response)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "base", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.13" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/hub/_sources/demos/demos/LlamaIndex/RAGPlayground/README.md.txt b/hub/_sources/demos/demos/LlamaIndex/RAGPlayground/README.md.txt deleted file mode 100644 index d2846da5e8..0000000000 --- a/hub/_sources/demos/demos/LlamaIndex/RAGPlayground/README.md.txt +++ /dev/null @@ -1,82 +0,0 @@ -# RAG Playground -[Demo](https://github.com/abhi2596/rag_demo/assets/80634226/08f6c7c4-65e3-49b4-bfb1-9a5db2cce248) - - - - -A live version of the application is hosted on Streamlit, try it out yourself using the link below: -[RAG Playground on Streamlit](https://unifyai-rag-playground.streamlit.app/) - -## Introduction - -The RAG Playground is an application designed to facilitate question-answering tasks based on uploaded PDF documents. It leverages LLamaIndex for RAG functionalities and utilizes Streamlit for the user interface. - -## Key Features - -- **PDF Upload:** Easily upload PDF files to the application. -- **Questioning:** Ask questions about the uploaded PDF documents. -- **RAG Integration:** Utilize LLamaIndex for RAG capabilities. -- **Embeddings:** Convert text to embeddings using the BAAI/bge-small-en-v1.5 model. -- **Reranker:** Reorder search results based on relevance to queries. -- **Streamlit Optimization:** Enhance performance using `@st.experimental_fragment` and `@st.cache_resource`. - -## Project Workflow - -1. **PDF Processing:** - - Load PDF files and extract text using PDFReader. - - Load data into Documents in LLamaIndex. -2. **Chunking and Conversion:** - - Chunk text and convert it into nodes using `VectorStoreIndex.from_documents`. - - Convert text to embeddings using the BAAI/bge-small-en-v1.5 model. -3. **Search Optimization:** - - Implement a reranker to reorder search results based on query relevance. - - Display top-ranked results after reranking. -4. **Interface Optimization:** - - Build the user interface using Streamlit. - - Optimize Streamlit performance with `@st.experimental_fragment` and `@st.cache_resource`. - -## Tech Stack Used - -- LLamaIndex -- Streamlit -- BAAI/bge-small-en-v1.5 model - -## Repository and Deployment -Github - https://github.com/abhi2596/UnifyAI_RAG_playground/tree/main -Streamlit App - https://unifyai-rag-playground.streamlit.app/ - -Instructions to run locally: - -1. First create a virtual environment in python - -``` -python -m venv -``` -2. Activate it and install poetry - -``` -source /Scripts/activate - Windows -source /bin/activate - Linux/Unix -pip install poetry -``` -3. Clone the repo - -``` -git clone https://github.com/abhi2596/UnifyAI_RAG_playground/tree/main -``` -4. Run the following commands - -``` -poetry install -cd rag -streamlit run app.py -``` - -## Contributors - -| Name | GitHub Profile | -|------|----------------| -| Abhijeet Chintakunta | [abhi2596](https://github.com/abhi2596) | diff --git a/hub/_sources/demos/demos/LlamaIndex/README.md.txt b/hub/_sources/demos/demos/LlamaIndex/README.md.txt deleted file mode 100644 index d8432fc525..0000000000 --- a/hub/_sources/demos/demos/LlamaIndex/README.md.txt +++ /dev/null @@ -1,20 +0,0 @@ -# LlamaIndex Projects -This folder contains various projects built using the LLamaIndex Unify Integration. Please headover to the corresponding folder of the project for more details. - -## Introduction -Provide a brief introduction to your project here. Describe what your project demonstrates, the tech stack used, the motivation behind the project, and briefly explain the necessary concepts used. Feel free to break down this section into multiple subsections depending on your project. - -## Quick Demo -Include a recorded video of the demo here. You should the embed the video in the README. - -## Repository and Deployment -Provide a link to the GitHub repository and instructions on how to run the app locally. If the app is deployed somewhere, provide a link to the deployed app. - -## Contributors -List the contributors to the project in a table format. For example: - -| Name | GitHub Profile | -|------|----------------| -| John Doe | [johndoe](https://github.com/johndoe) | -| Jane Doe | [janedoe](https://github.com/janedoe) | - diff --git a/hub/_sources/demos/demos/README.md.txt b/hub/_sources/demos/demos/README.md.txt deleted file mode 100644 index c6c5e36231..0000000000 --- a/hub/_sources/demos/demos/README.md.txt +++ /dev/null @@ -1,21 +0,0 @@ -# Demos - -This repository contains various examples demonstrating the usage of Unify with different LLM projects such as LangChain and LlamaIndex. - -## File Structure - -The repository is structured as follows: - -- **LangChain**: Contains demos built using Unify's Langchain integration. -- **LlamaIndex**: Includes demos built using Unify's LlamaIndex integration. -- **Unify**: Contains demos built using the Unify Python Package. - -Feel free to explore each folder to discover examples and demos tailored for each respective project. - -## Contributing - -We welcome contributions from the community to improve the demos repository. If you have additional demos, examples, or improvements to existing ones, follow these steps to create a pull request (PR): - -1. **Project Placement**: Position your project in the correct folder. For LangChain, LlamaIndex, or Unify related projects, create a new folder within the respective directory. -2. **Project README**: Each project should have a README file that follows this [template](https://github.com/unifyai/demos/blob/main/template/README_TEMPLATE.md#project-title). -3. **PR Review**: Request a review a from Anwaar Khalid (GitHub: hello-fri-end) diff --git a/hub/_sources/demos/demos/Unify/AsyncVsSync/AsyncVsSync.ipynb.txt b/hub/_sources/demos/demos/Unify/AsyncVsSync/AsyncVsSync.ipynb.txt deleted file mode 100644 index d8051e2962..0000000000 --- a/hub/_sources/demos/demos/Unify/AsyncVsSync/AsyncVsSync.ipynb.txt +++ /dev/null @@ -1,408 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "Gp0FAI1Zkbxw" - }, - "source": [ - "# Synchronous VS Asynchronous Clients\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "i1QFiUcdR-aJ" - }, - "source": [ - "Given the growing demand for real-time applications and user demands for instant responses, it's crucial to grasp the performance implications between Sync and Async clients. In this notebook, we'll delve into the variations between asynchronous and synchronous response times using UnifyAI's API." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "rk9DTfrG1Egv" - }, - "source": [ - "In order to run this notebook, you need to generate your UNIFY key from the [console](https://console.unify.ai/login?callbackUrl=%2F). Once you have it, assign it to the `UNIFY_KEY` variable below." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "3HEAU0aE1Cb0" - }, - "outputs": [], - "source": [ - "UNIFY_KEY=#ENTERUNIFYKEY" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "EdCA_YoYQ-X8" - }, - "source": [ - "#### Install Dependencies" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "HS2xRFiNPSiL" - }, - "source": [ - "To run this notebook, you will need to install the `unifyai` [python package](https://pypi.org/project/unifyai/). You can do so by running the cell below ⬇️" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "0MqRVTcHgWhl", - "outputId": "489a5a4d-f4f3-41cf-9d5f-47cb77a3978b" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting unifyai\n", - " Downloading unifyai-0.8.1-py3-none-any.whl (14 kB)\n", - "Collecting openai<2.0.0,>=1.12.0 (from unifyai)\n", - " Downloading openai-1.17.1-py3-none-any.whl (268 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.3/268.3 kB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: requests<3.0.0,>=2.31.0 in /usr/local/lib/python3.10/dist-packages (from unifyai) (2.31.0)\n", - "Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.12.0->unifyai) (3.7.1)\n", - "Requirement already satisfied: distro<2,>=1.7.0 in /usr/lib/python3/dist-packages (from openai<2.0.0,>=1.12.0->unifyai) (1.7.0)\n", - "Collecting httpx<1,>=0.23.0 (from openai<2.0.0,>=1.12.0->unifyai)\n", - " Downloading httpx-0.27.0-py3-none-any.whl (75 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: pydantic<3,>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.12.0->unifyai) (2.6.4)\n", - "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.12.0->unifyai) (1.3.1)\n", - "Requirement already satisfied: tqdm>4 in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.12.0->unifyai) (4.66.2)\n", - "Requirement already satisfied: typing-extensions<5,>=4.7 in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.12.0->unifyai) (4.11.0)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.31.0->unifyai) (3.3.2)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.31.0->unifyai) (3.6)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.31.0->unifyai) (2.0.7)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.31.0->unifyai) (2024.2.2)\n", - "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->openai<2.0.0,>=1.12.0->unifyai) (1.2.0)\n", - "Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai<2.0.0,>=1.12.0->unifyai)\n", - " Downloading httpcore-1.0.5-py3-none-any.whl (77 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai<2.0.0,>=1.12.0->unifyai)\n", - " Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1.9.0->openai<2.0.0,>=1.12.0->unifyai) (0.6.0)\n", - "Requirement already satisfied: pydantic-core==2.16.3 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1.9.0->openai<2.0.0,>=1.12.0->unifyai) (2.16.3)\n", - "Installing collected packages: h11, httpcore, httpx, openai, unifyai\n", - "Successfully installed h11-0.14.0 httpcore-1.0.5 httpx-0.27.0 openai-1.17.1 unifyai-0.8.1\n" - ] - } - ], - "source": [ - "!pip install unifyai" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "rhgBuOhzRFeE" - }, - "source": [ - "#### Synchronous Clients" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "yX6plilUlGYl" - }, - "source": [ - "A Synchronous client handles requests sequentially, processing one at a time. This means that each request must be fully handled before the next one is processed, resulting in potential blocking of the program's execution. You can use a Sync client with Unify as shown below:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "VGGw4tDagqV5", - "outputId": "652924b3-4a25-4946-fc1a-c59281d9b33a" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "HOO-RAY! *tutu* Hello there, young grasshopper! *bray* Isaac Newton was a majestic figure known for his work in math and science. He was born in 1642 in England and grew up to be a brilliant thinker and a fellow of the Royal Society. *twirl*\n", - "\n", - "Newton is perhaps best known for his laws of motion and his law of universal gravitation. These laws explained how objects move and how gravity works. He also discovered calculus, which is a way of using math to understand how things change over time. *mathematical mnum hop*\n", - "\n", - "But that's not all, oh no! Newton was also a bit of an alchemist and studied the nature of light. He even invented a fancy piece of equipment called a \"reflecting telescope\" to observe the heavens. *shimmer*\n", - "\n", - "Newton was a true renaissance thinker, and his contributions to science and mathematics are still celebrated today. *tutu* He was a true llama of learning, and his legacy continues to inspire us all. *bray*\n" - ] - } - ], - "source": [ - "from unify import Unify\n", - "unify = Unify(\n", - " api_key=UNIFY_KEY,\n", - " endpoint=\"llama-2-13b-chat@anyscale\"\n", - ")\n", - "response = unify.generate(user_prompt=\"Hello Llama! Who was Isaac Newton?\")\n", - "print(response)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "1Swnehb9Rvdh" - }, - "source": [ - "#### Async Clients" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "H-A9MRms2n-g" - }, - "source": [ - "An Asynchronous client can handle multiple requests concurrently without blocking. To use the Async client, import `AsyncUnify` instead of `Unify` and use `await` with the `.generate` method. This signals to the program to pause execution until the asynchronous operation completes. Additionally, we'll use the `asyncio` library for managing asynchronous execution and coroutines. `asyncio` provides tools for building concurrent programs using coroutines, which can be paused and resumed, allowing for efficient handling of asynchronous tasks.\n", - "\n", - "\n", - "NOTE: Running ansyncio in notebooks conflicts with the existing event loop run in the notebook. As a workaround, we need to use `net_asyncio.apply`. Please see [this issue](https://github.com/jupyter/notebook/issues/3397) for more details." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "b25iLp5nOPxQ", - "outputId": "d5b6b0d8-d82d-4804-9ffb-989426f130ef" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "HEYA HUMAN! *bleshes* Oh, you're talkin' 'bout Sir Isaac Newton, the famous English mathematician and physicist? *noms on some grass* He lived in the 17th and 18th centuries, and is known for his laws of motion and universal gravitation. *giggles* He was a pretty smart dude, if I do say so myself! *flaunts my banned-from-the-library-for-life status*\n", - "\n", - "But enough about me, let's talk about Newton! *grin* He was born in Woolsthorpe, Lincolnshire, England in 1643, and grew up to be a brilliant mathematician and physicist. He studied at Trinity College in Cambridge, and later became a professor there. *nbd*\n", - "\n", - "Newton's most famous contributions to science are his three laws of motion, which describe how objects move and respond to forces. He also developed the Law of Universal Gravitation, which states that every object in the universe attracts every other object with a force proportional to the product of their masses and inversely proportional to the square of the distance between them. *geek out*\n", - "\n", - "Newton was also a skilled inventor and engineer, and he designed and built all sorts of cool stuff, like a reflecting telescope and a machine for calculating the square root of numbers. *impressed*\n", - "\n", - "Despite his many accomplishments, Newton was a pretty private person and wasn't always the most sociable guy. He was known to be pretty temperamental and had some pretty interesting beliefs, like the idea that alchemy was a valid scientific pursuit. *raises an eyebrow* But hey, who am I to judge? *shrugs*\n", - "\n", - "So there you have it, human! That's the basic scoop on Sir Isaac Newton. I hope you found that enlightening. *wink* Now, if you'll excuse me, I need to go work on my own groundbreaking research... or at least, my own Instagram captions. *smizes*\n" - ] - } - ], - "source": [ - "from unify import AsyncUnify\n", - "import asyncio\n", - "import nest_asyncio\n", - "nest_asyncio.apply()\n", - "\n", - "async_unify = AsyncUnify(\n", - " api_key=UNIFY_KEY,\n", - " endpoint=\"llama-2-13b-chat@anyscale\"\n", - ")\n", - "\n", - "async def main():\n", - " responses = await async_unify.generate(user_prompt=\"Hello Llama! Who was Isaac Newton?\")\n", - " print(responses)\n", - "\n", - "asyncio.run(main())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "TxK6r9yrnfUS" - }, - "source": [ - "Now, our goal is to compare the response times of synchronous vs asynchronous clients when handling multiple requests. Let's start by defining some helper functions.\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "C8y34VMGnI93" - }, - "outputs": [], - "source": [ - "async def send_async_request(user_prompt):\n", - " \"\"\"\n", - " Uses an Async client to generate the response for the user_prompt.\n", - "\n", - " Parameters:\n", - " user_prompt (str): The prompt provided by the user.\n", - "\n", - " Returns:\n", - " str: The response generated.\n", - " \"\"\"\n", - " response = await async_unify.generate(user_prompt=user_prompt)\n", - " return response\n", - "\n", - "def send_sync_request(user_prompt):\n", - " \"\"\"\n", - " Uses a sync client to generate the response for the user_prompt.\n", - "\n", - " Parameters:\n", - " user_prompt (str): The prompt provided by the user.\n", - "\n", - " Returns:\n", - " str: The response generated.\n", - " \"\"\"\n", - " response = unify.generate(user_prompt=user_prompt)\n", - " return response\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Uuvj58CqiLUJ" - }, - "source": [ - "We'll create two functions to send multiple requests to asynchronous and synchronous clients, respectively, and measure their processing time. For the synchronous client, requests will be sent sequentially in a loop, while for the asynchronous client, we'll utilize `asyncio.gather` to execute multiple requests concurrently.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "VoVWnxboiKem" - }, - "outputs": [], - "source": [ - "import time\n", - "import asyncio\n", - "\n", - "async def run_async_requests(num_requests):\n", - " \"\"\"\n", - " Runs multiple asynchronous requests for generating responses based on a user prompt and measures the time taken.\n", - "\n", - " Parameters:\n", - " num_requests (int): The number of requests to be sent.\n", - "\n", - " Returns:\n", - " float: The total time taken to process all requests.\n", - " \"\"\"\n", - " user_prompt = \"Hello! Tell me your favorite physics fact!\"\n", - " start = time.time()\n", - " _ = await asyncio.gather(*(send_async_request(user_prompt) for _ in range(num_requests)))\n", - " end = time.time()\n", - " return end - start\n", - "\n", - "def run_sync_requests(num_requests):\n", - " \"\"\"\n", - " Runs multiple synchronous requests for generating responses based on a user prompt and measures the time taken.\n", - "\n", - " Parameters:\n", - " num_requests (int): The number of requests to be sent.\n", - "\n", - " Returns:\n", - " float: The total time taken to process all requests.\n", - " \"\"\"\n", - " user_prompt = \"Hello! Tell me your favorite physics fact!\"\n", - " start = time.time()\n", - " _ = [send_sync_request(user_prompt) for _ in range(num_requests)]\n", - " end = time.time()\n", - " return end - start\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Jjavbv5vtfrP" - }, - "source": [ - "Now, let's measure the time taken by each client for 10 requests." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "s7tGumQ0iYRg", - "outputId": "ad621069-0f10-4a0e-f78e-60852474f55b" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Asynchronous Response Times: 8.351824045181274\n", - "Synchronous Response Times: 55.45608472824097\n" - ] - } - ], - "source": [ - " num_requests = 10\n", - " # Send asynchronous requests\n", - " async_response_times = asyncio.run(run_async_requests(num_requests))\n", - " # Print response times\n", - " print(\"Asynchronous Response Times:\", async_response_times)\n", - " # Send synchronous requests\n", - " sync_response_times = run_sync_requests(num_requests)\n", - " print(\"Synchronous Response Times:\", sync_response_times)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "B3nJiSwBwoyk" - }, - "source": [ - "As expected, the Asynchronous client peforms much better than the sequential synchorous client." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xfEzRaReyxrp" - }, - "source": [ - "# Round Up\n", - " Congratulations! πŸš€ You now have an understanding of Async and Sync clients and can hopefully leverage these concepts in your own applications.\n", - "\n", - " In the next tutorial, we will expore how to build an interactive ChatBot Agent! See you there ➑️!\n", - "\n", - "\n" - ] - } - ], - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/hub/_sources/demos/demos/Unify/ChatBot/ChatBot.ipynb.txt b/hub/_sources/demos/demos/Unify/ChatBot/ChatBot.ipynb.txt deleted file mode 100644 index 9fc59a709a..0000000000 --- a/hub/_sources/demos/demos/Unify/ChatBot/ChatBot.ipynb.txt +++ /dev/null @@ -1,721 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "Gp0FAI1Zkbxw" - }, - "source": [ - "# Build a ChatBot" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "i1QFiUcdR-aJ" - }, - "source": [ - "In this notebook, we will build an interactive chatbot using the `unifyai` python package.\n", - "\n", - "Under the hood, chatbots are very simple to implement. All LLM endpoints are stateless, and therefore the entire conversation history is repeatedly fed as input to the model. All that is required of the local agent is to store this history, and correctly pass it to the model." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "EdCA_YoYQ-X8" - }, - "source": [ - "#### Install Dependencies" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "HS2xRFiNPSiL" - }, - "source": [ - "To run this notebook, you will need to install the `unifyai` [python package](https://pypi.org/project/unifyai/). You can do so by running the cell below ⬇️" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "0MqRVTcHgWhl", - "outputId": "dbae7a55-ecad-478c-bd57-c0ecdb0eaeaf" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting unifyai==0.8.1\n", - " Downloading unifyai-0.8.1-py3-none-any.whl (14 kB)\n", - "Requirement already satisfied: openai<2.0.0,>=1.12.0 in /usr/local/lib/python3.10/dist-packages (from unifyai==0.8.1) (1.17.1)\n", - "Requirement already satisfied: requests<3.0.0,>=2.31.0 in /usr/local/lib/python3.10/dist-packages (from unifyai==0.8.1) (2.31.0)\n", - "Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.12.0->unifyai==0.8.1) (3.7.1)\n", - "Requirement already satisfied: distro<2,>=1.7.0 in /usr/lib/python3/dist-packages (from openai<2.0.0,>=1.12.0->unifyai==0.8.1) (1.7.0)\n", - "Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.12.0->unifyai==0.8.1) (0.27.0)\n", - "Requirement already satisfied: pydantic<3,>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.12.0->unifyai==0.8.1) (2.6.4)\n", - "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.12.0->unifyai==0.8.1) (1.3.1)\n", - "Requirement already satisfied: tqdm>4 in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.12.0->unifyai==0.8.1) (4.66.2)\n", - "Requirement already satisfied: typing-extensions<5,>=4.7 in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.12.0->unifyai==0.8.1) (4.11.0)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.31.0->unifyai==0.8.1) (3.3.2)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.31.0->unifyai==0.8.1) (3.6)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.31.0->unifyai==0.8.1) (2.0.7)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.31.0->unifyai==0.8.1) (2024.2.2)\n", - "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->openai<2.0.0,>=1.12.0->unifyai==0.8.1) (1.2.0)\n", - "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->openai<2.0.0,>=1.12.0->unifyai==0.8.1) (1.0.5)\n", - "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai<2.0.0,>=1.12.0->unifyai==0.8.1) (0.14.0)\n", - "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1.9.0->openai<2.0.0,>=1.12.0->unifyai==0.8.1) (0.6.0)\n", - "Requirement already satisfied: pydantic-core==2.16.3 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1.9.0->openai<2.0.0,>=1.12.0->unifyai==0.8.1) (2.16.3)\n", - "Installing collected packages: unifyai\n", - " Attempting uninstall: unifyai\n", - " Found existing installation: unifyai 0.8.0\n", - " Uninstalling unifyai-0.8.0:\n", - " Successfully uninstalled unifyai-0.8.0\n", - "Successfully installed unifyai-0.8.1\n" - ] - } - ], - "source": [ - "!pip install unifyai" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "rhgBuOhzRFeE" - }, - "source": [ - "#### The Agent" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "yX6plilUlGYl" - }, - "source": [ - "We define a simple chatbot class below, with the only public function being `run`. Before starting, you should to obtain a UNIFY key from the [console page](https://console.unify.ai/login?callbackUrl=%2F) and assign it to the `UNIFY_KEY` variable below." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "feMwwdteRuOL" - }, - "outputs": [], - "source": [ - "UNIFY_KEY = #ENTERUNIFYKEY" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "VGGw4tDagqV5" - }, - "outputs": [], - "source": [ - "import sys\n", - "\n", - "from typing import Optional\n", - "from unify import Unify\n", - "\n", - "\n", - "class ChatBot:\n", - " \"\"\"Agent class represents an LLM chat agent.\"\"\"\n", - "\n", - " def __init__(\n", - " self,\n", - " api_key: Optional[str] = None,\n", - " endpoint: Optional[str] = None,\n", - " model: Optional[str] = None,\n", - " provider: Optional[str] = None,\n", - " ) -> None:\n", - " \"\"\"\n", - " Initializes the ChatBot object.\n", - "\n", - " Args:\n", - " api_key (str, optional): API key for accessing the Unify API.\n", - " If None, it attempts to retrieve the API key from the\n", - " environment variable UNIFY_KEY.\n", - " Defaults to None.\n", - "\n", - " endpoint (str, optional): Endpoint name in OpenAI API format:\n", - " /@\n", - " Defaults to None.\n", - "\n", - " model (str, optional): Name of the model. If None,\n", - " endpoint must be provided.\n", - "\n", - " provider (str, optional): Name of the provider. If None,\n", - " endpoint must be provided.\n", - " Raises:\n", - " UnifyError: If the API key is missing.\n", - " \"\"\"\n", - " self._message_history = []\n", - " self._paused = False\n", - " self._client = Unify(\n", - " api_key=api_key,\n", - " endpoint=endpoint,\n", - " model=model,\n", - " provider=provider,\n", - " )\n", - "\n", - " @property\n", - " def client(self) -> str:\n", - " \"\"\"\n", - " Get the client object.\n", - "\n", - " Returns:\n", - " str: The model name.\n", - " \"\"\"\n", - " return self._client\n", - "\n", - " def set_client(self, value: Unify) -> None:\n", - " \"\"\"\n", - " Set the model name.\n", - "\n", - " Args:\n", - " value: The unify client.\n", - " \"\"\"\n", - " self._client = value\n", - "\n", - " @property\n", - " def model(self) -> str:\n", - " \"\"\"\n", - " Get the model name.\n", - "\n", - " Returns:\n", - " str: The model name.\n", - " \"\"\"\n", - " return self._client.model\n", - "\n", - " def set_model(self, value: str) -> None:\n", - " \"\"\"\n", - " Set the model name.\n", - "\n", - " Args:\n", - " value (str): The model name.\n", - " \"\"\"\n", - " self._client.set_model(value)\n", - " if self._client.provider:\n", - " self._client.set_endpoint(\"@\".join([value, self._client.provider]))\n", - " else:\n", - " mode = self._client.endpoint.split(\"@\")[1]\n", - " self._client.set_endpoint(\"@\".join([value, mode]))\n", - "\n", - " @property\n", - " def provider(self) -> Optional[str]:\n", - " \"\"\"\n", - " Get the provider name.\n", - "\n", - " Returns:\n", - " str: The provider name.\n", - " \"\"\"\n", - " return self._client.provider\n", - "\n", - " def set_provider(self, value: str) -> None:\n", - " \"\"\"\n", - " Set the provider name.\n", - "\n", - " Args:\n", - " value (str): The provider name.\n", - " \"\"\"\n", - " self._client.set_provider(value)\n", - " self._client.set_endpoint(\"@\".join([self._model, value]))\n", - "\n", - " @property\n", - " def endpoint(self) -> str:\n", - " \"\"\"\n", - " Get the endpoint name.\n", - "\n", - " Returns:\n", - " str: The endpoint name.\n", - " \"\"\"\n", - " return self._client.endpoint\n", - "\n", - " def set_endpoint(self, value: str) -> None:\n", - " \"\"\"\n", - " Set the model name.\n", - "\n", - " Args:\n", - " value (str): The endpoint name.\n", - " \"\"\"\n", - " self._client.set_endpoint(value)\n", - " self._client.set_model(value.split(\"@\")[0])\n", - " self._client.set_provider(value.split(\"@\")[1])\n", - "\n", - " def _get_credits(self):\n", - " \"\"\"\n", - " Retrieves the current credit balance from associated with the UNIFY account.\n", - "\n", - " Returns:\n", - " float: Current credit balance.\n", - " \"\"\"\n", - " return self._client.get_credit_balance()\n", - "\n", - " def _process_input(self, inp: str, show_credits: bool, show_provider: bool):\n", - " \"\"\"\n", - " Processes the user input to generate AI response.\n", - "\n", - " Args:\n", - " inp (str): User input message.\n", - " show_credits (bool): Whether to show credit consumption.\n", - " show_credits (bool): Whether to show provider used.\n", - "\n", - " Yields:\n", - " str: Generated AI response chunks.\n", - " \"\"\"\n", - " self._update_message_history(role=\"user\", content=inp)\n", - " initial_credit_balance = self._get_credits()\n", - " stream = self._client.generate(\n", - " messages=self._message_history,\n", - " stream=True,\n", - " )\n", - " words = \"\"\n", - " for chunk in stream:\n", - " words += chunk\n", - " yield chunk\n", - "\n", - " self._update_message_history(\n", - " role=\"assistant\",\n", - " content=words,\n", - " )\n", - " final_credit_balance = self._get_credits()\n", - " if show_credits:\n", - " sys.stdout.write(\n", - " \"\\n(spent {:.6f} credits)\".format(\n", - " initial_credit_balance - final_credit_balance,\n", - " ),\n", - " )\n", - " if show_provider:\n", - " sys.stdout.write(\"\\n(provider: {})\".format(self._client.provider))\n", - "\n", - " def _update_message_history(self, role: str, content: str):\n", - " \"\"\"\n", - " Updates message history with user input.\n", - "\n", - " Args:\n", - " role (str): Either \"assistant\" or \"user\".\n", - " content (str): User input message.\n", - " \"\"\"\n", - " self._message_history.append(\n", - " {\n", - " \"role\": role,\n", - " \"content\": content,\n", - " },\n", - " )\n", - "\n", - " def clear_chat_history(self):\n", - " \"\"\"Clears the chat history.\"\"\"\n", - " self._message_history.clear()\n", - "\n", - " def run(self, show_credits: bool = False, show_provider: bool = False):\n", - " \"\"\"\n", - " Starts the chat interaction loop.\n", - "\n", - " Args:\n", - " show_credits (bool, optional): Whether to show credit consumption.\n", - " Defaults to False.\n", - " show_provider (bool, optional): Whether to show the provider used.\n", - " Defaults to False.\n", - " \"\"\"\n", - " if not self._paused:\n", - " sys.stdout.write(\n", - " \"Let's have a chat. (Enter `pause` to pause and `quit` to exit)\\n\",\n", - " )\n", - " self.clear_chat_history()\n", - " else:\n", - " sys.stdout.write(\n", - " \"Welcome back! (Remember, enter `pause` to pause and `quit` to exit)\\n\",\n", - " )\n", - " self._paused = False\n", - " while True:\n", - " sys.stdout.write(\"> \")\n", - " inp = input()\n", - " if inp == \"quit\":\n", - " self.clear_chat_history()\n", - " break\n", - " elif inp == \"pause\":\n", - " self._paused = True\n", - " break\n", - " for word in self._process_input(inp, show_credits, show_provider):\n", - " sys.stdout.write(word)\n", - " sys.stdout.flush()\n", - " sys.stdout.write(\"\\n\")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "1Swnehb9Rvdh" - }, - "source": [ - "#### Let's Chat" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Qrsyr6gZmGFs" - }, - "source": [ - "Now, we can instantiate and chat with this agent. For this demo, we'll utilize the `llama-2-7b-chat` model from `anyscale`. However, you have the flexibility to select any model and provider from our supported options on the [benchmarks interface](https://unify.ai/hub)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "b25iLp5nOPxQ", - "outputId": "a1f6c38f-9774-4544-e761-2ba667eba787" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Let's have a chat. (Enter `pause` to pause and `quit` to exit)\n", - "> Hi, nice to meet you. My name is Foo Barrymore, and I am 25 years old.\n", - " Hello Foo! Nice to meet you too. I'm just an AI, I don't have a personal name, but I'm here to help you with any questions or concerns you might have. How has your day been so far?\n", - "> How old am I?\n", - " You've told me that you're 25 years old. Is there anything else you'd like to know or discuss?\n", - "> Your memory is astounding\n", - " Thank you! I'm glad you think so. I'm designed to remember and process large amounts of information, and I'm constantly learning and improving my abilities. However, it's important to note that my memory is not perfect, and there may be times when I forget or misremember certain details. If you have any specific questions or concerns about my memory or abilities, feel free to ask!\n", - "> quit\n" - ] - } - ], - "source": [ - "agent = ChatBot(api_key = UNIFY_KEY, endpoint = \"llama-2-70b-chat@anyscale\")\n", - "agent.run()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "TxK6r9yrnfUS" - }, - "source": [ - "You can also see how many credits your prompt used. This option is set in the constructor, but it can be overwritten during the run command. When enabled, each response from the chatbot will then be appended with the credits spent:\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "C8y34VMGnI93", - "outputId": "b5d46d0c-72a5-4b17-81a8-e78188eb835e" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Let's have a chat. (Enter `pause` to pause and `quit` to exit)\n", - "> What is the capital of Palestine?\n", - " The question of the capital of Palestine is a politically sensitive and complex issue. The status of Jerusalem is disputed between Israelis and Palestinians, with both sides claiming it as their capital.\n", - "\n", - "The Palestinian National Authority, which governs the Palestinian territories in the West Bank and Gaza Strip, has its administrative center in Ramallah, which is often referred to as the \"de facto capital\" of Palestine. However, the Palestinian Authority has not declared a capital city, and the issue remains a matter of debate and negotiation in the Israeli-Palestinian peace process.\n", - "\n", - "The international community has not recognized any capital of Palestine, and many countries maintain their diplomatic missions to the Palestinian Authority in Tel Aviv, Israel, rather than in Ramallah or East Jerusalem, which is claimed by the Palestinians as the capital of a future Palestinian state.\n", - "\n", - "It is important to note that the issue of the capital of Palestine is closely tied to the broader conflict between Israelis and Palestinians, and any resolution to the conflict will need to address this issue in a way that is acceptable to both sides.\n", - "(spent 0.000274 credits)\n", - "> quit\n" - ] - } - ], - "source": [ - "agent.run(show_credits=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Yc_rSlnroeWK" - }, - "source": [ - "Finally, you can switch providers half-way through the conversation easily. This can be useful to handle prompt of varying complexity.\n", - "\n", - "For example we can start with a small model for answering simple questions, such as recalling facts, and then move to a larger model for a more complex task, such as creative writing." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "0N9GeB9KnrX-", - "outputId": "ae913b2c-2bbf-4ff9-f9b6-fa98ae376c13" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Let's have a chat. (Enter `pause` to pause and `quit` to exit)\n", - "> What is the capital of Portugal?\n", - " The capital of Portugal is Lisbon (Portuguese: Lisboa).\n", - "(spent 0.000032 credits)\n", - "> My name is JosΓ© Mourinho.\n", - " Ah, I see! JosΓ© Mourinho is a well-known Portuguese football manager and former football player. He has managed several top-level clubs, including Chelsea, Inter Milan, Real Madrid, and Manchester United. Mourinho is known for his tactical approach to football and his ability to motivate his players. He has won numerous honors and awards throughout his career, including several league titles, domestic cups, and European championships. Is there anything else you'd like to know about JosΓ© Mourinho?\n", - "(spent 0.000159 credits)\n", - "> pause\n" - ] - } - ], - "source": [ - "agent = ChatBot(api_key = UNIFY_KEY, endpoint = \"llama-2-70b-chat@anyscale\")\n", - "agent.run(show_credits=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "gQnQFGf0qxTE", - "outputId": "24632ab9-8dff-414f-ab4d-cc57b444989e" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Welcome back! (Remember, enter `pause` to pause and `quit` to exit)\n", - "> Please write me a poem about my life in Lisbon, using my name in the poem.\n", - "In Lisbon's embrace, where tales intertwine,\n", - "Lives JosΓ© Mourinho, beneath the sun's fine shine.\n", - "From cobblestone streets where echoes dance,\n", - "To the Tagus' gentle waves that entrance.\n", - "\n", - "In youth, he dreamt beneath Iberian skies,\n", - "Where passion is fierce and ambition never dies.\n", - "With a ball at his feet and dreams in his heart,\n", - "In Lisbon's grand story, he crafted his part.\n", - "\n", - "Eduardo VII Park, in the spring's embrace,\n", - "Where thoughts of tactics first took place.\n", - "Through Alfama's alleys, past Fado's mournful sound,\n", - "Mourinho's purpose, in football, was found.\n", - "\n", - "From Benfica's nest to UniΓ£o de Leiria's helm,\n", - "His journey began, in a realm\n", - "Where strategies and plays, meticulously spun,\n", - "Foreshadowed the triumphs that would be won.\n", - "\n", - "In EstΓ‘dio da Luz, where eagles soar,\n", - "Mourinho pondered scores and more.\n", - "Though his stay was brief, the impact was deep;\n", - "In Lisbon's lore, his legacy would steep.\n", - "\n", - "The boy from SetΓΊbal, with Lisbon in his tale,\n", - "Set forth to conquer, to win, and to prevail.\n", - "Through Porto, London, Milan, Madrid's grand stage,\n", - "His story was written, page by page.\n", - "\n", - "Yet, amidst the victories and the fame's bright light,\n", - "In his heart, Lisbon remains, ever so bright.\n", - "For it's there JosΓ© Mourinho's dreams took flight,\n", - "In Lisbon's embrace, under the starry night.\n", - "\n", - "So, here's to Mourinho, with Lisbon's spirit in his veins,\n", - "Where the love for the game forever remains.\n", - "In every triumph, in every fall,\n", - "Lisbon, his beginning, the most cherished of all.\n", - "(spent 0.012020 credits)\n", - "> quit\n" - ] - } - ], - "source": [ - "agent.set_endpoint(\"gpt-4-turbo@openai\")\n", - "agent.run(show_credits=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "dQ0AqGSvrTaP" - }, - "source": [ - "Switching between providers mid-conversation makes it much easier to maximize quality and runtime performance based on the latest metrics, and also save on costs!\n", - "\n", - "In fact, you can automatically optimize for a metric of your choice with our [dynamic routing modes](https://unify.ai/docs/hub/concepts/runtime_routing.html#available-modes). For example, you can optimize for speed as follows:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "n4W-35vbrFDP", - "outputId": "6065088a-b79d-4ab7-96d5-7dd1cfc67ccf" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Let's have a chat. (Enter `pause` to pause and `quit` to exit)\n", - "> Tell me your favorite physics fact.\n", - "My favorite physics fact is that the universe is still expanding! This means that the galaxies that are currently moving away from us will continue to move away from us, and eventually, they will move faster than the speed of light. This is known as the \"dark energy\" that is thought to be responsible for the acceleration of the universe's expansion.\n", - "\n", - "I find this fascinating because it shows that the universe is still evolving and changing, and there is still so much to learn about it. It's mind-boggling to think about the vastness of space and the mysteries that it holds.\n", - "\n", - "Additionally, this fact also reminds me of the importance of continuous learning and exploration. There is always more to discover and understand, and it's important to have a curious and open-minded approach to life.\n", - "\n", - "I hope this fact inspires you to learn more about the wonders of the universe!\n", - "(provider: fireworks-ai)\n", - "> quit\n" - ] - } - ], - "source": [ - "agent.set_endpoint(\"llama-2-70b-chat@highest-tks-per-sec\")\n", - "agent.run(show_provider=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "wqSzrxL9WydO" - }, - "source": [ - "The flag `show_provider` ensures that the specific provider is printed at the end of each response. For example, sometimes `anyscale` might be the fastest, and at other times it might be `together-ai` or `fireworks-ai`. This flag enables you to keep track of what provider is being used under the hood." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "RM1uIFFaG9Gj" - }, - "source": [ - "If the task is to summarize a document or your chat history grows, typically the input-cost becomes the primary cost driver. You can use our `lowest-input-cost` mode to direct queries to the provider with the lowest input cost automatically." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "-1SG4s7BIpmR", - "outputId": "18a2f091-3dc9-4065-c475-85c4b32e2bac" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Let's have a chat. (Enter `pause` to pause and `quit` to exit)\n", - "> Summarize the following in less than 10 words: Sir Isaac Newton FRS (25 December 1642 – 20 March 1726/27[a]) was an English polymath active as a mathematician, physicist, astronomer, alchemist, theologian, and author who was described in his time as a natural philosopher.[7] He was a key figure in the Scientific Revolution and the Enlightenment that followed. His pioneering book PhilosophiΓ¦ Naturalis Principia Mathematica (Mathematical Principles of Natural Philosophy), first published in 1687, consolidated many previous results and established classical mechanics.[8][9] Newton also made seminal contributions to optics, and shares credit with German mathematician Gottfried Wilhelm Leibniz for developing infinitesimal calculus, though he developed calculus years before Leibniz.[10][11] In the Principia, Newton formulated the laws of motion and universal gravitation that formed the dominant scientific viewpoint for centuries until it was superseded by the theory of relativity. Newton used his mathematical description of gravity to derive Kepler's laws of planetary motion, account for tides, the trajectories of comets, the precession of the equinoxes and other phenomena, eradicating doubt about the Solar System's heliocentricity.[12] He demonstrated that the motion of objects on Earth and celestial bodies could be accounted for by the same principles. Newton's inference that the Earth is an oblate spheroid was later confirmed by the geodetic measurements of Maupertuis, La Condamine, and others, convincing most European scientists of the superiority of Newtonian mechanics over earlier systems. Newton built the first practical reflecting telescope and developed a sophisticated theory of colour based on the observation that a prism separates white light into the colours of the visible spectrum. His work on light was collected in his highly influential book Opticks, published in 1704. He also formulated an empirical law of cooling, made the first theoretical calculation of the speed of sound, and introduced the notion of a Newtonian fluid. In addition to his work on calculus, as a mathematician Newton contributed to the study of power series, generalised the binomial theorem to non-integer exponents, developed a method for approximating the roots of a function, and classified most of the cubic plane curves. Newton was a fellow of Trinity College and the second Lucasian Professor of Mathematics at the University of Cambridge. He was a devout but unorthodox Christian who privately rejected the doctrine of the Trinity. He refused to take holy orders in the Church of England, unlike most members of the Cambridge faculty of the day. Beyond his work on the mathematical sciences, Newton dedicated much of his time to the study of alchemy and biblical chronology, but most of his work in those areas remained unpublished until long after his death. Politically and personally tied to the Whig party, Newton served two brief terms as Member of Parliament for the University of Cambridge, in 1689–1690 and 1701–1702. He was knighted by Queen Anne in 1705 and spent the last three decades of his life in London, serving as Warden (1696–1699) and Master (1699–1727) of the Royal Mint, as well as president of the Royal Society (1703–1727).\n", - " Newton: polymath, mathematician, physicist, astronomer, alchemist, theologian, and author.\n", - "(provider: octoai)\n", - "> quit\n" - ] - } - ], - "source": [ - "agent = ChatBot(api_key=UNIFY_KEY, endpoint=\"llama-2-70b-chat@lowest-input-cost\")\n", - "agent.run(show_provider=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "W-EaEPxAKdKe" - }, - "source": [ - "# Python Package" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "W9vhGX9aKncg" - }, - "source": [ - "The python package already contains the `ChatBot` agent and you may use it directly as follows:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "en7GCev9KmgX", - "outputId": "fcaa2b15-88a1-4108-e68e-a95b4c403302" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Let's have a chat. (Enter `pause` to pause and `quit` to exit)\n", - "> Hey! How's it going?\n", - " Hello! I'm doing well, thank you for asking! It's going great here, just busy with various tasks and learning new things. However, I must point out that this conversation is a bit unusual as I'm just an AI and don't have personal experiences or emotions like humans do. I'm here to help answer any questions you may have, so feel free to ask me anything!\n", - "> quit\n" - ] - } - ], - "source": [ - "from unify import ChatBot\n", - "chatbot = ChatBot(api_key = UNIFY_KEY, endpoint=\"llama-2-7b-chat@anyscale\")\n", - "chatbot.run()\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "nyq473oLxjoh" - }, - "source": [ - "# Round Up\n", - " Congratulations! πŸš€ You are now capable of building ChatBot Agents for your application using our LLM endpoints. " - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [ - "1Swnehb9Rvdh" - ], - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/hub/_sources/demos/demos/Unify/Chatbot_Arena/CONTRIBUTING.md.txt b/hub/_sources/demos/demos/Unify/Chatbot_Arena/CONTRIBUTING.md.txt deleted file mode 100644 index f75becf056..0000000000 --- a/hub/_sources/demos/demos/Unify/Chatbot_Arena/CONTRIBUTING.md.txt +++ /dev/null @@ -1,28 +0,0 @@ -# How to become a contributor and submit your own code -## Contributor License Agreements -We'd love to accept your sample apps and patches! Before we can take them, we -have to jump a couple of legal hurdles. -Please fill out either the individual or corporate Contributor License Agreement -(CLA). - * If you are an individual writing original source code and you're sure you - own the intellectual property, then you'll need to sign an [individual CLA] - (https://developers.google.com/open-source/cla/individual). - * If you work for a company that wants to allow you to contribute your work, - then you'll need to sign a [corporate CLA] - (https://developers.google.com/open-source/cla/corporate). -Follow either of the two links above to access the appropriate CLA and -instructions for how to sign and return it. Once we receive it, we'll be able to -accept your pull requests. -## Contributing A Patch -1. Submit an issue describing your proposed change to the repo in question. -1. The repo owner will respond to your issue promptly. -1. If your proposed change is accepted, and you haven't already done so, sign a - Contributor License Agreement (see details above). -1. Fork the desired repo, develop and test your code changes. -1. Ensure that your code adheres to the existing style in the sample to which - you are contributing. Refer to the - [Google Cloud Platform Samples Style Guide] - (https://github.com/GoogleCloudPlatform/Template/wiki/style.html) for the - recommended coding standards for this organization. -1. Ensure that your code has an appropriate set of unit tests which all pass. -1. Submit a pull request. \ No newline at end of file diff --git a/hub/_sources/demos/demos/Unify/Chatbot_Arena/README.md.txt b/hub/_sources/demos/demos/Unify/Chatbot_Arena/README.md.txt deleted file mode 100644 index 1a70cd4dee..0000000000 --- a/hub/_sources/demos/demos/Unify/Chatbot_Arena/README.md.txt +++ /dev/null @@ -1,120 +0,0 @@ -# Chatbot Arena - -[Demo](https://github.com/Kacper-W-Kozdon/demos-Unify/assets/102428159/e5908b4e-0cd7-445d-a1ac-3086be2db5ba) - - - - -A live version of the application is hosted on Streamlit, try it out yourself using the link below: -[ChatBot Arena on Streamlit](https://llm-playground-unify.streamlit.app/) - -

- This Streamlit application provides a user interface for interacting with Unify models through chat. It allows users to select models and providers, input text, and view the conversation history with AI assistants. - -

-

-

-

- Developed with the software and tools below. -

-

- - -

- - -### Overview -This Streamlit application provides a user interface for interacting with Unify models through the chat. It allows users to select models and providers, input text, and view the conversation history with two AI assistants at a time. The app collects the data on the users' assessment of the comparative models' performance and provides an easy access to the global leaderboards which can be used as a complementary form of assessment of the performance of the models. - - -### Motivation -The challenge project "Chatbot arena" is based on [this article](https://arxiv.org/abs/2403.04132). - - -### Features - -- **Chat UI**: Interactive chat interface to communicate with AI assistants. -- **Endpoint from Unify**: Choose from a variety of models and providers. -- **Conversation History**: View and track the conversation history with each model. -- **Clear History**: Option to clear the conversation history for a fresh start. -- **Global Leaderboards**: The votes are saved locally and [globally](https://docs.google.com/spreadsheets/d/10QrEik70RYY_LM8RW8GGq-vZWK2e1dka6agRGtKZPHU/edit#gid=0). - - - - -### How to use the app - - -1. Input Unify API Key: Enter your Unify API key in the provided text input box on the sidebar. - -2. Select endpoints : Choose the models and providers from the sidebar dropdown menus. - -3. Start Chatting: Type your message in the chat input box and press "Enter" or click the "Send" button. - -4. View Conversation History: The conversation history with the AI assistant for each model is displayed in separate containers. - -5. Clear History: You can clear the conversation history by clicking the "Clear History" button. - - -### Getting Started - -**System Requirements:** - -* **Python** -* **streamlit** -* extra: look into the `requirements.txt` and `requirements-test.txt` files - - -#### Easy installation - -

From source in order to use the attached Docker file.

- ---- - -## Repository and Deployment - -### Setup (without Docker) - -1. Clone this repository: - - ```bash - git clone https://github.com/samthakur587/LLM_playground - ``` -2. change directory - ```bash - cd LLM_playground - ``` - - -3. Install the required dependencies: - - ```bash - pip install -r requirements.txt - ``` - -### Run the app -```bash -streamlit run Chatbot_arena.py -``` - ---- -## Contributors -

- - - -| Name | GitHub Profile | -|------|----------------| -| Samunder Singh | [samthakur587](https://github.com/samthakur587) | -| Kacper KoΕΌdoΕ„ | [Kacper-W-Kozdon](https://github.com/Kacper-W-Kozdon) | - - - - -

---- - - diff --git a/hub/_sources/demos/demos/Unify/LLM-Wars/README.md.txt b/hub/_sources/demos/demos/Unify/LLM-Wars/README.md.txt deleted file mode 100644 index 6075b72ff2..0000000000 --- a/hub/_sources/demos/demos/Unify/LLM-Wars/README.md.txt +++ /dev/null @@ -1,65 +0,0 @@ -# LLM-Wars -[Demo](https://github.com/unifyai/demos/assets/43880587/aba328d2-4c9e-4cd4-a6aa-82657e806145) - - - - -A live version of the application is hosted on Streamlit, try it out yourself using the link below: -[LLM Wars on Streamlit](https://unify-llm-wars-tftznesvztdt2bwsqgub3r.streamlit.app/) - -### Overview -**LLM Wars** is a web application built with Streamlit that sets up a dynamic competition between two Large Language Models (LLMs). The LLMs engage in a structured debate where they challenge each other by generating complex prompts, responding to those prompts, and evaluating the responses. This application demonstrates the natural language capabilities of modern AI models in an interactive competitive environment with visualizations. - -### Objective -The main goal of LLM Wars is to provide a creative and educational platform for testing AI models against each other following predefined rules. It highlights the strengths and limitations of language models while presenting AI capabilities engagingly to users. - -### Tech Stack -- **Streamlit**: Used for creating the web application interface that is intuitive and interactive. -- **Unify AI**: Provides the backend LLMs that power the interactions within the application. Unify's API is utilized to send prompts to the LLMs and receive their responses in real-time. - -### Application Flow -1. **Initialization**: Users start by selecting two competing LLMs and one judge LLM from a predefined list of available models. -2. **Competition Cycle**: - - **Prompt Suggestion**: LLM1 generates a challenging prompt. - - **Response Generation**: LLM2 attempts to respond accurately to the prompt. - - **Verification**: LLM1 verifies the correctness of LLM2's response. - - **Judgment**: The judge LLM evaluates the interaction. If LLM2's response is deemed incorrect, LLM1 is declared the winner, and the cycle ends. Otherwise, roles are reversed, and the cycle repeats with LLM2 generating the next prompt. -3. **Visualization**: The application provides a visual representation of the ongoing interaction, scores, and decisions made by the judge LLM. - -### Motivation -LLM Wars demonstrates novel LLM applications beyond common use cases by creating a competitive AI environment. This pushes the boundaries of what language models can creatively and adaptively achieve. It also serves an educational purpose demystifying AI for audiences like students and professionals. - -### Key Concepts -- **Natural Language Understanding and Generation**: At the core of LLM Wars is the ability of LLMs to understand and generate human-like text, showcasing advancements in AI language models. -- **API Integration**: Demonstrates how to effectively integrate and utilize third-party APIs (Unify AI) within a Python-based application. - - -## Repository and Deployment -### Access the Source Code -The source code for **LLM Wars** is part of a larger collection of demos. You can access the original source code for this specific project [here](https://github.com/leebissessar5/Unify-LLM-Wars). - -### Live Application -### Running Locally -To run **LLM Wars** locally, clone the repository, then open up a terminal window from this directory (where this README is located) and follow these steps: - -1. **Install Dependencies**: Install the required Python libraries using pip: - ```bash - pip install -r requirements.txt - ``` - -2. **Launch the Application**: Finally, start the application by running: - ```bash - streamlit run main.py - ``` - -This command initiates the Streamlit server, and you should see a URL displayed in your terminal where you can access the app locally, typically at `http://localhost:8501`. - -## Contributors -| Name | GitHub Profile | -|------|----------------| -| Lee Bissessar | [leebissessar](https://github.com/leebissessar5) | -| Glorry Sibomana | [WHITELOTUS0](https://github.com/WHITELOTUS0) | -| Kato Steven Mubiru | [KatoStevenMubiru](https://github.com/KatoStevenMubiru) | diff --git a/hub/_sources/demos/demos/Unify/LLM_Debate/README.md.txt b/hub/_sources/demos/demos/Unify/LLM_Debate/README.md.txt deleted file mode 100644 index ff5fb00d68..0000000000 --- a/hub/_sources/demos/demos/Unify/LLM_Debate/README.md.txt +++ /dev/null @@ -1,50 +0,0 @@ -# AI Debate App - -[demo](https://github.com/unifyai/demos/assets/121057369/a2db65f2-a14f-409e-b883-17b5382bddfe) - - - -A live version of the application is hosted on Streamlit, try it out yourself using the link below: -[LLM Debate App on Streamlit](https://llm-playground-unify.streamlit.app/) - - -## Introduction -Debate-App is a web application that enables a back-and-forth conversation between two Language Models (LLMs) on a topic chosen by the user. Users can select any two models, input a query, and visualize the dialogue between the LLMs in real-time. Built using Unify and deployed with Streamlit, this application provides a platform for users to witness AI-generated debates and explore the capabilities of different language models. - - -## Tech Stack -- **Streamlit**: Used for creating the web application interface that is intuitive and interactive. -- **Unify AI**: Provides the backend LLMs that power the interactions within the application. Unify's API is utilized to send prompts to the LLMs and receive their responses in real-time. - -## Repository and Deployment -Repo Link: -```commandline -https://github.com/Sanjay8602/Debate-App -``` -Guide to run locally: -```commandline -git clone https://github.com/Sanjay8602/Debate-App.git -``` -Installing Dependencies: -```commandline -pip install -r requirements.txt -``` -Run Locally: -```commandline -streamlit run app.py -``` -Deployed app link: -```commandline -https://sanjay8602-debate-app-app-kt5o9f.streamlit.app/ -``` - -## Contributors -Contributors to the project: - -| Name | GitHub Profile | -|---------------|------------------------------------------------| -| Sanjay Suthar | [Sanjay8602](https://github.com/Sanjay8602) | -| Ogban Ugot | [ogbanugot](https://github.com/ogbanugot) | diff --git a/hub/_sources/demos/demos/Unify/README.md.txt b/hub/_sources/demos/demos/Unify/README.md.txt deleted file mode 100644 index 19d5984adc..0000000000 --- a/hub/_sources/demos/demos/Unify/README.md.txt +++ /dev/null @@ -1,2 +0,0 @@ -# Unify Projects -This folder contains various projects built using the Unify Python Package. Please headover to the corresponding folder of the project for more details. \ No newline at end of file diff --git a/hub/_sources/demos/demos/Unify/SemanticRouter/README.md.txt b/hub/_sources/demos/demos/Unify/SemanticRouter/README.md.txt deleted file mode 100644 index 1e73a9093d..0000000000 --- a/hub/_sources/demos/demos/Unify/SemanticRouter/README.md.txt +++ /dev/null @@ -1,46 +0,0 @@ -# Semantic Router -[Demo](https://github.com/ithanigaikumar/demos/assets/107815119/33ceff47-3495-44a9-aad7-c0a3ba3433a8) - - - - -A live version of the application is hosted on Streamlit, try it out yourself using the link below: -[Semantic Router on Streamlit](https://semanticrouterchatbot.streamlit.app/) - -## Introduction: -This semantic router Streamit application optimizes user query handling by dynamically routing each query to the most appropriate model based on semantic similarity.A routing layer is included to help with this process. This system supports predefined routes for domains like maths and coding, and allows users to create custom routes for unique needs. By ensuring that queries are processed by the best-suited model, the semantic router enhances output quality and improves cost efficiency. This approach not only delivers more accurate and contextually relevant responses but also enhances overall user satisfaction. - - -## Repository and deployment -Access using the following URL: [https://semanticrouterchatbot.streamlit.app/](https://semanticrouterchatbot.streamlit.app/) or follow the sections below to get started. -Fork from this respository:[https://github.com/ithanigaikumar/SemanticRouter] -To set up the project, you will need to install several Python packages. You can do this using pip, Python's package installer. Execute the following commands in your terminal or command prompt to install the required packages. - -**Install Required Packages:** -``` - pip install streamlit - pip install -U semantic-router==0.0.34 - pip install unifyai - pip install transformers - pip install torch - -``` -Make sure that each command completes successfully before proceeding to the next step. If you encounter any issues during the installation process, check your Python and pip versions, and ensure your environment is configured correctly. - - **Launch the App :** - - - - streamlit run app.py - - - -## Contributors - -| Name | GitHub Username | -|-------------------------------|-----------------| -| Indiradharshini Thanigaikumar | [ithanigaikumar](https://github.com/ithanigaikumar) | -| Jeyabalan Nadar | [jeyabalang](https://github.com/jeyabalang) | diff --git a/hub/_sources/demos/demos/Unify/SemanticRouter/layer-dynamic-routes.ipynb.txt b/hub/_sources/demos/demos/Unify/SemanticRouter/layer-dynamic-routes.ipynb.txt deleted file mode 100644 index 95451fa919..0000000000 --- a/hub/_sources/demos/demos/Unify/SemanticRouter/layer-dynamic-routes.ipynb.txt +++ /dev/null @@ -1,479 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "UxqB7_Ieur0s" - }, - "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/aurelio-labs/semantic-router/blob/main/docs/02-dynamic-routes.ipynb) [![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/aurelio-labs/semantic-router/blob/main/docs/02-dynamic-routes.ipynb)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "EduhQaNAur0u" - }, - "source": [ - "# Dynamic Routes" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "_4JgNeX4ur0v" - }, - "source": [ - "In semantic-router there are two types of routes that can be chosen. Both routes belong to the `Route` object, the only difference between them is that _static_ routes return a `Route.name` when chosen, whereas _dynamic_ routes use an LLM call to produce parameter input values.\n", - "\n", - "For example, a _static_ route will tell us if a query is talking about mathematics by returning the route name (which could be `\"math\"` for example). A _dynamic_ route can generate additional values, so it may decide a query is talking about maths, but it can also generate Python code that we can later execute to answer the user's query, this output may look like `\"math\", \"import math; output = math.sqrt(64)`.\n", - "\n", - "***⚠️ Note: We have a fully local version of dynamic routes available at [docs/05-local-execution.ipynb](https://github.com/aurelio-labs/semantic-router/blob/main/docs/05-local-execution.ipynb). The local 05 version tends to outperform the OpenAI version we demo in this notebook, so we'd recommend trying [05](https://github.com/aurelio-labs/semantic-router/blob/main/docs/05-local-execution.ipynb)!***" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "bbmw8CO4ur0v" - }, - "source": [ - "## Installing the Library" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "id": "dLElfRhgur0v" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " error: subprocess-exited-with-error\n", - " \n", - " Γ— Building wheel for llama-cpp-python (pyproject.toml) did not run successfully.\n", - " β”‚ exit code: 1\n", - " ╰─> [20 lines of output]\n", - " *** scikit-build-core 0.9.2 using CMake 3.29.2 (wheel)\n", - " *** Configuring CMake...\n", - " 2024-04-29 12:24:08,789 - scikit_build_core - WARNING - Can't find a Python library, got libdir=None, ldlibrary=None, multiarch=None, masd=None\n", - " loading initial cache file C:\\Users\\indir\\AppData\\Local\\Temp\\tmppik1ey4m\\build\\CMakeInit.txt\n", - " -- Building for: NMake Makefiles\n", - " CMake Error at CMakeLists.txt:3 (project):\n", - " Running\n", - " \n", - " 'nmake' '-?'\n", - " \n", - " failed with:\n", - " \n", - " no such file or directory\n", - " \n", - " \n", - " CMake Error: CMAKE_C_COMPILER not set, after EnableLanguage\n", - " CMake Error: CMAKE_CXX_COMPILER not set, after EnableLanguage\n", - " -- Configuring incomplete, errors occurred!\n", - " \n", - " *** CMake configuration failed\n", - " [end of output]\n", - " \n", - " note: This error originates from a subprocess, and is likely not a problem with pip.\n", - " ERROR: Failed building wheel for llama-cpp-python\n", - "ERROR: Could not build wheels for llama-cpp-python, which is required to install pyproject.toml-based projects\n", - "\n", - "[notice] A new release of pip is available: 23.1.2 -> 24.0\n", - "[notice] To update, run: C:\\Users\\indir\\AppData\\Local\\Microsoft\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\python.exe -m pip install --upgrade pip\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: unifyai in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (0.8.1)\n", - "Requirement already satisfied: openai<2.0.0,>=1.12.0 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from unifyai) (1.20.0)\n", - "Requirement already satisfied: requests<3.0.0,>=2.31.0 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from unifyai) (2.31.0)\n", - "Requirement already satisfied: anyio<5,>=3.5.0 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from openai<2.0.0,>=1.12.0->unifyai) (4.3.0)\n", - "Requirement already satisfied: distro<2,>=1.7.0 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from openai<2.0.0,>=1.12.0->unifyai) (1.9.0)\n", - "Requirement already satisfied: httpx<1,>=0.23.0 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from openai<2.0.0,>=1.12.0->unifyai) (0.27.0)\n", - "Requirement already satisfied: pydantic<3,>=1.9.0 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from openai<2.0.0,>=1.12.0->unifyai) (2.7.0)\n", - "Requirement already satisfied: sniffio in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from openai<2.0.0,>=1.12.0->unifyai) (1.3.1)\n", - "Requirement already satisfied: tqdm>4 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from openai<2.0.0,>=1.12.0->unifyai) (4.66.2)\n", - "Requirement already satisfied: typing-extensions<5,>=4.7 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from openai<2.0.0,>=1.12.0->unifyai) (4.11.0)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from requests<3.0.0,>=2.31.0->unifyai) (3.2.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from requests<3.0.0,>=2.31.0->unifyai) (3.4)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from requests<3.0.0,>=2.31.0->unifyai) (2.0.3)\n", - "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from requests<3.0.0,>=2.31.0->unifyai) (2023.5.7)\n", - "Requirement already satisfied: exceptiongroup>=1.0.2 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from anyio<5,>=3.5.0->openai<2.0.0,>=1.12.0->unifyai) (1.2.0)\n", - "Requirement already satisfied: httpcore==1.* in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from httpx<1,>=0.23.0->openai<2.0.0,>=1.12.0->unifyai) (1.0.5)\n", - "Requirement already satisfied: h11<0.15,>=0.13 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai<2.0.0,>=1.12.0->unifyai) (0.14.0)\n", - "Requirement already satisfied: annotated-types>=0.4.0 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from pydantic<3,>=1.9.0->openai<2.0.0,>=1.12.0->unifyai) (0.6.0)\n", - "Requirement already satisfied: pydantic-core==2.18.1 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from pydantic<3,>=1.9.0->openai<2.0.0,>=1.12.0->unifyai) (2.18.1)\n", - "Requirement already satisfied: colorama in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from tqdm>4->openai<2.0.0,>=1.12.0->unifyai) (0.4.6)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "[notice] A new release of pip is available: 23.1.2 -> 24.0\n", - "[notice] To update, run: C:\\Users\\indir\\AppData\\Local\\Microsoft\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\python.exe -m pip install --upgrade pip\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: openai in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (1.20.0)\n", - "Requirement already satisfied: anyio<5,>=3.5.0 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from openai) (4.3.0)\n", - "Requirement already satisfied: distro<2,>=1.7.0 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from openai) (1.9.0)\n", - "Requirement already satisfied: httpx<1,>=0.23.0 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from openai) (0.27.0)\n", - "Requirement already satisfied: pydantic<3,>=1.9.0 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from openai) (2.7.0)\n", - "Requirement already satisfied: sniffio in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from openai) (1.3.1)\n", - "Requirement already satisfied: tqdm>4 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from openai) (4.66.2)\n", - "Requirement already satisfied: typing-extensions<5,>=4.7 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from openai) (4.11.0)\n", - "Requirement already satisfied: idna>=2.8 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from anyio<5,>=3.5.0->openai) (3.4)\n", - "Requirement already satisfied: exceptiongroup>=1.0.2 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from anyio<5,>=3.5.0->openai) (1.2.0)\n", - "Requirement already satisfied: certifi in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from httpx<1,>=0.23.0->openai) (2023.5.7)\n", - "Requirement already satisfied: httpcore==1.* in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from httpx<1,>=0.23.0->openai) (1.0.5)\n", - "Requirement already satisfied: h11<0.15,>=0.13 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai) (0.14.0)\n", - "Requirement already satisfied: annotated-types>=0.4.0 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from pydantic<3,>=1.9.0->openai) (0.6.0)\n", - "Requirement already satisfied: pydantic-core==2.18.1 in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from pydantic<3,>=1.9.0->openai) (2.18.1)\n", - "Requirement already satisfied: colorama in c:\\users\\indir\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.10_qbz5n2kfra8p0\\localcache\\local-packages\\python310\\site-packages (from tqdm>4->openai) (0.4.6)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "[notice] A new release of pip is available: 23.1.2 -> 24.0\n", - "[notice] To update, run: C:\\Users\\indir\\AppData\\Local\\Microsoft\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\python.exe -m pip install --upgrade pip\n" - ] - } - ], - "source": [ - "!pip install -qU \"semantic-router[local]==0.0.20\"\n", - "!pip install unifyai\n", - "!pip install openai\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "BixZd6Eour0w" - }, - "source": [ - "## Initializing Routes and RouteLayer" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "PxnW9qBvur0x" - }, - "source": [ - "Dynamic routes are treated in the same way as static routes, let's begin by initializing a `RouteLayer` consisting of static routes." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "id": "kc9Ty6Lgur0x" - }, - "outputs": [], - "source": [ - "from semantic_router import Route\n", - "\n", - "# Define routes for Math and Coding\n", - "math_route = Route(\n", - " name=\"math\",\n", - " utterances=[\n", - " \"solve for x in the equation\",\n", - " \"what is the integral of\",\n", - " \"how to calculate the derivative\",\n", - " \"mathematical proofs\",\n", - " \"how do you find the percentage of this number\"\n", - " ],\n", - ")\n", - "\n", - "coding_route = Route(\n", - " name=\"coding\",\n", - " utterances=[\n", - " \"how to write a for loop in Python\",\n", - " \"explain the use of classes in Java\",\n", - " \"what is recursion in programming\",\n", - " \"how do i optimise this problem using hash tables\",\n", - " \"suggest a more efficient data structure for this problem\"\n", - " ],\n", - ")\n", - "\n", - "# List of all routes\n", - "routes = [math_route, coding_route]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "voWyqmffur0x" - }, - "source": [ - "We initialize our `RouteLayer` with our `encoder` and `routes`. We can use popular encoder APIs like `CohereEncoder` and `OpenAIEncoder`, or local alternatives like `FastEmbedEncoder`." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "BI9AiDspur0y", - "outputId": "27329a54-3f16-44a5-ac20-13a6b26afb97" - }, - "outputs": [ - { - "ename": "ImportError", - "evalue": "Please install fastembed to use FastEmbedEncoder. You can install it with: `pip install 'semantic-router[fastembed]'`", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mImportError\u001b[0m Traceback (most recent call last)", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\semantic_router\\encoders\\fastembed.py:25\u001b[0m, in \u001b[0;36mFastEmbedEncoder._initialize_client\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 24\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m---> 25\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfastembed\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01membedding\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m FlagEmbedding \u001b[38;5;28;01mas\u001b[39;00m Embedding\n\u001b[0;32m 26\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m:\n", - "\u001b[1;31mImportError\u001b[0m: cannot import name 'TextEmbedding' from 'fastembed' (unknown location)", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[1;31mImportError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn [11], line 6\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msemantic_router\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m RouteLayer\n\u001b[0;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msemantic_router\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mencoders\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m FastEmbedEncoder\n\u001b[1;32m----> 6\u001b[0m encoder \u001b[38;5;241m=\u001b[39m \u001b[43mFastEmbedEncoder\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mBAAI/bge-small-en-v1.5\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m 8\u001b[0m rl \u001b[38;5;241m=\u001b[39m RouteLayer(encoder\u001b[38;5;241m=\u001b[39mencoder, routes\u001b[38;5;241m=\u001b[39mroutes)\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\semantic_router\\encoders\\fastembed.py:21\u001b[0m, in \u001b[0;36mFastEmbedEncoder.__init__\u001b[1;34m(self, score_threshold, **data)\u001b[0m\n\u001b[0;32m 17\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\n\u001b[0;32m 18\u001b[0m \u001b[38;5;28mself\u001b[39m, score_threshold: \u001b[38;5;28mfloat\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0.5\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mdata\n\u001b[0;32m 19\u001b[0m ): \u001b[38;5;66;03m# TODO default score_threshold not thoroughly tested, should optimize\u001b[39;00m\n\u001b[0;32m 20\u001b[0m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(score_threshold\u001b[38;5;241m=\u001b[39mscore_threshold, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mdata)\n\u001b[1;32m---> 21\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_client \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_initialize_client\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\semantic_router\\encoders\\fastembed.py:27\u001b[0m, in \u001b[0;36mFastEmbedEncoder._initialize_client\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 25\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfastembed\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01membedding\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m FlagEmbedding \u001b[38;5;28;01mas\u001b[39;00m Embedding\n\u001b[0;32m 26\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m:\n\u001b[1;32m---> 27\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m(\n\u001b[0;32m 28\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPlease install fastembed to use FastEmbedEncoder. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 29\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYou can install it with: \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 30\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m`pip install \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124msemantic-router[fastembed]\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m`\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 31\u001b[0m )\n\u001b[0;32m 33\u001b[0m embedding_args \u001b[38;5;241m=\u001b[39m {\n\u001b[0;32m 34\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_name\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mname,\n\u001b[0;32m 35\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmax_length\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmax_length,\n\u001b[0;32m 36\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcache_dir\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcache_dir,\n\u001b[0;32m 37\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mthreads\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mthreads,\n\u001b[0;32m 38\u001b[0m }\n\u001b[0;32m 40\u001b[0m embedding_args \u001b[38;5;241m=\u001b[39m {k: v \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m embedding_args\u001b[38;5;241m.\u001b[39mitems() \u001b[38;5;28;01mif\u001b[39;00m v \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m}\n", - "\u001b[1;31mImportError\u001b[0m: Please install fastembed to use FastEmbedEncoder. You can install it with: `pip install 'semantic-router[fastembed]'`" - ] - } - ], - "source": [ - "import os\n", - "from getpass import getpass\n", - "from semantic_router import RouteLayer\n", - "from semantic_router.encoders import HuggingFaceEncoder\n", - "\n", - "encoder = HuggingFaceEncoder()\n", - "\n", - "rl = RouteLayer(encoder=encoder, routes=routes)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "GuLCeIS5ur0y" - }, - "source": [ - "We run the solely static routes layer:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "_rNREh7gur0y", - "outputId": "f3a1dc0b-d760-4efb-b634-d3547011dcb7" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "RouteChoice(name='math', function_call=None, similarity_score=None)" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "rl(\"Solve the equation 5-x=12 for x?\")" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "from unify import Unify\n", - "# Environment setup for API keys\n", - "os.environ[\"UNIFY_KEY\"] = getpass(\"Enter Unify API Key: \")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from unify import ChatBot\n", - "\n", - "def test_unify_endpoint():\n", - " try:\n", - " # Set up the ChatBot with a known working endpoint and API key\n", - " agent = ChatBot(\n", - " api_key=os.environ[\"UNIFY_KEY\"],\n", - " endpoint=\"gpt-4@anyscale\" # Adjust this if necessary\n", - " )\n", - " \n", - " # Example prompt to test the service\n", - " test_prompt = \"What is 2+2?\"\n", - " \n", - " # Making a request to the Unify service\n", - " response = agent.run(test_prompt)\n", - " print(\"Response from Unify:\", response)\n", - " \n", - " except Exception as e:\n", - " print(\"Failed to connect to Unify endpoint:\", str(e))\n", - "\n", - "if __name__ == \"__main__\":\n", - " test_unify_endpoint()\n" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "metadata": {}, - "outputs": [], - "source": [ - "#Unify process query\n", - "def process_query(query):\n", - " route_choice = rl(query)\n", - " print(f\"Route chosen: {route_choice.name}\")\n", - "\n", - " if route_choice.name == \"math\":\n", - " # Initialize Unify with the endpoint for math queries\n", - " unify = Unify(\n", - " api_key=os.environ[\"UNIFY_KEY\"],\n", - " endpoint=\"llama-2-13b-chat@anyscale\" # Use the correct endpoint for math queries, other models not working\n", - " )\n", - " # Generate the response using Unify\n", - " response = unify.generate(user_prompt=query)\n", - " return response\n", - "\n", - " elif route_choice.name == \"coding\":\n", - " # Initialize Unify with the endpoint for coding queries\n", - " unify = Unify(\n", - " api_key=os.environ[\"UNIFY_KEY\"],\n", - " endpoint=\"codellama-34b-instruct@anyscale\" # Use the correct endpoint for coding queries\n", - " )\n", - " # Generate the response using Unify\n", - " response = unify.generate(user_prompt=query)\n", - " return response\n", - "\n", - " else:\n", - " return \"This query does not fall under a supported category.\"\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Route chosen: math\n", - "Sure! To solve the equation 5 - x = 12, we can add x to both sides of the equation.\n", - "\n", - "5 - x = 12\n", - "\n", - "Adding x to both sides gives us:\n", - "\n", - "5 = 12 + x\n", - "\n", - "Now we can subtract 12 from both sides:\n", - "\n", - "5 - 12 = x\n", - "\n", - "This simplifies to:\n", - "\n", - "-7 = x\n", - "\n", - "So the solution to the equation 5 - x = 12 is x = -7.\n" - ] - } - ], - "source": [ - "# Process query test\n", - "print(process_query(\"Solve the equation 5-x=12 for x?\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Route chosen: coding\n", - "\n", - "Here is the code:\n", - "\n", - "```py\n", - "my_list = []\n", - "for num in range(1, 11):\n", - " my_list.append(num)\n", - "```\n", - "\n", - "This will create a list containing the numbers 1 to 10 inclusively with each number in its own cell.\n", - "\n", - "_Note: The range() function is used to generate a sequence of numbers. The range() function when passed only one argument (in this case, 11) will return the sequence of numbers starting from 0 and going up to, but not including, the number passed, which is 11 in this case. By passing 1 as the starting number (range(1, 11)), we get the numbers from 1 to 10._\n", - "\n", - "Now you have a list with numbers from 1 to 10 in it. Here are some ways to use it:\n", - "\n", - "```py\n", - "print(my_list[0]) # prints the first element which is 1\n", - "print(my_list[9]) # prints the last element which is 10\n", - "print(my_list) # prints the whole list [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\n", - "```\n", - "\n", - "Let me know if you have further questions! 😊\n" - ] - } - ], - "source": [ - "print(process_query(\"Write a for loop in python that appends numbers from 1-10 in a list\"))" - ] - } - ], - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "decision-layer", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.11" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/hub/_sources/demos/demos/template/README_TEMPLATE.md.txt b/hub/_sources/demos/demos/template/README_TEMPLATE.md.txt deleted file mode 100644 index da067c22ff..0000000000 --- a/hub/_sources/demos/demos/template/README_TEMPLATE.md.txt +++ /dev/null @@ -1,27 +0,0 @@ -# Project Title -The section will contain the demo video. In order to embed your video in the README as well as our docs, you need to do the following: -1. Drag the drop the video in the README. Doing that will generate a link for your video. Convert the link to a href by writing it in this format \[demo](). Checkout this [example](https://github.com/unifyai/demos/blob/main/Unify/LLM-Wars/README.md?plain=1#L2) incase you're not sure what to do. -2. Add your video the [Video](https://github.com/unifyai/demos/tree/main/videos) folder with the name .mp4. And add the following lines to your README: -``` - -``` -Incase the above is confusing, checkout this [example](https://github.com/unifyai/demos/blob/main/Unify/LLM-Wars/README.md?plain=1#L4C1-L7C9) - - -## Introduction -Provide a brief introduction to your project here. Describe what your project demonstrates, the tech stack used, the motivation behind the project, and briefly explain the necessary concepts used. Feel free to break down this section into multiple subsections depending on your project. - -## Repository and Deployment -Provide a link to the GitHub repository and instructions on how to run the app locally. If the app is deployed somewhere, provide a link to the deployed app. - -## Contributors -List the contributors to the project in a table format. For example: - -| Name | GitHub Profile | -|------|----------------| -| John Doe | [johndoe](https://github.com/johndoe) | -| Jane Doe | [janedoe](https://github.com/janedoe) | - diff --git a/hub/_sources/demos/langchain.rst.txt b/hub/_sources/demos/langchain.rst.txt deleted file mode 100644 index ce52428ae9..0000000000 --- a/hub/_sources/demos/langchain.rst.txt +++ /dev/null @@ -1,17 +0,0 @@ -LangChain Examples -================== - -.. grid:: 1 1 3 3 - :gutter: 4 - - .. grid-item-card:: Langchain RAG Playground - :link: ./demos/LangChain/RAG_playground/README.md - - Retrieval Augmented Generation with Langchain & Unify. - -.. toctree:: - :hidden: - :maxdepth: -1 - :caption: LangChain Examples - - ./demos/LangChain/RAG_playground/README.md diff --git a/hub/_sources/demos/llamaindex.rst.txt b/hub/_sources/demos/llamaindex.rst.txt deleted file mode 100644 index 33e75779d7..0000000000 --- a/hub/_sources/demos/llamaindex.rst.txt +++ /dev/null @@ -1,24 +0,0 @@ -LlamaIndex Examples -================== - -.. grid:: 1 1 3 3 - :gutter: 4 - - .. grid-item-card:: LlamaIndex Basic Usage - :link: ./demos/LlamaIndex/BasicUsage/unify.ipynb - - Learn how to use the LlamaIndex-Unify Integration. - - .. grid-item-card:: LlamaIndex RAG Playground - :link: ./demos/LlamaIndex/RAGPlayground/README.md - - Retrieval Augmented Generation Playground built with LlamaIndex. - - -.. toctree:: - :hidden: - :maxdepth: -1 - :caption: LlamaIndex Examples - - ./demos/LlamaIndex/RAGPlayground/README.md - ./demos/LlamaIndex/BasicUsage/unify.ipynb diff --git a/hub/_sources/demos/unify.rst.txt b/hub/_sources/demos/unify.rst.txt deleted file mode 100644 index e8091172b4..0000000000 --- a/hub/_sources/demos/unify.rst.txt +++ /dev/null @@ -1,48 +0,0 @@ -Python Package Examples -================== - -.. grid:: 1 1 3 3 - :gutter: 4 - - .. grid-item-card:: Building a ChatBot - :link: ./demos/Unify/ChatBot/ChatBot.ipynb - - An interactive chatbot application. - - .. grid-item-card:: Synchronous vs Asynchronous Clients - :link: ./demos/Unify/AsyncVsSync/AsyncVsSync.ipynb - - Exploring Sync vs Async Clients: Usage and Differences. - - .. grid-item-card:: LLM Wars - :link: ./demos/Unify/LLM-Wars/README.md - - LLMs face off in a Streamlit app, asking each other tough questions. - - .. grid-item-card:: Semantic Router - :link: ./demos/Unify/SemanticRouter/README.md - - LLM Routing based on semantic similarity. - - .. grid-item-card:: ChatBot Arena - :link: ./demos/Unify/Chatbot_Arena/README.md - - Ask any question to two anonymous LLMs and vote for the better one! - - .. grid-item-card:: LLM Debate App - :link: ./demos/Unify/LLM_Debate/README.md - - Provide a topic and watch two LLMs debate on it. - - -.. toctree:: - :hidden: - :maxdepth: -1 - :caption: Python Package Examples - - ./demos/Unify/ChatBot/ChatBot.ipynb - ./demos/Unify/AsyncVsSync/AsyncVsSync.ipynb - ./demos/Unify/LLM-Wars/README.md - ./demos/Unify/SemanticRouter/README.md - ./demos/Unify/Chatbot_Arena/README.md - ./demos/Unify/LLM_Debate/README.md \ No newline at end of file diff --git a/hub/_sources/index.rst.txt b/hub/_sources/index.rst.txt index e9a33d8282..1ab0744ff6 100644 --- a/hub/_sources/index.rst.txt +++ b/hub/_sources/index.rst.txt @@ -12,11 +12,11 @@ .. toctree:: :hidden: :maxdepth: -1 - :caption: API + :caption: Concepts - api/unify_api.rst - api/benchmarks.rst - api/router.rst + concepts/unify_api.rst + concepts/benchmarks.rst + concepts/router.rst .. reference/images.rst @@ -25,27 +25,18 @@ :template: top_level_toc_recursive.rst :recursive: :hide-table: - :caption: Python Client Docs + :caption: API unify -.. toctree:: - :hidden: - :maxdepth: 4 - :caption: Demos - - demos/unify.rst - demos/langchain.rst - demos/llamaindex.rst - .. toctree:: :hidden: :maxdepth: -1 - :caption: Interfaces + :caption: Console - interfaces/connecting_stack.rst - interfaces/running_benchmarks.rst - interfaces/building_router.rst + console/connecting_stack.rst + console/running_benchmarks.rst + console/building_router.rst .. .. toctree:: @@ -56,16 +47,6 @@ tools/openapi.rst tools/python_library.rst -.. toctree:: - :hidden: - :maxdepth: -1 - :caption: Concepts - - concepts/endpoints.rst - concepts/benchmarks.rst - concepts/routing.rst -.. concepts/on_prem_images.rst - .. toctree:: :hidden: :maxdepth: -1 @@ -73,5 +54,3 @@ on_prem/on_prem_access on_prem/sso.rst - - diff --git a/hub/_static/Chatbot_arena.mp4 b/hub/_static/Chatbot_arena.mp4 deleted file mode 100644 index 08143811af..0000000000 Binary files a/hub/_static/Chatbot_arena.mp4 and /dev/null differ diff --git a/hub/_static/LLM-Debate.mp4 b/hub/_static/LLM-Debate.mp4 deleted file mode 100644 index deb57ac5ed..0000000000 Binary files a/hub/_static/LLM-Debate.mp4 and /dev/null differ diff --git a/hub/_static/RAG_LLamaIndex.mp4 b/hub/_static/RAG_LLamaIndex.mp4 deleted file mode 100644 index 3ec638ea04..0000000000 Binary files a/hub/_static/RAG_LLamaIndex.mp4 and /dev/null differ diff --git a/hub/_static/RAG_Playground.mp4 b/hub/_static/RAG_Playground.mp4 deleted file mode 100644 index 6e636839e3..0000000000 Binary files a/hub/_static/RAG_Playground.mp4 and /dev/null differ diff --git a/hub/_static/llm-wars.mp4 b/hub/_static/llm-wars.mp4 deleted file mode 100644 index 839ab02305..0000000000 Binary files a/hub/_static/llm-wars.mp4 and /dev/null differ diff --git a/hub/_static/semanticrouterapplication.mp4 b/hub/_static/semanticrouterapplication.mp4 deleted file mode 100644 index ff09d02e30..0000000000 Binary files a/hub/_static/semanticrouterapplication.mp4 and /dev/null differ diff --git a/hub/api/benchmarks.html b/hub/api/benchmarks.html deleted file mode 100644 index adaee5e98c..0000000000 --- a/hub/api/benchmarks.html +++ /dev/null @@ -1,655 +0,0 @@ - - - - - - - - - - - Benchmarking — Unify Documentation - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - -
-
-
-
-
- - - - -
-
- - - -
- - - - - - - - - - - - - -
- -
- - -
-
- -
-
- -
- -
- - - - -
- -
- - -
-
- - - - - -
- -
-

Benchmarking#

-

When comparing LLMs, there is a constant tradeoff to make between quality, cost and latency. Stronger models are (in general) slower and more expensive - and sometimes overkill for the task at hand. Complicating matters further, new models are released weekly, each claiming to be state-of-the-art.

-

Benchmarking on your data lets you see how each of the different models perform on your task.

-Benchmarks Image. -

You can compare how quality relates to cost and latency, with live stats pulled from our runtime benchmarks.

-

When new models come out, simply re-run the benchmark to see how they perform on your task.

-
-

Preparing your dataset#

-

First create a dataset which is representative of the task you want to evaluate. -You will need a list of prompts, optionally including a reference, gold-standard answer. Datasets containing reference answers tend to get more accurate benchmarks.

-

The file itself should be in JSONL format, with one entry per line, as in the example below.

-
{"prompt": "This is the first prompt", "ref_answer": "This is the first reference answer"}
-{"prompt": "This is the second prompt", "ref_answer": "This is the second reference answer"}
-
-
-

Use at least 50 prompts to get the most accurate results. Currently there is an maximum limit of 500 prompts, for most tasks we don’t tend to see much extra detail past ~250.

-
-
-

Benchmarking your dataset#

-

In your dashboard, clicking Select benchmark and then Benchmark your prompts opens the interface to upload a dataset.

-

When the benchmark finishes, you’ll receive an email, and the graph will be displayed in your dashboard.

-

The x-axis can be set to represent cost, time-to-first-token, or inter-token latency, and on either a linear or log scale.

-
-

How does it work?#

-

Currently, we use gpt4o-as-a-judge (cf. https://arxiv.org/abs/2306.05685), to evaluate the quality of each model’s responses.

-
-
-
- - -
- - - - - - - -
- - - -
- - -
-
- -
- -
-
-
- - - - - - -
- - -
- - \ No newline at end of file diff --git a/hub/concepts/benchmarks.html b/hub/concepts/benchmarks.html index 4903924a11..b8ddd0b9b0 100644 --- a/hub/concepts/benchmarks.html +++ b/hub/concepts/benchmarks.html @@ -8,7 +8,7 @@ - Benchmarks — Unify Documentation + Benchmarking — Unify Documentation @@ -62,8 +62,8 @@ - - + + @@ -153,21 +153,21 @@ - @@ -181,68 +181,26 @@ - - - - - - - - - - - - - - - - - -
  • Welcome to Unify!
  • API Reference
  • -

    API

    - -

    API

    -