diff --git a/rag-playground/playground.ipynb b/rag-playground/playground.ipynb new file mode 100644 index 0000000..847f748 --- /dev/null +++ b/rag-playground/playground.ipynb @@ -0,0 +1,586 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "19011569-f522-4ece-a8f6-e5e046d9dda9", + "metadata": {}, + "source": [ + "## Simple RAG playground" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "id": "d65192b5-ddbc-4091-a410-3cd15f7e1ccb", + "metadata": {}, + "outputs": [], + "source": [ + "import instructor\n", + "from openai import AsyncOpenAI\n", + "import os\n", + "from pydantic import BaseModel\n", + "from dataclasses import dataclass" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "id": "b21ecce8-2f0e-467f-ae3f-77e7fb131735", + "metadata": {}, + "outputs": [], + "source": [ + "os.environ[\"OPENAI_API_KEY\"] = \"your-api-key\"\n", + "PROJECT_ID = \"1b35d9bf94ff801792bfd1824fac0c96\"\n", + "NOTION_TOKEN = 'your-notion-token'" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "id": "a7b3537a-484d-40df-bfe7-f09e6fc0c388", + "metadata": {}, + "outputs": [], + "source": [ + "openai_client = instructor.from_openai(AsyncOpenAI())" + ] + }, + { + "cell_type": "markdown", + "id": "55c2f926-37ec-439a-9b25-ab0d463768c0", + "metadata": {}, + "source": [ + "## Simple RAG app" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "14f7975b-b7d2-482f-8747-91c8d94c79f8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cloning into 'ragas-airline-dataset'...\n", + "remote: Enumerating objects: 14, done.\u001b[K\n", + "remote: Total 14 (delta 0), reused 0 (delta 0), pack-reused 14 (from 1)\u001b[K\n", + "Unpacking objects: 100% (14/14), 16.16 KiB | 719.00 KiB/s, done.\n" + ] + } + ], + "source": [ + "! git clone https://huggingface.co/datasets/explodinggradients/ragas-airline-dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "id": "d4e605c9-45f6-47bf-bc60-81aada653912", + "metadata": {}, + "outputs": [], + "source": [ + "class Ragoutput(BaseModel):\n", + " response:str\n", + "\n", + "\n", + "\n", + "@dataclass\n", + "class Myllmapp:\n", + " prompt: str = \"Your are expert document chatbot. Answer from following documents\"\n", + "\n", + " def _read_markdown_file(self, file_path):\n", + " with open(file_path, 'r', encoding='utf-8') as file:\n", + " content = file.read()\n", + " return content\n", + "\n", + " def __post_init__(self):\n", + " files = [os.path.join(\"ragas-airline-dataset\",file) for file in os.listdir(\"ragas-airline-dataset\") if file.endswith(\".md\")]\n", + " self.content = [f\"{self._read_markdown_file(file)}---\\n\\n--{file}\" for file in files]\n", + " self.content = '\\n\\n'.join(self.content)\n", + " self.client = openai_client\n", + "\n", + "\n", + " async def ask(self, query:str):\n", + " response = await self.client.chat.completions.create(\n", + " model=\"gpt-4o-mini\",\n", + " response_model=Ragoutput,\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": self.prompt},\n", + " {\"role\": \"user\", \"content\": self.content},\n", + " {\"role\": \"user\", \"content\": query}\n", + " ],\n", + " )\n", + "\n", + " return response.response\n", + " \n", + " \n", + "\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "id": "b4231384-a1eb-4de9-9b8e-4aaef9c03958", + "metadata": {}, + "outputs": [], + "source": [ + "my_rag = Myllmapp()" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "id": "cdd184fd-ce81-451f-a618-ccc07bf2c49a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'If you miss your flight, whether you can receive a refund depends on the fare conditions of your ticket:\\n\\n1. **Refundable Tickets (Flexible Fares)**:\\n - If you have a refundable ticket, you can cancel your flight and request a full refund, even if you missed the flight.\\n\\n2. **Non-Refundable Tickets (Standard Fares)**:\\n - Typically, non-refundable tickets do not allow for refunds if you miss your flight. However, you may be eligible for a partial refund or travel credit, but cancellation fees may apply.\\n\\n3. **Basic Economy & Promotional Fares**:\\n - These tickets usually do not allow any refunds or modifications if you miss your flight.\\n\\nTo proceed with a refund request:\\n- Check your ticket conditions via \"Manage My Booking.\" \\n- If eligible, submit a refund request through the same platform.'" + ] + }, + "execution_count": 89, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "await my_rag.ask(\"Can i get a refund for my missed flight?\")" + ] + }, + { + "cell_type": "markdown", + "id": "f4e4efdf-9068-44c6-9897-07bd95259c23", + "metadata": {}, + "source": [ + "## Setup sdk" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "id": "f70f1a17-688e-4cd0-b449-1400076658a6", + "metadata": {}, + "outputs": [], + "source": [ + "from ragas_annotator.project.core import Project" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "id": "e41474dc-cc11-4eb1-9473-6686857bc14b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Project(name='Customer support RAG', root_page_id=1b35d9bf94ff801792bfd1824fac0c96)" + ] + }, + "execution_count": 91, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "project = Project(\n", + " name=\"Customer support RAG\", \n", + " notion_api_key=NOTION_TOKEN, \n", + " notion_root_page_id=PROJECT_ID,\n", + ")\n", + "project" + ] + }, + { + "cell_type": "markdown", + "id": "a762ea33-4d54-477b-8920-138cc0ec8444", + "metadata": {}, + "source": [ + "## Read dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "id": "d26d45b5-697b-47b5-b65e-a82a69c9d594", + "metadata": {}, + "outputs": [], + "source": [ + "from ragas_annotator.model.notion_model import NotionModel\n", + "from ragas_annotator import nmt\n", + "\n", + "class Dataset(NotionModel):\n", + " id: str = nmt.ID()\n", + " query: str = nmt.Title()\n", + " expected_answer: str = nmt.Text()" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "id": "9989e56e-86d5-44a9-82d4-904977c75e00", + "metadata": {}, + "outputs": [], + "source": [ + "dataset = project.get_dataset(\n", + " name=\"RAG Dataset\",\n", + " model=Dataset,\n", + ")\n", + "dataset.load()" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "id": "193d8366-b527-42ee-96d3-b51f70b59060", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "15" + ] + }, + "execution_count": 95, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(dataset)" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "id": "49092454-0dca-437d-9fcf-e17cd71a4ae5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dataset(id=15 query='Are there any special policies for traveling with infants?' expected_answer='Infants under 2 years can travel on an adult’s lap for free or with a discounted seat. Free checked baggage allowance may apply depending on the airline.')" + ] + }, + "execution_count": 96, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset[0]" + ] + }, + { + "cell_type": "markdown", + "id": "4d9163c2-bbd2-4425-a683-d5b5d67b76c6", + "metadata": {}, + "source": [ + "## LLM as judge" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "id": "c6f70157-77a1-4d0d-90b4-431f932c828d", + "metadata": {}, + "outputs": [], + "source": [ + "import typing as t\n", + "@ragas.metrics\n", + "async def judge(x,y):\n", + " class Judge(BaseModel):\n", + " verdict: t.Literal[\"pass\",\"fail\"]\n", + " \n", + " response = await openai_client.chat.completions.create(\n", + " model=\"gpt-4o-mini\",\n", + " response_model=Judge,\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": \"check if response and expected answer are the same.\"},\n", + " {\"role\": \"user\", \"content\": f\"response:{x}, expected {y}\"},\n", + " ],\n", + " ) \n", + " return response.verdict\n", + "\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "id": "034bf580-93ab-4b90-b9ae-2b53d428e0c2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'fail'" + ] + }, + "execution_count": 112, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "await judge(\"aftab is designer\",\"aftab is UX designer\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4d8a9610-b18a-4ab4-ae3b-77c783fc92e4", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cf4596b6-47cf-47cc-998a-bab130ef6006", + "metadata": {}, + "outputs": [], + "source": [ + "judge.train(\"fourth-experiment\")\n", + "judge.save('my-name')" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "id": "490f98c7-3668-44e7-8abe-fa14eb55b038", + "metadata": {}, + "outputs": [], + "source": [ + "import typing as t\n", + "async def tone_judge(x,y):\n", + " class Judge(BaseModel):\n", + " verdict: int\n", + " \n", + " response = await openai_client.chat.completions.create(\n", + " model=\"gpt-4o-mini\",\n", + " response_model=Judge,\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": \"check if the tone is arrogant. and give score from 0 to 10 depending on the arrogance level.\"},\n", + " {\"role\": \"user\", \"content\": f\"response:{x}, expected {y}\"},\n", + " ],\n", + " ) \n", + " return response.verdict\n", + "\n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "7244f410-0292-4384-bce8-bb0358457e40", + "metadata": {}, + "source": [ + "## Exp - 1" + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "id": "ae42189d-17e5-4dec-8e81-9e1ca53399e3", + "metadata": {}, + "outputs": [], + "source": [ + "class Experiment(Dataset):\n", + " response: str = nmt.Text()\n", + " # correctness: str = nmt.Select(options=[\"pass\",\"fail\"])\n", + " tone_score: str = nmt.Text()" + ] + }, + { + "cell_type": "code", + "execution_count": 123, + "id": "146f2227-82b5-44b5-aaf4-4f8b35917002", + "metadata": {}, + "outputs": [], + "source": [ + "@project.langfuse_experiment(Experiment, name_prefix=\"Workshop\")\n", + "async def run_experiment(row: SupermeDataset):\n", + " response = await my_rag.ask(row.query)\n", + " tone_score = await judge(response, row.expected_answer)\n", + " experiment_view = Experiment(\n", + " id=row.id,\n", + " query=row.query,\n", + " expected_answer=row.expected_answer,\n", + " response=response,\n", + " )\n", + " return experiment_view" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "id": "93d3dfee-5c8d-4d87-bd4a-459b785b9c8b", + "metadata": {}, + "outputs": [], + "source": [ + "my_rag.prompt = \"You are a specialized document chatbot. Provide answers based on the following documents.\"" + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "id": "4426d7b6-28b3-49ce-92c5-74e468c51165", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|███████████████████████████████████████████████████| 15/15 [00:06<00:00, 2.39it/s]\n" + ] + }, + { + "data": { + "text/plain": [ + "Experiment(name=fifth-experiment, model=Experiment)" + ] + }, + "execution_count": 126, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "await run_experiment.run_async(\n", + " name=\"fifth-experiment\",\n", + " dataset=dataset\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "a787be25-5563-4019-bdbb-ff10febb96ab", + "metadata": {}, + "source": [ + "## Exp - 1" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "id": "4cfe8012-59a5-4d15-bf9e-45535e28bd93", + "metadata": {}, + "outputs": [], + "source": [ + "my_rag.prompt = \"You are a specialized document chatbot, answer shortly like a customer support guy.\"" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "id": "3ae61192-8047-41fd-8a1b-a639a1419024", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|███████████████████████████████████████████████████| 15/15 [00:05<00:00, 2.84it/s]\n" + ] + }, + { + "data": { + "text/plain": [ + "Experiment(name=third-experiment, model=Experiment)" + ] + }, + "execution_count": 111, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "await run_experiment.run_async(\n", + " name=\"third-experiment\",\n", + " dataset=dataset\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "0f3e2d61-9be0-46f2-827a-c88897574d8f", + "metadata": {}, + "source": [ + "## Compare" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "id": "802821c5-098f-4507-ab6c-ecf40e39edae", + "metadata": {}, + "outputs": [], + "source": [ + "exp_one = project.get_experiment('first-experiment',Experiment)\n", + "exp_two = project.get_experiment('second-experiment',Experiment)" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "id": "ed7e96cc-1c98-400a-85f0-2fbf648a842e", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Experiments have different models: <class '__main__.Experiment'> and <class '__main__.Experiment'>\n", + "Experiments have different models: <class '__main__.Experiment'> and <class '__main__.Experiment'>\n", + "Uploading to Notion: 100%|██████████████████████████████| 15/15 [00:43<00:00, 2.88s/it]\n" + ] + }, + { + "data": { + "text/plain": [ + "'https://www.notion.so/1ba5d9bf94ff81e2ab94c2586a1ed315'" + ] + }, + "execution_count": 82, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "project.compare_experiments(exp_one,exp_two)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3b1e5d32-b779-4b3a-8ac7-70c54500d8b8", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python (random)", + "language": "python", + "name": "random" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}