add annotation notebook (#15)

shahules786 · web-flow · commit b30245de42ec · 2024-11-20T11:21:59.000+05:30
diff --git a/app.ragas-experiments/evaluation_annotation.ipynb b/app.ragas-experiments/evaluation_annotation.ipynb
@@ -0,0 +1,134 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "37727c5a-0b9b-45b8-9494-0d41dc4e16d9",
+   "metadata": {},
+   "source": [
+    "## API_KEY"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "df16f79c-dd92-402f-b5d2-aa72703d6c9a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "os.environ[\"OPENAI_API_KEY\"] = \"your-api_key\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "87af68f7-b973-4338-a168-c541f7f2dbc7",
+   "metadata": {},
+   "source": [
+    "## Imports"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "22d60861-5bb3-48b0-a6a1-c3008c63c14b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datasets import load_dataset\n",
+    "from ragas import evaluate, EvaluationDataset\n",
+    "from ragas.metrics import AspectCritic\n",
+    "from langchain_openai.chat_models import ChatOpenAI\n",
+    "from ragas.llms import LangchainLLMWrapper\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0dab6de2-680a-4178-823f-8889a144a5d4",
+   "metadata": {},
+   "source": [
+    "## Dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "a2e9130c-b934-4331-a0c1-ce63089dcdf8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset = load_dataset(\"explodinggradients/aspect_critic_answer_correctness\",split=\"train\")\n",
+    "eval_dataset = EvaluationDataset.from_hf_dataset(dataset)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ffe5b8b9-8b1b-4ce3-95ce-51dab58458d0",
+   "metadata": {},
+   "source": [
+    "## Set Model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "2c0d7d0c-d7e7-4c50-b2a5-a7336744288e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "EvaluationDataset(features=['user_input', 'response', 'reference'], len=50)"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\n",
+    "llm_4o = LangchainLLMWrapper(ChatOpenAI(model=\"gpt-4o\"))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cb91f37f-3eb0-425a-8a47-7ca6729e498e",
+   "metadata": {},
+   "source": [
+    "## Evaluate"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2e52e40c-10c6-4cb0-8815-c01614225b2e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "critic = AspectCritic(name=\"answer_correctness\",definition=\"Given the user_input, reference and response. Is the response correct compared with the reference\")\n",
+    "results = evaluate(eval_dataset,metrics=[critic],llm=llm_4o)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ragas",
+   "language": "python",
+   "name": "ragas"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.20"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/evaluation_annotation.ipynb b/evaluation_annotation.ipynb
@@ -0,0 +1,115 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "87af68f7-b973-4338-a168-c541f7f2dbc7",
+   "metadata": {},
+   "source": [
+    "## Imports"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "22d60861-5bb3-48b0-a6a1-c3008c63c14b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datasets import load_dataset\n",
+    "from ragas import evaluate, EvaluationDataset\n",
+    "from ragas.metrics import AspectCritic\n",
+    "from langchain_openai.chat_models import ChatOpenAI\n",
+    "from ragas.llms import LangchainLLMWrapper\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0dab6de2-680a-4178-823f-8889a144a5d4",
+   "metadata": {},
+   "source": [
+    "## Dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "a2e9130c-b934-4331-a0c1-ce63089dcdf8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset = load_dataset(\"explodinggradients/aspect_critic_answer_correctness\",split=\"train\")\n",
+    "eval_dataset = EvaluationDataset.from_hf_dataset(dataset)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ffe5b8b9-8b1b-4ce3-95ce-51dab58458d0",
+   "metadata": {},
+   "source": [
+    "## Set Model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "2c0d7d0c-d7e7-4c50-b2a5-a7336744288e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "EvaluationDataset(features=['user_input', 'response', 'reference'], len=50)"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\n",
+    "llm_4o = LangchainLLMWrapper(ChatOpenAI(model=\"gpt-4o\"))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cb91f37f-3eb0-425a-8a47-7ca6729e498e",
+   "metadata": {},
+   "source": [
+    "## Evaluate"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2e52e40c-10c6-4cb0-8815-c01614225b2e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "critic = AspectCritic(name=\"answer_correctness\",definition=\"Given the user_input, reference and response. Is the response correct compared with the reference\")\n",
+    "results = evaluate(eval_dataset,metrics=[critic],llm=llm_4o)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ragas",
+   "language": "python",
+   "name": "ragas"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.20"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}