Skip to content

Commit b30245d

Browse files
authored
add annotation notebook (#15)
1 parent 9736412 commit b30245d

File tree

2 files changed

+249
-0
lines changed

2 files changed

+249
-0
lines changed
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "37727c5a-0b9b-45b8-9494-0d41dc4e16d9",
6+
"metadata": {},
7+
"source": [
8+
"## API_KEY"
9+
]
10+
},
11+
{
12+
"cell_type": "code",
13+
"execution_count": null,
14+
"id": "df16f79c-dd92-402f-b5d2-aa72703d6c9a",
15+
"metadata": {},
16+
"outputs": [],
17+
"source": [
18+
"import os\n",
19+
"os.environ[\"OPENAI_API_KEY\"] = \"your-api_key\""
20+
]
21+
},
22+
{
23+
"cell_type": "markdown",
24+
"id": "87af68f7-b973-4338-a168-c541f7f2dbc7",
25+
"metadata": {},
26+
"source": [
27+
"## Imports"
28+
]
29+
},
30+
{
31+
"cell_type": "code",
32+
"execution_count": 10,
33+
"id": "22d60861-5bb3-48b0-a6a1-c3008c63c14b",
34+
"metadata": {},
35+
"outputs": [],
36+
"source": [
37+
"from datasets import load_dataset\n",
38+
"from ragas import evaluate, EvaluationDataset\n",
39+
"from ragas.metrics import AspectCritic\n",
40+
"from langchain_openai.chat_models import ChatOpenAI\n",
41+
"from ragas.llms import LangchainLLMWrapper\n"
42+
]
43+
},
44+
{
45+
"cell_type": "markdown",
46+
"id": "0dab6de2-680a-4178-823f-8889a144a5d4",
47+
"metadata": {},
48+
"source": [
49+
"## Dataset"
50+
]
51+
},
52+
{
53+
"cell_type": "code",
54+
"execution_count": 13,
55+
"id": "a2e9130c-b934-4331-a0c1-ce63089dcdf8",
56+
"metadata": {},
57+
"outputs": [],
58+
"source": [
59+
"dataset = load_dataset(\"explodinggradients/aspect_critic_answer_correctness\",split=\"train\")\n",
60+
"eval_dataset = EvaluationDataset.from_hf_dataset(dataset)"
61+
]
62+
},
63+
{
64+
"cell_type": "markdown",
65+
"id": "ffe5b8b9-8b1b-4ce3-95ce-51dab58458d0",
66+
"metadata": {},
67+
"source": [
68+
"## Set Model"
69+
]
70+
},
71+
{
72+
"cell_type": "code",
73+
"execution_count": 7,
74+
"id": "2c0d7d0c-d7e7-4c50-b2a5-a7336744288e",
75+
"metadata": {},
76+
"outputs": [
77+
{
78+
"data": {
79+
"text/plain": [
80+
"EvaluationDataset(features=['user_input', 'response', 'reference'], len=50)"
81+
]
82+
},
83+
"execution_count": 7,
84+
"metadata": {},
85+
"output_type": "execute_result"
86+
}
87+
],
88+
"source": [
89+
"\n",
90+
"llm_4o = LangchainLLMWrapper(ChatOpenAI(model=\"gpt-4o\"))"
91+
]
92+
},
93+
{
94+
"cell_type": "markdown",
95+
"id": "cb91f37f-3eb0-425a-8a47-7ca6729e498e",
96+
"metadata": {},
97+
"source": [
98+
"## Evaluate"
99+
]
100+
},
101+
{
102+
"cell_type": "code",
103+
"execution_count": null,
104+
"id": "2e52e40c-10c6-4cb0-8815-c01614225b2e",
105+
"metadata": {},
106+
"outputs": [],
107+
"source": [
108+
"critic = AspectCritic(name=\"answer_correctness\",definition=\"Given the user_input, reference and response. Is the response correct compared with the reference\")\n",
109+
"results = evaluate(eval_dataset,metrics=[critic],llm=llm_4o)\n"
110+
]
111+
}
112+
],
113+
"metadata": {
114+
"kernelspec": {
115+
"display_name": "ragas",
116+
"language": "python",
117+
"name": "ragas"
118+
},
119+
"language_info": {
120+
"codemirror_mode": {
121+
"name": "ipython",
122+
"version": 3
123+
},
124+
"file_extension": ".py",
125+
"mimetype": "text/x-python",
126+
"name": "python",
127+
"nbconvert_exporter": "python",
128+
"pygments_lexer": "ipython3",
129+
"version": "3.9.20"
130+
}
131+
},
132+
"nbformat": 4,
133+
"nbformat_minor": 5
134+
}

evaluation_annotation.ipynb

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "87af68f7-b973-4338-a168-c541f7f2dbc7",
6+
"metadata": {},
7+
"source": [
8+
"## Imports"
9+
]
10+
},
11+
{
12+
"cell_type": "code",
13+
"execution_count": 10,
14+
"id": "22d60861-5bb3-48b0-a6a1-c3008c63c14b",
15+
"metadata": {},
16+
"outputs": [],
17+
"source": [
18+
"from datasets import load_dataset\n",
19+
"from ragas import evaluate, EvaluationDataset\n",
20+
"from ragas.metrics import AspectCritic\n",
21+
"from langchain_openai.chat_models import ChatOpenAI\n",
22+
"from ragas.llms import LangchainLLMWrapper\n"
23+
]
24+
},
25+
{
26+
"cell_type": "markdown",
27+
"id": "0dab6de2-680a-4178-823f-8889a144a5d4",
28+
"metadata": {},
29+
"source": [
30+
"## Dataset"
31+
]
32+
},
33+
{
34+
"cell_type": "code",
35+
"execution_count": 13,
36+
"id": "a2e9130c-b934-4331-a0c1-ce63089dcdf8",
37+
"metadata": {},
38+
"outputs": [],
39+
"source": [
40+
"dataset = load_dataset(\"explodinggradients/aspect_critic_answer_correctness\",split=\"train\")\n",
41+
"eval_dataset = EvaluationDataset.from_hf_dataset(dataset)"
42+
]
43+
},
44+
{
45+
"cell_type": "markdown",
46+
"id": "ffe5b8b9-8b1b-4ce3-95ce-51dab58458d0",
47+
"metadata": {},
48+
"source": [
49+
"## Set Model"
50+
]
51+
},
52+
{
53+
"cell_type": "code",
54+
"execution_count": 7,
55+
"id": "2c0d7d0c-d7e7-4c50-b2a5-a7336744288e",
56+
"metadata": {},
57+
"outputs": [
58+
{
59+
"data": {
60+
"text/plain": [
61+
"EvaluationDataset(features=['user_input', 'response', 'reference'], len=50)"
62+
]
63+
},
64+
"execution_count": 7,
65+
"metadata": {},
66+
"output_type": "execute_result"
67+
}
68+
],
69+
"source": [
70+
"\n",
71+
"llm_4o = LangchainLLMWrapper(ChatOpenAI(model=\"gpt-4o\"))"
72+
]
73+
},
74+
{
75+
"cell_type": "markdown",
76+
"id": "cb91f37f-3eb0-425a-8a47-7ca6729e498e",
77+
"metadata": {},
78+
"source": [
79+
"## Evaluate"
80+
]
81+
},
82+
{
83+
"cell_type": "code",
84+
"execution_count": null,
85+
"id": "2e52e40c-10c6-4cb0-8815-c01614225b2e",
86+
"metadata": {},
87+
"outputs": [],
88+
"source": [
89+
"critic = AspectCritic(name=\"answer_correctness\",definition=\"Given the user_input, reference and response. Is the response correct compared with the reference\")\n",
90+
"results = evaluate(eval_dataset,metrics=[critic],llm=llm_4o)\n"
91+
]
92+
}
93+
],
94+
"metadata": {
95+
"kernelspec": {
96+
"display_name": "ragas",
97+
"language": "python",
98+
"name": "ragas"
99+
},
100+
"language_info": {
101+
"codemirror_mode": {
102+
"name": "ipython",
103+
"version": 3
104+
},
105+
"file_extension": ".py",
106+
"mimetype": "text/x-python",
107+
"name": "python",
108+
"nbconvert_exporter": "python",
109+
"pygments_lexer": "ipython3",
110+
"version": "3.9.20"
111+
}
112+
},
113+
"nbformat": 4,
114+
"nbformat_minor": 5
115+
}

0 commit comments

Comments
 (0)