Skip to content

Commit 540741b

Browse files
authored
Merge pull request #7 from shahules786/gitlab-test
test set generation - Atomicworks
2 parents f4d8e72 + 971aa20 commit 540741b

File tree

2 files changed

+585
-0
lines changed

2 files changed

+585
-0
lines changed
Lines changed: 284 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,284 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"id": "c2589bb3",
7+
"metadata": {},
8+
"outputs": [
9+
{
10+
"name": "stdout",
11+
"output_type": "stream",
12+
"text": [
13+
"\u001b[1;36m./data/handbook/communication/\u001b[00m\r\n",
14+
"├── _index.md\r\n",
15+
"├── ally-resources.md\r\n",
16+
"├── ask-me-anything.md\r\n",
17+
"├── chat.md\r\n",
18+
"├── confidentiality-levels.md\r\n",
19+
"├── deep-dives.md\r\n",
20+
"├── power-of-the-pause.md\r\n",
21+
"├── top-misused-terms.md\r\n",
22+
"├── video-playbook.md\r\n",
23+
"└── \u001b[1;36mzoom\u001b[00m\r\n",
24+
" ├── _index.md\r\n",
25+
" └── webinars.md\r\n",
26+
"\r\n",
27+
"1 directory, 11 files\r\n"
28+
]
29+
}
30+
],
31+
"source": [
32+
"!tree ./data/handbook/communication/"
33+
]
34+
},
35+
{
36+
"cell_type": "markdown",
37+
"id": "dbc74482",
38+
"metadata": {},
39+
"source": [
40+
"### Read documents "
41+
]
42+
},
43+
{
44+
"cell_type": "code",
45+
"execution_count": 2,
46+
"id": "333097c5",
47+
"metadata": {},
48+
"outputs": [
49+
{
50+
"data": {
51+
"text/plain": [
52+
"205"
53+
]
54+
},
55+
"execution_count": 2,
56+
"metadata": {},
57+
"output_type": "execute_result"
58+
}
59+
],
60+
"source": [
61+
"from llama_index import SimpleDirectoryReader\n",
62+
"\n",
63+
"reader = SimpleDirectoryReader('./data/handbook/communication/', recursive=True)\n",
64+
"docs = reader.load_data()\n",
65+
"\n",
66+
"len(docs)"
67+
]
68+
},
69+
{
70+
"cell_type": "markdown",
71+
"id": "423ae9be",
72+
"metadata": {},
73+
"source": [
74+
"## Pass documents and test size"
75+
]
76+
},
77+
{
78+
"cell_type": "code",
79+
"execution_count": 3,
80+
"id": "4f605077",
81+
"metadata": {},
82+
"outputs": [
83+
{
84+
"name": "stderr",
85+
"output_type": "stream",
86+
"text": [
87+
"/opt/anaconda3/envs/ragas/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
88+
" from .autonotebook import tqdm as notebook_tqdm\n"
89+
]
90+
}
91+
],
92+
"source": [
93+
"from ragas.testset.testset_generator import TestsetGenerator"
94+
]
95+
},
96+
{
97+
"cell_type": "code",
98+
"execution_count": 4,
99+
"id": "e1d2b53b",
100+
"metadata": {},
101+
"outputs": [],
102+
"source": [
103+
"test_genertor = TestsetGenerator.from_default()"
104+
]
105+
},
106+
{
107+
"cell_type": "code",
108+
"execution_count": 5,
109+
"id": "a7cfbc59",
110+
"metadata": {},
111+
"outputs": [
112+
{
113+
"name": "stderr",
114+
"output_type": "stream",
115+
"text": [
116+
"15it [01:47, 7.20s/it] \n"
117+
]
118+
}
119+
],
120+
"source": [
121+
"test_dataset = test_genertor.generate(docs,test_size=5)"
122+
]
123+
},
124+
{
125+
"cell_type": "code",
126+
"execution_count": 6,
127+
"id": "731adb20",
128+
"metadata": {},
129+
"outputs": [
130+
{
131+
"data": {
132+
"text/html": [
133+
"<div>\n",
134+
"<style scoped>\n",
135+
" .dataframe tbody tr th:only-of-type {\n",
136+
" vertical-align: middle;\n",
137+
" }\n",
138+
"\n",
139+
" .dataframe tbody tr th {\n",
140+
" vertical-align: top;\n",
141+
" }\n",
142+
"\n",
143+
" .dataframe thead th {\n",
144+
" text-align: right;\n",
145+
" }\n",
146+
"</style>\n",
147+
"<table border=\"1\" class=\"dataframe\">\n",
148+
" <thead>\n",
149+
" <tr style=\"text-align: right;\">\n",
150+
" <th></th>\n",
151+
" <th>question</th>\n",
152+
" <th>context</th>\n",
153+
" <th>answer</th>\n",
154+
" <th>question_type</th>\n",
155+
" <th>episode_done</th>\n",
156+
" </tr>\n",
157+
" </thead>\n",
158+
" <tbody>\n",
159+
" <tr>\n",
160+
" <th>0</th>\n",
161+
" <td>How does having a positive attitude affect you...</td>\n",
162+
" <td>- A shift in perspective or change in attitude...</td>\n",
163+
" <td>Having a positive attitude can greatly affect ...</td>\n",
164+
" <td>reasoning</td>\n",
165+
" <td>False</td>\n",
166+
" </tr>\n",
167+
" <tr>\n",
168+
" <th>1</th>\n",
169+
" <td>In what ways does a change in attitude influen...</td>\n",
170+
" <td>- A shift in perspective or change in attitude...</td>\n",
171+
" <td>A change in attitude can influence the respons...</td>\n",
172+
" <td>reasoning</td>\n",
173+
" <td>True</td>\n",
174+
" </tr>\n",
175+
" <tr>\n",
176+
" <th>2</th>\n",
177+
" <td>What is the purpose of Project Channels and Wo...</td>\n",
178+
" <td>- These channels are temporary and used to foc...</td>\n",
179+
" <td>The purpose of Project Channels and Working Gr...</td>\n",
180+
" <td>simple</td>\n",
181+
" <td>True</td>\n",
182+
" </tr>\n",
183+
" <tr>\n",
184+
" <th>3</th>\n",
185+
" <td>What types of information are restricted under...</td>\n",
186+
" <td>- Content that would violate confidentiality f...</td>\n",
187+
" <td>The types of information that are restricted u...</td>\n",
188+
" <td>simple</td>\n",
189+
" <td>True</td>\n",
190+
" </tr>\n",
191+
" <tr>\n",
192+
" <th>4</th>\n",
193+
" <td>What is the significance of using Simple Langu...</td>\n",
194+
" <td>Simple Language is meant to encourage everyone...</td>\n",
195+
" <td>The significance of using Simple Language in c...</td>\n",
196+
" <td>conditional</td>\n",
197+
" <td>True</td>\n",
198+
" </tr>\n",
199+
" <tr>\n",
200+
" <th>5</th>\n",
201+
" <td>What is the purpose of the location-specific c...</td>\n",
202+
" <td>These channels are used to help GitLab team-me...</td>\n",
203+
" <td>The purpose of the location-specific channels ...</td>\n",
204+
" <td>simple</td>\n",
205+
" <td>True</td>\n",
206+
" </tr>\n",
207+
" </tbody>\n",
208+
"</table>\n",
209+
"</div>"
210+
],
211+
"text/plain": [
212+
" question \\\n",
213+
"0 How does having a positive attitude affect you... \n",
214+
"1 In what ways does a change in attitude influen... \n",
215+
"2 What is the purpose of Project Channels and Wo... \n",
216+
"3 What types of information are restricted under... \n",
217+
"4 What is the significance of using Simple Langu... \n",
218+
"5 What is the purpose of the location-specific c... \n",
219+
"\n",
220+
" context \\\n",
221+
"0 - A shift in perspective or change in attitude... \n",
222+
"1 - A shift in perspective or change in attitude... \n",
223+
"2 - These channels are temporary and used to foc... \n",
224+
"3 - Content that would violate confidentiality f... \n",
225+
"4 Simple Language is meant to encourage everyone... \n",
226+
"5 These channels are used to help GitLab team-me... \n",
227+
"\n",
228+
" answer question_type \\\n",
229+
"0 Having a positive attitude can greatly affect ... reasoning \n",
230+
"1 A change in attitude can influence the respons... reasoning \n",
231+
"2 The purpose of Project Channels and Working Gr... simple \n",
232+
"3 The types of information that are restricted u... simple \n",
233+
"4 The significance of using Simple Language in c... conditional \n",
234+
"5 The purpose of the location-specific channels ... simple \n",
235+
"\n",
236+
" episode_done \n",
237+
"0 False \n",
238+
"1 True \n",
239+
"2 True \n",
240+
"3 True \n",
241+
"4 True \n",
242+
"5 True "
243+
]
244+
},
245+
"execution_count": 6,
246+
"metadata": {},
247+
"output_type": "execute_result"
248+
}
249+
],
250+
"source": [
251+
"test_dataset.to_pandas()\n"
252+
]
253+
},
254+
{
255+
"cell_type": "code",
256+
"execution_count": null,
257+
"id": "9dad1ca7",
258+
"metadata": {},
259+
"outputs": [],
260+
"source": []
261+
}
262+
],
263+
"metadata": {
264+
"kernelspec": {
265+
"display_name": "ragas",
266+
"language": "python",
267+
"name": "ragas"
268+
},
269+
"language_info": {
270+
"codemirror_mode": {
271+
"name": "ipython",
272+
"version": 3
273+
},
274+
"file_extension": ".py",
275+
"mimetype": "text/x-python",
276+
"name": "python",
277+
"nbconvert_exporter": "python",
278+
"pygments_lexer": "ipython3",
279+
"version": "3.10.8"
280+
}
281+
},
282+
"nbformat": 4,
283+
"nbformat_minor": 5
284+
}

0 commit comments

Comments
 (0)