|
188 | 188 | },
|
189 | 189 | {
|
190 | 190 | "cell_type": "code",
|
191 |
| - "execution_count": 4, |
| 191 | + "execution_count": null, |
192 | 192 | "id": "553148f5",
|
193 | 193 | "metadata": {},
|
194 | 194 | "outputs": [
|
|
207 | 207 | "pipe = ov_genai.LLMPipeline(target_model_path, device.value)\n",
|
208 | 208 | "\n",
|
209 | 209 | "config = ov_genai.GenerationConfig()\n",
|
210 |
| - "config.max_new_tokens = 100\n", |
211 |
| - "\n", |
212 |
| - "\n", |
| 210 | + "config.max_new_tokens = 330\n", |
| 211 | + "prompt = '''<s>\n", |
| 212 | + "\n", |
| 213 | + "def prime_fib(n: int):\n", |
| 214 | + " \"\"\"\n", |
| 215 | + " prime_fib returns n-th number that is a Fibonacci number and it's also prime.\n", |
| 216 | + " >>> prime_fib(1)\n", |
| 217 | + " 2\n", |
| 218 | + " >>> prime_fib(2)\n", |
| 219 | + " 3\n", |
| 220 | + " >>> prime_fib(3)\n", |
| 221 | + " 5\n", |
| 222 | + " >>> prime_fib(4)\n", |
| 223 | + " 13\n", |
| 224 | + " >>> prime_fib(5)\n", |
| 225 | + " 89\n", |
| 226 | + " \"\"\"'''\n", |
213 | 227 | "def streamer(subword):\n",
|
214 | 228 | " print(subword, end=\"\", flush=True)\n",
|
215 | 229 | " # Return flag corresponds whether generation should be stopped.\n",
|
|
218 | 232 | "\n",
|
219 | 233 | "\n",
|
220 | 234 | "start_time = time.perf_counter()\n",
|
221 |
| - "pipe.generate([\"Sun is yellow because\"], config, streamer=streamer)\n", |
| 235 | + "pipe.generate(prompt, config, streamer=streamer)\n", |
222 | 236 | "end_time = time.perf_counter()"
|
223 | 237 | ]
|
224 | 238 | },
|
225 | 239 | {
|
226 | 240 | "cell_type": "code",
|
227 |
| - "execution_count": 5, |
| 241 | + "execution_count": null, |
228 | 242 | "id": "c40d9901-ceb2-4c4c-a686-303590292ab3",
|
229 | 243 | "metadata": {},
|
230 | 244 | "outputs": [
|
|
241 | 255 | "\n",
|
242 | 256 | "print(f\"Generation time: {end_time - start_time:.2f}s\")\n",
|
243 | 257 | "del pipe\n",
|
244 |
| - "gc.collect();" |
| 258 | + "gc.collect()" |
245 | 259 | ]
|
246 | 260 | },
|
247 | 261 | {
|
|
263 | 277 | },
|
264 | 278 | {
|
265 | 279 | "cell_type": "code",
|
266 |
| - "execution_count": 6, |
| 280 | + "execution_count": null, |
267 | 281 | "id": "9fde1b3c",
|
268 | 282 | "metadata": {},
|
269 | 283 | "outputs": [
|
|
278 | 292 | "source": [
|
279 | 293 | "scheduler_config = ov_genai.SchedulerConfig()\n",
|
280 | 294 | "# cache params\n",
|
281 |
| - "scheduler_config.cache_size = 2\n", |
| 295 | + "scheduler_config.cache_size = 0\n", |
| 296 | + "scheduler_config.num_kv_blocks = 2048 // 8\n", |
| 297 | + "scheduler_config.max_num_batched_tokens = 2048\n", |
282 | 298 | "\n",
|
283 | 299 | "draft_model = ov_genai.draft_model(draft_model_path, device.value)\n",
|
284 | 300 | "\n",
|
285 | 301 | "pipe = ov_genai.LLMPipeline(target_model_path, device.value, draft_model=draft_model, scheduler_config=scheduler_config)\n",
|
286 | 302 | "\n",
|
287 | 303 | "config = ov_genai.GenerationConfig()\n",
|
288 |
| - "config.max_new_tokens = 100\n", |
289 |
| - "config.num_assistant_tokens = 3\n", |
| 304 | + "config.max_new_tokens = 330\n", |
| 305 | + "config.num_assistant_tokens = 5\n", |
290 | 306 | "start_time = time.perf_counter()\n",
|
291 |
| - "result = pipe.generate([\"Sun is yellow because\"], config, streamer=streamer)\n", |
| 307 | + "result = pipe.generate(prompt, config, streamer=streamer)\n", |
292 | 308 | "end_time = time.perf_counter()"
|
293 | 309 | ]
|
294 | 310 | },
|
|
0 commit comments