|
243 | 243 | "source": [
|
244 | 244 | "import IPython.display as ipd\n",
|
245 | 245 | "\n",
|
246 |
| - "ipd.Audio(tts_output.audio[0].numpy(), rate=tts_output.sr)" |
| 246 | + "\n", |
| 247 | + "def play(data, rate=None):\n", |
| 248 | + " ipd.display(ipd.Audio(data, rate=rate))\n", |
| 249 | + "\n", |
| 250 | + "\n", |
| 251 | + "play(tts_output.audio[0].numpy(), rate=tts_output.sr)" |
247 | 252 | ]
|
248 | 253 | },
|
249 | 254 | {
|
|
277 | 282 | "metadata": {},
|
278 | 283 | "outputs": [],
|
279 | 284 | "source": [
|
280 |
| - "ipd.Audio(file_path)" |
| 285 | + "play(file_path)" |
281 | 286 | ]
|
282 | 287 | },
|
283 | 288 | {
|
|
310 | 315 | "metadata": {},
|
311 | 316 | "outputs": [],
|
312 | 317 | "source": [
|
313 |
| - "ipd.Audio(cloned_output.audio[0].numpy(), rate=cloned_output.sr)" |
| 318 | + "play(cloned_output.audio[0].numpy(), rate=cloned_output.sr)" |
314 | 319 | ]
|
315 | 320 | },
|
316 | 321 | {
|
|
338 | 343 | "metadata": {},
|
339 | 344 | "outputs": [],
|
340 | 345 | "source": [
|
| 346 | + "from notebook_utils import quantization_widget\n", |
| 347 | + "\n", |
| 348 | + "to_quantize = quantization_widget()\n", |
| 349 | + "\n", |
| 350 | + "to_quantize" |
| 351 | + ] |
| 352 | + }, |
| 353 | + { |
| 354 | + "cell_type": "code", |
| 355 | + "execution_count": null, |
| 356 | + "metadata": {}, |
| 357 | + "outputs": [], |
| 358 | + "source": [ |
| 359 | + "# Fetch `skip_kernel_extension` module\n", |
| 360 | + "import requests\n", |
| 361 | + "\n", |
| 362 | + "r = requests.get(\n", |
| 363 | + " url=\"https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py\",\n", |
| 364 | + ")\n", |
| 365 | + "open(\"skip_kernel_extension.py\", \"w\").write(r.text)\n", |
| 366 | + "\n", |
| 367 | + "ov_quantized_model = None\n", |
| 368 | + "quantized_ov_pipe = None\n", |
| 369 | + "\n", |
| 370 | + "%load_ext skip_kernel_extension" |
| 371 | + ] |
| 372 | + }, |
| 373 | + { |
| 374 | + "cell_type": "code", |
| 375 | + "execution_count": null, |
| 376 | + "metadata": {}, |
| 377 | + "outputs": [], |
| 378 | + "source": [ |
| 379 | + "%%skip not $to_quantize.value\n", |
| 380 | + "\n", |
341 | 381 | "hf_model = OVHFModel(model_dir, device.value).model"
|
342 | 382 | ]
|
343 | 383 | },
|
|
357 | 397 | "metadata": {},
|
358 | 398 | "outputs": [],
|
359 | 399 | "source": [
|
| 400 | + "%%skip not $to_quantize.value\n", |
| 401 | + "\n", |
360 | 402 | "from datasets import load_dataset\n",
|
361 | 403 | "\n",
|
362 | 404 | "libritts = load_dataset(\"parler-tts/libritts_r_filtered\", \"clean\", split=\"test.clean\")"
|
|
378 | 420 | "metadata": {},
|
379 | 421 | "outputs": [],
|
380 | 422 | "source": [
|
| 423 | + "%%skip not $to_quantize.value\n", |
| 424 | + "\n", |
381 | 425 | "import nncf\n",
|
382 | 426 | "from functools import partial\n",
|
383 | 427 | "import numpy as np\n",
|
|
403 | 447 | "\n",
|
404 | 448 | "\n",
|
405 | 449 | "dataset = nncf.Dataset(libritts, partial(transform_fn, interface=interface))\n",
|
406 |
| - "\n", |
407 |
| - "quantized_model = nncf.quantize(hf_model.model, dataset, preset=nncf.QuantizationPreset.MIXED, model_type=nncf.ModelType.TRANSFORMER)" |
| 450 | + "quantized_model = nncf.quantize(hf_model.model, dataset, preset=nncf.QuantizationPreset.MIXED, model_type=nncf.ModelType.TRANSFORMER, ignored_scope=nncf.IgnoredScope(patterns=[\"__module.model.layers.*.self_attn/aten::scaled_dot_product_attention/ScaledDotProductAttention\"]))" |
408 | 451 | ]
|
409 | 452 | },
|
410 | 453 | {
|
|
425 | 468 | "metadata": {},
|
426 | 469 | "outputs": [],
|
427 | 470 | "source": [
|
| 471 | + "%%skip not $to_quantize.value\n", |
| 472 | + "\n", |
428 | 473 | "hf_model.model = quantized_model\n",
|
| 474 | + "# int8_path = Path(f\"{model_dir}_compressed\")\n", |
429 | 475 | "int8_path = Path(f\"{model_dir}_int8\")\n",
|
430 | 476 | "hf_model.save_pretrained(int8_path)\n",
|
431 | 477 | "interface.prompt_processor.tokenizer.save_pretrained(int8_path)\n",
|
|
439 | 485 | "metadata": {},
|
440 | 486 | "outputs": [],
|
441 | 487 | "source": [
|
| 488 | + "%%skip not $to_quantize.value\n", |
| 489 | + "\n", |
442 | 490 | "tts_output_int8 = interface_int8.generate(text=\"Hello, I'm working!\", temperature=0.1, repetition_penalty=1.1, max_length=4096)"
|
443 | 491 | ]
|
444 | 492 | },
|
|
448 | 496 | "metadata": {},
|
449 | 497 | "outputs": [],
|
450 | 498 | "source": [
|
| 499 | + "%%skip not $to_quantize.value\n", |
451 | 500 | "# Non-quantized model output:\n",
|
452 |
| - "ipd.Audio(tts_output.audio[0].numpy(), rate=tts_output.sr)" |
| 501 | + "play(tts_output.audio[0].numpy(), rate=tts_output.sr)" |
453 | 502 | ]
|
454 | 503 | },
|
455 | 504 | {
|
|
459 | 508 | "outputs": [],
|
460 | 509 | "source": [
|
461 | 510 | "# Quantized model output:\n",
|
462 |
| - "ipd.Audio(tts_output_int8.audio[0].numpy(), rate=tts_output_int8.sr)" |
| 511 | + "play(tts_output_int8.audio[0].numpy(), rate=tts_output_int8.sr)" |
463 | 512 | ]
|
464 | 513 | },
|
465 | 514 | {
|
|
468 | 517 | "metadata": {},
|
469 | 518 | "outputs": [],
|
470 | 519 | "source": [
|
| 520 | + "%%skip not $to_quantize.value\n", |
| 521 | + "\n", |
471 | 522 | "speaker = interface_int8.load_speaker(\"speaker.pkl\")\n",
|
472 | 523 | "cloned_output_int8 = interface_int8.generate(\n",
|
473 | 524 | " text=\"This is a cloned voice speaking\",\n",
|
|
484 | 535 | "metadata": {},
|
485 | 536 | "outputs": [],
|
486 | 537 | "source": [
|
| 538 | + "%%skip not $to_quantize.value\n", |
487 | 539 | "# Non-quantized model output:\n",
|
488 |
| - "ipd.Audio(cloned_output.audio[0].numpy(), rate=cloned_output.sr)" |
| 540 | + "play(cloned_output.audio[0].numpy(), rate=cloned_output.sr)" |
489 | 541 | ]
|
490 | 542 | },
|
491 | 543 | {
|
|
494 | 546 | "metadata": {},
|
495 | 547 | "outputs": [],
|
496 | 548 | "source": [
|
| 549 | + "%%skip not $to_quantize.value\n", |
497 | 550 | "# Quantized model output:\n",
|
498 |
| - "ipd.Audio(cloned_output_int8.audio[0].numpy(), rate=cloned_output_int8.sr)" |
| 551 | + "play(cloned_output_int8.audio[0].numpy(), rate=cloned_output_int8.sr)" |
499 | 552 | ]
|
500 | 553 | },
|
501 | 554 | {
|
|
512 | 565 | "metadata": {},
|
513 | 566 | "outputs": [],
|
514 | 567 | "source": [
|
| 568 | + "%%skip not $to_quantize.value\n", |
| 569 | + "\n", |
515 | 570 | "import time\n",
|
516 | 571 | "import tqdm\n",
|
517 | 572 | "\n",
|
|
521 | 576 | " for text in tqdm.tqdm(dataset[\"text_normalized\"]):\n",
|
522 | 577 | " additional_gen_config = {\"pad_token_id\": interface.prompt_processor.tokenizer.eos_token_id}\n",
|
523 | 578 | " start = time.perf_counter()\n",
|
524 |
| - " _ = interface.generate(text=text, additional_gen_config=additional_gen_config)\n", |
| 579 | + " _ = interface.generate(text=text, max_length=256, additional_gen_config=additional_gen_config)\n", |
525 | 580 | " end = time.perf_counter()\n",
|
526 | 581 | " delta = end - start\n",
|
527 | 582 | " inference_time.append(delta)\n",
|
528 | 583 | " return np.median(inference_time)\n",
|
529 | 584 | "\n",
|
530 | 585 | "\n",
|
531 |
| - "subset_size = 25\n", |
532 | 586 | "interface = InterfaceOV(model_dir, device.value)\n",
|
| 587 | + "dataset_size = 25\n", |
533 | 588 | "\n",
|
534 |
| - "fp_inference_time = calculate_inference_time(interface, libritts[:subset_size])\n", |
| 589 | + "fp_inference_time = calculate_inference_time(interface, libritts[:dataset_size])\n", |
535 | 590 | "print(f\"FP model generate time: {fp_inference_time}\")\n",
|
536 | 591 | "\n",
|
537 | 592 | "interface_int8 = InterfaceOV(int8_path, device.value)\n",
|
538 |
| - "int_inference_time = calculate_inference_time(interface_int8, libritts[:subset_size])\n", |
| 593 | + "int_inference_time = calculate_inference_time(interface_int8, libritts[:dataset_size])\n", |
539 | 594 | "print(f\"INT model generate time: {int_inference_time}\")"
|
540 | 595 | ]
|
541 | 596 | },
|
|
0 commit comments