Skip to content

Commit 39e8e22

Browse files
Update with ignored scope
1 parent 8ff0d5a commit 39e8e22

File tree

1 file changed

+68
-13
lines changed

1 file changed

+68
-13
lines changed

notebooks/outetts-text-to-speech/outetts-text-to-speech.ipynb

Lines changed: 68 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,12 @@
243243
"source": [
244244
"import IPython.display as ipd\n",
245245
"\n",
246-
"ipd.Audio(tts_output.audio[0].numpy(), rate=tts_output.sr)"
246+
"\n",
247+
"def play(data, rate=None):\n",
248+
" ipd.display(ipd.Audio(data, rate=rate))\n",
249+
"\n",
250+
"\n",
251+
"play(tts_output.audio[0].numpy(), rate=tts_output.sr)"
247252
]
248253
},
249254
{
@@ -277,7 +282,7 @@
277282
"metadata": {},
278283
"outputs": [],
279284
"source": [
280-
"ipd.Audio(file_path)"
285+
"play(file_path)"
281286
]
282287
},
283288
{
@@ -310,7 +315,7 @@
310315
"metadata": {},
311316
"outputs": [],
312317
"source": [
313-
"ipd.Audio(cloned_output.audio[0].numpy(), rate=cloned_output.sr)"
318+
"play(cloned_output.audio[0].numpy(), rate=cloned_output.sr)"
314319
]
315320
},
316321
{
@@ -338,6 +343,41 @@
338343
"metadata": {},
339344
"outputs": [],
340345
"source": [
346+
"from notebook_utils import quantization_widget\n",
347+
"\n",
348+
"to_quantize = quantization_widget()\n",
349+
"\n",
350+
"to_quantize"
351+
]
352+
},
353+
{
354+
"cell_type": "code",
355+
"execution_count": null,
356+
"metadata": {},
357+
"outputs": [],
358+
"source": [
359+
"# Fetch `skip_kernel_extension` module\n",
360+
"import requests\n",
361+
"\n",
362+
"r = requests.get(\n",
363+
" url=\"https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py\",\n",
364+
")\n",
365+
"open(\"skip_kernel_extension.py\", \"w\").write(r.text)\n",
366+
"\n",
367+
"ov_quantized_model = None\n",
368+
"quantized_ov_pipe = None\n",
369+
"\n",
370+
"%load_ext skip_kernel_extension"
371+
]
372+
},
373+
{
374+
"cell_type": "code",
375+
"execution_count": null,
376+
"metadata": {},
377+
"outputs": [],
378+
"source": [
379+
"%%skip not $to_quantize.value\n",
380+
"\n",
341381
"hf_model = OVHFModel(model_dir, device.value).model"
342382
]
343383
},
@@ -357,6 +397,8 @@
357397
"metadata": {},
358398
"outputs": [],
359399
"source": [
400+
"%%skip not $to_quantize.value\n",
401+
"\n",
360402
"from datasets import load_dataset\n",
361403
"\n",
362404
"libritts = load_dataset(\"parler-tts/libritts_r_filtered\", \"clean\", split=\"test.clean\")"
@@ -378,6 +420,8 @@
378420
"metadata": {},
379421
"outputs": [],
380422
"source": [
423+
"%%skip not $to_quantize.value\n",
424+
"\n",
381425
"import nncf\n",
382426
"from functools import partial\n",
383427
"import numpy as np\n",
@@ -403,8 +447,7 @@
403447
"\n",
404448
"\n",
405449
"dataset = nncf.Dataset(libritts, partial(transform_fn, interface=interface))\n",
406-
"\n",
407-
"quantized_model = nncf.quantize(hf_model.model, dataset, preset=nncf.QuantizationPreset.MIXED, model_type=nncf.ModelType.TRANSFORMER)"
450+
"quantized_model = nncf.quantize(hf_model.model, dataset, preset=nncf.QuantizationPreset.MIXED, model_type=nncf.ModelType.TRANSFORMER, ignored_scope=nncf.IgnoredScope(patterns=[\"__module.model.layers.*.self_attn/aten::scaled_dot_product_attention/ScaledDotProductAttention\"]))"
408451
]
409452
},
410453
{
@@ -425,7 +468,10 @@
425468
"metadata": {},
426469
"outputs": [],
427470
"source": [
471+
"%%skip not $to_quantize.value\n",
472+
"\n",
428473
"hf_model.model = quantized_model\n",
474+
"# int8_path = Path(f\"{model_dir}_compressed\")\n",
429475
"int8_path = Path(f\"{model_dir}_int8\")\n",
430476
"hf_model.save_pretrained(int8_path)\n",
431477
"interface.prompt_processor.tokenizer.save_pretrained(int8_path)\n",
@@ -439,6 +485,8 @@
439485
"metadata": {},
440486
"outputs": [],
441487
"source": [
488+
"%%skip not $to_quantize.value\n",
489+
"\n",
442490
"tts_output_int8 = interface_int8.generate(text=\"Hello, I'm working!\", temperature=0.1, repetition_penalty=1.1, max_length=4096)"
443491
]
444492
},
@@ -448,8 +496,9 @@
448496
"metadata": {},
449497
"outputs": [],
450498
"source": [
499+
"%%skip not $to_quantize.value\n",
451500
"# Non-quantized model output:\n",
452-
"ipd.Audio(tts_output.audio[0].numpy(), rate=tts_output.sr)"
501+
"play(tts_output.audio[0].numpy(), rate=tts_output.sr)"
453502
]
454503
},
455504
{
@@ -459,7 +508,7 @@
459508
"outputs": [],
460509
"source": [
461510
"# Quantized model output:\n",
462-
"ipd.Audio(tts_output_int8.audio[0].numpy(), rate=tts_output_int8.sr)"
511+
"play(tts_output_int8.audio[0].numpy(), rate=tts_output_int8.sr)"
463512
]
464513
},
465514
{
@@ -468,6 +517,8 @@
468517
"metadata": {},
469518
"outputs": [],
470519
"source": [
520+
"%%skip not $to_quantize.value\n",
521+
"\n",
471522
"speaker = interface_int8.load_speaker(\"speaker.pkl\")\n",
472523
"cloned_output_int8 = interface_int8.generate(\n",
473524
" text=\"This is a cloned voice speaking\",\n",
@@ -484,8 +535,9 @@
484535
"metadata": {},
485536
"outputs": [],
486537
"source": [
538+
"%%skip not $to_quantize.value\n",
487539
"# Non-quantized model output:\n",
488-
"ipd.Audio(cloned_output.audio[0].numpy(), rate=cloned_output.sr)"
540+
"play(cloned_output.audio[0].numpy(), rate=cloned_output.sr)"
489541
]
490542
},
491543
{
@@ -494,8 +546,9 @@
494546
"metadata": {},
495547
"outputs": [],
496548
"source": [
549+
"%%skip not $to_quantize.value\n",
497550
"# Quantized model output:\n",
498-
"ipd.Audio(cloned_output_int8.audio[0].numpy(), rate=cloned_output_int8.sr)"
551+
"play(cloned_output_int8.audio[0].numpy(), rate=cloned_output_int8.sr)"
499552
]
500553
},
501554
{
@@ -512,6 +565,8 @@
512565
"metadata": {},
513566
"outputs": [],
514567
"source": [
568+
"%%skip not $to_quantize.value\n",
569+
"\n",
515570
"import time\n",
516571
"import tqdm\n",
517572
"\n",
@@ -521,21 +576,21 @@
521576
" for text in tqdm.tqdm(dataset[\"text_normalized\"]):\n",
522577
" additional_gen_config = {\"pad_token_id\": interface.prompt_processor.tokenizer.eos_token_id}\n",
523578
" start = time.perf_counter()\n",
524-
" _ = interface.generate(text=text, additional_gen_config=additional_gen_config)\n",
579+
" _ = interface.generate(text=text, max_length=256, additional_gen_config=additional_gen_config)\n",
525580
" end = time.perf_counter()\n",
526581
" delta = end - start\n",
527582
" inference_time.append(delta)\n",
528583
" return np.median(inference_time)\n",
529584
"\n",
530585
"\n",
531-
"subset_size = 25\n",
532586
"interface = InterfaceOV(model_dir, device.value)\n",
587+
"dataset_size = 25\n",
533588
"\n",
534-
"fp_inference_time = calculate_inference_time(interface, libritts[:subset_size])\n",
589+
"fp_inference_time = calculate_inference_time(interface, libritts[:dataset_size])\n",
535590
"print(f\"FP model generate time: {fp_inference_time}\")\n",
536591
"\n",
537592
"interface_int8 = InterfaceOV(int8_path, device.value)\n",
538-
"int_inference_time = calculate_inference_time(interface_int8, libritts[:subset_size])\n",
593+
"int_inference_time = calculate_inference_time(interface_int8, libritts[:dataset_size])\n",
539594
"print(f\"INT model generate time: {int_inference_time}\")"
540595
]
541596
},

0 commit comments

Comments
 (0)