|
366 | 366 | ")\n",
|
367 | 367 | "open(\"skip_kernel_extension.py\", \"w\").write(r.text)\n",
|
368 | 368 | "\n",
|
369 |
| - "ov_quantized_model = None\n", |
370 |
| - "quantized_ov_pipe = None\n", |
371 |
| - "\n", |
372 | 369 | "%load_ext skip_kernel_extension"
|
373 | 370 | ]
|
374 | 371 | },
|
375 |
| - { |
376 |
| - "cell_type": "code", |
377 |
| - "execution_count": null, |
378 |
| - "metadata": {}, |
379 |
| - "outputs": [], |
380 |
| - "source": [ |
381 |
| - "%%skip not $to_quantize.value\n", |
382 |
| - "\n", |
383 |
| - "hf_model = OVHFModel(model_dir, device.value).model" |
384 |
| - ] |
385 |
| - }, |
386 | 372 | {
|
387 | 373 | "cell_type": "markdown",
|
388 | 374 | "metadata": {},
|
|
447 | 433 | "\n",
|
448 | 434 | " return inputs\n",
|
449 | 435 | "\n",
|
450 |
| - "\n", |
| 436 | + "hf_model = OVHFModel(model_dir, device.value).model\n", |
451 | 437 | "dataset = nncf.Dataset(libritts, partial(transform_fn, interface=interface))\n",
|
452 |
| - "quantized_model = nncf.quantize(hf_model.model, dataset, preset=nncf.QuantizationPreset.MIXED, model_type=nncf.ModelType.TRANSFORMER, ignored_scope=nncf.IgnoredScope(patterns=[\"__module.model.layers.*.self_attn/aten::scaled_dot_product_attention/ScaledDotProductAttention\"]))" |
| 438 | + "\n", |
| 439 | + "quantized_model = nncf.quantize(\n", |
| 440 | + " hf_model.model,\n", |
| 441 | + " dataset,\n", |
| 442 | + " preset=nncf.QuantizationPreset.MIXED,\n", |
| 443 | + " model_type=nncf.ModelType.TRANSFORMER,\n", |
| 444 | + " ignored_scope=nncf.IgnoredScope(\n", |
| 445 | + " patterns=[\n", |
| 446 | + " \"__module.model.layers.*.self_attn/aten::scaled_dot_product_attention/ScaledDotProductAttention\"\n", |
| 447 | + " ]\n", |
| 448 | + " )\n", |
| 449 | + ")\n", |
| 450 | + "\n", |
| 451 | + "hf_model.model = quantized_model\n", |
| 452 | + "int8_path = Path(f\"{model_dir}_int8_ignored\")\n", |
| 453 | + "hf_model.save_pretrained(int8_path)\n", |
| 454 | + "interface.prompt_processor.tokenizer.save_pretrained(int8_path)" |
453 | 455 | ]
|
454 | 456 | },
|
455 | 457 | {
|
|
472 | 474 | "source": [
|
473 | 475 | "%%skip not $to_quantize.value\n",
|
474 | 476 | "\n",
|
475 |
| - "hf_model.model = quantized_model\n", |
476 |
| - "# int8_path = Path(f\"{model_dir}_compressed\")\n", |
477 |
| - "int8_path = Path(f\"{model_dir}_int8\")\n", |
478 |
| - "hf_model.save_pretrained(int8_path)\n", |
479 |
| - "interface.prompt_processor.tokenizer.save_pretrained(int8_path)\n", |
480 |
| - "\n", |
481 | 477 | "interface_int8 = InterfaceOV(int8_path, device.value)"
|
482 | 478 | ]
|
483 | 479 | },
|
|
0 commit comments