|
75 | 75 | "\n",
|
76 | 76 | "\n",
|
77 | 77 | "%pip install -q \"torch>=2.3\" \"torchvision>=0.18.1\" --extra-index-url https://download.pytorch.org/whl/cpu\n",
|
78 |
| - "%pip install -q \"transformers>=4.45\" \"git+https://github.com/huggingface/optimum-intel.git\" --extra-index-url https://download.pytorch.org/whl/cpu\n", |
79 |
| - "%pip install -q -U \"openvino>=2024.5.0\" \"openvino-tokenizers>=2024.5.0\" \"openvino-genai>=2024.5.0\"\n", |
| 78 | + "%pip install -q -U \"transformers>=4.45\" --extra-index-url https://download.pytorch.org/whl/cpu\n", |
| 79 | + "%pip install -q \"git+https://github.com/huggingface/optimum-intel.git\" --extra-index-url https://download.pytorch.org/whl/cpu\n", |
| 80 | + "%pip install --pre -q -U \"openvino>=2024.5.0\" \"openvino-tokenizers>=2024.5.0\" \"openvino-genai>=2024.5.0\"\n", |
80 | 81 | "%pip install -q datasets \"gradio>=4.0\" \"soundfile>=0.12\" \"librosa\" \"python-ffmpeg<=1.0.16\"\n",
|
81 | 82 | "%pip install -q \"nncf>=2.14.0\" \"jiwer\" \"typing_extensions>=4.9\"\n",
|
82 | 83 | "if platform.system() == \"Darwin\":\n",
|
|
995 | 996 | " encoder_calibration_data = []\n",
|
996 | 997 | " decoder_calibration_data = []\n",
|
997 | 998 | " ov_model.encoder.request = InferRequestWrapper(ov_model.encoder.request, encoder_calibration_data, apply_caching=True)\n",
|
998 |
| - " ov_model.decoder_with_past.request = InferRequestWrapper(ov_model.decoder_with_past.request,\n", |
| 999 | + " ov_model.decoder.request = InferRequestWrapper(ov_model.decoder.request,\n", |
999 | 1000 | " decoder_calibration_data,\n",
|
1000 | 1001 | " apply_caching=True)\n",
|
1001 | 1002 | "\n",
|
|
1012 | 1013 | " pipe(sample[\"audio\"], return_timestamps=True)\n",
|
1013 | 1014 | " finally:\n",
|
1014 | 1015 | " ov_model.encoder.request = ov_model.encoder.request.request\n",
|
1015 |
| - " ov_model.decoder_with_past.request = ov_model.decoder_with_past.request.request\n", |
| 1016 | + " ov_model.decoder.request = ov_model.decoder.request.request\n", |
1016 | 1017 | "\n",
|
1017 | 1018 | " return encoder_calibration_data, decoder_calibration_data"
|
1018 | 1019 | ]
|
|
1070 | 1071 | " del encoder_calibration_data\n",
|
1071 | 1072 | " gc.collect()\n",
|
1072 | 1073 | "\n",
|
1073 |
| - " print(\"Quantizing decoder with past\")\n", |
1074 |
| - " quantized_decoder_with_past = nncf.quantize(\n", |
1075 |
| - " ov_model.decoder_with_past.model,\n", |
| 1074 | + " print(\"Quantizing decoder\")\n", |
| 1075 | + " quantized_decoder = nncf.quantize(\n", |
| 1076 | + " ov_model.decoder.model,\n", |
1076 | 1077 | " nncf.Dataset(decoder_calibration_data),\n",
|
1077 | 1078 | " subset_size=len(decoder_calibration_data),\n",
|
1078 | 1079 | " model_type=nncf.ModelType.TRANSFORMER,\n",
|
1079 | 1080 | " # Smooth Quant algorithm reduces activation quantization error; optimal alpha value was obtained through grid search\n",
|
1080 | 1081 | " advanced_parameters=nncf.AdvancedQuantizationParameters(smooth_quant_alpha=0.96)\n",
|
1081 | 1082 | " )\n",
|
1082 |
| - " ov.save_model(quantized_decoder_with_past, quantized_model_path / \"openvino_decoder_with_past_model.xml\")\n", |
1083 |
| - " del quantized_decoder_with_past\n", |
| 1083 | + " ov.save_model(quantized_decoder, quantized_model_path / \"openvino_decoder_model.xml\")\n", |
| 1084 | + " del quantized_decoder\n", |
1084 | 1085 | " del decoder_calibration_data\n",
|
1085 | 1086 | " gc.collect()\n",
|
1086 | 1087 | "\n",
|
|
0 commit comments