From 894d859ac0a32963335c76f05dd7e869d87ad4e2 Mon Sep 17 00:00:00 2001 From: Ekaterina Aidova Date: Mon, 20 Jan 2025 19:23:14 +0400 Subject: [PATCH] distil whisper fix quantization (#2674) --- notebooks/distil-whisper-asr/distil-whisper-asr.ipynb | 4 ++-- notebooks/outetts-text-to-speech/ov_outetts_helper.py | 6 +++++- notebooks/phi-3-vision/phi-3-vision.ipynb | 2 ++ .../sparsity-optimization/sparsity-optimization.ipynb | 4 +++- notebooks/stable-audio/stable-audio.ipynb | 2 +- .../stable-diffusion-v3/stable-diffusion-v3-torch-fx.ipynb | 3 ++- notebooks/whisper-asr-genai/whisper-asr-genai.ipynb | 7 ++++++- 7 files changed, 21 insertions(+), 7 deletions(-) diff --git a/notebooks/distil-whisper-asr/distil-whisper-asr.ipynb b/notebooks/distil-whisper-asr/distil-whisper-asr.ipynb index fe07fd701dc..e055bb81486 100644 --- a/notebooks/distil-whisper-asr/distil-whisper-asr.ipynb +++ b/notebooks/distil-whisper-asr/distil-whisper-asr.ipynb @@ -1015,7 +1015,7 @@ "### Quantize Distil-Whisper encoder and decoder models\n", "[back to top ⬆️](#Table-of-contents:)\n", "\n", - "Below we run the `quantize` function which calls `nncf.quantize` on Distil-Whisper encoder and decoder-with-past models. We don't quantize first-step-decoder because its share in whole inference time is negligible." + "Below we run the `quantize` function which calls `nncf.quantize` on Distil-Whisper encoder and decoder models. We don't quantize first-step-decoder because its share in whole inference time is negligible." ] }, { @@ -1154,7 +1154,7 @@ " # Smooth Quant algorithm reduces activation quantization error; optimal alpha value was obtained through grid search\n", " advanced_parameters=nncf.AdvancedQuantizationParameters(smooth_quant_alpha=0.95)\n", " )\n", - " ov.save_model(quantized_decoder_with_past, quantized_model_path / \"openvino_decoder_model.xml\")\n", + " ov.save_model(quantized_decoder, quantized_model_path / \"openvino_decoder_model.xml\")\n", " del quantized_decoder\n", " del decoder_calibration_data\n", " gc.collect()\n", diff --git a/notebooks/outetts-text-to-speech/ov_outetts_helper.py b/notebooks/outetts-text-to-speech/ov_outetts_helper.py index e2c8af93261..e451020ce09 100644 --- a/notebooks/outetts-text-to-speech/ov_outetts_helper.py +++ b/notebooks/outetts-text-to-speech/ov_outetts_helper.py @@ -5,7 +5,11 @@ try: from outetts.version.v1.interface import InterfaceHF from outetts.version.v1.prompt_processor import PromptProcessor - from outetts.version.v1.model import HFModel + + try: + from outetts.version.v1.model import HFModel + except ImportError: + from outetts.models.hf_model import HFModel from outetts.wav_tokenizer.audio_codec import AudioCodec updated_version = True diff --git a/notebooks/phi-3-vision/phi-3-vision.ipynb b/notebooks/phi-3-vision/phi-3-vision.ipynb index 97e53331a54..82a27fc0396 100644 --- a/notebooks/phi-3-vision/phi-3-vision.ipynb +++ b/notebooks/phi-3-vision/phi-3-vision.ipynb @@ -51,6 +51,8 @@ "metadata": {}, "outputs": [], "source": [ + "import platform\n", + "\n", "%pip install -q -U \"torch>=2.1\" \"torchvision\" \"transformers>=4.45\" \"protobuf>=3.20\" \"gradio>=4.26\" \"Pillow\" \"accelerate\" \"tqdm\" --extra-index-url https://download.pytorch.org/whl/cpu\n", "%pip install --pre -qU \"openvino>=2024.6.0\" \"openvino-tokenizers>=2024.6.0\" --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly\n", "%pip install -q -U \"nncf>=2.14.0\"\n", diff --git a/notebooks/sparsity-optimization/sparsity-optimization.ipynb b/notebooks/sparsity-optimization/sparsity-optimization.ipynb index f032c5d7e65..683ff2f4b01 100644 --- a/notebooks/sparsity-optimization/sparsity-optimization.ipynb +++ b/notebooks/sparsity-optimization/sparsity-optimization.ipynb @@ -124,6 +124,8 @@ }, "outputs": [], "source": [ + "import torch\n", + "\n", "# The following model has been quantized, sparsified using Optimum-Intel 1.7 which is enabled by OpenVINO and NNCF\n", "# for reproducibility, refer https://huggingface.co/OpenVINO/bert-base-uncased-sst2-int8-unstructured80\n", "model_id = \"OpenVINO/bert-base-uncased-sst2-int8-unstructured80\"\n", @@ -133,7 +135,7 @@ "tokenizer = AutoTokenizer.from_pretrained(model_id)\n", "\n", "# Let's take the model for a spin!\n", - "sentiment_classifier = pipeline(\"text-classification\", model=ov_model, tokenizer=tokenizer)\n", + "sentiment_classifier = pipeline(\"text-classification\", model=ov_model, tokenizer=tokenizer, device=torch.device(\"cpu\"))\n", "\n", "text = \"He's a dreadful magician.\"\n", "outputs = sentiment_classifier(text)\n", diff --git a/notebooks/stable-audio/stable-audio.ipynb b/notebooks/stable-audio/stable-audio.ipynb index b807823726c..e32435c5ac2 100644 --- a/notebooks/stable-audio/stable-audio.ipynb +++ b/notebooks/stable-audio/stable-audio.ipynb @@ -64,7 +64,7 @@ "source": [ "import platform\n", "\n", - "%pip install -q \"torch>=2.2\" torchaudio einops einops-exts huggingface-hub k-diffusion pytorch_lightning alias-free-torch ema-pytorch transformers>=4.45 \"gradio>=4.19 --extra-index-url https://download.pytorch.org/whl/cpu\n", + "%pip install -q \"torch>=2.2\" \"torchaudio\" \"einops\" \"einops-exts\" \"huggingface-hub\" \"k-diffusion\" \"pytorch_lightning\" \"alias-free-torch\" \"ema-pytorch\" \"transformers>=4.45\" \"gradio>=4.19\" --extra-index-url https://download.pytorch.org/whl/cpu\n", "%pip install -q --no-deps \"stable-audio-tools\"\n", "%pip install -q \"nncf>=2.12.0\"\n", "if platform.system() == \"Darwin\":\n", diff --git a/notebooks/stable-diffusion-v3/stable-diffusion-v3-torch-fx.ipynb b/notebooks/stable-diffusion-v3/stable-diffusion-v3-torch-fx.ipynb index 4fed1daafed..0298e07882f 100644 --- a/notebooks/stable-diffusion-v3/stable-diffusion-v3-torch-fx.ipynb +++ b/notebooks/stable-diffusion-v3/stable-diffusion-v3-torch-fx.ipynb @@ -450,7 +450,8 @@ " ).shuffle(seed=42)\n", "\n", " transformer_config = dict(pipe.transformer.config)\n", - " del transformer_config[\"model\"]\n", + " if \"model\" in transformer_config:\n", + " del transformer_config[\"model\"]\n", " wrapped_unet = UNetWrapper(pipe.transformer.model, transformer_config)\n", " pipe.transformer = wrapped_unet\n", " # Run inference for data collection\n", diff --git a/notebooks/whisper-asr-genai/whisper-asr-genai.ipynb b/notebooks/whisper-asr-genai/whisper-asr-genai.ipynb index b4ad9f06e58..67a763f542c 100644 --- a/notebooks/whisper-asr-genai/whisper-asr-genai.ipynb +++ b/notebooks/whisper-asr-genai/whisper-asr-genai.ipynb @@ -77,10 +77,15 @@ "%pip install -q \"torch>=2.3\" \"torchvision>=0.18.1\" --extra-index-url https://download.pytorch.org/whl/cpu\n", "%pip install -q -U \"transformers>=4.45\" --extra-index-url https://download.pytorch.org/whl/cpu\n", "%pip install -q \"git+https://github.com/huggingface/optimum-intel.git\" --extra-index-url https://download.pytorch.org/whl/cpu\n", - "%pip install --pre -q -U \"openvino>=2024.5.0\" \"openvino-tokenizers>=2024.5.0\" \"openvino-genai>=2024.5.0\"\n", + "%pip install --pre -q -U \"openvino>=2024.5.0\" \"openvino-tokenizers>=2024.5.0\" \"openvino-genai>=2024.5.0\" --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly\n", "%pip install -q datasets \"gradio>=4.0\" \"soundfile>=0.12\" \"librosa\" \"python-ffmpeg<=1.0.16\"\n", "%pip install -q \"nncf>=2.14.0\" \"jiwer\" \"typing_extensions>=4.9\"\n", "if platform.system() == \"Darwin\":\n", + " %pip install -q \"numpy<2.0\"\n", + "\n", + "from transformers.utils.import_utils import is_tf_available\n", + "\n", + "if is_tf_available():\n", " %pip install -q \"numpy<2.0\"" ] },