Skip to content

Commit 4e70504

Browse files
FP16 Calibration Script Upgrade (#1602)
* Added improvements to the fp16 calibration script * Added default value for half type * tqdm tweak * Removed unneseccary dels * Added note regarding fp16 weights * Updated deepfloyd notebook calibration * deepfloyd tweak * Added calibration to llm-chatbot * Added calibration to tiny-sd-unet * Updated OV version for tiny-sd notebook * Improved chat-llm calibration logic * Fix the case when SNR is near-equal for many nodes * Revert calibration code for red pajama; add inference precision hint instead * Disable calibration verbosity for tiny-sd * Tweaks * Fix rag-chatbot * Tweak spelling * Update batch size for deepfloyd * Add ov uninstall command * Removed commented ops * Removed pickled UNet input. Made it downloadable.
1 parent ad04ef6 commit 4e70504

File tree

5 files changed

+339
-158
lines changed

5 files changed

+339
-158
lines changed

notebooks/238-deepfloyd-if/238-deep-floyd-if-convert.ipynb

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -116,11 +116,12 @@
116116
}
117117
],
118118
"source": [
119-
"# # Set up requirements\n",
119+
"# Set up requirements\n",
120120
"\n",
121+
"%pip uninstall -q -y openvino-dev openvino openvino-nightly\n",
121122
"%pip install -q --upgrade pip\n",
122123
"%pip install -q transformers \"diffusers>=0.16.1\" accelerate safetensors sentencepiece huggingface_hub --extra-index-url https://download.pytorch.org/whl/cpu\n",
123-
"%pip install -q \"openvino>=2023.2.0\" opencv-python\n",
124+
"%pip install -q \"openvino-nightly\" opencv-python\n",
124125
"%pip install -q gradio"
125126
]
126127
},
@@ -151,7 +152,7 @@
151152
"checkpoint_variant = 'fp16'\n",
152153
"model_dtype = torch.float32\n",
153154
"ir_input_type = ov.Type.f32\n",
154-
"compress_to_fp16 = False\n",
155+
"compress_to_fp16 = True\n",
155156
"\n",
156157
"models_dir = Path('./models')\n",
157158
"models_dir.mkdir(exist_ok=True)\n",
@@ -872,9 +873,10 @@
872873
"if 'GPU' in core.available_devices and not is_model_partially_upcasted(encoder_ov_model):\n",
873874
" example_input_prompt = 'ultra close color photo portrait of rainbow owl with deer horns in the woods'\n",
874875
" text_inputs = stage_1.tokenizer(example_input_prompt, max_length=77, padding=\"max_length\", return_tensors=\"np\")\n",
875-
" upcasted_ov_model = partially_upcast_nodes_to_fp32(encoder_ov_model, text_inputs.input_ids)\n",
876+
" upcasted_ov_model = partially_upcast_nodes_to_fp32(encoder_ov_model, text_inputs.input_ids, upcast_ratio=0.05,\n",
877+
" operation_types=[\"MatMul\"], batch_size=10)\n",
876878
" del encoder_ov_model\n",
877-
" gc.collect();\n",
879+
" gc.collect()\n",
878880
"\n",
879881
" import os\n",
880882
" os.remove(encoder_ir_path)\n",

notebooks/251-tiny-sd-image-generation/251-tiny-sd-image-generation.ipynb

Lines changed: 50 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
"metadata": {},
5555
"outputs": [],
5656
"source": [
57-
"%pip install -q --extra-index-url https://download.pytorch.org/whl/cpu torch torchvision \"openvino>=2023.1.0\" \"diffusers>=0.18.0\" \"transformers>=4.30.2\" \"gradio\" "
57+
"%pip install -q --extra-index-url https://download.pytorch.org/whl/cpu torch torchvision \"openvino-nightly\" \"diffusers>=0.18.0\" \"transformers>=4.30.2\" \"gradio\""
5858
]
5959
},
6060
{
@@ -907,13 +907,61 @@
907907
"text_enc = core.compile_model(TEXT_ENCODER_OV_PATH, device.value)"
908908
]
909909
},
910+
{
911+
"cell_type": "markdown",
912+
"metadata": {
913+
"collapsed": false
914+
},
915+
"source": [
916+
"### Calibrate UNet for GPU inference\n",
917+
"\n",
918+
"On a GPU device a model is executed in FP16 precision. For Tiny-SD UNet model there known to be accuracy issues caused by this. Therefore, a special calibration procedure is used to selectively mark some operations to be executed in full precision."
919+
]
920+
},
921+
{
922+
"cell_type": "code",
923+
"execution_count": null,
924+
"metadata": {
925+
"collapsed": false
926+
},
927+
"outputs": [],
928+
"source": [
929+
"import pickle\n",
930+
"import urllib.request\n",
931+
"\n",
932+
"# Fetch `model_upcast_utils` which helps to restore accuracy when inferred on GPU\n",
933+
"urllib.request.urlretrieve(\n",
934+
" url='https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/main/notebooks/utils/model_upcast_utils.py',\n",
935+
" filename='model_upcast_utils.py'\n",
936+
")\n",
937+
"\n",
938+
"# Fetch an example input for UNet model needed for upcasting calibration process\n",
939+
"urllib.request.urlretrieve(\n",
940+
" url='https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/pkl/unet_calibration_example_input.pkl',\n",
941+
" filename='unet_calibration_example_input.pkl'\n",
942+
")\n",
943+
"from model_upcast_utils import is_model_partially_upcasted, partially_upcast_nodes_to_fp32\n",
944+
"\n",
945+
"unet_model = core.read_model(UNET_OV_PATH)\n",
946+
"if 'GPU' in core.available_devices and not is_model_partially_upcasted(unet_model):\n",
947+
" with open(\"unet_calibration_example_input.pkl\", \"rb\") as f:\n",
948+
" example_input = pickle.load(f)\n",
949+
" unet_model = partially_upcast_nodes_to_fp32(unet_model, example_input, upcast_ratio=0.7,\n",
950+
" operation_types=[\"Convolution\"])\n",
951+
"\n",
952+
" import os\n",
953+
" os.remove(UNET_OV_PATH)\n",
954+
" os.remove(str(UNET_OV_PATH).replace(\".xml\", \".bin\"))\n",
955+
" ov.save_model(unet_model, UNET_OV_PATH)"
956+
]
957+
},
910958
{
911959
"cell_type": "code",
912960
"execution_count": 10,
913961
"metadata": {},
914962
"outputs": [],
915963
"source": [
916-
"unet_model = core.compile_model(UNET_OV_PATH, device.value)"
964+
"unet_model = core.compile_model(unet_model, device.value)"
917965
]
918966
},
919967
{

notebooks/254-llm-chatbot/254-llm-chatbot.ipynb

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -736,10 +736,15 @@
736736
" model_dir = fp16_model_dir\n",
737737
"print(f\"Loading model from {model_dir}\")\n",
738738
"\n",
739-
"model_name = model_configuration[\"model_id\"]\n",
740-
"class_key = model_id.value.split(\"-\")[0]\n",
741739
"ov_config = {\"PERFORMANCE_HINT\": \"LATENCY\", \"NUM_STREAMS\": \"1\", \"CACHE_DIR\": \"\"}\n",
742740
"\n",
741+
"# On a GPU device a model is executed in FP16 precision. For red-pajama-3b-chat model there known accuracy\n",
742+
"# issues caused by this, which we avoid by setting precision hint to \"f32\".\n",
743+
"if model_id.value == \"red-pajama-3b-chat\" and \"GPU\" in core.available_devices and device.value in [\"GPU\", \"AUTO\"]:\n",
744+
" ov_config[\"INFERENCE_PRECISION_HINT\"] = \"f32\"\n",
745+
"\n",
746+
"model_name = model_configuration[\"model_id\"]\n",
747+
"class_key = model_id.value.split(\"-\")[0]\n",
743748
"tok = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n",
744749
"\n",
745750
"model_class = (\n",

notebooks/254-llm-chatbot/254-rag-chatbot.ipynb

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -939,11 +939,16 @@
939939
" model_dir = fp16_model_dir\n",
940940
"print(f\"Loading model from {model_dir}\")\n",
941941
"\n",
942+
"ov_config = {\"PERFORMANCE_HINT\": \"LATENCY\", \"NUM_STREAMS\": \"1\", \"CACHE_DIR\": \"\"}\n",
943+
"\n",
944+
"# On a GPU device a model is executed in FP16 precision. For red-pajama-3b-chat model there known accuracy\n",
945+
"# issues caused by this, which we avoid by setting precision hint to \"f32\".\n",
946+
"if llm_model_id.value == \"red-pajama-3b-chat\" and \"GPU\" in core.available_devices and llm_device.value in [\"GPU\", \"AUTO\"]:\n",
947+
" ov_config[\"INFERENCE_PRECISION_HINT\"] = \"f32\"\n",
948+
"\n",
942949
"model_name = llm_model_configuration[\"model_id\"]\n",
943950
"stop_tokens = llm_model_configuration.get(\"stop_tokens\")\n",
944951
"class_key = llm_model_id.value.split(\"-\")[0]\n",
945-
"ov_config = {\"PERFORMANCE_HINT\": \"LATENCY\", \"NUM_STREAMS\": \"1\", \"CACHE_DIR\": \"\"}\n",
946-
"\n",
947952
"tok = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n",
948953
"\n",
949954
"class StopOnTokens(StoppingCriteria):\n",

0 commit comments

Comments
 (0)