Added a fix for llm-chatbot int8 weight compression in case fp16 model already exists (#1479)

nikita-savelyevv · web-flow · commit 3b51b95fcdb3 · 2023-11-20T20:01:03.000+04:00
diff --git a/notebooks/254-llm-chatbot/254-llm-chatbot.ipynb b/notebooks/254-llm-chatbot/254-llm-chatbot.ipynb
@@ -554,7 +554,10 @@
     "        return\n",
     "    int8_model_dir.mkdir(parents=True, exist_ok=True)\n",
     "    if not model_configuration[\"remote\"]:\n",
-    "        if not fp16_model_dir.exists():\n",
+    "        if fp16_model_dir.exists():\n",
+    "            ov_model = OVModelForCausalLM.from_pretrained(fp16_model_dir, compile=False,\n",
+    "                                                          ov_config={\"CACHE_DIR\": \"\"})\n",
+    "        else:\n",
     "            ov_model = OVModelForCausalLM.from_pretrained(\n",
     "                pt_model_id, export=True, compile=False\n",
     "            ).half()\n",