diff --git a/.docker/Pipfile b/.docker/Pipfile index 98ba54d007a..1e9954abdff 100644 --- a/.docker/Pipfile +++ b/.docker/Pipfile @@ -26,7 +26,7 @@ notebook = "<7.0.0" numpy = ">=1.21.0" onnx = ">=1.11.0" opencv-python = "*" -openvino = {version = "==2024.5.0"} +openvino = {version = "==2025.0.0"} openvino-telemetry = "==2023.2.1" ovmsclient = "*" Pillow = ">=8.3.2" diff --git a/.docker/Pipfile.lock b/.docker/Pipfile.lock index 612c85db4c2..e6a5de91ada 100644 --- a/.docker/Pipfile.lock +++ b/.docker/Pipfile.lock @@ -2380,10 +2380,10 @@ "sha256:db30ba9e8af7188436ff76ce79ad7a8a2a5204e3c4757d2728223270b6a73080", "sha256:e941dfe7196b3a75364fcee4e0fe79cee63fa9d4e2a1852fb459048d7daa83f5", "sha256:f31dfd3d793a787a799930568e9e1dc736bb3a7069b266f2d80b2ad623c29d84", - "sha256:b1d2551c5d4f9e28d72e2f0d48ee091223082bf6ebb3a6d6e6712cbdbab60511", - "sha256:54411e20c90b42614ed02762e4fc038268f08c0c78d56a94b1f99a5d5a41d293" + "sha256:36f4094e8ec90984749434bec05cdca2cf07f0f0ef9d5b1e5afb931598fdf8eb", + "sha256:ea6344414b631043e6bae94698faae7fbcc9f3a007638bbc81871b6082ccec38" ], - "version": "==2024.5.0" + "version": "==2025.0.0" }, "openvino-telemetry": { "hashes": [ diff --git a/supplementary_materials/notebooks/fastdraft-deepseek/fastdraft_deepseek.ipynb b/supplementary_materials/notebooks/fastdraft-deepseek/fastdraft_deepseek.ipynb index 5fb133d711a..782b0ff4fd8 100644 --- a/supplementary_materials/notebooks/fastdraft-deepseek/fastdraft_deepseek.ipynb +++ b/supplementary_materials/notebooks/fastdraft-deepseek/fastdraft_deepseek.ipynb @@ -103,7 +103,7 @@ " ! optimum-cli export openvino --model $model_id --task text-generation-with-past --weight-format int4 $model_dir\n", "\n", "# convert OV tokenizer if needed\n", - "if not (model_dir / 'openvino_tokenizer.xml').exists():\n", + "if not (model_dir / \"openvino_tokenizer.xml\").exists():\n", " ! convert_tokenizer $model_dir --with-detokenizer -o $model_dir" ] }, @@ -135,6 +135,7 @@ " print(subword, end=\"\", flush=True)\n", " return False\n", "\n", + "\n", "# Define scheduler\n", "scheduler_config = ov_genai.SchedulerConfig()\n", "scheduler_config.num_kv_blocks = 2048 // 16\n", @@ -221,15 +222,15 @@ "import huggingface_hub as hf_hub\n", "\n", "draft_model_id = \"OpenVINO/Llama-3.1-8B-Instruct-FastDraft-150M-int8-ov\"\n", - "draft_model_path = Path('DeepSeek-R1-Llama-FastDraft-int8-ov')\n", + "draft_model_path = Path(\"DeepSeek-R1-Llama-FastDraft-int8-ov\")\n", "\n", "if not draft_model_path.exists():\n", " hf_hub.snapshot_download(draft_model_id, local_dir=draft_model_path)\n", "\n", "# We need tokenizers to match between the target and draft model so we apply this workaround\n", "if not filecmp.cmp(str(model_dir / \"openvino_tokenizer.xml\"), str(draft_model_path / \"openvino_tokenizer.xml\"), shallow=False):\n", - " for fname in ['openvino_tokenizer.xml', 'openvino_tokenizer.bin', 'openvino_detokenizer.xml', 'openvino_detokenizer.bin']:\n", - " shutil.copy(model_dir / fname, draft_model_path / fname)\n" + " for fname in [\"openvino_tokenizer.xml\", \"openvino_tokenizer.bin\", \"openvino_detokenizer.xml\", \"openvino_detokenizer.bin\"]:\n", + " shutil.copy(model_dir / fname, draft_model_path / fname)" ] }, { diff --git a/supplementary_materials/notebooks/fastdraft-deepseek/gradio_helper.py b/supplementary_materials/notebooks/fastdraft-deepseek/gradio_helper.py index e7b77c18abc..e611eec2199 100644 --- a/supplementary_materials/notebooks/fastdraft-deepseek/gradio_helper.py +++ b/supplementary_materials/notebooks/fastdraft-deepseek/gradio_helper.py @@ -13,7 +13,7 @@ english_examples = [ ["Which is bigger, 9.9 or 9.11?"], ["Classify the following numbers as 'prime' or 'composite' - 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16.?"], - ["What are the classifications of Academic Degrees?"], + ["What are the classifications of Academic Degrees?"], ["Which word does not belong to the other: Hammer, Screwdriver, Nail, Wood"], ["Identify which instrument is string or percussion: Kpanlogo, Shamisen"], ["Which of the following are colors: red, black, yellow, orange, sun, sunflower, chips, book, white, pink, blue, keyboard."], @@ -185,10 +185,16 @@ def apply_format(partial_text: str): list-style-position: outside; margin: 0.5em 15px; padding: 0px 0px 10px 15px;""" - formatted_text = '' - splits = partial_text.split('') + formatted_text = "" + splits = partial_text.split("") for i, s in enumerate(splits): - formatted_text += s.replace('', f'
Thought

') +'

' + formatted_text += ( + s.replace( + "", + f'
Thought

', + ) + + "

" + ) return formatted_text def is_partial_stop(output, stop_str): @@ -379,4 +385,4 @@ def clear_history(): ) clear.click(fn=clear_history, outputs=[chatbot], queue=False) - return demo \ No newline at end of file + return demo diff --git a/supplementary_materials/notebooks/fastdraft-deepseek/llm_pipeline_with_hf_tokenizer.py b/supplementary_materials/notebooks/fastdraft-deepseek/llm_pipeline_with_hf_tokenizer.py index c56dec50ac3..9eb84a1f342 100644 --- a/supplementary_materials/notebooks/fastdraft-deepseek/llm_pipeline_with_hf_tokenizer.py +++ b/supplementary_materials/notebooks/fastdraft-deepseek/llm_pipeline_with_hf_tokenizer.py @@ -6,26 +6,25 @@ import openvino_genai as ov_genai - -DecodedResults = namedtuple('DecodedResults', ['perf_metrics', 'scores', 'texts']) +DecodedResults = namedtuple("DecodedResults", ["perf_metrics", "scores", "texts"]) class LLMPipelineWithHFTokenizer(ov_genai.LLMPipeline): - + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - model_dir = kwargs['model_dir'] if 'model_dir' in kwargs else args[0] + model_dir = kwargs["model_dir"] if "model_dir" in kwargs else args[0] self.tokenizer = AutoTokenizer.from_pretrained(model_dir) def generate(self, *args, **kwargs): - texts = kwargs.pop('inputs', None) + texts = kwargs.pop("inputs", None) if texts is None: texts, args = args[0], args[1:] - if kwargs.pop('apply_chat_template', False): - inputs = self.tokenizer.apply_chat_template(texts, add_generation_prompt=True, return_tensors='np') + if kwargs.pop("apply_chat_template", False): + inputs = self.tokenizer.apply_chat_template(texts, add_generation_prompt=True, return_tensors="np") inputs = ov.Tensor(inputs) else: - inputs = ov.Tensor(self.tokenizer(texts, return_tensors='np')['input_ids']) + inputs = ov.Tensor(self.tokenizer(texts, return_tensors="np")["input_ids"]) out = super().generate(inputs, *args, **kwargs) res = DecodedResults(out.perf_metrics, out.scores, self.tokenizer.batch_decode(out.tokens)) - return res \ No newline at end of file + return res