Skip to content

Commit

Permalink
update ov in docker (#2725)
Browse files Browse the repository at this point in the history
  • Loading branch information
eaidova authored Feb 6, 2025
1 parent 7d2a168 commit e3bb88c
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 22 deletions.
2 changes: 1 addition & 1 deletion .docker/Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ notebook = "<7.0.0"
numpy = ">=1.21.0"
onnx = ">=1.11.0"
opencv-python = "*"
openvino = {version = "==2024.5.0"}
openvino = {version = "==2025.0.0"}
openvino-telemetry = "==2023.2.1"
ovmsclient = "*"
Pillow = ">=8.3.2"
Expand Down
6 changes: 3 additions & 3 deletions .docker/Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@
" ! optimum-cli export openvino --model $model_id --task text-generation-with-past --weight-format int4 $model_dir\n",
"\n",
"# convert OV tokenizer if needed\n",
"if not (model_dir / 'openvino_tokenizer.xml').exists():\n",
"if not (model_dir / \"openvino_tokenizer.xml\").exists():\n",
" ! convert_tokenizer $model_dir --with-detokenizer -o $model_dir"
]
},
Expand Down Expand Up @@ -135,6 +135,7 @@
" print(subword, end=\"\", flush=True)\n",
" return False\n",
"\n",
"\n",
"# Define scheduler\n",
"scheduler_config = ov_genai.SchedulerConfig()\n",
"scheduler_config.num_kv_blocks = 2048 // 16\n",
Expand Down Expand Up @@ -221,15 +222,15 @@
"import huggingface_hub as hf_hub\n",
"\n",
"draft_model_id = \"OpenVINO/Llama-3.1-8B-Instruct-FastDraft-150M-int8-ov\"\n",
"draft_model_path = Path('DeepSeek-R1-Llama-FastDraft-int8-ov')\n",
"draft_model_path = Path(\"DeepSeek-R1-Llama-FastDraft-int8-ov\")\n",
"\n",
"if not draft_model_path.exists():\n",
" hf_hub.snapshot_download(draft_model_id, local_dir=draft_model_path)\n",
"\n",
"# We need tokenizers to match between the target and draft model so we apply this workaround\n",
"if not filecmp.cmp(str(model_dir / \"openvino_tokenizer.xml\"), str(draft_model_path / \"openvino_tokenizer.xml\"), shallow=False):\n",
" for fname in ['openvino_tokenizer.xml', 'openvino_tokenizer.bin', 'openvino_detokenizer.xml', 'openvino_detokenizer.bin']:\n",
" shutil.copy(model_dir / fname, draft_model_path / fname)\n"
" for fname in [\"openvino_tokenizer.xml\", \"openvino_tokenizer.bin\", \"openvino_detokenizer.xml\", \"openvino_detokenizer.bin\"]:\n",
" shutil.copy(model_dir / fname, draft_model_path / fname)"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
english_examples = [
["Which is bigger, 9.9 or 9.11?"],
["Classify the following numbers as 'prime' or 'composite' - 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16.?"],
["What are the classifications of Academic Degrees?"],
["What are the classifications of Academic Degrees?"],
["Which word does not belong to the other: Hammer, Screwdriver, Nail, Wood"],
["Identify which instrument is string or percussion: Kpanlogo, Shamisen"],
["Which of the following are colors: red, black, yellow, orange, sun, sunflower, chips, book, white, pink, blue, keyboard."],
Expand Down Expand Up @@ -185,10 +185,16 @@ def apply_format(partial_text: str):
list-style-position: outside;
margin: 0.5em 15px;
padding: 0px 0px 10px 15px;"""
formatted_text = ''
splits = partial_text.split('</think>')
formatted_text = ""
splits = partial_text.split("</think>")
for i, s in enumerate(splits):
formatted_text += s.replace('<think>', f'<details {"open" if i == (len(splits) - 1) else ""} style="margin:0px;padding:0px;"><summary style="{summary_style}">Thought</summary><blockquote style="{blockquote_style}"><p>') +'</p></blockquote></details>'
formatted_text += (
s.replace(
"<think>",
f'<details {"open" if i == (len(splits) - 1) else ""} style="margin:0px;padding:0px;"><summary style="{summary_style}">Thought</summary><blockquote style="{blockquote_style}"><p>',
)
+ "</p></blockquote></details>"
)
return formatted_text

def is_partial_stop(output, stop_str):
Expand Down Expand Up @@ -379,4 +385,4 @@ def clear_history():
)
clear.click(fn=clear_history, outputs=[chatbot], queue=False)

return demo
return demo
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,25 @@
import openvino_genai as ov_genai



DecodedResults = namedtuple('DecodedResults', ['perf_metrics', 'scores', 'texts'])
DecodedResults = namedtuple("DecodedResults", ["perf_metrics", "scores", "texts"])


class LLMPipelineWithHFTokenizer(ov_genai.LLMPipeline):

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
model_dir = kwargs['model_dir'] if 'model_dir' in kwargs else args[0]
model_dir = kwargs["model_dir"] if "model_dir" in kwargs else args[0]
self.tokenizer = AutoTokenizer.from_pretrained(model_dir)

def generate(self, *args, **kwargs):
texts = kwargs.pop('inputs', None)
texts = kwargs.pop("inputs", None)
if texts is None:
texts, args = args[0], args[1:]
if kwargs.pop('apply_chat_template', False):
inputs = self.tokenizer.apply_chat_template(texts, add_generation_prompt=True, return_tensors='np')
if kwargs.pop("apply_chat_template", False):
inputs = self.tokenizer.apply_chat_template(texts, add_generation_prompt=True, return_tensors="np")
inputs = ov.Tensor(inputs)
else:
inputs = ov.Tensor(self.tokenizer(texts, return_tensors='np')['input_ids'])
inputs = ov.Tensor(self.tokenizer(texts, return_tensors="np")["input_ids"])
out = super().generate(inputs, *args, **kwargs)
res = DecodedResults(out.perf_metrics, out.scores, self.tokenizer.batch_decode(out.tokens))
return res
return res

0 comments on commit e3bb88c

Please sign in to comment.