|
146 | 146 | "\n",
|
147 | 147 | "convert_script = genai_llm_bench / \"convert.py\"\n",
|
148 | 148 | "\n",
|
149 |
| - "if not (stateful_model_path / \"openvino_model.xml\").exists()\n", |
| 149 | + "if not (stateful_model_path / \"openvino_model.xml\").exists():\n", |
150 | 150 | " !python $convert_script --model_id stabilityai/stable-zephyr-3b --precision FP16 --compress_weights 4BIT_DEFAULT --output stable-zephyr-3b-stateful --force_convert"
|
151 | 151 | ]
|
152 | 152 | },
|
|
481 | 481 | },
|
482 | 482 | {
|
483 | 483 | "cell_type": "code",
|
484 |
| - "execution_count": 9, |
| 484 | + "execution_count": null, |
485 | 485 | "id": "b7f014a4-2b3b-41fc-aae1-b4b0729b978d",
|
486 | 486 | "metadata": {},
|
487 |
| - "outputs": [ |
488 |
| - { |
489 |
| - "name": "stderr", |
490 |
| - "output_type": "stream", |
491 |
| - "text": [ |
492 |
| - "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", |
493 |
| - "/home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/gradio/deprecation.py:40: UserWarning: `height` is deprecated in `Interface()`, please use it within `launch()` instead.\n", |
494 |
| - " warnings.warn(value)\n", |
495 |
| - "/home/ea/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/gradio/deprecation.py:43: UserWarning: You have unused kwarg parameters in Textbox, please remove them: {'container': False}\n", |
496 |
| - " warnings.warn(\n" |
497 |
| - ] |
498 |
| - }, |
499 |
| - { |
500 |
| - "name": "stdout", |
501 |
| - "output_type": "stream", |
502 |
| - "text": [ |
503 |
| - "Running on local URL: http://127.0.0.1:7860\n" |
504 |
| - ] |
505 |
| - }, |
506 |
| - { |
507 |
| - "ename": "KeyboardInterrupt", |
508 |
| - "evalue": "", |
509 |
| - "output_type": "error", |
510 |
| - "traceback": [ |
511 |
| - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", |
512 |
| - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", |
513 |
| - "Cell \u001b[0;32mIn[9], line 326\u001b[0m\n\u001b[1;32m 320\u001b[0m demo\u001b[38;5;241m.\u001b[39mqueue(max_size\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2\u001b[39m)\n\u001b[1;32m 321\u001b[0m \u001b[38;5;66;03m# if you are launching remotely, specify server_name and server_port\u001b[39;00m\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# demo.launch(server_name='your server name', server_port='server port in int')\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# if you have any issue to launch on your platform, you can pass share=True to launch method:\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# demo.launch(share=True)\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# it creates a publicly shareable link for the interface. Read more in the docs: https://gradio.app/docs/\u001b[39;00m\n\u001b[0;32m--> 326\u001b[0m \u001b[43mdemo\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlaunch\u001b[49m\u001b[43m(\u001b[49m\u001b[43mshare\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n", |
514 |
| - "File \u001b[0;32m~/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/gradio/blocks.py:1542\u001b[0m, in \u001b[0;36mBlocks.launch\u001b[0;34m(self, inline, inbrowser, share, debug, enable_queue, max_threads, auth, auth_message, prevent_thread_lock, show_error, server_name, server_port, show_tips, height, width, encrypt, favicon_path, ssl_keyfile, ssl_certfile, ssl_keyfile_password, quiet, show_api, file_directories, _frontend)\u001b[0m\n\u001b[1;32m 1540\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1541\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mshare_url \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 1542\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mshare_url \u001b[38;5;241m=\u001b[39m \u001b[43mnetworking\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msetup_tunnel\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1543\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mserver_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mserver_port\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshare_token\u001b[49m\n\u001b[1;32m 1544\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1545\u001b[0m \u001b[38;5;28mprint\u001b[39m(strings\u001b[38;5;241m.\u001b[39men[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSHARE_LINK_DISPLAY\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39mformat(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mshare_url))\n\u001b[1;32m 1546\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (quiet):\n", |
515 |
| - "File \u001b[0;32m~/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/gradio/networking.py:168\u001b[0m, in \u001b[0;36msetup_tunnel\u001b[0;34m(local_host, local_port, share_token)\u001b[0m\n\u001b[1;32m 164\u001b[0m remote_host, remote_port \u001b[38;5;241m=\u001b[39m payload[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhost\u001b[39m\u001b[38;5;124m\"\u001b[39m], \u001b[38;5;28mint\u001b[39m(payload[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mport\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m 165\u001b[0m tunnel \u001b[38;5;241m=\u001b[39m Tunnel(\n\u001b[1;32m 166\u001b[0m remote_host, remote_port, local_host, local_port, share_token\n\u001b[1;32m 167\u001b[0m )\n\u001b[0;32m--> 168\u001b[0m address \u001b[38;5;241m=\u001b[39m \u001b[43mtunnel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstart_tunnel\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 169\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m address\n\u001b[1;32m 170\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n", |
516 |
| - "File \u001b[0;32m~/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/gradio/tunneling.py:61\u001b[0m, in \u001b[0;36mTunnel.start_tunnel\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 59\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mstart_tunnel\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mstr\u001b[39m:\n\u001b[1;32m 60\u001b[0m binary_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdownload_binary()\n\u001b[0;32m---> 61\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39murl \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_start_tunnel\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbinary_path\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 62\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39murl\n", |
517 |
| - "File \u001b[0;32m~/work/my_optimum_intel/optimum_env/lib/python3.8/site-packages/gradio/tunneling.py:97\u001b[0m, in \u001b[0;36mTunnel._start_tunnel\u001b[0;34m(self, binary)\u001b[0m\n\u001b[1;32m 95\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mproc\u001b[38;5;241m.\u001b[39mstdout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 96\u001b[0m \u001b[38;5;28;01mcontinue\u001b[39;00m\n\u001b[0;32m---> 97\u001b[0m line \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mproc\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstdout\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreadline\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 98\u001b[0m line \u001b[38;5;241m=\u001b[39m line\u001b[38;5;241m.\u001b[39mdecode(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mutf-8\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 99\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstart proxy success\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m line:\n", |
518 |
| - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " |
519 |
| - ] |
520 |
| - } |
521 |
| - ], |
| 487 | + "outputs": [], |
522 | 488 | "source": [
|
523 | 489 | "import torch\n",
|
524 | 490 | "from threading import Event, Thread\n",
|
|
884 | 850 | },
|
885 | 851 | "widgets": {
|
886 | 852 | "application/vnd.jupyter.widget-state+json": {
|
887 |
| - "state": { |
888 |
| - "75f7bc21fe684e019f27736a7cf80f58": { |
889 |
| - "model_module": "@jupyter-widgets/base", |
890 |
| - "model_module_version": "2.0.0", |
891 |
| - "model_name": "LayoutModel", |
892 |
| - "state": {} |
893 |
| - }, |
894 |
| - "86abb36a40914433bfdde6cdb044cab9": { |
895 |
| - "model_module": "@jupyter-widgets/controls", |
896 |
| - "model_module_version": "2.0.0", |
897 |
| - "model_name": "DescriptionStyleModel", |
898 |
| - "state": { |
899 |
| - "description_width": "" |
900 |
| - } |
901 |
| - }, |
902 |
| - "bd380d7c5b044d02a29085bb584067e5": { |
903 |
| - "model_module": "@jupyter-widgets/controls", |
904 |
| - "model_module_version": "2.0.0", |
905 |
| - "model_name": "DropdownModel", |
906 |
| - "state": { |
907 |
| - "_options_labels": [ |
908 |
| - "CPU", |
909 |
| - "GPU.0", |
910 |
| - "GPU.1" |
911 |
| - ], |
912 |
| - "description": "Device:", |
913 |
| - "index": 0, |
914 |
| - "layout": "IPY_MODEL_75f7bc21fe684e019f27736a7cf80f58", |
915 |
| - "style": "IPY_MODEL_86abb36a40914433bfdde6cdb044cab9" |
916 |
| - } |
917 |
| - } |
918 |
| - }, |
| 853 | + "state": {}, |
919 | 854 | "version_major": 2,
|
920 | 855 | "version_minor": 0
|
921 | 856 | }
|
|
0 commit comments