openvinotoolkit · eaidova · Aug 22, 2024 · Aug 21, 2024 · Aug 21, 2024 · Aug 21, 2024
diff --git a/.ci/spellcheck/.pyspelling.wordlist.txt b/.ci/spellcheck/.pyspelling.wordlist.txt
@@ -527,6 +527,7 @@ notus
 nsamples
 nsfw
 NSFW
+NuExtract
 num
 numpy
 NumPy

diff --git a/notebooks/nuextract-structure-extraction/README.md b/notebooks/nuextract-structure-extraction/README.md
@@ -0,0 +1,23 @@
+# Structure Extraction with NuExtract and OpenVINO
+
+[![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/nuextract-structure-extraction/nuextract-structure-extraction.ipynb)
+
+[NuExtract](https://huggingface.co/numind/NuExtract) model is a text-to-JSON Large Language Model (LLM) that allows to extract arbitrarily complex information from text and turns it into structured data.
+
+## Notebook Contents
+
+The tutorial consists of the following steps:
+
+- Install prerequisites
+- Download and convert the model from a public source using the [OpenVINO integration with Hugging Face Optimum](https://huggingface.co/blog/openvino)
+- Compress model weights to INT8 and INT4 with [OpenVINO NNCF](https://github.com/openvinotoolkit/nncf)
+- Create a structure extraction inference pipeline with [Generate API](https://github.com/openvinotoolkit/openvino.genai)
+- Launch interactive Gradio demo with structure extraction pipeline
+
+## Installation Instructions
+
+This is a self-contained example that relies solely on its own code.</br>
+We recommend  running the notebook in a virtual environment. You only need a Jupyter server to start.
+For details, please refer to [Installation Guide](../../README.md).
+
+<img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=5b5a4db0-7875-4bfb-bdbd-01698b5b1a77&file=notebooks/nuextract-structure-extraction/README.md" />
diff --git a/notebooks/nuextract-structure-extraction/gradio_helper.py b/notebooks/nuextract-structure-extraction/gradio_helper.py
@@ -0,0 +1,64 @@
+import gradio as gr
+from typing import Callable
+
+example_text = """We introduce Mistral 7B, a 7-billion-parameter language model engineered for
+superior performance and efficiency. Mistral 7B outperforms the best open 13B
+model (Llama 2) across all evaluated benchmarks, and the best released 34B
+model (Llama 1) in reasoning, mathematics, and code generation. Our model
+leverages grouped-query attention (GQA) for faster inference, coupled with sliding
+window attention (SWA) to effectively handle sequences of arbitrary length with a
+reduced inference cost. We also provide a model fine-tuned to follow instructions,
+Mistral 7B - Instruct, that surpasses Llama 2 13B - chat model both on human and
+automated benchmarks. Our models are released under the Apache 2.0 license.
+Code: https://github.com/mistralai/mistral-src
+Webpage: https://mistral.ai/news/announcing-mistral-7b/"""
+
+example_schema = """{
+    "Model": {
+        "Name": "",
+        "Number of parameters": "",
+        "Number of max token": "",
+        "Architecture": []
+    },
+    "Usage": {
+        "Use case": [],
+        "Licence": ""
+    }
+}"""
+
+
+def make_demo(fn: Callable):
+    with gr.Blocks() as demo:
+        gr.Markdown("# Structure Extraction with NuExtract and OpenVINO")
+
+        with gr.Row():
+            with gr.Column():
+                text_textbox = gr.Textbox(
+                    label="Text",
+                    placeholder="Text from which to extract information",
+                    lines=5,
+                )
+                schema_textbox = gr.Code(
+                    label="JSON Schema",
+                    language="json",
+                    lines=5,
+                )
+            with gr.Column():
+                model_output_textbox = gr.Code(
+                    label="Model Response",
+                    language="json",
+                    interactive=False,
+                    lines=10,
+                )
+        with gr.Row():
+            gr.ClearButton(components=[text_textbox, schema_textbox, model_output_textbox])
+            submit_button = gr.Button(value="Submit", variant="primary")
+        with gr.Row():
+            gr.Examples(examples=[[example_text, example_schema]], inputs=[text_textbox, schema_textbox])
+
+        submit_button.click(
+            fn,
+            [text_textbox, schema_textbox],
+            [model_output_textbox],
+        )
+    return demo
-Original file line number
+Diff line change
@@ Expand Up / @@ -527,6 +527,7 @@ notus @@
     nsamples
     nsfw
     NSFW
+    NuExtract
     num
     numpy
     NumPy
@@ Expand Down @@