add flex.2 tutorial (#2959)

eaidova · web-flow · commit 3ca65622a25a · 2025-05-23T14:04:09.000+04:00
CVS-167875
diff --git a/.ci/ignore_treon_docker.txt b/.ci/ignore_treon_docker.txt
@@ -88,4 +88,5 @@ notebooks/olmocr-pdf-vlm/olmocr-pdf-vlm.ipynb
 notebooks/minicpm-o-omnimodal-chatbot/minicpm-o-omnimodal-chatbot.ipynb
 notebooks/kokoro/kokoro.ipynb
 notebooks/qwen2.5-omni-chatbot/qwen2.5-omni-chatbot.ipynb
-notebooks/intern-video2-classiciation/intern-video2-classification.ipynb
+notebooks/intern-video2-classiciation/intern-video2-classification.ipynb
+notebooks/flex.2-image-generation/flex.2-image-generation.ipynb
diff --git a/.ci/skipped_notebooks.yml b/.ci/skipped_notebooks.yml
@@ -542,6 +542,10 @@
         - macos-13
         - ubuntu-22.04
         - windows-2019
+- notebook: notebooks/flex.2-image-generation/flex.2-image-generation.ipynb
+  skips:
+    - python:
+        - "3.9"
 - notebook: notebooks/openvoice2-and-melotts/openvoice2-and-melotts.ipynb
   skips:
     - os:
diff --git a/notebooks/flex.2-image-generation/README.md b/notebooks/flex.2-image-generation/README.md
@@ -0,0 +1,39 @@
+# Image generation with universal control using Flex.2 and OpenVINO
+
+<div class="alert alert-block alert-danger"> <b>Important note:</b> This notebook requires python >= 3.11. Please make sure that your environment fulfill to this requirement before running it </div>
+
+Flex.2 is flexible text-to-image diffusion model based on Flux model architecture with built in support inpainting and universal control - model accepts pose, line, and depth inputs.
+
+<img src="https://github.com/user-attachments/assets/6a9ab66a-387a-4538-8625-2bb3a16072b5" width="1024"> 
+
+More details about model can be found in [model card](https://huggingface.co/ostris/Flex.2-preview).
+
+In this tutorial we consider how to convert and optimize Flex.2 model using OpenVINO.
+
+>**Note**: Some demonstrated models can require at least 32GB RAM for conversion and running.
+
+### Notebook Contents
+
+In this demonstration, you will learn how to perform text-to-image generation using Flex.2 and OpenVINO. 
+
+Example of model work:
+
+![](https://github.com/user-attachments/assets/140685b7-2c5d-4cef-86fb-33df0849ec1a)
+
+The tutorial consists of the following steps:
+
+- Install prerequisites
+- Collect Pytorch model pipeline
+- Convert model to OpenVINO intermediate representation (IR) format 
+- Compress weights using NNCF
+- Prepare OpenVINO Inference pipeline
+- Run Image generation
+- Launch interactive demo
+
+## Installation Instructions
+
+This is a self-contained example that relies solely on its own code.</br>
+We recommend running the notebook in a virtual environment. You only need a Jupyter server to start.
+For further details, please refer to [Installation Guide](../../README.md).
+
+<img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=5b5a4db0-7875-4bfb-bdbd-01698b5b1a77&file=notebooks/flex.2-image-generation/README.md" />
diff --git a/notebooks/flex.2-image-generation/flex.2-image-generation.ipynb b/notebooks/flex.2-image-generation/flex.2-image-generation.ipynb
diff --git a/notebooks/flex.2-image-generation/gradio_helper.py b/notebooks/flex.2-image-generation/gradio_helper.py
@@ -0,0 +1,252 @@
+import gradio as gr
+import numpy as np
+import torch
+import random
+from PIL import Image, ImageFilter
+
+MAX_SEED = np.iinfo(np.int32).max
+MAX_IMAGE_SIZE = 2048
+
+
+def make_demo(pipe):
+    def infer(
+        edit_images,
+        prompt,
+        seed=42,
+        randomize_seed=False,
+        width=1024,
+        height=1024,
+        guidance_scale=3.5,
+        control_strength=0.5,
+        control_stop=0.33,
+        num_inference_steps=50,
+        progress=gr.Progress(track_tqdm=True),
+    ):
+        image = edit_images["background"].convert("RGB")
+        mask = Image.fromarray(np.array(edit_images["layers"][-1])[:, :, -1])
+        if randomize_seed:
+            seed = random.randint(0, MAX_SEED)
+        out_image = pipe(
+            prompt=prompt,
+            inpaint_image=image,
+            inpaint_mask=mask,
+            height=height,
+            width=width,
+            guidance_scale=guidance_scale,
+            control_strength=control_strength,
+            control_stop=control_stop,
+            num_inference_steps=num_inference_steps,
+            generator=torch.Generator("cpu").manual_seed(seed),
+        ).images[0]
+        return (image, out_image), seed
+
+    css = """
+:root {
+    --primary-color: #7E57C2;
+    --secondary-color: #5E35B1;
+    --accent-color: #B39DDB;
+    --background-color: #F5F5F7;
+    --card-background: #FFFFFF;
+    --text-color: #333333;
+    --shadow: 0 4px 12px rgba(0, 0, 0, 0.08);
+    --radius: 12px;
+}
+body {
+    font-family: 'Inter', system-ui, sans-serif;
+    background-color: var(--background-color);
+}
+#col-container {
+    margin: 0 auto;
+    max-width: 1200px;
+    padding: 0;
+}
+.container {
+    background-color: var(--card-background);
+    border-radius: var(--radius);
+    box-shadow: var(--shadow);
+    padding: 24px;
+    margin-bottom: 24px;
+}
+.header-container {
+    background: linear-gradient(135deg, var(--primary-color), var(--secondary-color));
+    border-radius: var(--radius);
+    padding: 32px;
+    margin-bottom: 24px;
+    color: white;
+    text-align: center;
+    box-shadow: var(--shadow);
+}
+.header-container h1 {
+    font-weight: 700;
+    font-size: 2.5rem;
+    margin-bottom: 8px;
+    background: linear-gradient(to right, #ffffff, #e0e0e0);
+    -webkit-background-clip: text;
+    background-clip: text;
+    -webkit-text-fill-color: transparent;
+}
+.header-container p {
+    font-size: 1.1rem;
+    opacity: 0.92;
+    margin-bottom: 16px;
+}
+.header-container a {
+    color: var(--accent-color);
+    text-decoration: underline;
+    transition: opacity 0.2s;
+}
+.header-container a:hover {
+    opacity: 0.8;
+}
+.btn-primary {
+    background: linear-gradient(90deg, var(--primary-color), var(--secondary-color));
+    border: none;
+    border-radius: 8px;
+    color: white;
+    font-weight: 600;
+    padding: 12px 24px;
+    font-size: 16px;
+    cursor: pointer;
+    transition: all 0.3s ease;
+    box-shadow: 0 4px 12px rgba(126, 87, 194, 0.3);
+}
+.btn-primary:hover {
+    transform: translateY(-2px);
+    box-shadow: 0 6px 16px rgba(126, 87, 194, 0.4);
+}
+.image-editor-container {
+    border-radius: var(--radius);
+    overflow: hidden;
+    box-shadow: var(--shadow);
+}
+.prompt-container {
+    background-color: var(--card-background);
+    border-radius: var(--radius);
+    padding: 16px;
+    box-shadow: var(--shadow);
+    margin-top: 16px;
+}
+.result-container {
+    border-radius: var(--radius);
+    overflow: hidden;
+    box-shadow: var(--shadow);
+}
+.settings-container {
+    background-color: var(--card-background);
+    border-radius: var(--radius);
+    padding: 20px;
+    box-shadow: var(--shadow);
+    margin-top: 16px;
+}
+.accordion-header {
+    font-weight: 600;
+    color: var(--primary-color);
+}
+/* Custom slider styling */
+input[type="range"] {
+    height: 6px;
+    border-radius: 3px;
+    background: linear-gradient(90deg, var(--primary-color), var(--secondary-color));
+}
+input[type="range"]::-webkit-slider-thumb {
+    background: var(--primary-color);
+    border: 2px solid white;
+    height: 18px;
+    width: 18px;
+}
+.footer {
+    text-align: center;
+    padding: 24px;
+    color: #777;
+    font-size: 14px;
+}
+/* Animate the result transition */
+@keyframes fadeIn {
+    from { opacity: 0; }
+    to { opacity: 1; }
+}
+.result-animation {
+    animation: fadeIn 0.5s ease-in-out;
+}
+    """
+
+    with gr.Blocks(css=css, theme=gr.themes.Monochrome()) as demo:
+        with gr.Column(elem_id="col-container"):
+            # Header with gradient
+            with gr.Column(elem_classes=["header-container"]):
+                gr.HTML(
+                    """
+                    <h1>Flex.2 Preview Inpainting OpenVINO Demo</h1>
+                """
+                )
+            # Main interface container
+            with gr.Column(elem_classes=["container"]):
+                with gr.Row():
+                    # Left column: Input
+                    with gr.Column(scale=1):
+                        edit_image = gr.ImageEditor(
+                            label="Upload and draw mask for inpainting",
+                            type="pil",
+                            sources=["upload", "webcam"],
+                            image_mode="RGB",
+                            layers=False,
+                            brush=gr.Brush(colors=["#FFFFFF"], color_mode="fixed"),
+                            height=500,
+                        )
+
+                        with gr.Column(elem_classes=["prompt-container"]):
+                            prompt = gr.Text(
+                                label="Your creative prompt",
+                                show_label=True,
+                                max_lines=1,
+                                placeholder="Describe what you want to generate...",
+                                container=True,
+                            )
+
+                            run_button = gr.Button("✨ Generate", elem_classes=["btn-primary"])
+                    # Right column: Output
+                    with gr.Column(scale=1, elem_classes=["result-container"]):
+                        result = gr.ImageSlider(label="Before & After", type="pil", image_mode="RGB", elem_classes=["result-animation"])
+
+            # Advanced settings in a nice container
+            with gr.Column(elem_classes=["settings-container"]):
+                with gr.Accordion("Advanced Settings", open=False, elem_classes=["accordion-header"]):
+                    with gr.Column():
+                        with gr.Row():
+                            seed = gr.Slider(
+                                label="Seed",
+                                minimum=0,
+                                maximum=MAX_SEED,
+                                step=1,
+                                value=0,
+                            )
+                            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
+
+                        with gr.Row():
+                            height = gr.Slider(64, 2048, value=512, step=64, label="Height")
+                            width = gr.Slider(64, 2048, value=512, step=64, label="Width")
+
+                        with gr.Row():
+                            guidance_scale = gr.Slider(0.0, 20.0, value=3.5, step=0.1, label="Guidance Scale")
+                            control_strength = gr.Slider(0.0, 1.0, value=0.5, step=0.05, label="Control Strength")
+
+                        with gr.Row():
+                            control_stop = gr.Slider(0.0, 1.0, value=0.33, step=0.05, label="Control Stop")
+                            num_inference_steps = gr.Slider(1, 100, value=20, step=1, label="Inference Steps")
+
+            # Footer
+            gr.HTML(
+                """
+                <div class="footer">
+                    <p>Flex.2 Preview Inpainting OpenVINO Demo</p>
+                </div>
+            """
+            )
+
+        run_button.click(
+            fn=infer,
+            inputs=[edit_image, prompt, seed, randomize_seed, width, height, guidance_scale, control_strength, control_stop, num_inference_steps],
+            outputs=[result, seed],
+        )
+
+    return demo
diff --git a/notebooks/flex.2-image-generation/ov_flex2_helper.py b/notebooks/flex.2-image-generation/ov_flex2_helper.py
@@ -0,0 +1,8 @@
+from optimum.intel.openvino import OVDiffusionPipeline
+from pipeline import Flex2Pipeline
+
+
+class OVFlex2Pipeline(OVDiffusionPipeline, Flex2Pipeline):
+    main_input_name = "prompt"
+    export_feature = "text-to-image"
+    auto_model_class = Flex2Pipeline