diff --git a/.ci/spellcheck/.pyspelling.wordlist.txt b/.ci/spellcheck/.pyspelling.wordlist.txt index ccd02a9def8..6c4a8ede9e4 100644 --- a/.ci/spellcheck/.pyspelling.wordlist.txt +++ b/.ci/spellcheck/.pyspelling.wordlist.txt @@ -634,6 +634,7 @@ opset optimizable Orca OuteTTS +outpainting OVC overfitting overlayed diff --git a/notebooks/flux-fill/README.md b/notebooks/flux-fill/README.md new file mode 100644 index 00000000000..d92914c9a4f --- /dev/null +++ b/notebooks/flux-fill/README.md @@ -0,0 +1,40 @@ +# Image inpainting and outpainting with FLUX.1 Fill + +inpainting is the task of replacing or editing a specific area of the input image. This makes it a useful tool for image restoration like removing defects and artifacts, or even replacing an image area with something entirely new. Inpainting relies on a mask to determine which regions of an image to fill in; the area to inpaint is represented by white pixels and the area to keep is represented by black pixels. The white pixels are filled in by the prompt. +FLUX.1 Fill introduces advanced inpainting capabilities that surpass existing approaches. It allows for seamless edits that integrate naturally with existing images. + +![](https://github.com/user-attachments/assets/3598a8e1-526b-4571-8d73-200dcde92430) + +Additionally, FLUX.1 Fill supports outpainting, enabling the user to extend images beyond their original borders. + +![](https://github.com/user-attachments/assets/0e195ef2-fc5d-4eca-b32f-08cdd646199f) + +You can find more details about the model in [blog post](https://blackforestlabs.ai/flux-1-tools/) and [model card](https://huggingface.co/black-forest-labs/FLUX.1-Fill-dev) + +In this tutorial, we consider how to convert and optimize FLUX.1 Fill for performing image editing using text prompt and binary mask. + +### Notebook Contents + +In this demonstration, you will learn how to perform inpainting and outpainting using Flux.1 and OpenVINO. + +Example of model work: + +![](https://github.com/user-attachments/assets/2cc19c7c-2d68-4a33-b143-226319888bd6) + +The tutorial consists of the following steps: + +- Install prerequisites +- Collect Pytorch model pipeline +- Convert model to OpenVINO intermediate representation (IR) format +- Compress weights using NNCF +- Prepare OpenVINO Inference pipeline +- Run model +- Launch interactive demo + +## Installation Instructions + +This is a self-contained example that relies solely on its own code.
+We recommend running the notebook in a virtual environment. You only need a Jupyter server to start. +For further details, please refer to [Installation Guide](../../README.md). + + diff --git a/notebooks/flux-fill/flux-fill.ipynb b/notebooks/flux-fill/flux-fill.ipynb new file mode 100644 index 00000000000..ae51f28f72f --- /dev/null +++ b/notebooks/flux-fill/flux-fill.ipynb @@ -0,0 +1,743 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Image inpainting and outpainting with FLUX.1 Fill\n", + "\n", + "inpainting is task of replacing or editing specific area of input image. This makes it a useful tool for image restoration like removing defects and artifacts, or even replacing an image area with something entirely new. Inpainting relies on a mask to determine which regions of an image to fill in; the area to inpaint is represented by white pixels and the area to keep is represented by black pixels. The white pixels are filled in by the prompt.\n", + "FLUX.1 Fill introduces advanced inpainting capabilities that surpass existing approaches. It allows for seamless edits that integrate naturally with existing images. \n", + "\n", + "![](https://github.com/user-attachments/assets/3598a8e1-526b-4571-8d73-200dcde92430)\n", + "\n", + "Additionally, FLUX.1 Fill supports outpainting, enabling the user to extend images beyond their original borders.\n", + "\n", + "![](https://github.com/user-attachments/assets/0e195ef2-fc5d-4eca-b32f-08cdd646199f)\n", + "\n", + "You can find more details about model in [blog post](https://blackforestlabs.ai/flux-1-tools/) and [model card](https://huggingface.co/black-forest-labs/FLUX.1-Fill-dev)\n", + "\n", + "In this tutorial, we consider how to convert and optimize FLUX.1 Fill for performing image editing using text prompt and binary mask.\n", + "#### Table of contents:\n", + "\n", + "- [Prerequisites](#Prerequisites)\n", + "- [Convert model with OpenVINO](#Convert-model-with-OpenVINO)\n", + " - [Convert model using Optimum Intel](#Convert-model-using-Optimum-Intel)\n", + " - [Compress model weights](#Compress-model-weights)\n", + "- [Run OpenVINO model inference](#Run-OpenVINO-model-inference)\n", + "- [Interactive demo](#Interactive-demo)\n", + "\n", + "\n", + "### Installation Instructions\n", + "\n", + "This is a self-contained example that relies solely on its own code.\n", + "\n", + "We recommend running the notebook in a virtual environment. You only need a Jupyter server to start.\n", + "For details, please refer to [Installation Guide](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/README.md#-installation-guide).\n", + "\n", + "\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "[back to top ⬆️](#Table-of-contents:)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import platform\n", + "import requests\n", + "from pathlib import Path\n", + "\n", + "utility_files = [\"cmd_helper.py\", \"notebook_utils.py\"]\n", + "base_utility_url = \"https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/\"\n", + "\n", + "for utility_file in utility_files:\n", + " if not Path(utility_file).exists():\n", + " r = requests.get(base_utility_url + utility_file)\n", + " with Path(utility_file).open(\"w\") as f:\n", + " f.write(r.text)\n", + "\n", + "%pip install -q \"gradio>=4.19\" \"torch>=2.1\" \"transformers\" \"diffusers>=0.32.0\" \"opencv-python\" \"pillow\" \"peft>=0.7.0\" --extra-index-url https://download.pytorch.org/whl/cpu\n", + "%pip install -q \"sentencepiece\" \"protobuf\"\n", + "%pip install -q \"git+https://github.com/huggingface/optimum-intel.git\" --extra-index-url https://download.pytorch.org/whl/cpu\n", + "%pip install -qU \"openvino>=2025.0.0\" \"nncf>=2.15.0\"\n", + "\n", + "if platform.system() == \"Darwin\":\n", + " %pip install -q \"numpy<2.0\"" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + ">**Note**: run Flux.1-Fill-dev model with notebook, you will need to accept license agreement. \n", + ">You must be a registered user in 🤗 Hugging Face Hub. Please visit [HuggingFace model card](https://huggingface.co/black-forest-labs/FLUX.1-Fill-dev), carefully read the terms of usage, and click the accept button. You will need to use an access token for the code below to run. For more information on access tokens, refer to [this section of the documentation](https://huggingface.co/docs/hub/security-tokens).\n", + ">You can login on to Hugging Face Hub in the notebook environment, using the following code:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# uncomment these lines to login to huggingfacehub to get access to pretrained model\n", + "\n", + "# from huggingface_hub import notebook_login, whoami\n", + "\n", + "# try:\n", + "# whoami()\n", + "# print('Authorization token already provided')\n", + "# except OSError:\n", + "# notebook_login()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Convert model with OpenVINO\n", + "[back to top ⬆️](#Table-of-contents:)\n", + "\n", + "Starting from 2023.0 release, OpenVINO supports PyTorch models directly via Model Conversion API. `ov.convert_model` function accepts instance of PyTorch model and example inputs for tracing and returns object of `ov.Model` class, ready to use or save on disk using `ov.save_model` function. \n", + "\n", + "\n", + "The pipeline consists of four important parts:\n", + "\n", + "* Clip and T5 Text Encoders to create condition to generate an image from a text prompt.\n", + "* Transformer for step-by-step denoising latent image representation.\n", + "* Autoencoder (VAE) for encoding input image and mask and decoding latent space to image.\n", + " \n", + "### Convert model using Optimum Intel\n", + "[back to top ⬆️](#Table-of-contents:)\n", + "\n", + "For convenience, we will use OpenVINO integration with HuggingFace Optimum. 🤗 [Optimum Intel](https://huggingface.co/docs/optimum/intel/index) is the interface between the 🤗 Transformers and Diffusers libraries and the different tools and libraries provided by Intel to accelerate end-to-end pipelines on Intel architectures.\n", + "\n", + "Among other use cases, Optimum Intel provides a simple interface to optimize your Transformers and Diffusers models, convert them to the OpenVINO Intermediate Representation (IR) format and run inference using OpenVINO Runtime. `optimum-cli` provides command line interface for model conversion and optimization. \n", + "\n", + "General command format:\n", + "\n", + "```bash\n", + "optimum-cli export openvino --model --task \n", + "```\n", + "\n", + "where task is task to export the model for, if not specified, the task will be auto-inferred based on the model. You can find a mapping between tasks and model classes in Optimum TaskManager [documentation](https://huggingface.co/docs/optimum/exporters/task_manager). Additionally, you can specify weights compression using `--weight-format` argument with one of following options: `fp32`, `fp16`, `int8` and `int4`. Fro int8 and int4 [nncf](https://github.com/openvinotoolkit/nncf) will be used for weight compression. More details about model export provided in [Optimum Intel documentation](https://huggingface.co/docs/optimum/intel/openvino/export#export-your-model).\n", + "\n", + "### Compress model weights\n", + "[back to top ⬆️](#Table-of-contents:)\n", + "\n", + "For reducing model memory consumption we will use weights compression. The [Weights Compression](https://docs.openvino.ai/2024/openvino-workflow/model-optimization-guide/weight-compression.html) algorithm is aimed at compressing the weights of the models and can be used to optimize the model footprint and performance of large models where the size of weights is relatively larger than the size of activations, for example, Large Language Models (LLM). Compared to INT8 compression, INT4 compression improves performance even more, but introduces a minor drop in prediction quality. We will use [NNCF](https://github.com/openvinotoolkit/nncf) integration to `optimum-cli` tool for weight compression.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + ":247: DeprecationWarning: The `openvino.runtime` module is deprecated and will be removed in the 2026.0 release. Please replace `openvino.runtime` with `openvino`.\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c1d26676194b498db0f009ead7a3f833", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Checkbox(value=True, description='Weight compression')" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from cmd_helper import optimum_cli\n", + "import ipywidgets as widgets\n", + "\n", + "# Read more about telemetry collection at https://github.com/openvinotoolkit/openvino_notebooks?tab=readme-ov-file#-telemetry\n", + "from notebook_utils import collect_telemetry\n", + "\n", + "collect_telemetry(\"flux-fill.ipynb\")\n", + "\n", + "to_compress = widgets.Checkbox(\n", + " value=True,\n", + " description=\"Weight compression\",\n", + " disabled=False,\n", + ")\n", + "\n", + "to_compress" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "test_replace": { + "64": "-1", + "black-forest-labs/FLUX.1-Fill-dev": "katuni4ka/tiny-random-flux-fill" + } + }, + "outputs": [], + "source": [ + "model_id = \"black-forest-labs/FLUX.1-Fill-dev\"\n", + "\n", + "model_base_dir = Path(model_id.split(\"/\")[-1])\n", + "additional_args = {}\n", + "\n", + "if to_compress.value:\n", + " model_dir = model_base_dir / \"INT4\"\n", + " additional_args.update({\"weight-format\": \"int4\", \"group-size\": \"64\", \"ratio\": \"1.0\"})\n", + "else:\n", + " model_dir = model_base_dir / \"FP16\"\n", + " additional_args.update({\"weight-format\": \"fp16\"})\n", + "\n", + "if not model_dir.exists():\n", + " optimum_cli(model_id, model_dir, additional_args=additional_args)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Run OpenVINO model inference\n", + "[back to top ⬆️](#Table-of-contents:)\n", + "\n", + "`OVDiffusionPipeline` from Optimum Intel provides ready-to-use interface for running Diffusers models using OpenVINO. It supports various models including Stable Diffusion, Stable Diffusion XL, LCM, Stable Diffusion v3 and Flux. Similar to original Diffusers pipeline, for initialization, we should use `from_preptrained` method providing model id from HuggingFace hub or local directory (both original PyTorch and OpenVINO models formats supported, in the first case model class additionally will trigger model conversion)." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "d8c94902691548bfb0a9af7ad4b2f7f2", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Dropdown(description='Device:', options=('CPU', 'AUTO'), value='CPU')" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from notebook_utils import device_widget\n", + "\n", + "device = device_widget(default=\"CPU\", exclude=[\"NPU\"])\n", + "device" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b99d2706e716417886a75544182cc07d", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Checkbox(value=True, description='Use compressed models')" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import ipywidgets as widgets\n", + "\n", + "model_available = (model_base_dir / \"INT4\").is_dir()\n", + "use_quantized_models = widgets.Checkbox(\n", + " value=model_available,\n", + " description=\"Use compressed models\",\n", + " disabled=not model_available,\n", + ")\n", + "\n", + "use_quantized_models" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-02-06 22:52:20.058138: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", + "2025-02-06 22:52:20.071147: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", + "E0000 00:00:1738867940.086226 322985 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "E0000 00:00:1738867940.090599 322985 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2025-02-06 22:52:20.105783: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers\n" + ] + } + ], + "source": [ + "from optimum.intel.openvino import OVDiffusionPipeline\n", + "\n", + "model_dir = model_base_dir / \"INT4\" if use_quantized_models.value else model_base_dir / \"FP16\"\n", + "\n", + "ov_pipe = OVDiffusionPipeline.from_pretrained(model_dir, device=device.value)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When pipeline initialized, we can see model in action. Let's try to turn dog to cat!" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "from diffusers.utils import load_image, make_image_grid\n", + "\n", + "input_image_path = Path(\"input_image.png\")\n", + "input_mask_path = Path(\"input_mask.png\")\n", + "\n", + "if not input_image_path.exists():\n", + " init_image = load_image(\"https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png\")\n", + " init_image.save(input_image_path)\n", + "else:\n", + " init_image = load_image(str(input_image_path))\n", + "\n", + "if not input_mask_path.exists():\n", + " init_mask = load_image(\"https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png\")\n", + " init_mask.save(input_mask_path)\n", + "else:\n", + " init_mask = load_image(str(input_mask_path))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "2233cef3212247409d99dbe50c95f11f", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/20 [00:00" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "prompt = \"A yellow cat, high resolution, sitting on a park bench\"\n", + "image = ov_pipe(prompt=prompt, image=init_image, mask_image=init_mask, num_inference_steps=20, height=512, width=512).images[0]\n", + "make_image_grid([init_image, init_mask, image], rows=1, cols=3)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Interactive demo\n", + "[back to top ⬆️](#Table-of-contents:)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if not Path(\"gradio_helper.py\").exists():\n", + " r = requests.get(\"https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/flux-fill.gradio_helper.py\")\n", + " with open(\"grdio_helper.py\", \"w\") as f:\n", + " f.write(r.text)\n", + "\n", + "\n", + "from gradio_helper import make_demo\n", + "\n", + "demo = make_demo(ov_pipe)\n", + "\n", + "try:\n", + " demo.launch(debug=True)\n", + "except Exception:\n", + " demo.launch(share=True, debug=True)\n", + "# if you are launching remotely, specify server_name and server_port\n", + "# demo.launch(server_name='your server name', server_port='server port in int')\n", + "# Read more in the docs: https://gradio.app/docs/" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + }, + "openvino_notebooks": { + "imageUrl": "https://github.com/user-attachments/assets/0e195ef2-fc5d-4eca-b32f-08cdd646199f", + "tags": { + "categories": [ + "Model Demos", + "AI Trends" + ], + "libraries": [], + "other": [ + "Stable Diffusion" + ], + "tasks": [ + "Image-to-Image" + ] + } + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": { + "0a4372eb5ad243bc99868db9305fa586": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "0c9b1d98f73a4263be1269f61e888670": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "14d0118960be48f6aeb9b443b8eaf308": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "layout": "IPY_MODEL_799f51a7a5b94a7b9ccb5cc7f0580edb", + "style": "IPY_MODEL_7de682aa45e84bdab1cea94668c8457b", + "value": " 28/28 [30:00<00:00, 64.76s/steps]" + } + }, + "2233cef3212247409d99dbe50c95f11f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HBoxModel", + "state": { + "children": [ + "IPY_MODEL_9b5bf943eaaf4e4e961246c3f4d2b13d", + "IPY_MODEL_639bad2a309c47acb51fc0314626e1c5", + "IPY_MODEL_7ddd04d198ff422cb17efa8e50e9962f" + ], + "layout": "IPY_MODEL_cefa46e48c3e40caaea186afe49e9e2a" + } + }, + "26fdc286eb544405adb907c789609161": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "FloatProgressModel", + "state": { + "bar_style": "success", + "layout": "IPY_MODEL_fea0773d8a0549098d2bffa8455185e0", + "max": 28, + "style": "IPY_MODEL_faf6a6ed9e124caeaef9852c719cf4bc", + "value": 28 + } + }, + "31d59080067445bb936f67ed4bd495f4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "442633e9bd6847c38baab87cef1ee027": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "5fdce1ea45704ea9a95cc4908cecc253": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "639bad2a309c47acb51fc0314626e1c5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "FloatProgressModel", + "state": { + "bar_style": "success", + "layout": "IPY_MODEL_442633e9bd6847c38baab87cef1ee027", + "max": 20, + "style": "IPY_MODEL_cd36842c164c4cecb96a0bb3e32e9478", + "value": 20 + } + }, + "738d82bd6b354789a2fbd3a81796fb81": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "799f51a7a5b94a7b9ccb5cc7f0580edb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "7ddd04d198ff422cb17efa8e50e9962f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "layout": "IPY_MODEL_5fdce1ea45704ea9a95cc4908cecc253", + "style": "IPY_MODEL_a3b42d41a69f4449b0a80043e4578efb", + "value": " 20/20 [04:42<00:00, 13.55s/it]" + } + }, + "7de682aa45e84bdab1cea94668c8457b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "7e466f478b984873a8c25965214fdc6a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HBoxModel", + "state": { + "children": [ + "IPY_MODEL_cb9caba4013e49adacdff8c73f427441", + "IPY_MODEL_26fdc286eb544405adb907c789609161", + "IPY_MODEL_14d0118960be48f6aeb9b443b8eaf308" + ], + "layout": "IPY_MODEL_81858751b1f144999dcc239f16f96151" + } + }, + "81858751b1f144999dcc239f16f96151": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "9b5bf943eaaf4e4e961246c3f4d2b13d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "layout": "IPY_MODEL_31d59080067445bb936f67ed4bd495f4", + "style": "IPY_MODEL_be54015b16ca442ba93a45badbd14baa", + "value": "100%" + } + }, + "a3b42d41a69f4449b0a80043e4578efb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "ab6f3c0c71994e0fa82de56845b4b208": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "CheckboxStyleModel", + "state": { + "description_width": "" + } + }, + "b99d2706e716417886a75544182cc07d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "CheckboxModel", + "state": { + "description": "Use compressed models", + "disabled": false, + "layout": "IPY_MODEL_f85e59eeb8dd409aa8c29efc99cbd3e5", + "style": "IPY_MODEL_ccb8c3a0ea274e9a9e63b30352cf956a", + "value": true + } + }, + "be54015b16ca442ba93a45badbd14baa": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "c1d26676194b498db0f009ead7a3f833": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "CheckboxModel", + "state": { + "description": "Weight compression", + "disabled": false, + "layout": "IPY_MODEL_0c9b1d98f73a4263be1269f61e888670", + "style": "IPY_MODEL_ab6f3c0c71994e0fa82de56845b4b208", + "value": true + } + }, + "c37e3dca7d41420db0839f75af31ef6f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "c673e0c48829441f99f28428a10f9fa1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "cb9caba4013e49adacdff8c73f427441": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "layout": "IPY_MODEL_c673e0c48829441f99f28428a10f9fa1", + "style": "IPY_MODEL_738d82bd6b354789a2fbd3a81796fb81", + "value": "100%" + } + }, + "ccb8c3a0ea274e9a9e63b30352cf956a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "CheckboxStyleModel", + "state": { + "description_width": "" + } + }, + "cd36842c164c4cecb96a0bb3e32e9478": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "ProgressStyleModel", + "state": { + "description_width": "" + } + }, + "cefa46e48c3e40caaea186afe49e9e2a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "d8c94902691548bfb0a9af7ad4b2f7f2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "DropdownModel", + "state": { + "_options_labels": [ + "CPU", + "AUTO" + ], + "description": "Device:", + "index": 0, + "layout": "IPY_MODEL_c37e3dca7d41420db0839f75af31ef6f", + "style": "IPY_MODEL_0a4372eb5ad243bc99868db9305fa586" + } + }, + "f85e59eeb8dd409aa8c29efc99cbd3e5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "faf6a6ed9e124caeaef9852c719cf4bc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "ProgressStyleModel", + "state": { + "description_width": "" + } + }, + "fea0773d8a0549098d2bffa8455185e0": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": {} + } + }, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/flux-fill/gradio_helper.py b/notebooks/flux-fill/gradio_helper.py new file mode 100644 index 00000000000..efa78d2c8ce --- /dev/null +++ b/notebooks/flux-fill/gradio_helper.py @@ -0,0 +1,396 @@ +import gradio as gr +from PIL import Image, ImageDraw + + +def can_expand(source_width, source_height, target_width, target_height, alignment): + if alignment in ("Left", "Right") and source_width >= target_width: + return False + if alignment in ("Top", "Bottom") and source_height >= target_height: + return False + return True + + +def prepare_image_and_mask( + image, width, height, overlap_percentage, resize_option, custom_resize_percentage, alignment, overlap_left, overlap_right, overlap_top, overlap_bottom +): + target_size = (width, height) + + scale_factor = min(target_size[0] / image.width, target_size[1] / image.height) + new_width = int(image.width * scale_factor) + new_height = int(image.height * scale_factor) + + source = image.resize((new_width, new_height), Image.LANCZOS) + + if resize_option == "Full": + resize_percentage = 100 + elif resize_option == "75%": + resize_percentage = 75 + elif resize_option == "50%": + resize_percentage = 50 + elif resize_option == "33%": + resize_percentage = 33 + elif resize_option == "25%": + resize_percentage = 25 + else: # Custom + resize_percentage = custom_resize_percentage + + # Calculate new dimensions based on percentage + resize_factor = resize_percentage / 100 + new_width = int(source.width * resize_factor) + new_height = int(source.height * resize_factor) + + # Ensure minimum size of 64 pixels + new_width = max(new_width, 64) + new_height = max(new_height, 64) + + # Resize the image + source = source.resize((new_width, new_height), Image.LANCZOS) + + # Calculate the overlap in pixels based on the percentage + overlap_x = int(new_width * (overlap_percentage / 100)) + overlap_y = int(new_height * (overlap_percentage / 100)) + + # Ensure minimum overlap of 1 pixel + overlap_x = max(overlap_x, 1) + overlap_y = max(overlap_y, 1) + + # Calculate margins based on alignment + if alignment == "Middle": + margin_x = (target_size[0] - new_width) // 2 + margin_y = (target_size[1] - new_height) // 2 + elif alignment == "Left": + margin_x = 0 + margin_y = (target_size[1] - new_height) // 2 + elif alignment == "Right": + margin_x = target_size[0] - new_width + margin_y = (target_size[1] - new_height) // 2 + elif alignment == "Top": + margin_x = (target_size[0] - new_width) // 2 + margin_y = 0 + elif alignment == "Bottom": + margin_x = (target_size[0] - new_width) // 2 + margin_y = target_size[1] - new_height + + # Adjust margins to eliminate gaps + margin_x = max(0, min(margin_x, target_size[0] - new_width)) + margin_y = max(0, min(margin_y, target_size[1] - new_height)) + + # Create a new background image and paste the resized source image + background = Image.new("RGB", target_size, (255, 255, 255)) + background.paste(source, (margin_x, margin_y)) + + # Create the mask + mask = Image.new("L", target_size, 255) + mask_draw = ImageDraw.Draw(mask) + + # Calculate overlap areas + white_gaps_patch = 2 + + left_overlap = margin_x + overlap_x if overlap_left else margin_x + white_gaps_patch + right_overlap = margin_x + new_width - overlap_x if overlap_right else margin_x + new_width - white_gaps_patch + top_overlap = margin_y + overlap_y if overlap_top else margin_y + white_gaps_patch + bottom_overlap = margin_y + new_height - overlap_y if overlap_bottom else margin_y + new_height - white_gaps_patch + + if alignment == "Left": + left_overlap = margin_x + overlap_x if overlap_left else margin_x + elif alignment == "Right": + right_overlap = margin_x + new_width - overlap_x if overlap_right else margin_x + new_width + elif alignment == "Top": + top_overlap = margin_y + overlap_y if overlap_top else margin_y + elif alignment == "Bottom": + bottom_overlap = margin_y + new_height - overlap_y if overlap_bottom else margin_y + new_height + + # Draw the mask + mask_draw.rectangle([(left_overlap, top_overlap), (right_overlap, bottom_overlap)], fill=0) + + return background, mask + + +def make_demo(pipe): + def inpaint( + image, + width, + height, + overlap_percentage, + num_inference_steps, + resize_option, + custom_resize_percentage, + prompt_input, + alignment, + overlap_left, + overlap_right, + overlap_top, + overlap_bottom, + progress=gr.Progress(track_tqdm=True), + ): + + background, mask = prepare_image_and_mask( + image, + width, + height, + overlap_percentage, + resize_option, + custom_resize_percentage, + alignment, + overlap_left, + overlap_right, + overlap_top, + overlap_bottom, + ) + + if not can_expand(background.width, background.height, width, height, alignment): + alignment = "Middle" + + cnet_image = background.copy() + cnet_image.paste(0, (0, 0), mask) + + final_prompt = prompt_input + + # generator = torch.Generator(device="cuda").manual_seed(42) + + result = pipe( + prompt=final_prompt, + height=height, + width=width, + image=cnet_image, + mask_image=mask, + num_inference_steps=num_inference_steps, + guidance_scale=30, + ).images[0] + + result = result.convert("RGBA") + cnet_image.paste(result, (0, 0), mask) + + return cnet_image, background + + def preview_image_and_mask( + image, width, height, overlap_percentage, resize_option, custom_resize_percentage, alignment, overlap_left, overlap_right, overlap_top, overlap_bottom + ): + background, mask = prepare_image_and_mask( + image, + width, + height, + overlap_percentage, + resize_option, + custom_resize_percentage, + alignment, + overlap_left, + overlap_right, + overlap_top, + overlap_bottom, + ) + + preview = background.copy().convert("RGBA") + red_overlay = Image.new("RGBA", background.size, (255, 0, 0, 64)) + red_mask = Image.new("RGBA", background.size, (0, 0, 0, 0)) + red_mask.paste(red_overlay, (0, 0), mask) + preview = Image.alpha_composite(preview, red_mask) + + return preview + + def clear_result(): + return gr.update(value=None) + + def preload_presets(target_ratio, ui_width, ui_height): + if target_ratio == "9:16": + return 720, 1280, gr.update() + elif target_ratio == "16:9": + return 1280, 720, gr.update() + elif target_ratio == "1:1": + return 1024, 1024, gr.update() + elif target_ratio == "Custom": + return ui_width, ui_height, gr.update(open=True) + + def select_the_right_preset(user_width, user_height): + if user_width == 720 and user_height == 1280: + return "9:16" + elif user_width == 1280 and user_height == 720: + return "16:9" + elif user_width == 1024 and user_height == 1024: + return "1:1" + else: + return "Custom" + + def toggle_custom_resize_slider(resize_option): + return gr.update(visible=(resize_option == "Custom")) + + def update_history(new_image, history): + if history is None: + history = [] + history.insert(0, new_image) + return history + + css = """ + .gradio-container { + max-width: 1250px !important; + } + """ + + title = """

FLUX Fill Outpaint

+
Drop an image you would like to extend, pick your expected ratio and hit Generate.
+
Using FLUX.1-Fill-dev
+ """ + + with gr.Blocks(css=css) as demo: + with gr.Column(): + gr.HTML(title) + + with gr.Row(): + with gr.Column(): + input_image = gr.Image(type="pil", label="Input Image") + + with gr.Row(): + with gr.Column(scale=2): + prompt_input = gr.Textbox(label="Prompt (Optional)") + with gr.Column(scale=1): + run_button = gr.Button("Generate") + + with gr.Row(): + target_ratio = gr.Radio(label="Image Ratio", choices=["9:16", "16:9", "1:1", "Custom"], value="9:16", scale=3) + alignment_dropdown = gr.Dropdown( + choices=["Middle", "Left", "Right", "Top", "Bottom"], + value="Middle", + label="Alignment", + ) + resize_option = gr.Radio(label="Resize input image", choices=["Full", "75%", "50%", "33%", "25%", "Custom"], value="75%") + custom_resize_percentage = gr.Slider(label="Custom resize (%)", minimum=1, maximum=100, step=1, value=50, visible=False) + with gr.Accordion(label="Advanced settings", open=False) as settings_panel: + with gr.Column(): + with gr.Row(): + width_slider = gr.Slider( + label="Target Width", + minimum=720, + maximum=1536, + step=8, + value=720, + ) + height_slider = gr.Slider( + label="Target Height", + minimum=720, + maximum=1536, + step=8, + value=1280, + ) + + num_inference_steps = gr.Slider(label="Steps", minimum=2, maximum=50, step=1, value=28) + with gr.Group(): + overlap_percentage = gr.Slider(label="Mask overlap (%)", minimum=1, maximum=50, value=10, step=1) + with gr.Row(): + overlap_top = gr.Checkbox(label="Overlap Top", value=True) + overlap_right = gr.Checkbox(label="Overlap Right", value=True) + with gr.Row(): + overlap_left = gr.Checkbox(label="Overlap Left", value=True) + overlap_bottom = gr.Checkbox(label="Overlap Bottom", value=True) + + with gr.Column(): + preview_button = gr.Button("Preview alignment and mask") + + with gr.Column(): + result = gr.Image( + interactive=False, + label="Generated Image", + ) + use_as_input_button = gr.Button("Use as Input Image", visible=False) + with gr.Accordion("History and Mask", open=False): + history_gallery = gr.Gallery(label="History", columns=6, object_fit="contain", interactive=False) + preview_image = gr.Image(label="Mask preview") + + def use_output_as_input(output_image): + return output_image + + use_as_input_button.click(fn=use_output_as_input, inputs=[result], outputs=[input_image]) + + target_ratio.change( + fn=preload_presets, inputs=[target_ratio, width_slider, height_slider], outputs=[width_slider, height_slider, settings_panel], queue=False + ) + + width_slider.change(fn=select_the_right_preset, inputs=[width_slider, height_slider], outputs=[target_ratio], queue=False) + + height_slider.change(fn=select_the_right_preset, inputs=[width_slider, height_slider], outputs=[target_ratio], queue=False) + + resize_option.change(fn=toggle_custom_resize_slider, inputs=[resize_option], outputs=[custom_resize_percentage], queue=False) + + run_button.click( + fn=clear_result, + inputs=None, + outputs=result, + ).then( + fn=inpaint, + inputs=[ + input_image, + width_slider, + height_slider, + overlap_percentage, + num_inference_steps, + resize_option, + custom_resize_percentage, + prompt_input, + alignment_dropdown, + overlap_left, + overlap_right, + overlap_top, + overlap_bottom, + ], + outputs=[result, preview_image], + ).then( + fn=lambda x, history: update_history(x, history), + inputs=[result, history_gallery], + outputs=history_gallery, + ).then( + fn=lambda: gr.update(visible=True), + inputs=None, + outputs=use_as_input_button, + ) + + prompt_input.submit( + fn=clear_result, + inputs=None, + outputs=result, + ).then( + fn=inpaint, + inputs=[ + input_image, + width_slider, + height_slider, + overlap_percentage, + num_inference_steps, + resize_option, + custom_resize_percentage, + prompt_input, + alignment_dropdown, + overlap_left, + overlap_right, + overlap_top, + overlap_bottom, + ], + outputs=[result, preview_image], + ).then( + fn=lambda x, history: update_history(x, history), + inputs=[result, history_gallery], + outputs=history_gallery, + ).then( + fn=lambda: gr.update(visible=True), + inputs=None, + outputs=use_as_input_button, + ) + + preview_button.click( + fn=preview_image_and_mask, + inputs=[ + input_image, + width_slider, + height_slider, + overlap_percentage, + resize_option, + custom_resize_percentage, + alignment_dropdown, + overlap_left, + overlap_right, + overlap_top, + overlap_bottom, + ], + outputs=preview_image, + queue=False, + ) + + return demo