From 2bb7c08808702c2d248c0136863da4a6960f9946 Mon Sep 17 00:00:00 2001 From: Ekaterina Aidova Date: Thu, 11 Jan 2024 09:57:03 +0300 Subject: [PATCH] change dataset for diffusers quantization (#1605) --- .../248-segmind-vegart.ipynb | 18 ++++++++++-------- ...t-consistency-models-image-generation.ipynb | 14 ++++++++------ .../265-wuerstchen-image-generation.ipynb | 9 ++++----- 3 files changed, 22 insertions(+), 19 deletions(-) diff --git a/notebooks/248-stable-diffusion-xl/248-segmind-vegart.ipynb b/notebooks/248-stable-diffusion-xl/248-segmind-vegart.ipynb index a1668f42662..eb1a04bf3ab 100644 --- a/notebooks/248-stable-diffusion-xl/248-segmind-vegart.ipynb +++ b/notebooks/248-stable-diffusion-xl/248-segmind-vegart.ipynb @@ -410,7 +410,7 @@ "source": [ "### Prepare calibration dataset\n", "\n", - "We use a portion of [`laion/laion2B-en-aesthetic`](https://huggingface.co/datasets/laion/laion2B-en-aesthetic) dataset from Hugging Face as calibration data.\n", + "We use a portion of [conceptual_captions](https://huggingface.co/datasets/conceptual_captions) dataset from Hugging Face as calibration data.\n", "To collect intermediate model inputs for calibration we should customize `CompiledModel`." ] }, @@ -460,14 +460,14 @@ " original_unet = pipe.unet.request\n", " pipe.unet.request = CompiledModelDecorator(original_unet)\n", "\n", - " dataset = datasets.load_dataset(\"laion/laion2B-en-aesthetic\", split=\"train\", streaming=True).shuffle(seed=42)\n", + " dataset = datasets.load_dataset(\"conceptual_captions\", split=\"train\").shuffle(seed=42)\n", " disable_progress_bar(pipe)\n", "\n", " # Run inference for data collection\n", " pbar = tqdm(total=subset_size)\n", " diff = 0\n", - " for batch in dataset:\n", - " prompt = batch[\"TEXT\"]\n", + " for batch in enumerate(dataset):\n", + " prompt = batch[\"caption\"]\n", " if len(prompt) > pipe.tokenizer.model_max_length:\n", " continue\n", " _ = pipe(\n", @@ -686,17 +686,17 @@ "import time\n", "\n", "validation_size = 7\n", - "calibration_dataset = datasets.load_dataset(\"laion/laion2B-en-aesthetic\", split=\"train\", streaming=True).take(validation_size)\n", + "calibration_dataset = datasets.load_dataset(\"conceptual_captions\",, split=\"train\")\n", "validation_data = []\n", "for batch in calibration_dataset:\n", - " prompt = batch[\"TEXT\"]\n", + " prompt = batch[\"caption\"]\n", " validation_data.append(prompt)\n", "\n", "def calculate_inference_time(pipe, dataset):\n", " inference_time = []\n", " disable_progress_bar(pipe)\n", "\n", - " for prompt in dataset:\n", + " for idx, prompt in dataset:\n", " start = time.perf_counter()\n", " image = pipe(\n", " prompt,\n", @@ -707,6 +707,8 @@ " end = time.perf_counter()\n", " delta = end - start\n", " inference_time.append(delta)\n", + " if idx >= validation_size:\n", + " break\n", " disable_progress_bar(pipe, disable=False)\n", " return np.median(inference_time)" ] @@ -863,7 +865,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, diff --git a/notebooks/263-latent-consistency-models-image-generation/263-latent-consistency-models-image-generation.ipynb b/notebooks/263-latent-consistency-models-image-generation/263-latent-consistency-models-image-generation.ipynb index 982c629be73..5648f767d2b 100644 --- a/notebooks/263-latent-consistency-models-image-generation/263-latent-consistency-models-image-generation.ipynb +++ b/notebooks/263-latent-consistency-models-image-generation/263-latent-consistency-models-image-generation.ipynb @@ -886,7 +886,7 @@ "### Prepare calibration dataset\n", "[back to top ⬆️](#Table-of-contents:)\n", "\n", - "We use a portion of [`laion/laion2B-en-aesthetic`](https://huggingface.co/datasets/laion/laion2B-en-aesthetic) dataset from Hugging Face as calibration data.\n", + "We use a portion of [conceptual_captions](https://huggingface.co/datasets/conceptual_captions) dataset from Hugging Face as calibration data.\n", "To collect intermediate model inputs for calibration we should customize `CompiledModel`." ] }, @@ -921,7 +921,7 @@ " original_unet = lcm_pipeline.unet\n", " lcm_pipeline.unet = CompiledModelDecorator(original_unet, prob=0.3)\n", "\n", - " dataset = datasets.load_dataset(\"laion/laion2B-en-aesthetic\", split=\"train\", streaming=True).shuffle(seed=42)\n", + " dataset = datasets.load_dataset(\"conceptual_captions\", split=\"train\").shuffle(seed=42)\n", " lcm_pipeline.set_progress_bar_config(disable=True)\n", " safety_checker = lcm_pipeline.safety_checker\n", " lcm_pipeline.safety_checker = None\n", @@ -930,7 +930,7 @@ " pbar = tqdm(total=subset_size)\n", " diff = 0\n", " for batch in dataset:\n", - " prompt = batch[\"TEXT\"]\n", + " prompt = batch[\"caption\"]\n", " if len(prompt) > tokenizer.model_max_length:\n", " continue\n", " _ = lcm_pipeline(\n", @@ -1290,7 +1290,7 @@ "import time\n", "\n", "validation_size = 10\n", - "calibration_dataset = datasets.load_dataset(\"laion/laion2B-en-aesthetic\", split=\"train\", streaming=True).take(validation_size)\n", + "calibration_dataset = datasets.load_dataset(\"conceptual_captions\", split=\"train\")\n", "validation_data = []\n", "for batch in calibration_dataset:\n", " prompt = batch[\"TEXT\"]\n", @@ -1299,7 +1299,7 @@ "def calculate_inference_time(pipeline, calibration_dataset):\n", " inference_time = []\n", " pipeline.set_progress_bar_config(disable=True)\n", - " for prompt in calibration_dataset:\n", + " for idx, prompt in enumerate(calibration_dataset):\n", " start = time.perf_counter()\n", " _ = pipeline(\n", " prompt,\n", @@ -1313,6 +1313,8 @@ " end = time.perf_counter()\n", " delta = end - start\n", " inference_time.append(delta)\n", + " if idx >= validation_size:\n", + " break\n", " return np.median(inference_time)" ] }, @@ -1561,7 +1563,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, diff --git a/notebooks/265-wuerstchen-image-generation/265-wuerstchen-image-generation.ipynb b/notebooks/265-wuerstchen-image-generation/265-wuerstchen-image-generation.ipynb index d901307f14c..6f8fad02ba2 100644 --- a/notebooks/265-wuerstchen-image-generation/265-wuerstchen-image-generation.ipynb +++ b/notebooks/265-wuerstchen-image-generation/265-wuerstchen-image-generation.ipynb @@ -197,7 +197,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "6aa7571f-916c-4488-a9e3-dd21407a3f8d", "metadata": {}, @@ -838,7 +837,7 @@ "### Prepare calibration datasets\n", "[back to top ⬆️](#Table-of-contents:)\n", "\n", - "We use a portion of [`laion/laion2B-en-aesthetic`](https://huggingface.co/datasets/laion/laion2B-en-aesthetic) dataset from Hugging Face as calibration data.\n", + "We use a portion of [conceptual_captions](https://huggingface.co/datasets/conceptual_captions) dataset from Hugging Face as calibration data.\n", "To collect intermediate model inputs for calibration we should customize `CompiledModel`." ] }, @@ -884,11 +883,11 @@ " pipeline.prior_pipe.prior.prior = CompiledModelDecorator(original_prior)\n", " pipeline.decoder_pipe.decoder.decoder = CompiledModelDecorator(original_decoder)\n", "\n", - " dataset = datasets.load_dataset(\"laion/laion2B-en-aesthetic\", split=\"train\", streaming=True).shuffle(seed=42)\n", + " dataset = datasets.load_dataset(\"conceptual_captions\", split=\"train\").shuffle(seed=42)\n", " pbar = tqdm(total=subset_size)\n", " diff = 0\n", " for batch in dataset:\n", - " prompt = batch[\"TEXT\"]\n", + " prompt = batch[\"caption\"]\n", " if len(prompt) > pipeline.tokenizer.model_max_length:\n", " continue\n", " _ = pipeline(\n", @@ -1339,7 +1338,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" },