Skip to content

Commit

Permalink
change dataset for diffusers quantization (#1605)
Browse files Browse the repository at this point in the history
  • Loading branch information
eaidova authored Jan 11, 2024
1 parent 76121ac commit 2bb7c08
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 19 deletions.
18 changes: 10 additions & 8 deletions notebooks/248-stable-diffusion-xl/248-segmind-vegart.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@
"source": [
"### Prepare calibration dataset\n",
"\n",
"We use a portion of [`laion/laion2B-en-aesthetic`](https://huggingface.co/datasets/laion/laion2B-en-aesthetic) dataset from Hugging Face as calibration data.\n",
"We use a portion of [conceptual_captions](https://huggingface.co/datasets/conceptual_captions) dataset from Hugging Face as calibration data.\n",
"To collect intermediate model inputs for calibration we should customize `CompiledModel`."
]
},
Expand Down Expand Up @@ -460,14 +460,14 @@
" original_unet = pipe.unet.request\n",
" pipe.unet.request = CompiledModelDecorator(original_unet)\n",
"\n",
" dataset = datasets.load_dataset(\"laion/laion2B-en-aesthetic\", split=\"train\", streaming=True).shuffle(seed=42)\n",
" dataset = datasets.load_dataset(\"conceptual_captions\", split=\"train\").shuffle(seed=42)\n",
" disable_progress_bar(pipe)\n",
"\n",
" # Run inference for data collection\n",
" pbar = tqdm(total=subset_size)\n",
" diff = 0\n",
" for batch in dataset:\n",
" prompt = batch[\"TEXT\"]\n",
" for batch in enumerate(dataset):\n",
" prompt = batch[\"caption\"]\n",
" if len(prompt) > pipe.tokenizer.model_max_length:\n",
" continue\n",
" _ = pipe(\n",
Expand Down Expand Up @@ -686,17 +686,17 @@
"import time\n",
"\n",
"validation_size = 7\n",
"calibration_dataset = datasets.load_dataset(\"laion/laion2B-en-aesthetic\", split=\"train\", streaming=True).take(validation_size)\n",
"calibration_dataset = datasets.load_dataset(\"conceptual_captions\",, split=\"train\")\n",
"validation_data = []\n",
"for batch in calibration_dataset:\n",
" prompt = batch[\"TEXT\"]\n",
" prompt = batch[\"caption\"]\n",
" validation_data.append(prompt)\n",
"\n",
"def calculate_inference_time(pipe, dataset):\n",
" inference_time = []\n",
" disable_progress_bar(pipe)\n",
"\n",
" for prompt in dataset:\n",
" for idx, prompt in dataset:\n",
" start = time.perf_counter()\n",
" image = pipe(\n",
" prompt,\n",
Expand All @@ -707,6 +707,8 @@
" end = time.perf_counter()\n",
" delta = end - start\n",
" inference_time.append(delta)\n",
" if idx >= validation_size:\n",
" break\n",
" disable_progress_bar(pipe, disable=False)\n",
" return np.median(inference_time)"
]
Expand Down Expand Up @@ -863,7 +865,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -886,7 +886,7 @@
"### Prepare calibration dataset\n",
"[back to top ⬆️](#Table-of-contents:)\n",
"\n",
"We use a portion of [`laion/laion2B-en-aesthetic`](https://huggingface.co/datasets/laion/laion2B-en-aesthetic) dataset from Hugging Face as calibration data.\n",
"We use a portion of [conceptual_captions](https://huggingface.co/datasets/conceptual_captions) dataset from Hugging Face as calibration data.\n",
"To collect intermediate model inputs for calibration we should customize `CompiledModel`."
]
},
Expand Down Expand Up @@ -921,7 +921,7 @@
" original_unet = lcm_pipeline.unet\n",
" lcm_pipeline.unet = CompiledModelDecorator(original_unet, prob=0.3)\n",
"\n",
" dataset = datasets.load_dataset(\"laion/laion2B-en-aesthetic\", split=\"train\", streaming=True).shuffle(seed=42)\n",
" dataset = datasets.load_dataset(\"conceptual_captions\", split=\"train\").shuffle(seed=42)\n",
" lcm_pipeline.set_progress_bar_config(disable=True)\n",
" safety_checker = lcm_pipeline.safety_checker\n",
" lcm_pipeline.safety_checker = None\n",
Expand All @@ -930,7 +930,7 @@
" pbar = tqdm(total=subset_size)\n",
" diff = 0\n",
" for batch in dataset:\n",
" prompt = batch[\"TEXT\"]\n",
" prompt = batch[\"caption\"]\n",
" if len(prompt) > tokenizer.model_max_length:\n",
" continue\n",
" _ = lcm_pipeline(\n",
Expand Down Expand Up @@ -1290,7 +1290,7 @@
"import time\n",
"\n",
"validation_size = 10\n",
"calibration_dataset = datasets.load_dataset(\"laion/laion2B-en-aesthetic\", split=\"train\", streaming=True).take(validation_size)\n",
"calibration_dataset = datasets.load_dataset(\"conceptual_captions\", split=\"train\")\n",
"validation_data = []\n",
"for batch in calibration_dataset:\n",
" prompt = batch[\"TEXT\"]\n",
Expand All @@ -1299,7 +1299,7 @@
"def calculate_inference_time(pipeline, calibration_dataset):\n",
" inference_time = []\n",
" pipeline.set_progress_bar_config(disable=True)\n",
" for prompt in calibration_dataset:\n",
" for idx, prompt in enumerate(calibration_dataset):\n",
" start = time.perf_counter()\n",
" _ = pipeline(\n",
" prompt,\n",
Expand All @@ -1313,6 +1313,8 @@
" end = time.perf_counter()\n",
" delta = end - start\n",
" inference_time.append(delta)\n",
" if idx >= validation_size:\n",
" break\n",
" return np.median(inference_time)"
]
},
Expand Down Expand Up @@ -1561,7 +1563,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "6aa7571f-916c-4488-a9e3-dd21407a3f8d",
"metadata": {},
Expand Down Expand Up @@ -838,7 +837,7 @@
"### Prepare calibration datasets\n",
"[back to top ⬆️](#Table-of-contents:)\n",
"\n",
"We use a portion of [`laion/laion2B-en-aesthetic`](https://huggingface.co/datasets/laion/laion2B-en-aesthetic) dataset from Hugging Face as calibration data.\n",
"We use a portion of [conceptual_captions](https://huggingface.co/datasets/conceptual_captions) dataset from Hugging Face as calibration data.\n",
"To collect intermediate model inputs for calibration we should customize `CompiledModel`."
]
},
Expand Down Expand Up @@ -884,11 +883,11 @@
" pipeline.prior_pipe.prior.prior = CompiledModelDecorator(original_prior)\n",
" pipeline.decoder_pipe.decoder.decoder = CompiledModelDecorator(original_decoder)\n",
"\n",
" dataset = datasets.load_dataset(\"laion/laion2B-en-aesthetic\", split=\"train\", streaming=True).shuffle(seed=42)\n",
" dataset = datasets.load_dataset(\"conceptual_captions\", split=\"train\").shuffle(seed=42)\n",
" pbar = tqdm(total=subset_size)\n",
" diff = 0\n",
" for batch in dataset:\n",
" prompt = batch[\"TEXT\"]\n",
" prompt = batch[\"caption\"]\n",
" if len(prompt) > pipeline.tokenizer.model_max_length:\n",
" continue\n",
" _ = pipeline(\n",
Expand Down Expand Up @@ -1339,7 +1338,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
Expand Down

0 comments on commit 2bb7c08

Please sign in to comment.