Skip to content

Commit b2add10

Browse files
authored
Update is_safetensors_compatible check (#8991)
* update * update * update * update * update
1 parent 815d882 commit b2add10

File tree

4 files changed

+151
-82
lines changed

4 files changed

+151
-82
lines changed

src/diffusers/pipelines/pipeline_loading_utils.py

Lines changed: 21 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -89,49 +89,44 @@
8989
ALL_IMPORTABLE_CLASSES.update(LOADABLE_CLASSES[library])
9090

9191

92-
def is_safetensors_compatible(filenames, variant=None, passed_components=None) -> bool:
92+
def is_safetensors_compatible(filenames, passed_components=None) -> bool:
9393
"""
9494
Checking for safetensors compatibility:
95-
- By default, all models are saved with the default pytorch serialization, so we use the list of default pytorch
96-
files to know which safetensors files are needed.
97-
- The model is safetensors compatible only if there is a matching safetensors file for every default pytorch file.
95+
- The model is safetensors compatible only if there is a safetensors file for each model component present in
96+
filenames.
9897
9998
Converting default pytorch serialized filenames to safetensors serialized filenames:
10099
- For models from the diffusers library, just replace the ".bin" extension with ".safetensors"
101100
- For models from the transformers library, the filename changes from "pytorch_model" to "model", and the ".bin"
102101
extension is replaced with ".safetensors"
103102
"""
104-
pt_filenames = []
105-
106-
sf_filenames = set()
107-
108103
passed_components = passed_components or []
109104

105+
# extract all components of the pipeline and their associated files
106+
components = {}
110107
for filename in filenames:
111-
_, extension = os.path.splitext(filename)
108+
if not len(filename.split("/")) == 2:
109+
continue
112110

113-
if len(filename.split("/")) == 2 and filename.split("/")[0] in passed_components:
111+
component, component_filename = filename.split("/")
112+
if component in passed_components:
114113
continue
115114

116-
if extension == ".bin":
117-
pt_filenames.append(os.path.normpath(filename))
118-
elif extension == ".safetensors":
119-
sf_filenames.add(os.path.normpath(filename))
115+
components.setdefault(component, [])
116+
components[component].append(component_filename)
120117

121-
for filename in pt_filenames:
122-
# filename = 'foo/bar/baz.bam' -> path = 'foo/bar', filename = 'baz', extension = '.bam'
123-
path, filename = os.path.split(filename)
124-
filename, extension = os.path.splitext(filename)
118+
# iterate over all files of a component
119+
# check if safetensor files exist for that component
120+
# if variant is provided check if the variant of the safetensors exists
121+
for component, component_filenames in components.items():
122+
matches = []
123+
for component_filename in component_filenames:
124+
filename, extension = os.path.splitext(component_filename)
125125

126-
if filename.startswith("pytorch_model"):
127-
filename = filename.replace("pytorch_model", "model")
128-
else:
129-
filename = filename
126+
match_exists = extension == ".safetensors"
127+
matches.append(match_exists)
130128

131-
expected_sf_filename = os.path.normpath(os.path.join(path, filename))
132-
expected_sf_filename = f"{expected_sf_filename}.safetensors"
133-
if expected_sf_filename not in sf_filenames:
134-
logger.warning(f"{expected_sf_filename} not found")
129+
if not any(matches):
135130
return False
136131

137132
return True

src/diffusers/pipelines/pipeline_utils.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1416,18 +1416,14 @@ def download(cls, pretrained_model_name, **kwargs) -> Union[str, os.PathLike]:
14161416
if (
14171417
use_safetensors
14181418
and not allow_pickle
1419-
and not is_safetensors_compatible(
1420-
model_filenames, variant=variant, passed_components=passed_components
1421-
)
1419+
and not is_safetensors_compatible(model_filenames, passed_components=passed_components)
14221420
):
14231421
raise EnvironmentError(
14241422
f"Could not find the necessary `safetensors` weights in {model_filenames} (variant={variant})"
14251423
)
14261424
if from_flax:
14271425
ignore_patterns = ["*.bin", "*.safetensors", "*.onnx", "*.pb"]
1428-
elif use_safetensors and is_safetensors_compatible(
1429-
model_filenames, variant=variant, passed_components=passed_components
1430-
):
1426+
elif use_safetensors and is_safetensors_compatible(model_filenames, passed_components=passed_components):
14311427
ignore_patterns = ["*.bin", "*.msgpack"]
14321428

14331429
use_onnx = use_onnx if use_onnx is not None else pipeline_class._is_onnx

tests/pipelines/test_pipeline_utils.py

Lines changed: 46 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -68,25 +68,21 @@ def test_all_is_compatible_variant(self):
6868
"unet/diffusion_pytorch_model.fp16.bin",
6969
"unet/diffusion_pytorch_model.fp16.safetensors",
7070
]
71-
variant = "fp16"
72-
self.assertTrue(is_safetensors_compatible(filenames, variant=variant))
71+
self.assertTrue(is_safetensors_compatible(filenames))
7372

7473
def test_diffusers_model_is_compatible_variant(self):
7574
filenames = [
7675
"unet/diffusion_pytorch_model.fp16.bin",
7776
"unet/diffusion_pytorch_model.fp16.safetensors",
7877
]
79-
variant = "fp16"
80-
self.assertTrue(is_safetensors_compatible(filenames, variant=variant))
78+
self.assertTrue(is_safetensors_compatible(filenames))
8179

82-
def test_diffusers_model_is_compatible_variant_partial(self):
83-
# pass variant but use the non-variant filenames
80+
def test_diffusers_model_is_compatible_variant_mixed(self):
8481
filenames = [
8582
"unet/diffusion_pytorch_model.bin",
86-
"unet/diffusion_pytorch_model.safetensors",
83+
"unet/diffusion_pytorch_model.fp16.safetensors",
8784
]
88-
variant = "fp16"
89-
self.assertTrue(is_safetensors_compatible(filenames, variant=variant))
85+
self.assertTrue(is_safetensors_compatible(filenames))
9086

9187
def test_diffusers_model_is_not_compatible_variant(self):
9288
filenames = [
@@ -99,25 +95,14 @@ def test_diffusers_model_is_not_compatible_variant(self):
9995
"unet/diffusion_pytorch_model.fp16.bin",
10096
# Removed: 'unet/diffusion_pytorch_model.fp16.safetensors',
10197
]
102-
variant = "fp16"
103-
self.assertFalse(is_safetensors_compatible(filenames, variant=variant))
98+
self.assertFalse(is_safetensors_compatible(filenames))
10499

105100
def test_transformer_model_is_compatible_variant(self):
106101
filenames = [
107102
"text_encoder/pytorch_model.fp16.bin",
108103
"text_encoder/model.fp16.safetensors",
109104
]
110-
variant = "fp16"
111-
self.assertTrue(is_safetensors_compatible(filenames, variant=variant))
112-
113-
def test_transformer_model_is_compatible_variant_partial(self):
114-
# pass variant but use the non-variant filenames
115-
filenames = [
116-
"text_encoder/pytorch_model.bin",
117-
"text_encoder/model.safetensors",
118-
]
119-
variant = "fp16"
120-
self.assertTrue(is_safetensors_compatible(filenames, variant=variant))
105+
self.assertTrue(is_safetensors_compatible(filenames))
121106

122107
def test_transformer_model_is_not_compatible_variant(self):
123108
filenames = [
@@ -126,9 +111,45 @@ def test_transformer_model_is_not_compatible_variant(self):
126111
"vae/diffusion_pytorch_model.fp16.bin",
127112
"vae/diffusion_pytorch_model.fp16.safetensors",
128113
"text_encoder/pytorch_model.fp16.bin",
129-
# 'text_encoder/model.fp16.safetensors',
130114
"unet/diffusion_pytorch_model.fp16.bin",
131115
"unet/diffusion_pytorch_model.fp16.safetensors",
132116
]
133-
variant = "fp16"
134-
self.assertFalse(is_safetensors_compatible(filenames, variant=variant))
117+
self.assertFalse(is_safetensors_compatible(filenames))
118+
119+
def test_transformers_is_compatible_sharded(self):
120+
filenames = [
121+
"text_encoder/pytorch_model.bin",
122+
"text_encoder/model-00001-of-00002.safetensors",
123+
"text_encoder/model-00002-of-00002.safetensors",
124+
]
125+
self.assertTrue(is_safetensors_compatible(filenames))
126+
127+
def test_transformers_is_compatible_variant_sharded(self):
128+
filenames = [
129+
"text_encoder/pytorch_model.bin",
130+
"text_encoder/model.fp16-00001-of-00002.safetensors",
131+
"text_encoder/model.fp16-00001-of-00002.safetensors",
132+
]
133+
self.assertTrue(is_safetensors_compatible(filenames))
134+
135+
def test_diffusers_is_compatible_sharded(self):
136+
filenames = [
137+
"unet/diffusion_pytorch_model.bin",
138+
"unet/diffusion_pytorch_model-00001-of-00002.safetensors",
139+
"unet/diffusion_pytorch_model-00002-of-00002.safetensors",
140+
]
141+
self.assertTrue(is_safetensors_compatible(filenames))
142+
143+
def test_diffusers_is_compatible_variant_sharded(self):
144+
filenames = [
145+
"unet/diffusion_pytorch_model.bin",
146+
"unet/diffusion_pytorch_model.fp16-00001-of-00002.safetensors",
147+
"unet/diffusion_pytorch_model.fp16-00001-of-00002.safetensors",
148+
]
149+
self.assertTrue(is_safetensors_compatible(filenames))
150+
151+
def test_diffusers_is_compatible_only_variants(self):
152+
filenames = [
153+
"unet/diffusion_pytorch_model.fp16.safetensors",
154+
]
155+
self.assertTrue(is_safetensors_compatible(filenames))

tests/pipelines/test_pipelines.py

Lines changed: 82 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -551,37 +551,94 @@ def test_download_variant_partly(self):
551551
assert sum(f.endswith(this_format) and not f.endswith(f"{variant}{this_format}") for f in files) == 3
552552
assert not any(f.endswith(other_format) for f in files)
553553

554-
def test_download_broken_variant(self):
555-
for use_safetensors in [False, True]:
556-
# text encoder is missing no variant and "no_ema" variant weights, so the following can't work
557-
for variant in [None, "no_ema"]:
558-
with self.assertRaises(OSError) as error_context:
559-
with tempfile.TemporaryDirectory() as tmpdirname:
560-
tmpdirname = StableDiffusionPipeline.from_pretrained(
561-
"hf-internal-testing/stable-diffusion-broken-variants",
562-
cache_dir=tmpdirname,
563-
variant=variant,
564-
use_safetensors=use_safetensors,
565-
)
566-
567-
assert "Error no file name" in str(error_context.exception)
568-
569-
# text encoder has fp16 variants so we can load it
570-
with tempfile.TemporaryDirectory() as tmpdirname:
571-
tmpdirname = StableDiffusionPipeline.download(
554+
def test_download_safetensors_only_variant_exists_for_model(self):
555+
variant = None
556+
use_safetensors = True
557+
558+
# text encoder is missing no variant weights, so the following can't work
559+
with tempfile.TemporaryDirectory() as tmpdirname:
560+
with self.assertRaises(OSError) as error_context:
561+
tmpdirname = StableDiffusionPipeline.from_pretrained(
572562
"hf-internal-testing/stable-diffusion-broken-variants",
563+
cache_dir=tmpdirname,
564+
variant=variant,
573565
use_safetensors=use_safetensors,
566+
)
567+
assert "Error no file name" in str(error_context.exception)
568+
569+
# text encoder has fp16 variants so we can load it
570+
with tempfile.TemporaryDirectory() as tmpdirname:
571+
tmpdirname = StableDiffusionPipeline.download(
572+
"hf-internal-testing/stable-diffusion-broken-variants",
573+
use_safetensors=use_safetensors,
574+
cache_dir=tmpdirname,
575+
variant="fp16",
576+
)
577+
all_root_files = [t[-1] for t in os.walk(tmpdirname)]
578+
files = [item for sublist in all_root_files for item in sublist]
579+
# None of the downloaded files should be a non-variant file even if we have some here:
580+
# https://huggingface.co/hf-internal-testing/stable-diffusion-broken-variants/tree/main/unet
581+
assert len(files) == 15, f"We should only download 15 files, not {len(files)}"
582+
583+
def test_download_bin_only_variant_exists_for_model(self):
584+
variant = None
585+
use_safetensors = False
586+
587+
# text encoder is missing Non-variant weights, so the following can't work
588+
with tempfile.TemporaryDirectory() as tmpdirname:
589+
with self.assertRaises(OSError) as error_context:
590+
tmpdirname = StableDiffusionPipeline.from_pretrained(
591+
"hf-internal-testing/stable-diffusion-broken-variants",
574592
cache_dir=tmpdirname,
575-
variant="fp16",
593+
variant=variant,
594+
use_safetensors=use_safetensors,
576595
)
596+
assert "Error no file name" in str(error_context.exception)
577597

578-
all_root_files = [t[-1] for t in os.walk(tmpdirname)]
579-
files = [item for sublist in all_root_files for item in sublist]
598+
# text encoder has fp16 variants so we can load it
599+
with tempfile.TemporaryDirectory() as tmpdirname:
600+
tmpdirname = StableDiffusionPipeline.download(
601+
"hf-internal-testing/stable-diffusion-broken-variants",
602+
use_safetensors=use_safetensors,
603+
cache_dir=tmpdirname,
604+
variant="fp16",
605+
)
606+
all_root_files = [t[-1] for t in os.walk(tmpdirname)]
607+
files = [item for sublist in all_root_files for item in sublist]
608+
# None of the downloaded files should be a non-variant file even if we have some here:
609+
# https://huggingface.co/hf-internal-testing/stable-diffusion-broken-variants/tree/main/unet
610+
assert len(files) == 15, f"We should only download 15 files, not {len(files)}"
580611

581-
# None of the downloaded files should be a non-variant file even if we have some here:
582-
# https://huggingface.co/hf-internal-testing/stable-diffusion-broken-variants/tree/main/unet
583-
assert len(files) == 15, f"We should only download 15 files, not {len(files)}"
584-
# only unet has "no_ema" variant
612+
def test_download_safetensors_variant_does_not_exist_for_model(self):
613+
variant = "no_ema"
614+
use_safetensors = True
615+
616+
# text encoder is missing no_ema variant weights, so the following can't work
617+
with tempfile.TemporaryDirectory() as tmpdirname:
618+
with self.assertRaises(OSError) as error_context:
619+
tmpdirname = StableDiffusionPipeline.from_pretrained(
620+
"hf-internal-testing/stable-diffusion-broken-variants",
621+
cache_dir=tmpdirname,
622+
variant=variant,
623+
use_safetensors=use_safetensors,
624+
)
625+
626+
assert "Error no file name" in str(error_context.exception)
627+
628+
def test_download_bin_variant_does_not_exist_for_model(self):
629+
variant = "no_ema"
630+
use_safetensors = False
631+
632+
# text encoder is missing no_ema variant weights, so the following can't work
633+
with tempfile.TemporaryDirectory() as tmpdirname:
634+
with self.assertRaises(OSError) as error_context:
635+
tmpdirname = StableDiffusionPipeline.from_pretrained(
636+
"hf-internal-testing/stable-diffusion-broken-variants",
637+
cache_dir=tmpdirname,
638+
variant=variant,
639+
use_safetensors=use_safetensors,
640+
)
641+
assert "Error no file name" in str(error_context.exception)
585642

586643
def test_local_save_load_index(self):
587644
prompt = "hello"

0 commit comments

Comments
 (0)