Skip to content

Commit c51b6bd

Browse files
yao-matrixhlkygithub-actions[bot]
authored
introduce compute arch specific expectations and fix test_sd3_img2img_inference failure (#11227)
* add arch specfic expectations support, to support different arch's numerical characteristics Signed-off-by: YAO Matrix <[email protected]> * fix typo Signed-off-by: YAO Matrix <[email protected]> * Apply suggestions from code review * Apply style fixes * Update src/diffusers/utils/testing_utils.py --------- Signed-off-by: YAO Matrix <[email protected]> Co-authored-by: hlky <[email protected]> Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent fb54499 commit c51b6bd

File tree

3 files changed

+244
-35
lines changed

3 files changed

+244
-35
lines changed

src/diffusers/utils/testing_utils.py

+103-1
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,11 @@
1414
import time
1515
import unittest
1616
import urllib.parse
17+
from collections import UserDict
1718
from contextlib import contextmanager
1819
from io import BytesIO, StringIO
1920
from pathlib import Path
20-
from typing import Callable, Dict, List, Optional, Union
21+
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union
2122

2223
import numpy as np
2324
import PIL.Image
@@ -48,6 +49,17 @@
4849
from .logging import get_logger
4950

5051

52+
if is_torch_available():
53+
import torch
54+
55+
IS_ROCM_SYSTEM = torch.version.hip is not None
56+
IS_CUDA_SYSTEM = torch.version.cuda is not None
57+
IS_XPU_SYSTEM = getattr(torch.version, "xpu", None) is not None
58+
else:
59+
IS_ROCM_SYSTEM = False
60+
IS_CUDA_SYSTEM = False
61+
IS_XPU_SYSTEM = False
62+
5163
global_rng = random.Random()
5264

5365
logger = get_logger(__name__)
@@ -1275,3 +1287,93 @@ def update_mapping_from_spec(device_fn_dict: Dict[str, Callable], attribute_name
12751287
update_mapping_from_spec(BACKEND_RESET_PEAK_MEMORY_STATS, "RESET_PEAK_MEMORY_STATS_FN")
12761288
update_mapping_from_spec(BACKEND_RESET_MAX_MEMORY_ALLOCATED, "RESET_MAX_MEMORY_ALLOCATED_FN")
12771289
update_mapping_from_spec(BACKEND_MAX_MEMORY_ALLOCATED, "MAX_MEMORY_ALLOCATED_FN")
1290+
1291+
1292+
# Modified from https://github.com/huggingface/transformers/blob/cdfb018d0300fef3b07d9220f3efe9c2a9974662/src/transformers/testing_utils.py#L3090
1293+
1294+
# Type definition of key used in `Expectations` class.
1295+
DeviceProperties = Tuple[Union[str, None], Union[int, None]]
1296+
1297+
1298+
@functools.lru_cache
1299+
def get_device_properties() -> DeviceProperties:
1300+
"""
1301+
Get environment device properties.
1302+
"""
1303+
if IS_CUDA_SYSTEM or IS_ROCM_SYSTEM:
1304+
import torch
1305+
1306+
major, _ = torch.cuda.get_device_capability()
1307+
if IS_ROCM_SYSTEM:
1308+
return ("rocm", major)
1309+
else:
1310+
return ("cuda", major)
1311+
elif IS_XPU_SYSTEM:
1312+
import torch
1313+
1314+
# To get more info of the architecture meaning and bit allocation, refer to https://github.com/intel/llvm/blob/sycl/sycl/include/sycl/ext/oneapi/experimental/device_architecture.def
1315+
arch = torch.xpu.get_device_capability()["architecture"]
1316+
gen_mask = 0x000000FF00000000
1317+
gen = (arch & gen_mask) >> 32
1318+
return ("xpu", gen)
1319+
else:
1320+
return (torch_device, None)
1321+
1322+
1323+
if TYPE_CHECKING:
1324+
DevicePropertiesUserDict = UserDict[DeviceProperties, Any]
1325+
else:
1326+
DevicePropertiesUserDict = UserDict
1327+
1328+
1329+
class Expectations(DevicePropertiesUserDict):
1330+
def get_expectation(self) -> Any:
1331+
"""
1332+
Find best matching expectation based on environment device properties.
1333+
"""
1334+
return self.find_expectation(get_device_properties())
1335+
1336+
@staticmethod
1337+
def is_default(key: DeviceProperties) -> bool:
1338+
return all(p is None for p in key)
1339+
1340+
@staticmethod
1341+
def score(key: DeviceProperties, other: DeviceProperties) -> int:
1342+
"""
1343+
Returns score indicating how similar two instances of the `Properties` tuple are. Points are calculated using
1344+
bits, but documented as int. Rules are as follows:
1345+
* Matching `type` gives 8 points.
1346+
* Semi-matching `type`, for example cuda and rocm, gives 4 points.
1347+
* Matching `major` (compute capability major version) gives 2 points.
1348+
* Default expectation (if present) gives 1 points.
1349+
"""
1350+
(device_type, major) = key
1351+
(other_device_type, other_major) = other
1352+
1353+
score = 0b0
1354+
if device_type == other_device_type:
1355+
score |= 0b1000
1356+
elif device_type in ["cuda", "rocm"] and other_device_type in ["cuda", "rocm"]:
1357+
score |= 0b100
1358+
1359+
if major == other_major and other_major is not None:
1360+
score |= 0b10
1361+
1362+
if Expectations.is_default(other):
1363+
score |= 0b1
1364+
1365+
return int(score)
1366+
1367+
def find_expectation(self, key: DeviceProperties = (None, None)) -> Any:
1368+
"""
1369+
Find best matching expectation based on provided device properties.
1370+
"""
1371+
(result_key, result) = max(self.data.items(), key=lambda x: Expectations.score(key, x[0]))
1372+
1373+
if Expectations.score(key, result_key) == 0:
1374+
raise ValueError(f"No matching expectation found for {key}")
1375+
1376+
return result
1377+
1378+
def __repr__(self):
1379+
return f"{self.data}"

tests/others/test_utils.py

+33-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
from diffusers import __version__
2222
from diffusers.utils import deprecate
23-
from diffusers.utils.testing_utils import str_to_bool
23+
from diffusers.utils.testing_utils import Expectations, str_to_bool
2424

2525

2626
# Used to test the hub
@@ -182,6 +182,38 @@ def test_deprecate_stacklevel(self):
182182
assert "diffusers/tests/others/test_utils.py" in warning.filename
183183

184184

185+
# Copied from https://github.com/huggingface/transformers/blob/main/tests/utils/test_expectations.py
186+
class ExpectationsTester(unittest.TestCase):
187+
def test_expectations(self):
188+
expectations = Expectations(
189+
{
190+
(None, None): 1,
191+
("cuda", 8): 2,
192+
("cuda", 7): 3,
193+
("rocm", 8): 4,
194+
("rocm", None): 5,
195+
("cpu", None): 6,
196+
("xpu", 3): 7,
197+
}
198+
)
199+
200+
def check(value, key):
201+
assert expectations.find_expectation(key) == value
202+
203+
# npu has no matches so should find default expectation
204+
check(1, ("npu", None))
205+
check(7, ("xpu", 3))
206+
check(2, ("cuda", 8))
207+
check(3, ("cuda", 7))
208+
check(4, ("rocm", 9))
209+
check(4, ("rocm", None))
210+
check(2, ("cuda", 2))
211+
212+
expectations = Expectations({("cuda", 8): 1})
213+
with self.assertRaises(ValueError):
214+
expectations.find_expectation(("xpu", None))
215+
216+
185217
def parse_flag_from_env(key, default=False):
186218
try:
187219
value = os.environ[key]

tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_img2img.py

+108-33
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
)
1616
from diffusers.utils import load_image
1717
from diffusers.utils.testing_utils import (
18+
Expectations,
1819
backend_empty_cache,
1920
floats_tensor,
2021
numpy_cosine_similarity_distance,
@@ -208,41 +209,115 @@ def test_sd3_img2img_inference(self):
208209
inputs = self.get_inputs(torch_device)
209210
image = pipe(**inputs).images[0]
210211
image_slice = image[0, :10, :10]
211-
expected_slice = np.array(
212-
[
213-
0.5435,
214-
0.4673,
215-
0.5732,
216-
0.4438,
217-
0.3557,
218-
0.4912,
219-
0.4331,
220-
0.3491,
221-
0.4915,
222-
0.4287,
223-
0.3477,
224-
0.4849,
225-
0.4355,
226-
0.3469,
227-
0.4871,
228-
0.4431,
229-
0.3538,
230-
0.4912,
231-
0.4521,
232-
0.3643,
233-
0.5059,
234-
0.4587,
235-
0.3730,
236-
0.5166,
237-
0.4685,
238-
0.3845,
239-
0.5264,
240-
0.4746,
241-
0.3914,
242-
0.5342,
243-
]
212+
expected_slices = Expectations(
213+
{
214+
("xpu", 3): np.array(
215+
[
216+
0.5117,
217+
0.4421,
218+
0.3852,
219+
0.5044,
220+
0.4219,
221+
0.3262,
222+
0.5024,
223+
0.4329,
224+
0.3276,
225+
0.4978,
226+
0.4412,
227+
0.3355,
228+
0.4983,
229+
0.4338,
230+
0.3279,
231+
0.4893,
232+
0.4241,
233+
0.3129,
234+
0.4875,
235+
0.4253,
236+
0.3030,
237+
0.4961,
238+
0.4267,
239+
0.2988,
240+
0.5029,
241+
0.4255,
242+
0.3054,
243+
0.5132,
244+
0.4248,
245+
0.3222,
246+
]
247+
),
248+
("cuda", 7): np.array(
249+
[
250+
0.5435,
251+
0.4673,
252+
0.5732,
253+
0.4438,
254+
0.3557,
255+
0.4912,
256+
0.4331,
257+
0.3491,
258+
0.4915,
259+
0.4287,
260+
0.347,
261+
0.4849,
262+
0.4355,
263+
0.3469,
264+
0.4871,
265+
0.4431,
266+
0.3538,
267+
0.4912,
268+
0.4521,
269+
0.3643,
270+
0.5059,
271+
0.4587,
272+
0.373,
273+
0.5166,
274+
0.4685,
275+
0.3845,
276+
0.5264,
277+
0.4746,
278+
0.3914,
279+
0.5342,
280+
]
281+
),
282+
("cuda", 8): np.array(
283+
[
284+
0.5146,
285+
0.4385,
286+
0.3826,
287+
0.5098,
288+
0.4150,
289+
0.3218,
290+
0.5142,
291+
0.4312,
292+
0.3298,
293+
0.5127,
294+
0.4431,
295+
0.3411,
296+
0.5171,
297+
0.4424,
298+
0.3374,
299+
0.5088,
300+
0.4348,
301+
0.3242,
302+
0.5073,
303+
0.4380,
304+
0.3174,
305+
0.5132,
306+
0.4397,
307+
0.3115,
308+
0.5132,
309+
0.4343,
310+
0.3118,
311+
0.5219,
312+
0.4328,
313+
0.3256,
314+
]
315+
),
316+
}
244317
)
245318

319+
expected_slice = expected_slices.get_expectation()
320+
246321
max_diff = numpy_cosine_similarity_distance(expected_slice.flatten(), image_slice.flatten())
247322

248323
assert max_diff < 1e-4, f"Outputs are not close enough, got {max_diff}"

0 commit comments

Comments
 (0)