Skip to content

Commit 34684e6

Browse files
lisa0314chromium-wpt-export-bot
authored andcommitted
webnn: Support block-wise quantization for DirectML backend
Block-wise quantization divides input tensors into smaller blocks that are independently quantized, resulting in faster optimization and high precision quantization [1]. It is used for popular language models, such as phi-3 mini int4 quantized model [2]. Related WG issue [3] has been opened to discussion. Firstly, this CL validates scale and zero point tensors for block-wise quantization. Besides, this CL also implements the block-wise quantization in DirectML backend by using DML_OPERATOR_QUANTIZE and DML_OPERATOR_DEQUANTIZE which are available in FL >= 6.3. More validation and conformance tests are added to verify the implementation. [1]: https://arxiv.org/abs/2110.02861 [2]: https://huggingface.co/microsoft/Phi-3-mini-4k-instruct [3]: webmachinelearning/webnn#779 Bug: 40206287 Change-Id: I977b0be57deebd7afcae216edc3ddc3818b8c09f Cq-Include-Trybots: luci.chromium.try:mac14.arm64-blink-rel, mac14-blink-rel, mac15.arm64-blink-rel, mac15-blink-rel, linux-blink-rel Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/5964816 Reviewed-by: Rafael Cintron <[email protected]> Reviewed-by: ningxin hu <[email protected]> Commit-Queue: ningxin hu <[email protected]> Cr-Commit-Position: refs/heads/main@{#1380767}
1 parent 7b54230 commit 34684e6

File tree

4 files changed

+373
-40
lines changed

4 files changed

+373
-40
lines changed

webnn/conformance_tests/dequantizeLinear.https.any.js

+173-18
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,8 @@ const dequantizeLinearTests = [
119119
'constant': true
120120
},
121121
'dequantizeLinearZeroPoint': {
122-
'data': [128],
123-
'descriptor': {shape: [], dataType: 'uint8'},
122+
'data': [128, 128, 128, 128],
123+
'descriptor': {shape: [4], dataType: 'uint8'},
124124
'constant': true
125125
}
126126
},
@@ -144,6 +144,50 @@ const dequantizeLinearTests = [
144144
}
145145
}
146146
},
147+
{
148+
'name':
149+
'dequantizeLinear uint8 1D constant tensor with implicit block_size = 2.',
150+
'graph': {
151+
'inputs': {
152+
'dequantizeLinearInput': {
153+
'data': [12, 24, 35, 123],
154+
'descriptor': {shape: [4], dataType: 'uint8'},
155+
'constant': true
156+
},
157+
'dequantizeLinearScale': {
158+
'data': [
159+
9.343092918395996,
160+
-4.617084980010986,
161+
],
162+
'descriptor': {shape: [2], dataType: 'float32'},
163+
'constant': true
164+
},
165+
'dequantizeLinearZeroPoint': {
166+
'data': [128, 110],
167+
'descriptor': {shape: [2], dataType: 'uint8'},
168+
'constant': true
169+
}
170+
},
171+
'operators': [{
172+
'name': 'dequantizeLinear',
173+
'arguments': [
174+
{'input': 'dequantizeLinearInput'},
175+
{'scale': 'dequantizeLinearScale'},
176+
{'zeroPoint': 'dequantizeLinearZeroPoint'}
177+
],
178+
'outputs': 'dequantizeLinearOutput'
179+
}],
180+
'expectedOutputs': {
181+
'dequantizeLinearOutput': {
182+
'data': [
183+
-1083.798828125, -971.681640625, 346.2813720703125,
184+
-60.0221061706543
185+
],
186+
'descriptor': {shape: [4], dataType: 'float32'}
187+
}
188+
}
189+
}
190+
},
147191
{
148192
'name':
149193
'dequantizeLinear int8 4D constant tensor broadcasting scale and zeroPoint',
@@ -160,8 +204,8 @@ const dequantizeLinearTests = [
160204
'constant': true
161205
},
162206
'dequantizeLinearZeroPoint': {
163-
'data': [12],
164-
'descriptor': {shape: [], dataType: 'int8'},
207+
'data': [12, 12],
208+
'descriptor': {shape: [2, 1], dataType: 'int8'},
165209
'constant': true
166210
}
167211
},
@@ -185,6 +229,74 @@ const dequantizeLinearTests = [
185229
}
186230
}
187231
},
232+
{
233+
'name': 'dequantizeLinear int8 4D constant tensor with block_size = [3, 2]',
234+
'graph': {
235+
'inputs': {
236+
'dequantizeLinearInput': {
237+
'data': [
238+
-124, 0, 23, 122, 12, 23, 45, 36, 67, 78, -22, 0,
239+
-34, -45, -56, -67, 89, 30, 12, 23, 56, 67, 56, -12
240+
],
241+
'descriptor': {shape: [6, 4], dataType: 'int8'},
242+
'constant': true
243+
},
244+
'dequantizeLinearScale': {
245+
'data': [
246+
0.2800687253475189, -4.617084980010986, 1.2800687253475189,
247+
-3.617084980010986
248+
],
249+
'descriptor': {shape: [2, 2], dataType: 'float32'},
250+
'constant': true
251+
},
252+
'dequantizeLinearZeroPoint': {
253+
'data': [1, 3, 5, 12],
254+
'descriptor': {shape: [2, 2], dataType: 'int8'},
255+
'constant': true
256+
}
257+
},
258+
'operators': [{
259+
'name': 'dequantizeLinear',
260+
'arguments': [
261+
{'input': 'dequantizeLinearInput'},
262+
{'scale': 'dequantizeLinearScale'},
263+
{'zeroPoint': 'dequantizeLinearZeroPoint'}
264+
],
265+
'outputs': 'dequantizeLinearOutput'
266+
}],
267+
'expectedOutputs': {
268+
'dequantizeLinearOutput': {
269+
'data': [
270+
-35.00859069824219,
271+
-0.2800687253475189,
272+
-92.3416976928711,
273+
-549.43310546875,
274+
3.0807559490203857,
275+
6.1615118980407715,
276+
-193.91757202148438,
277+
-152.36380004882812,
278+
18.484535217285156,
279+
21.565292358398438,
280+
115.4271240234375,
281+
13.851255416870117,
282+
-49.92267990112305,
283+
-64.0034408569336,
284+
245.96177673339844,
285+
285.7497253417969,
286+
107.52577209472656,
287+
32.0017204284668,
288+
0,
289+
-39.787933349609375,
290+
65.28350830078125,
291+
79.36426544189453,
292+
-159.1517333984375,
293+
86.81004333496094
294+
],
295+
'descriptor': {shape: [6, 4], dataType: 'float32'}
296+
}
297+
}
298+
}
299+
},
188300
{
189301
'name': 'dequantizeLinear uint4 1D tensor with even input size',
190302
'graph': {
@@ -200,8 +312,8 @@ const dequantizeLinearTests = [
200312
'constant': true
201313
},
202314
'dequantizeLinearZeroPoint': {
203-
'data': [0],
204-
'descriptor': {shape: [], dataType: 'uint4'},
315+
'data': [0, 1],
316+
'descriptor': {shape: [2], dataType: 'uint4'},
205317
'constant': true
206318
}
207319
},
@@ -216,7 +328,7 @@ const dequantizeLinearTests = [
216328
}],
217329
'expectedOutputs': {
218330
'dequantizeLinearOutput': {
219-
'data': [16.804121017456055, 0],
331+
'data': [16.804121017456055, -1.1202747821807861],
220332
'descriptor': {shape: [2], dataType: 'float32'}
221333
}
222334
}
@@ -237,8 +349,8 @@ const dequantizeLinearTests = [
237349
'constant': true
238350
},
239351
'dequantizeLinearZeroPoint': {
240-
'data': [2, 1, 4],
241-
'descriptor': {shape: [3], dataType: 'uint4'},
352+
'data': [2],
353+
'descriptor': {shape: [1], dataType: 'uint4'},
242354
'constant': true
243355
}
244356
},
@@ -253,7 +365,7 @@ const dequantizeLinearTests = [
253365
}],
254366
'expectedOutputs': {
255367
'dequantizeLinearOutput': {
256-
'data': [8.962198257446289, 12.323022842407227, 11.202747344970703],
368+
'data': [8.962198257446289, 11.202747344970703, 13.443297386169434],
257369
'descriptor': {shape: [3], dataType: 'float32'}
258370
}
259371
}
@@ -278,7 +390,7 @@ const dequantizeLinearTests = [
278390
},
279391
'dequantizeLinearZeroPoint': {
280392
'data': [2, 3],
281-
'descriptor': {shape: [2], dataType: 'uint4'},
393+
'descriptor': {shape: [2, 1], dataType: 'uint4'},
282394
'constant': true
283395
}
284396
},
@@ -294,14 +406,57 @@ const dequantizeLinearTests = [
294406
'expectedOutputs': {
295407
'dequantizeLinearOutput': {
296408
'data': [
297-
-18.686185836791992, -18.686185836791992, -36.93667984008789,
409+
-18.686185836791992, -9.343092918395996, -32.31959533691406,
298410
-55.40502166748047
299411
],
300412
'descriptor': {shape: [1, 1, 2, 2], dataType: 'float32'}
301413
}
302414
}
303415
}
304416
},
417+
{
418+
'name': 'dequantizeLinear uint4 3D input with block_size = [1, 1, 2]',
419+
'graph': {
420+
'inputs': {
421+
'dequantizeLinearInput': {
422+
'data': [0, 1, 10, 15],
423+
'descriptor': {shape: [1, 1, 4], dataType: 'uint4'},
424+
'constant': true
425+
},
426+
'dequantizeLinearScale': {
427+
'data': [
428+
9.343092918395996,
429+
-4.617084980010986,
430+
],
431+
'descriptor': {shape: [1, 2], dataType: 'float32'},
432+
'constant': true
433+
},
434+
'dequantizeLinearZeroPoint': {
435+
'data': [2, 3],
436+
'descriptor': {shape: [1, 2], dataType: 'uint4'},
437+
'constant': true
438+
}
439+
},
440+
'operators': [{
441+
'name': 'dequantizeLinear',
442+
'arguments': [
443+
{'input': 'dequantizeLinearInput'},
444+
{'scale': 'dequantizeLinearScale'},
445+
{'zeroPoint': 'dequantizeLinearZeroPoint'}
446+
],
447+
'outputs': 'dequantizeLinearOutput'
448+
}],
449+
'expectedOutputs': {
450+
'dequantizeLinearOutput': {
451+
'data': [
452+
-18.686185836791992, -9.343092918395996, -32.31959533691406,
453+
-55.40502166748047
454+
],
455+
'descriptor': {shape: [1, 1, 4], dataType: 'float32'}
456+
}
457+
}
458+
}
459+
},
305460
{
306461
'name': 'dequantizeLinear int4 1D tensor with even size',
307462
'graph': {
@@ -312,8 +467,8 @@ const dequantizeLinearTests = [
312467
'constant': true
313468
},
314469
'dequantizeLinearScale': {
315-
'data': [1.1202747821807861],
316-
'descriptor': {shape: [], dataType: 'float32'},
470+
'data': [1.1202747821807861, 1.1202747821807861],
471+
'descriptor': {shape: [2], dataType: 'float32'},
317472
'constant': true
318473
},
319474
'dequantizeLinearZeroPoint': {
@@ -350,12 +505,12 @@ const dequantizeLinearTests = [
350505
},
351506
'dequantizeLinearScale': {
352507
'data': [1.1202747821807861],
353-
'descriptor': {shape: [], dataType: 'float32'},
508+
'descriptor': {shape: [1], dataType: 'float32'},
354509
'constant': true
355510
},
356511
'dequantizeLinearZeroPoint': {
357-
'data': [-3, 0, 0],
358-
'descriptor': {shape: [3], dataType: 'int4'},
512+
'data': [-3],
513+
'descriptor': {shape: [1], dataType: 'int4'},
359514
'constant': true
360515
}
361516
},
@@ -370,7 +525,7 @@ const dequantizeLinearTests = [
370525
}],
371526
'expectedOutputs': {
372527
'dequantizeLinearOutput': {
373-
'data': [2.2405495643615723, 7.841923713684082, 0],
528+
'data': [2.2405495643615723, 11.202747344970703, 3.3608243465423584],
374529
'descriptor': {shape: [3], dataType: 'float32'}
375530
}
376531
}

0 commit comments

Comments
 (0)