186
186
"stable-diffusion-xl" : {"SingleStream" : 1024 , "Server" : 270336 , "Offline" : 1 }
187
187
},
188
188
},
189
+ "v4.1" : {
190
+ "models" : [
191
+ "resnet" ,
192
+ "retinanet" ,
193
+ "bert-99" ,
194
+ "bert-99.9" ,
195
+ "dlrm-v2-99" ,
196
+ "dlrm-v2-99.9" ,
197
+ "3d-unet-99" ,
198
+ "3d-unet-99.9" ,
199
+ "gptj-99" ,
200
+ "gptj-99.9" ,
201
+ "llama2-70b-99" ,
202
+ "llama2-70b-99.9" ,
203
+ "stable-diffusion-xl" ,
204
+ "mixtral-8x7b"
205
+ ],
206
+ "required-scenarios-datacenter" : {
207
+ "resnet" : ["Server" , "Offline" ],
208
+ "retinanet" : ["Server" , "Offline" ],
209
+ "bert-99" : ["Server" , "Offline" ],
210
+ "bert-99.9" : ["Server" , "Offline" ],
211
+ "dlrm-v2-99" : ["Server" , "Offline" ],
212
+ "dlrm-v2-99.9" : ["Server" , "Offline" ],
213
+ "3d-unet-99" : ["Offline" ],
214
+ "3d-unet-99.9" : ["Offline" ],
215
+ "gptj-99" : ["Server" , "Offline" ],
216
+ "gptj-99.9" : ["Server" , "Offline" ],
217
+ "llama2-70b-99" : ["Server" , "Offline" ],
218
+ "llama2-70b-99.9" : ["Server" , "Offline" ],
219
+ "stable-diffusion-xl" : ["Server" , "Offline" ],
220
+ "mixtral-8x7b" : ["Server" , "Offline" ]
221
+ },
222
+ "optional-scenarios-datacenter" : {},
223
+ "required-scenarios-edge" : {
224
+ "resnet" : ["SingleStream" , "MultiStream" , "Offline" ],
225
+ "retinanet" : ["SingleStream" , "MultiStream" , "Offline" ],
226
+ "bert-99" : ["SingleStream" , "Offline" ],
227
+ "3d-unet-99" : ["SingleStream" , "Offline" ],
228
+ "3d-unet-99.9" : ["SingleStream" , "Offline" ],
229
+ "gptj-99" : ["SingleStream" , "Offline" ],
230
+ "gptj-99.9" : ["SingleStream" , "Offline" ],
231
+ "stable-diffusion-xl" : ["SingleStream" , "Offline" ],
232
+ },
233
+ "optional-scenarios-edge" : {},
234
+ "required-scenarios-datacenter-edge" : {
235
+ "resnet" : ["SingleStream" , "Offline" , "MultiStream" , "Server" ],
236
+ "retinanet" : ["SingleStream" , "Offline" , "MultiStream" , "Server" ],
237
+ "bert-99" : ["SingleStream" , "Offline" , "Server" ],
238
+ "bert-99.9" : ["Offline" , "Server" ],
239
+ "dlrm-v2-99" : ["Offline" , "Server" ],
240
+ "dlrm-v2-99.9" : ["Offline" , "Server" ],
241
+ "3d-unet-99" : ["SingleStream" , "Offline" ],
242
+ "3d-unet-99.9" : ["SingleStream" , "Offline" ],
243
+ "gptj-99" : ["SingleStream" , "Offline" , "Server" ],
244
+ "gptj-99.9" : ["SingleStream" , "Offline" , "Server" ],
245
+ "llama2-70b-99" : ["Server" , "Offline" ],
246
+ "llama2-70b-99.9" : ["Server" , "Offline" ],
247
+ "stable-diffusion-xl" : ["SingleStream" , "Offline" , "Server" ],
248
+ "mixtral-8x7b" : ["SingleStream" "Server" , "Offline" ]
249
+ },
250
+ "optional-scenarios-datacenter-edge" : {},
251
+ "accuracy-target" : {
252
+ "resnet" : ("acc" , 76.46 * 0.99 ),
253
+ "retinanet" : ("mAP" , 37.55 * 0.99 ),
254
+ "bert-99" : ("F1" , 90.874 * 0.99 ),
255
+ "bert-99.9" : ("F1" , 90.874 * 0.999 ),
256
+ "dlrm-v2-99" : ("AUC" , 80.31 * 0.99 ),
257
+ "dlrm-v2-99.9" : ("AUC" , 80.31 * 0.999 ),
258
+ "3d-unet-99" : ("DICE" , 0.86170 * 0.99 ),
259
+ "3d-unet-99.9" : ("DICE" , 0.86170 * 0.999 ),
260
+ "gptj-99" : ("ROUGE1" , 42.9865 * 0.99 , "ROUGE2" , 20.1235 * 0.99 , "ROUGEL" , 29.9881 * 0.99 , "GEN_LEN" , 4016878 * 0.9 ),
261
+ "gptj-99.9" : ("ROUGE1" , 42.9865 * 0.999 , "ROUGE2" , 20.1235 * 0.999 , "ROUGEL" , 29.9881 * 0.999 , "GEN_LEN" , 4016878 * 0.9 ),
262
+ "llama2-70b-99" : ("ROUGE1" , 44.4312 * 0.99 , "ROUGE2" , 22.0352 * 0.99 , "ROUGEL" , 28.6162 * 0.99 , "TOKENS_PER_SAMPLE" , 294.45 * 0.9 ),
263
+ "llama2-70b-99.9" : ("ROUGE1" , 44.4312 * 0.999 , "ROUGE2" , 22.0352 * 0.999 , "ROUGEL" , 28.6162 * 0.999 , "TOKENS_PER_SAMPLE" , 294.45 * 0.9 ),
264
+ "stable-diffusion-xl" : ("CLIP_SCORE" , 31.68631873 , "FID_SCORE" , 23.01085758 ),
265
+ # TODO: Mixtral metrics
266
+ # "mixtral-8x7b" : ("ROUGE1", X * 0.99, "ROUGE2", X * 0.99, "ROUGEL", X * 0.99, "TOKENS_PER_SAMPLE", X * 0.9, "gsm8k_accuracy": 73.78*0.99, "mbxp_accuracy": 60.12 * 0.99),
267
+ },
268
+ "accuracy-upper-limit" : {
269
+ "stable-diffusion-xl" : ("CLIP_SCORE" , 31.81331801 , "FID_SCORE" , 23.95007626 ),
270
+ "llama2-70b-99" : ("TOKENS_PER_SAMPLE" , 294.45 * 1.1 ),
271
+ "llama2-70b-99.9" : ("TOKENS_PER_SAMPLE" , 294.45 * 1.1 )
272
+ # "mixtral-8x7b" :("TOKENS_PER_SAMPLE", X * 0.9)
273
+ },
274
+ "performance-sample-count" : {
275
+ "resnet" : 1024 ,
276
+ "retinanet" : 64 ,
277
+ "bert-99" : 10833 ,
278
+ "bert-99.9" : 10833 ,
279
+ "dlrm-v2-99" : 204800 ,
280
+ "dlrm-v2-99.9" : 204800 ,
281
+ "3d-unet-99" : 43 ,
282
+ "3d-unet-99.9" : 43 ,
283
+ "gptj-99" : 13368 ,
284
+ "gptj-99.9" : 13368 ,
285
+ "llama2-70b-99" : 24576 ,
286
+ "llama2-70b-99.9" : 24576 ,
287
+ "stable-diffusion-xl" : 5000 ,
288
+ "mixtral-8x7b" : 15000 ,
289
+ },
290
+ # TODO: Update this list.
291
+ "model_mapping" : {
292
+ # map model names to the official mlperf model class
293
+ "ssd-resnet34" : "retinanet" ,
294
+ "mobilenet" : "resnet" ,
295
+ "resnet50" : "resnet"
296
+ },
297
+ "seeds" : {
298
+ # TODO: Update random seeds
299
+ "qsl_rng_seed" : 3066443479025735752 ,
300
+ "sample_index_rng_seed" : 10688027786191513374 ,
301
+ "schedule_rng_seed" : 14962580496156340209 ,
302
+ },
303
+ "test05_seeds" : {
304
+ # TODO: Update random seeds
305
+ "qsl_rng_seed" : 16799458546791641818 ,
306
+ "sample_index_rng_seed" : 5453809927556429288 ,
307
+ "schedule_rng_seed" : 5435552105434836064 ,
308
+ },
309
+ "ignore_errors" : [],
310
+ "latency-constraint" : {
311
+ "resnet" : {"Server" : 15000000 },
312
+ "retinanet" : {"Server" : 100000000 },
313
+ "bert-99" : {"Server" : 130000000 },
314
+ "bert-99.9" : {"Server" : 130000000 },
315
+ "dlrm-v2-99" : {"Server" : 60000000 },
316
+ "dlrm-v2-99.9" : {"Server" : 60000000 },
317
+ "gptj-99" : {"Server" : 20000000000 },
318
+ "gptj-99.9" : {"Server" : 20000000000 },
319
+ "llama2-70b-99" : {"Server" : 20000000000 },
320
+ "llama2-70b-99.9" : {"Server" : 20000000000 },
321
+ "stable-diffusion-xl" : {"Server" : 20000000000 }
322
+ # TODO: Mixtral metrics
323
+ # "mixtral-8x7b" : {"Server": 20000000000}
324
+ },
325
+ "min-queries" : {
326
+ "resnet" : {
327
+ "SingleStream" : 1024 ,
328
+ "MultiStream" : 270336 ,
329
+ "Server" : 270336 ,
330
+ "Offline" : 1 ,
331
+ },
332
+ "retinanet" : {
333
+ "SingleStream" : 1024 ,
334
+ "MultiStream" : 270336 ,
335
+ "Server" : 270336 ,
336
+ "Offline" : 1 ,
337
+ },
338
+ "bert-99" : {"SingleStream" : 1024 , "Server" : 270336 , "Offline" : 1 },
339
+ "bert-99.9" : {"SingleStream" : 1024 , "Server" : 270336 , "Offline" : 1 },
340
+ "dlrm-v2-99" : {"Server" : 270336 , "Offline" : 1 },
341
+ "dlrm-v2-99.9" : {"Server" : 270336 , "Offline" : 1 },
342
+ "3d-unet-99" : {"SingleStream" : 1024 , "Offline" : 1 },
343
+ "3d-unet-99.9" : {"SingleStream" : 1024 , "Offline" : 1 },
344
+ "gptj-99" : {"SingleStream" : 1024 , "Server" : 270336 , "Offline" : 1 },
345
+ "gptj-99.9" : {"SingleStream" : 1024 , "Server" : 270336 , "Offline" : 1 },
346
+ "llama2-70b-99" : {"SingleStream" : 1024 , "Server" : 270336 , "Offline" : 1 },
347
+ "llama2-70b-99.9" : {"SingleStream" : 1024 , "Server" : 270336 , "Offline" : 1 },
348
+ "stable-diffusion-xl" : {"SingleStream" : 1024 , "Server" : 270336 , "Offline" : 1 },
349
+ "mixtral-8x7b" : {"SingleStream" : 1024 , "Server" : 270336 , "Offline" : 1 },
350
+ },
351
+ },
189
352
}
190
353
191
354
VALID_DIVISIONS = ["open" , "closed" , "network" ]
221
384
"3319" ,
222
385
"95"
223
386
]
387
+ },
388
+ "v4.1" : {
389
+ "images" : [
390
+ "4655" ,
391
+ "2569" ,
392
+ "1303" ,
393
+ "109" ,
394
+ "4509" ,
395
+ "3009" ,
396
+ "2179" ,
397
+ "1826" ,
398
+ "2094" ,
399
+ "3340"
400
+ ]
224
401
}
225
402
}
226
403
}
255
432
"gptj-99.9" : 13368 ,
256
433
"llama2-70b-99" : 24576 ,
257
434
"llama2-70b-99.9" : 24576 ,
258
- "stable-diffusion-xl" : 5000
435
+ "stable-diffusion-xl" : 5000 ,
436
+ "mixtral-8x7b" : 15000
259
437
}
260
438
261
439
SCENARIO_MAPPING = {
302
480
},
303
481
"v4.1" : {
304
482
"llama2-70b-99" : {
305
- "Offline" : "result_tokens_per_second" ,
306
- "Server" : "result_completed_tokens_per_second" ,
483
+ "Offline" : "result_tokens_per_second" ,
484
+ "Server" : "result_completed_tokens_per_second" ,
307
485
},
308
486
"llama2-70b-99.9" : {
309
487
"Offline" : "result_tokens_per_second" ,
316
494
"gptj-99.9" : {
317
495
"Offline" : "result_inferred_tokens_per_second" ,
318
496
"Server" : "result_inferred_completed_tokens_per_second" ,
497
+ },
498
+ "mixtral-8x7b" : {
499
+ "Offline" : "result_tokens_per_second" ,
500
+ "Server" : "result_completed_tokens_per_second" ,
319
501
}
320
502
}
321
503
}
322
504
323
- LLAMA2_LATENCY_LIMITS = {
324
- # We might add interactive in the next round. Latency in ns
325
- "conversational" : {
326
- "ttft" : 2000 * 1000000 ,
327
- "tpot" : 200 * 1000000
328
- }
505
+ LLM_LATENCY_LIMITS = {
506
+ "llama2-70b-99" :{
507
+ "conversational" : {
508
+ "ttft" : 2000 * 1000000 ,
509
+ "tpot" : 200 * 1000000
510
+ }
511
+ },
512
+ "llama2-70b-99.9" :{
513
+ "conversational" : {
514
+ "ttft" : 2000 * 1000000 ,
515
+ "tpot" : 200 * 1000000
516
+ }
517
+ },
518
+ # "mixtral-8x7b":{
519
+ # "conversational": {
520
+ # "ttft": 2000 * 1000000,
521
+ # "tpot": 200 * 1000000
522
+ # }
523
+ # }
329
524
}
330
525
331
526
ACC_PATTERN = {
@@ -799,13 +994,13 @@ def check_accuracy_dir(config, model, path, verbose):
799
994
return is_valid , result_acc
800
995
801
996
802
- def extra_check_llama2 (mlperf_log , scenario ):
997
+ def extra_check_llm (mlperf_log , scenario , model ):
803
998
if (mlperf_log ["requested_use_token_latencies" ]):
804
999
if scenario == "Offline" :
805
1000
# For offline no further checks are necessary
806
1001
return None , True
807
1002
else :
808
- for constraint , limits in LLAMA2_LATENCY_LIMITS .items ():
1003
+ for constraint , limits in LLM_LATENCY_LIMITS [ model ] .items ():
809
1004
if mlperf_log ["result_first_token_99.00_percentile_latency_ns" ] < limits ["ttft" ] and mlperf_log ["result_time_per_output_token_99.00_percentile_ns" ] < limits ["tpot" ]:
810
1005
return constraint , True
811
1006
else :
@@ -867,8 +1062,8 @@ def check_performance_dir(
867
1062
res = float (mlperf_log [RESULT_FIELD_BENCHMARK_OVERWRITE [version ][model ][scenario ]])
868
1063
869
1064
870
- if model in ["llama2-70b-99" , "llama2-70b-99.9" ]:
871
- llama_constraint , is_valid = extra_check_llama2 (mlperf_log , scenario_fixed )
1065
+ if model in ["llama2-70b-99" , "llama2-70b-99.9" , "mixtral-8x7b" ]:
1066
+ llama_constraint , is_valid = extra_check_llm (mlperf_log , scenario_fixed , model )
872
1067
873
1068
latency_99_percentile = mlperf_log ["result_99.00_percentile_latency_ns" ]
874
1069
latency_mean = mlperf_log ["result_mean_latency_ns" ]
@@ -2344,8 +2539,7 @@ def check_compliance_dir(
2344
2539
"gptj-99.9" ,
2345
2540
"llama2-70b-99" ,
2346
2541
"llama2-70b-99.9" ,
2347
- "stable-diffusion-xl"
2348
-
2542
+ "mixtral-8x7b"
2349
2543
]:
2350
2544
test_list .remove ("TEST04" )
2351
2545
@@ -2355,13 +2549,23 @@ def check_compliance_dir(
2355
2549
"llama2-70b-99" ,
2356
2550
"llama2-70b-99.9" ,
2357
2551
"stable-diffusion-xl"
2552
+ "mixtral-8x7b"
2358
2553
]:
2359
2554
test_list .remove ("TEST05" )
2555
+
2556
+ if model in [
2557
+ "gptj-99" ,
2558
+ "gptj-99.9" ,
2559
+ "llama2-70b-99" ,
2560
+ "llama2-70b-99.9" ,
2561
+ "mixtral-8x7b"
2562
+ ]:
2360
2563
test_list .remove ("TEST01" )
2361
2564
2362
2565
if model in [
2363
2566
"llama2-70b-99" ,
2364
2567
"llama2-70b-99.9" ,
2568
+ "mixtral-8x7b"
2365
2569
]:
2366
2570
test_list .append ("TEST06" )
2367
2571
0 commit comments