Skip to content

Commit a536cd2

Browse files
Update for v4.1: Add new seeds + update checker + update compliance test table (mlcommons#1736)
1 parent 44ae828 commit a536cd2

File tree

5 files changed

+239
-34
lines changed

5 files changed

+239
-34
lines changed

compliance/nvidia/README.md

+3-2
Original file line numberDiff line numberDiff line change
@@ -37,5 +37,6 @@ The `run_verification.py` found in each test directory will copy the test files
3737
| 3d-unet | [TEST01](./TEST01/), [TEST05](./TEST05/) |
3838
| rnnt | [TEST01](./TEST01/), [TEST05](./TEST05/) |
3939
| gpt-j | - |
40-
| stable-diffusion-xl | - |
41-
| Llama2-70b | [TEST06]() |
40+
| stable-diffusion-xl | [TEST01](./TEST01/), [TEST04](./TEST04/), [TEST05](./TEST05/) |
41+
| Llama2-70b | [TEST06](./TEST06/) |
42+
| mixtral-8x7b | [TEST06](./TEST06/) |

mlperf.conf

+6-6
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,13 @@ stable-diffusion-xl.*.performance_sample_count_override = 5000
1919
3d-unet.*.performance_sample_count_override = 0
2020

2121
# Set seeds. The seeds will be distributed two weeks before the submission.
22-
*.*.qsl_rng_seed = 13281865557512327830
23-
*.*.sample_index_rng_seed = 198141574272810017
24-
*.*.schedule_rng_seed = 7575108116881280410
22+
*.*.qsl_rng_seed = 3066443479025735752
23+
*.*.sample_index_rng_seed = 10688027786191513374
24+
*.*.schedule_rng_seed = 14962580496156340209
2525
# Set seeds for TEST_05. The seeds will be distributed two weeks before the submission.
26-
*.*.test05_qsl_rng_seed = 2376919268182438552
27-
*.*.test05_sample_index_rng_seed = 11176391829184272374
28-
*.*.test05_schedule_rng_seed = 3911940905271271337
26+
*.*.test05_qsl_rng_seed = 16799458546791641818
27+
*.*.test05_sample_index_rng_seed = 5453809927556429288
28+
*.*.test05_schedule_rng_seed = 5435552105434836064
2929

3030

3131
*.SingleStream.target_latency_percentile = 90

text_to_image/tools/sample_ids.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def get_args():
1616
"--n", type=int, default=10, help="Dataset download location"
1717
)
1818
parser.add_argument(
19-
"--seed", "-s", type=int, default=926019364, help="Dataset download location"
19+
"--seed", "-s", type=int, default=633994880, help="Dataset download location"
2020
)
2121
args = parser.parse_args()
2222
return args

text_to_image/tools/sample_ids.txt

+10-10
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
4459
2-
4015
3-
2705
4-
1682
5-
4048
6-
4683
7-
3757
8-
1578
9-
3319
10-
95
1+
4655
2+
2569
3+
1303
4+
109
5+
4509
6+
3009
7+
2179
8+
1826
9+
2094
10+
3340

tools/submission/submission_checker.py

+219-15
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,169 @@
186186
"stable-diffusion-xl": {"SingleStream": 1024, "Server": 270336, "Offline": 1}
187187
},
188188
},
189+
"v4.1": {
190+
"models": [
191+
"resnet",
192+
"retinanet",
193+
"bert-99",
194+
"bert-99.9",
195+
"dlrm-v2-99",
196+
"dlrm-v2-99.9",
197+
"3d-unet-99",
198+
"3d-unet-99.9",
199+
"gptj-99",
200+
"gptj-99.9",
201+
"llama2-70b-99",
202+
"llama2-70b-99.9",
203+
"stable-diffusion-xl",
204+
"mixtral-8x7b"
205+
],
206+
"required-scenarios-datacenter": {
207+
"resnet": ["Server", "Offline"],
208+
"retinanet": ["Server", "Offline"],
209+
"bert-99": ["Server", "Offline"],
210+
"bert-99.9": ["Server", "Offline"],
211+
"dlrm-v2-99": ["Server", "Offline"],
212+
"dlrm-v2-99.9": ["Server", "Offline"],
213+
"3d-unet-99": ["Offline"],
214+
"3d-unet-99.9": ["Offline"],
215+
"gptj-99": ["Server", "Offline"],
216+
"gptj-99.9": ["Server", "Offline"],
217+
"llama2-70b-99": ["Server", "Offline"],
218+
"llama2-70b-99.9": ["Server", "Offline"],
219+
"stable-diffusion-xl": ["Server", "Offline"],
220+
"mixtral-8x7b": ["Server", "Offline"]
221+
},
222+
"optional-scenarios-datacenter": {},
223+
"required-scenarios-edge": {
224+
"resnet": ["SingleStream", "MultiStream", "Offline"],
225+
"retinanet": ["SingleStream", "MultiStream", "Offline"],
226+
"bert-99": ["SingleStream", "Offline"],
227+
"3d-unet-99": ["SingleStream", "Offline"],
228+
"3d-unet-99.9": ["SingleStream", "Offline"],
229+
"gptj-99": ["SingleStream", "Offline"],
230+
"gptj-99.9": ["SingleStream", "Offline"],
231+
"stable-diffusion-xl": ["SingleStream", "Offline"],
232+
},
233+
"optional-scenarios-edge": {},
234+
"required-scenarios-datacenter-edge": {
235+
"resnet": ["SingleStream", "Offline", "MultiStream", "Server"],
236+
"retinanet": ["SingleStream", "Offline", "MultiStream", "Server"],
237+
"bert-99": ["SingleStream", "Offline", "Server"],
238+
"bert-99.9": ["Offline", "Server"],
239+
"dlrm-v2-99": ["Offline", "Server"],
240+
"dlrm-v2-99.9": ["Offline", "Server"],
241+
"3d-unet-99": ["SingleStream", "Offline"],
242+
"3d-unet-99.9": ["SingleStream", "Offline"],
243+
"gptj-99": ["SingleStream", "Offline", "Server"],
244+
"gptj-99.9": ["SingleStream", "Offline", "Server"],
245+
"llama2-70b-99": ["Server", "Offline"],
246+
"llama2-70b-99.9": ["Server", "Offline"],
247+
"stable-diffusion-xl": ["SingleStream", "Offline", "Server"],
248+
"mixtral-8x7b": ["SingleStream""Server", "Offline"]
249+
},
250+
"optional-scenarios-datacenter-edge": {},
251+
"accuracy-target": {
252+
"resnet": ("acc", 76.46 * 0.99),
253+
"retinanet": ("mAP", 37.55 * 0.99),
254+
"bert-99": ("F1", 90.874 * 0.99),
255+
"bert-99.9": ("F1", 90.874 * 0.999),
256+
"dlrm-v2-99": ("AUC", 80.31 * 0.99),
257+
"dlrm-v2-99.9": ("AUC", 80.31 * 0.999),
258+
"3d-unet-99": ("DICE", 0.86170 * 0.99),
259+
"3d-unet-99.9": ("DICE", 0.86170 * 0.999),
260+
"gptj-99" : ("ROUGE1", 42.9865 * 0.99, "ROUGE2", 20.1235 * 0.99, "ROUGEL", 29.9881 * 0.99, "GEN_LEN", 4016878*0.9),
261+
"gptj-99.9" : ("ROUGE1", 42.9865 * 0.999, "ROUGE2", 20.1235 * 0.999, "ROUGEL", 29.9881 * 0.999, "GEN_LEN", 4016878*0.9),
262+
"llama2-70b-99" : ("ROUGE1", 44.4312 * 0.99, "ROUGE2", 22.0352 * 0.99, "ROUGEL", 28.6162 * 0.99, "TOKENS_PER_SAMPLE", 294.45*0.9),
263+
"llama2-70b-99.9" : ("ROUGE1", 44.4312 * 0.999, "ROUGE2", 22.0352 * 0.999, "ROUGEL", 28.6162 * 0.999, "TOKENS_PER_SAMPLE", 294.45*0.9),
264+
"stable-diffusion-xl": ("CLIP_SCORE", 31.68631873, "FID_SCORE", 23.01085758),
265+
# TODO: Mixtral metrics
266+
# "mixtral-8x7b" : ("ROUGE1", X * 0.99, "ROUGE2", X * 0.99, "ROUGEL", X * 0.99, "TOKENS_PER_SAMPLE", X * 0.9, "gsm8k_accuracy": 73.78*0.99, "mbxp_accuracy": 60.12 * 0.99),
267+
},
268+
"accuracy-upper-limit": {
269+
"stable-diffusion-xl": ("CLIP_SCORE", 31.81331801, "FID_SCORE", 23.95007626),
270+
"llama2-70b-99" : ("TOKENS_PER_SAMPLE", 294.45*1.1),
271+
"llama2-70b-99.9" : ("TOKENS_PER_SAMPLE", 294.45*1.1)
272+
# "mixtral-8x7b" :("TOKENS_PER_SAMPLE", X * 0.9)
273+
},
274+
"performance-sample-count": {
275+
"resnet": 1024,
276+
"retinanet": 64,
277+
"bert-99": 10833,
278+
"bert-99.9": 10833,
279+
"dlrm-v2-99": 204800,
280+
"dlrm-v2-99.9": 204800,
281+
"3d-unet-99": 43,
282+
"3d-unet-99.9": 43,
283+
"gptj-99": 13368,
284+
"gptj-99.9": 13368,
285+
"llama2-70b-99": 24576,
286+
"llama2-70b-99.9": 24576,
287+
"stable-diffusion-xl": 5000,
288+
"mixtral-8x7b": 15000,
289+
},
290+
# TODO: Update this list.
291+
"model_mapping": {
292+
# map model names to the official mlperf model class
293+
"ssd-resnet34": "retinanet",
294+
"mobilenet": "resnet",
295+
"resnet50": "resnet"
296+
},
297+
"seeds": {
298+
# TODO: Update random seeds
299+
"qsl_rng_seed": 3066443479025735752,
300+
"sample_index_rng_seed": 10688027786191513374,
301+
"schedule_rng_seed": 14962580496156340209,
302+
},
303+
"test05_seeds": {
304+
# TODO: Update random seeds
305+
"qsl_rng_seed": 16799458546791641818,
306+
"sample_index_rng_seed": 5453809927556429288,
307+
"schedule_rng_seed": 5435552105434836064,
308+
},
309+
"ignore_errors": [],
310+
"latency-constraint": {
311+
"resnet": {"Server": 15000000},
312+
"retinanet": {"Server": 100000000},
313+
"bert-99": {"Server": 130000000},
314+
"bert-99.9": {"Server": 130000000},
315+
"dlrm-v2-99": {"Server": 60000000},
316+
"dlrm-v2-99.9": {"Server": 60000000},
317+
"gptj-99": {"Server": 20000000000},
318+
"gptj-99.9": {"Server": 20000000000},
319+
"llama2-70b-99": {"Server": 20000000000},
320+
"llama2-70b-99.9": {"Server": 20000000000},
321+
"stable-diffusion-xl" : {"Server": 20000000000}
322+
# TODO: Mixtral metrics
323+
# "mixtral-8x7b" : {"Server": 20000000000}
324+
},
325+
"min-queries": {
326+
"resnet": {
327+
"SingleStream": 1024,
328+
"MultiStream": 270336,
329+
"Server": 270336,
330+
"Offline": 1,
331+
},
332+
"retinanet": {
333+
"SingleStream": 1024,
334+
"MultiStream": 270336,
335+
"Server": 270336,
336+
"Offline": 1,
337+
},
338+
"bert-99": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
339+
"bert-99.9": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
340+
"dlrm-v2-99": {"Server": 270336, "Offline": 1},
341+
"dlrm-v2-99.9": {"Server": 270336, "Offline": 1},
342+
"3d-unet-99": {"SingleStream": 1024, "Offline": 1},
343+
"3d-unet-99.9": {"SingleStream": 1024, "Offline": 1},
344+
"gptj-99": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
345+
"gptj-99.9": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
346+
"llama2-70b-99": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
347+
"llama2-70b-99.9": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
348+
"stable-diffusion-xl": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
349+
"mixtral-8x7b": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
350+
},
351+
},
189352
}
190353

191354
VALID_DIVISIONS = ["open", "closed", "network"]
@@ -221,6 +384,20 @@
221384
"3319",
222385
"95"
223386
]
387+
},
388+
"v4.1": {
389+
"images": [
390+
"4655",
391+
"2569",
392+
"1303",
393+
"109",
394+
"4509",
395+
"3009",
396+
"2179",
397+
"1826",
398+
"2094",
399+
"3340"
400+
]
224401
}
225402
}
226403
}
@@ -255,7 +432,8 @@
255432
"gptj-99.9": 13368,
256433
"llama2-70b-99": 24576,
257434
"llama2-70b-99.9": 24576,
258-
"stable-diffusion-xl": 5000
435+
"stable-diffusion-xl": 5000,
436+
"mixtral-8x7b": 15000
259437
}
260438

261439
SCENARIO_MAPPING = {
@@ -302,8 +480,8 @@
302480
},
303481
"v4.1": {
304482
"llama2-70b-99": {
305-
"Offline": "result_tokens_per_second",
306-
"Server": "result_completed_tokens_per_second",
483+
"Offline": "result_tokens_per_second",
484+
"Server": "result_completed_tokens_per_second",
307485
},
308486
"llama2-70b-99.9": {
309487
"Offline": "result_tokens_per_second",
@@ -316,16 +494,33 @@
316494
"gptj-99.9": {
317495
"Offline": "result_inferred_tokens_per_second",
318496
"Server": "result_inferred_completed_tokens_per_second",
497+
},
498+
"mixtral-8x7b": {
499+
"Offline": "result_tokens_per_second",
500+
"Server": "result_completed_tokens_per_second",
319501
}
320502
}
321503
}
322504

323-
LLAMA2_LATENCY_LIMITS = {
324-
# We might add interactive in the next round. Latency in ns
325-
"conversational": {
326-
"ttft": 2000 * 1000000,
327-
"tpot": 200 * 1000000
328-
}
505+
LLM_LATENCY_LIMITS = {
506+
"llama2-70b-99":{
507+
"conversational": {
508+
"ttft": 2000 * 1000000,
509+
"tpot": 200 * 1000000
510+
}
511+
},
512+
"llama2-70b-99.9":{
513+
"conversational": {
514+
"ttft": 2000 * 1000000,
515+
"tpot": 200 * 1000000
516+
}
517+
},
518+
# "mixtral-8x7b":{
519+
# "conversational": {
520+
# "ttft": 2000 * 1000000,
521+
# "tpot": 200 * 1000000
522+
# }
523+
# }
329524
}
330525

331526
ACC_PATTERN = {
@@ -799,13 +994,13 @@ def check_accuracy_dir(config, model, path, verbose):
799994
return is_valid, result_acc
800995

801996

802-
def extra_check_llama2(mlperf_log, scenario):
997+
def extra_check_llm(mlperf_log, scenario, model):
803998
if (mlperf_log["requested_use_token_latencies"]):
804999
if scenario == "Offline":
8051000
# For offline no further checks are necessary
8061001
return None, True
8071002
else:
808-
for constraint, limits in LLAMA2_LATENCY_LIMITS.items():
1003+
for constraint, limits in LLM_LATENCY_LIMITS[model].items():
8091004
if mlperf_log["result_first_token_99.00_percentile_latency_ns"] < limits["ttft"] and mlperf_log["result_time_per_output_token_99.00_percentile_ns"] < limits["tpot"]:
8101005
return constraint, True
8111006
else:
@@ -867,8 +1062,8 @@ def check_performance_dir(
8671062
res = float(mlperf_log[RESULT_FIELD_BENCHMARK_OVERWRITE[version][model][scenario]])
8681063

8691064

870-
if model in ["llama2-70b-99", "llama2-70b-99.9"]:
871-
llama_constraint, is_valid = extra_check_llama2(mlperf_log, scenario_fixed)
1065+
if model in ["llama2-70b-99", "llama2-70b-99.9", "mixtral-8x7b"]:
1066+
llama_constraint, is_valid = extra_check_llm(mlperf_log, scenario_fixed, model)
8721067

8731068
latency_99_percentile = mlperf_log["result_99.00_percentile_latency_ns"]
8741069
latency_mean = mlperf_log["result_mean_latency_ns"]
@@ -2344,8 +2539,7 @@ def check_compliance_dir(
23442539
"gptj-99.9",
23452540
"llama2-70b-99",
23462541
"llama2-70b-99.9",
2347-
"stable-diffusion-xl"
2348-
2542+
"mixtral-8x7b"
23492543
]:
23502544
test_list.remove("TEST04")
23512545

@@ -2355,13 +2549,23 @@ def check_compliance_dir(
23552549
"llama2-70b-99",
23562550
"llama2-70b-99.9",
23572551
"stable-diffusion-xl"
2552+
"mixtral-8x7b"
23582553
]:
23592554
test_list.remove("TEST05")
2555+
2556+
if model in [
2557+
"gptj-99",
2558+
"gptj-99.9",
2559+
"llama2-70b-99",
2560+
"llama2-70b-99.9",
2561+
"mixtral-8x7b"
2562+
]:
23602563
test_list.remove("TEST01")
23612564

23622565
if model in [
23632566
"llama2-70b-99",
23642567
"llama2-70b-99.9",
2568+
"mixtral-8x7b"
23652569
]:
23662570
test_list.append("TEST06")
23672571

0 commit comments

Comments
 (0)