29
29
'docker_runtime' : '' ,
30
30
'backend_profiling' : False ,
31
31
'config_properties' : 'config.properties' ,
32
- 'inference_model_url' : 'predictions/benchmark'
32
+ 'inference_model_url' : 'predictions/benchmark' ,
33
+ 'report_location' : tempfile .gettempdir ()
33
34
}
34
- TMP_DIR = tempfile .gettempdir ()
35
+
36
+ TMP_DIR = default_ab_params ['report_location' ]
35
37
execution_params = default_ab_params .copy ()
36
38
result_file = os .path .join (TMP_DIR , "benchmark/result.txt" )
37
39
metric_log = os .path .join (TMP_DIR , "benchmark/logs/model_metrics.log" )
@@ -41,7 +43,6 @@ def json_provider(file_path, cmd_name):
41
43
with open (file_path ) as config_data :
42
44
return json .load (config_data )
43
45
44
-
45
46
@click .command ()
46
47
@click .argument ('test_plan' , default = 'custom' )
47
48
@click .option ('--url' , '-u' , default = 'https://torchserve.pytorch.org/mar_files/resnet-18.mar' ,
@@ -70,7 +71,7 @@ def json_provider(file_path, cmd_name):
70
71
@click_config_file .configuration_option (provider = json_provider , implicit = False ,
71
72
help = "Read configuration from a JSON file" )
72
73
def benchmark (test_plan , url , gpus , exec_env , concurrency , requests , batch_size , batch_delay , input , workers ,
73
- content_type , image , docker_runtime , backend_profiling , config_properties , inference_model_url ):
74
+ content_type , image , docker_runtime , backend_profiling , config_properties , inference_model_url , report_location ):
74
75
input_params = {'url' : url ,
75
76
'gpus' : gpus ,
76
77
'exec_env' : exec_env ,
@@ -85,12 +86,14 @@ def benchmark(test_plan, url, gpus, exec_env, concurrency, requests, batch_size,
85
86
'docker_runtime' : docker_runtime ,
86
87
'backend_profiling' : backend_profiling ,
87
88
'config_properties' : config_properties ,
88
- 'inference_model_url' : inference_model_url
89
+ 'inference_model_url' : inference_model_url ,
90
+ 'report_location' : report_location
89
91
}
90
92
91
93
# set ab params
92
94
update_plan_params [test_plan ]()
93
95
update_exec_params (input_params )
96
+
94
97
click .secho ("Starting AB benchmark suite..." , fg = 'green' )
95
98
click .secho (f"\n \n Configured execution parameters are:" , fg = 'green' )
96
99
click .secho (f"{ execution_params } " , fg = "blue" )
@@ -122,7 +125,7 @@ def check_torchserve_health():
122
125
except Exception as e :
123
126
retry += 1
124
127
time .sleep (3 )
125
- failure_exit ("Could not connect to Tochserve instance at " + execution_params ['inference_url' ])
128
+ failure_exit ("Could not connect to Torchserve instance at " + execution_params ['inference_url' ])
126
129
127
130
def warm_up ():
128
131
register_model ()
@@ -135,7 +138,7 @@ def warm_up():
135
138
136
139
137
140
def run_benchmark ():
138
- click .secho ("\n \n Executing inference perfromance tests ..." , fg = 'green' )
141
+ click .secho ("\n \n Executing inference performance tests ..." , fg = 'green' )
139
142
ab_cmd = f"ab -c { execution_params ['concurrency' ]} -n { execution_params ['requests' ]} -k -p { TMP_DIR } /benchmark/input -T " \
140
143
f"{ execution_params ['content_type' ]} { execution_params ['inference_url' ]} /{ execution_params ['inference_model_url' ]} > { result_file } "
141
144
@@ -208,8 +211,8 @@ def docker_torchserve_start():
208
211
if execution_params ['backend_profiling' ]:
209
212
backend_profiling = '-e TS_BENCHMARK=True'
210
213
211
- # delete existing ts conatiner instance
212
- click .secho ("*Removing existing ts conatiner instance..." , fg = 'green' )
214
+ # delete existing ts container instance
215
+ click .secho ("*Removing existing ts container instance..." , fg = 'green' )
213
216
execute ('docker rm -f ts' , wait = True )
214
217
215
218
click .secho (f"*Starting docker container of image { docker_image } ..." , fg = 'green' )
@@ -308,10 +311,17 @@ def generate_csv_output():
308
311
line50 = int (batched_requests / 2 )
309
312
line90 = int (batched_requests * 9 / 10 )
310
313
line99 = int (batched_requests * 99 / 100 )
314
+
315
+ click .secho (f"Saving benchmark results to { execution_params ['report_location' ]} " )
316
+
311
317
artifacts = {}
312
- with open (f' { TMP_DIR } /benchmark/result.txt' ) as f :
318
+ with open (f" { execution_params [ 'report_location' ] } /benchmark/result.txt" ) as f :
313
319
data = f .readlines ()
320
+
314
321
artifacts ['Benchmark' ] = "AB"
322
+ artifacts ['Batch size' ] = execution_params ['batch_size' ]
323
+ artifacts ['Batch delay' ] = execution_params ['batch_delay' ]
324
+ artifacts ['Workers' ] = execution_params ['workers' ]
315
325
artifacts ['Model' ] = execution_params ['url' ]
316
326
artifacts ['Concurrency' ] = execution_params ['concurrency' ]
317
327
artifacts ['Requests' ] = execution_params ['requests' ]
@@ -323,18 +333,18 @@ def generate_csv_output():
323
333
artifacts ['TS latency mean' ] = extract_entity (data , 'Time per request:.*mean\)' , - 3 )
324
334
artifacts ['TS error rate' ] = int (artifacts ['TS failed requests' ]) / execution_params ['requests' ] * 100
325
335
326
- with open (os .path .join (TMP_DIR , 'benchmark/predict.txt' )) as f :
336
+ with open (os .path .join (execution_params [ 'report_location' ] , 'benchmark/predict.txt' )) as f :
327
337
lines = f .readlines ()
328
338
lines .sort (key = float )
329
339
artifacts ['Model_p50' ] = lines [line50 ].strip ()
330
340
artifacts ['Model_p90' ] = lines [line90 ].strip ()
331
341
artifacts ['Model_p99' ] = lines [line99 ].strip ()
332
342
333
343
for m in metrics :
334
- df = pd .read_csv (f"{ TMP_DIR } /benchmark/{ m } " , header = None , names = ['data' ])
344
+ df = pd .read_csv (f"{ execution_params [ 'report_location' ] } /benchmark/{ m } " , header = None , names = ['data' ])
335
345
artifacts [m .split ('.txt' )[0 ] + "_mean" ] = df ['data' ].values .mean ().round (2 )
336
346
337
- with open (os .path .join (TMP_DIR , 'benchmark/ab_report.csv' ), 'w' ) as csv_file :
347
+ with open (os .path .join (execution_params [ 'report_location' ] , 'benchmark/ab_report.csv' ), 'w' ) as csv_file :
338
348
csvwriter = csv .writer (csv_file )
339
349
csvwriter .writerow (artifacts .keys ())
340
350
csvwriter .writerow (artifacts .values ())
@@ -351,7 +361,7 @@ def extract_entity(data, pattern, index, delim=" "):
351
361
352
362
def generate_latency_graph ():
353
363
click .secho ("*Preparing graphs..." , fg = 'green' )
354
- df = pd .read_csv (os .path .join (TMP_DIR , 'benchmark/predict.txt' ), header = None , names = ['latency' ])
364
+ df = pd .read_csv (os .path .join (execution_params [ 'report_location' ] , 'benchmark/predict.txt' ), header = None , names = ['latency' ])
355
365
iteration = df .index
356
366
latency = df .latency
357
367
a4_dims = (11.7 , 8.27 )
@@ -360,15 +370,15 @@ def generate_latency_graph():
360
370
plt .ylabel ('Prediction time' )
361
371
plt .title ('Prediction latency' )
362
372
plt .bar (iteration , latency )
363
- plt .savefig (f"{ TMP_DIR } /benchmark/predict_latency.png" )
373
+ plt .savefig (f"{ execution_params [ 'report_location' ] } /benchmark/predict_latency.png" )
364
374
365
375
366
376
def generate_profile_graph ():
367
377
click .secho ("*Preparing Profile graphs..." , fg = 'green' )
368
378
369
379
plot_data = {}
370
380
for m in metrics :
371
- df = pd .read_csv (f' { TMP_DIR } /benchmark/{ m } ' , header = None )
381
+ df = pd .read_csv (f" { execution_params [ 'report_location' ] } /benchmark/{ m } " , header = None )
372
382
m = m .split ('.txt' )[0 ]
373
383
plot_data [f"{ m } _index" ] = df .index
374
384
plot_data [f"{ m } _values" ] = df .values
@@ -434,7 +444,6 @@ def stop_torchserve():
434
444
# Test plans (soak, vgg11_1000r_10c, vgg11_10000r_100c,...)
435
445
def soak ():
436
446
execution_params ['requests' ] = 100000
437
-
438
447
execution_params ['concurrency' ] = 10
439
448
440
449
@@ -464,6 +473,15 @@ def resnet152_batch_docker():
464
473
execution_params ['batch_size' ] = 4
465
474
execution_params ['exec_env' ] = 'docker'
466
475
476
+ def bert_batch ():
477
+ execution_params ['url' ] = 'https://bert-mar-file.s3.us-west-2.amazonaws.com/BERTSeqClassification.mar'
478
+ execution_params ['requests' ] = 1000
479
+ execution_params ['concurrency' ] = 10
480
+ execution_params ['batch_size' ] = 4
481
+ execution_params ['input' ] = '../examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text.txt'
482
+
483
+ def workflow_nmt ():
484
+ pass
467
485
468
486
def custom ():
469
487
pass
@@ -475,6 +493,8 @@ def custom():
475
493
"vgg11_10000r_100c" : vgg11_10000r_100c ,
476
494
"resnet152_batch" : resnet152_batch ,
477
495
"resnet152_batch_docker" : resnet152_batch_docker ,
496
+ "bert_batch" : bert_batch ,
497
+ "workflow_nmt" : workflow_nmt ,
478
498
"custom" : custom
479
499
}
480
500
0 commit comments