pytorch
diff --git a/‎.github/workflows/doc-automation.yml
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/doc-automation.yml
Lines changed: 1 addition & 0 deletions
diff --git a/‎benchmarks/benchmark-ab.py
Lines changed: 37 additions & 17 deletions b/‎benchmarks/benchmark-ab.py
Lines changed: 37 additions & 17 deletions
diff --git a/‎ci/launcher/launch_test.py
Lines changed: 1 addition & 1 deletion b/‎ci/launcher/launch_test.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎docker/Dockerfile
Lines changed: 1 addition & 1 deletion b/‎docker/Dockerfile
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/api/dev_api.rst
Lines changed: 13 additions & 0 deletions b/‎docs/api/dev_api.rst
Lines changed: 13 additions & 0 deletions
diff --git a/‎docs/api/modules.rst
Lines changed: 12 additions & 0 deletions b/‎docs/api/modules.rst
Lines changed: 12 additions & 0 deletions
diff --git a/‎docs/api/run_circleci_tests.rst
Lines changed: 7 additions & 0 deletions b/‎docs/api/run_circleci_tests.rst
Lines changed: 7 additions & 0 deletions
diff --git a/‎docs/api/setup.rst
Lines changed: 7 additions & 0 deletions b/‎docs/api/setup.rst
Lines changed: 7 additions & 0 deletions
diff --git a/‎docs/api/test.rst
Lines changed: 22 additions & 0 deletions b/‎docs/api/test.rst
Lines changed: 22 additions & 0 deletions
diff --git a/‎docs/api/torchserve_sanity.rst
Lines changed: 7 additions & 0 deletions b/‎docs/api/torchserve_sanity.rst
Lines changed: 7 additions & 0 deletions
diff --git a/‎docs/api/ts.metrics.rst
Lines changed: 78 additions & 0 deletions b/‎docs/api/ts.metrics.rst
Lines changed: 78 additions & 0 deletions
diff --git a/‎docs/api/ts.model_service.rst
Lines changed: 22 additions & 0 deletions b/‎docs/api/ts.model_service.rst
Lines changed: 22 additions & 0 deletions
@@ -24,6 +24,7 @@ jobs:
         python3 -m pip install katex
         python3 -m pip install sphinxcontrib.katex
         python3 -m pip install sphinx_markdown_tables
+        python3 -m pip install torchserve torch-model-archiver torch-workflow-archiver
     - name: Build the docset
       run: cd ./docs/sphinx ; export PATH=$PATH:$(python3 -m site --user-base)/bin/ ; make html
       id: build
 
@@ -29,9 +29,11 @@
                      'docker_runtime': '',
                      'backend_profiling': False,
                      'config_properties': 'config.properties',
-                     'inference_model_url': 'predictions/benchmark'
+                     'inference_model_url': 'predictions/benchmark',
+                     'report_location': tempfile.gettempdir()
                      }
-TMP_DIR = tempfile.gettempdir()
+                     
+TMP_DIR = default_ab_params['report_location']
 execution_params = default_ab_params.copy()
 result_file = os.path.join(TMP_DIR, "benchmark/result.txt")
 metric_log = os.path.join(TMP_DIR, "benchmark/logs/model_metrics.log")
@@ -41,7 +43,6 @@ def json_provider(file_path, cmd_name):
     with open(file_path) as config_data:
         return json.load(config_data)
 
-
 @click.command()
 @click.argument('test_plan', default='custom')
 @click.option('--url', '-u', default='https://torchserve.pytorch.org/mar_files/resnet-18.mar',
@@ -70,7 +71,7 @@ def json_provider(file_path, cmd_name):
 @click_config_file.configuration_option(provider=json_provider, implicit=False,
                                         help="Read configuration from a JSON file")
 def benchmark(test_plan, url, gpus, exec_env, concurrency, requests, batch_size, batch_delay, input, workers,
-              content_type, image, docker_runtime, backend_profiling, config_properties, inference_model_url):
+              content_type, image, docker_runtime, backend_profiling, config_properties, inference_model_url, report_location):
     input_params = {'url': url,
                     'gpus': gpus,
                     'exec_env': exec_env,
@@ -85,12 +86,14 @@ def benchmark(test_plan, url, gpus, exec_env, concurrency, requests, batch_size,
                     'docker_runtime': docker_runtime,
                     'backend_profiling': backend_profiling,
                     'config_properties': config_properties,
-                    'inference_model_url': inference_model_url
+                    'inference_model_url': inference_model_url,
+                    'report_location': report_location
                     }
 
     # set ab params
     update_plan_params[test_plan]()
     update_exec_params(input_params)
+
     click.secho("Starting AB benchmark suite...", fg='green')
     click.secho(f"\n\nConfigured execution parameters are:", fg='green')
     click.secho(f"{execution_params}", fg="blue")
@@ -122,7 +125,7 @@ def check_torchserve_health():
         except Exception as e:
             retry += 1
             time.sleep(3)
-    failure_exit("Could not connect to Tochserve instance at " + execution_params['inference_url'])
+    failure_exit("Could not connect to Torchserve instance at " + execution_params['inference_url'])
 
 def warm_up():
     register_model()
@@ -135,7 +138,7 @@ def warm_up():
 
 
 def run_benchmark():
-    click.secho("\n\nExecuting inference perfromance tests ...", fg='green')
+    click.secho("\n\nExecuting inference performance tests ...", fg='green')
     ab_cmd = f"ab -c {execution_params['concurrency']}  -n {execution_params['requests']} -k -p {TMP_DIR}/benchmark/input -T " \
              f"{execution_params['content_type']} {execution_params['inference_url']}/{execution_params['inference_model_url']} > {result_file}"
 
@@ -208,8 +211,8 @@ def docker_torchserve_start():
     if execution_params['backend_profiling']:
         backend_profiling = '-e TS_BENCHMARK=True'
 
-    # delete existing ts conatiner instance
-    click.secho("*Removing existing ts conatiner instance...", fg='green')
+    # delete existing ts container instance
+    click.secho("*Removing existing ts container instance...", fg='green')
     execute('docker rm -f ts', wait=True)
 
     click.secho(f"*Starting docker container of image {docker_image} ...", fg='green')
@@ -308,10 +311,17 @@ def generate_csv_output():
     line50 = int(batched_requests / 2)
     line90 = int(batched_requests * 9 / 10)
     line99 = int(batched_requests * 99 / 100)
+
+    click.secho(f"Saving benchmark results to {execution_params['report_location']}")
+
     artifacts = {}
-    with open(f'{TMP_DIR}/benchmark/result.txt') as f:
+    with open(f"{execution_params['report_location']}/benchmark/result.txt") as f:
         data = f.readlines()
+
     artifacts['Benchmark'] = "AB"
+    artifacts['Batch size'] = execution_params['batch_size']
+    artifacts['Batch delay'] = execution_params['batch_delay']
+    artifacts['Workers'] = execution_params['workers']
     artifacts['Model'] = execution_params['url']
     artifacts['Concurrency'] = execution_params['concurrency']
     artifacts['Requests'] = execution_params['requests']
@@ -323,18 +333,18 @@ def generate_csv_output():
     artifacts['TS latency mean'] = extract_entity(data, 'Time per request:.*mean\)', -3)
     artifacts['TS error rate'] = int(artifacts['TS failed requests']) / execution_params['requests'] * 100
 
-    with open(os.path.join(TMP_DIR, 'benchmark/predict.txt')) as f:
+    with open(os.path.join(execution_params['report_location'], 'benchmark/predict.txt')) as f:
         lines = f.readlines()
         lines.sort(key=float)
         artifacts['Model_p50'] = lines[line50].strip()
         artifacts['Model_p90'] = lines[line90].strip()
         artifacts['Model_p99'] = lines[line99].strip()
 
     for m in metrics:
-        df = pd.read_csv(f"{TMP_DIR}/benchmark/{m}", header=None, names=['data'])
+        df = pd.read_csv(f"{execution_params['report_location']}/benchmark/{m}", header=None, names=['data'])
         artifacts[m.split('.txt')[0] + "_mean"] = df['data'].values.mean().round(2)
 
-    with open(os.path.join(TMP_DIR, 'benchmark/ab_report.csv'), 'w') as csv_file:
+    with open(os.path.join(execution_params['report_location'], 'benchmark/ab_report.csv'), 'w') as csv_file:
         csvwriter = csv.writer(csv_file)
         csvwriter.writerow(artifacts.keys())
         csvwriter.writerow(artifacts.values())
@@ -351,7 +361,7 @@ def extract_entity(data, pattern, index, delim=" "):
 
 def generate_latency_graph():
     click.secho("*Preparing graphs...", fg='green')
-    df = pd.read_csv(os.path.join(TMP_DIR, 'benchmark/predict.txt'), header=None, names=['latency'])
+    df = pd.read_csv(os.path.join(execution_params['report_location'], 'benchmark/predict.txt'), header=None, names=['latency'])
     iteration = df.index
     latency = df.latency
     a4_dims = (11.7, 8.27)
@@ -360,15 +370,15 @@ def generate_latency_graph():
     plt.ylabel('Prediction time')
     plt.title('Prediction latency')
     plt.bar(iteration, latency)
-    plt.savefig(f"{TMP_DIR}/benchmark/predict_latency.png")
+    plt.savefig(f"{execution_params['report_location']}/benchmark/predict_latency.png")
 
 
 def generate_profile_graph():
     click.secho("*Preparing Profile graphs...", fg='green')
 
     plot_data = {}
     for m in metrics:
-        df = pd.read_csv(f'{TMP_DIR}/benchmark/{m}', header=None)
+        df = pd.read_csv(f"{execution_params['report_location']}/benchmark/{m}", header=None)
         m = m.split('.txt')[0]
         plot_data[f"{m}_index"] = df.index
         plot_data[f"{m}_values"] = df.values
@@ -434,7 +444,6 @@ def stop_torchserve():
 # Test plans (soak, vgg11_1000r_10c,  vgg11_10000r_100c,...)
 def soak():
     execution_params['requests'] = 100000
-
     execution_params['concurrency'] = 10
 
 
@@ -464,6 +473,15 @@ def resnet152_batch_docker():
     execution_params['batch_size'] = 4
     execution_params['exec_env'] = 'docker'
 
+def bert_batch():
+    execution_params['url'] = 'https://bert-mar-file.s3.us-west-2.amazonaws.com/BERTSeqClassification.mar'
+    execution_params['requests'] = 1000
+    execution_params['concurrency'] = 10
+    execution_params['batch_size'] = 4
+    execution_params['input'] = '../examples/Huggingface_Transformers/Seq_classification_artifacts/sample_text.txt'
+
+def workflow_nmt():
+    pass
 
 def custom():
     pass
@@ -475,6 +493,8 @@ def custom():
     "vgg11_10000r_100c": vgg11_10000r_100c,
     "resnet152_batch": resnet152_batch,
     "resnet152_batch_docker": resnet152_batch_docker,
+    "bert_batch": bert_batch,
+    "workflow_nmt": workflow_nmt,
     "custom": custom
 }
 
 
@@ -181,7 +181,7 @@ def main():
 
     parser.add_argument(
         "--ami-id",
-        default="ami-0e6d6921c639b58c3",
+        default="ami-0bc4efd197db7d3ad",
         help="Specify an Ubuntu Base DLAMI only. This AMI type ships with nvidia drivers already setup. Using other AMIs might"
         "need non-trivial installations on the AMI. AMI-ids differ per aws region.",
     )
 
@@ -63,7 +63,7 @@ RUN TORCH_VER=$(curl --silent --location https://pypi.org/pypi/torch/json | pyth
     else \
         pip install --no-cache-dir torch==$TORCH_VER+cpu torchvision==$TORCH_VISION_VER+cpu -f https://download.pytorch.org/whl/torch_stable.html; \
     fi
-RUN pip install --no-cache-dir captum torchtext torchserve torch-model-archiver
+RUN pip install -U setuptools && pip install --no-cache-dir captum torchtext torchserve torch-model-archiver
 
 # Final image for production
 FROM ${BASE_IMAGE} AS runtime-image
 
@@ -0,0 +1,13 @@
+.. toctree::
+  :caption: Developer APIs:
+  
+  ts.rst
+  ts.torch_handler.request_envelope.rst
+  ts.torch_handler.rst
+  ts.metrics.rst      
+  ts.model_service.rst  
+  ts.protocol.rst
+  modules.rst  
+
+
+
@@ -0,0 +1,12 @@
+serve
+=====
+
+.. toctree::
+   :maxdepth: 4
+
+   run_circleci_tests
+   setup
+   test
+   torchserve_sanity
+   ts
+   ts_scripts
@@ -0,0 +1,7 @@
+run\_circleci\_tests module
+===========================
+
+.. automodule:: run_circleci_tests
+   :members:
+   :undoc-members:
+   :show-inheritance:
@@ -0,0 +1,7 @@
+setup module
+============
+
+.. automodule:: setup
+   :members:
+   :undoc-members:
+   :show-inheritance:
@@ -0,0 +1,22 @@
+test package
+============
+
+Submodules
+----------
+
+test.regression\_tests module
+-----------------------------
+
+.. automodule:: test.regression_tests
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+
+Module contents
+---------------
+
+.. automodule:: test
+   :members:
+   :undoc-members:
+   :show-inheritance:
@@ -0,0 +1,7 @@
+torchserve\_sanity module
+=========================
+
+.. automodule:: torchserve_sanity
+   :members:
+   :undoc-members:
+   :show-inheritance:
@@ -0,0 +1,78 @@
+ts.metrics package
+==================
+
+Submodules
+----------
+
+ts.metrics.dimension module
+---------------------------
+
+.. automodule:: ts.metrics.dimension
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+ts.metrics.metric module
+------------------------
+
+.. automodule:: ts.metrics.metric
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+ts.metrics.metric\_collector module
+-----------------------------------
+
+.. automodule:: ts.metrics.metric_collector
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+ts.metrics.metric\_encoder module
+---------------------------------
+
+.. automodule:: ts.metrics.metric_encoder
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+ts.metrics.metrics\_store module
+--------------------------------
+
+.. automodule:: ts.metrics.metrics_store
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+ts.metrics.process\_memory\_metric module
+-----------------------------------------
+
+.. automodule:: ts.metrics.process_memory_metric
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+ts.metrics.system\_metrics module
+---------------------------------
+
+.. automodule:: ts.metrics.system_metrics
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+ts.metrics.unit module
+----------------------
+
+.. automodule:: ts.metrics.unit
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+
+Module contents
+---------------
+
+.. automodule:: ts.metrics
+   :members:
+   :undoc-members:
+   :show-inheritance:
@@ -0,0 +1,22 @@
+ts.model\_service package
+=========================
+
+Submodules
+----------
+
+ts.model\_service.model\_service module
+---------------------------------------
+
+.. automodule:: ts.model_service.model_service
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+
+Module contents
+---------------
+
+.. automodule:: ts.model_service
+   :members:
+   :undoc-members:
+   :show-inheritance:
Original file line number	Diff line number	Diff line change
`@@ -181,7 +181,7 @@ def main():`
`181`	`181`
`182`	`182`	`parser.add_argument(`
`183`	`183`	`"--ami-id",`
`184`		`- default="ami-0e6d6921c639b58c3",`
	`184`	`+ default="ami-0bc4efd197db7d3ad",`
`185`	`185`	`help="Specify an Ubuntu Base DLAMI only. This AMI type ships with nvidia drivers already setup. Using other AMIs might"`
`186`	`186`	`"need non-trivial installations on the AMI. AMI-ids differ per aws region.",`
`187`	`187`	`)`