From 7bf59976b5f4eb7c5b8f30a88af832e028028446 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 14 Feb 2025 19:28:29 +0000 Subject: [PATCH] Docs update, fix download links for llama models (#2055) * add docs for llama3 + inference * Update llama2-70b README.md * Update main.py --- .github/workflows/build_wheels.yml | 2 + .github/workflows/publish.yaml | 1 - .github/workflows/test-bert.yml | 6 +- .../workflows/test-calibration-downloads.yml | 94 ++++++++++++++++ .github/workflows/test-loadgen.yml | 6 +- .github/workflows/test-resnet50.yml | 6 +- .github/workflows/test-retinanet.yml | 6 +- .github/workflows/test-rgat.yml | 6 +- .github/workflows/test-rnnt.yml | 4 +- .github/workflows/test-submission-checker.yml | 2 +- .../workflows/test-submission-generation.yml | 8 +- .github/workflows/test-tvm.yml | 6 +- README.md | 1 + docs/README.md | 2 +- .../get-pointpainting-data.md | 9 ++ .../3d_object_detection/pointpainting.md | 13 +++ docs/benchmarks/graph/get-rgat-data.md | 6 +- .../image_classification/get-resnet50-data.md | 10 +- .../image_classification/mobilenets.md | 22 ++-- docs/benchmarks/language/get-bert-data.md | 8 +- docs/benchmarks/language/get-gptj-data.md | 4 +- .../language/get-llama2-70b-data.md | 4 +- .../language/get-llama3_1-405b-data.md | 6 +- .../language/get-mixtral-8x7b-data.md | 4 +- .../reproducibility/indyscc24-bert.md | 6 +- .../medical_imaging/get-3d-unet-data.md | 10 +- .../object_detection/get-retinanet-data.md | 8 +- .../recommendation/get-dlrm-v2-data.md | 4 +- .../benchmarks/text_to_image/get-sdxl-data.md | 4 +- .../text_to_image/reproducibility/scc24.md | 8 +- docs/install/index.md | 24 ++--- docs/submission/index.md | 28 ++--- docs/usage/index.md | 2 +- graph/R-GAT/README.md | 22 ++-- language/bert/README.md | 12 +-- language/bert/bert_QDL.py | 2 +- language/bert/onnxruntime_SUT.py | 2 +- language/bert/ray_SUT.py | 2 +- language/gpt-j/GPTJ_QDL.py | 2 +- language/gpt-j/README.md | 14 +-- language/gpt-j/main.py | 2 +- language/llama2-70b/README.md | 15 ++- language/llama3.1-405b/README.md | 33 +++--- main.py | 100 +++++++++++++----- mkdocs.yml | 7 +- recommendation/dlrm_v2/pytorch/README.md | 8 +- text_to_image/README.md | 26 ++--- tools/submission/submission_checker.py | 2 +- 48 files changed, 374 insertions(+), 205 deletions(-) create mode 100644 .github/workflows/test-calibration-downloads.yml create mode 100644 docs/benchmarks/automotive/3d_object_detection/get-pointpainting-data.md create mode 100644 docs/benchmarks/automotive/3d_object_detection/pointpainting.md diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index 24c7e8190..5f069c33e 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -11,6 +11,7 @@ on: paths: - loadgen/** + jobs: update_version: name: Update version only on ubuntu but used by windows and macos @@ -133,6 +134,7 @@ jobs: with: name: wheels-windows-latest path: wheels + # Publish - name: Publish uses: pypa/gh-action-pypi-publish@release/v1 with: diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml index 214ac3611..1fbb9cb88 100644 --- a/.github/workflows/publish.yaml +++ b/.github/workflows/publish.yaml @@ -2,7 +2,6 @@ name: Publish site - on: release: types: [published] diff --git a/.github/workflows/test-bert.yml b/.github/workflows/test-bert.yml index b7012a6e8..27fcee0d3 100755 --- a/.github/workflows/test-bert.yml +++ b/.github/workflows/test-bert.yml @@ -1,7 +1,7 @@ # This workflow will install Python dependencies, run tests and lint with a variety of Python versions # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions -name: Test for MLPerf inference bert submission generation using CM script automation +name: Test for MLPerf inference bert submission generation using MLC script automation on: pull_request: @@ -33,7 +33,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python3 -m pip install cm4mlops + python3 -m pip install mlc-scripts - name: Test BERT and end to end submission generation run: | - cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --quiet --submitter="MLCommons" --hw_name=default --model=bert-99 --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --adr.compiler.tags=gcc --adr.inference-src.version=custom --adr.inference-src.tags=_repo.${{ github.event.pull_request.head.repo.html_url }},_branch.$PR_HEAD_REF --adr.inference-src-loadgen.tags=_branch.$PR_HEAD_REF,_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src-loadgen.version=custom --adr.loadgen.version=custom + mlcr run,mlperf,inference,generate-run-cmds,_submission,_short --quiet --submitter="MLCommons" --hw_name=default --model=bert-99 --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --adr.compiler.tags=gcc --adr.inference-src.version=custom --adr.inference-src.tags=_repo.${{ github.event.pull_request.head.repo.html_url }},_branch.$PR_HEAD_REF --adr.inference-src-loadgen.tags=_branch.$PR_HEAD_REF,_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src-loadgen.version=custom --adr.loadgen.version=custom diff --git a/.github/workflows/test-calibration-downloads.yml b/.github/workflows/test-calibration-downloads.yml new file mode 100644 index 000000000..48078804f --- /dev/null +++ b/.github/workflows/test-calibration-downloads.yml @@ -0,0 +1,94 @@ +name: Test for the downloads of calibration datasets +on: + pull_request: + branches: [ "master", "dev" ] + paths: + - vision/classification_and_detection/** + - language/** + - recommendation/** + - .github/workflows/test-calibration-downloads.yml + - '!**.md' + +env: + PR_HEAD_REF: ${{ github.event.pull_request.head.ref }} + +jobs: + download-imagenet: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: [ "3.9" ] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python3 -m pip install mlc-scripts + - name: Download Imagenet Calibration + run: | + mlcr get,dataset,imagenet,_calibration --outdirname=. --quiet + + download-openimages: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: [ "3.9" ] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python3 -m pip install mlc-scripts + - name: Download Openimages Calibration + run: | + mlcr get,dataset,openimages,_calibration --outdirname=. --quiet + + download-igbh: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: [ "3.9" ] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python3 -m pip install mlc-scripts + # - name: Download IGBH Calibration + # run: | + # mlcr get,dataset,igbh,_calibration --outdirname=. --quiet + + download-squad: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: [ "3.9" ] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python3 -m pip install mlc-scripts + - name: Download SQUAD Calibration + run: | + mlcr get,dataset,squad,_calib1 --outdirname=. --quiet diff --git a/.github/workflows/test-loadgen.yml b/.github/workflows/test-loadgen.yml index d73d5913b..c90167f56 100755 --- a/.github/workflows/test-loadgen.yml +++ b/.github/workflows/test-loadgen.yml @@ -1,7 +1,7 @@ # This workflow will install Python dependencies, run tests and lint with a variety of Python versions # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions -name: Test for MLPerf inference loadgen using CM script automation +name: Test for MLPerf inference loadgen using MLC script automation on: pull_request: @@ -31,7 +31,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python3 -m pip install cm4mlops + python3 -m pip install mlc-scripts - name: Test Loadgen run: | - cm run script --tags=get,mlperf,inference,loadgen --quiet --version=custom --adr.inference-src.tags=_repo.${{ github.event.pull_request.head.repo.html_url }},_branch.$PR_HEAD_REF --adr.loadgen.tags=_no-compilation-warnings + mlcr get,mlperf,inference,loadgen --quiet --version=custom --adr.inference-src.tags=_repo.${{ github.event.pull_request.head.repo.html_url }},_branch.$PR_HEAD_REF --adr.loadgen.tags=_no-compilation-warnings diff --git a/.github/workflows/test-resnet50.yml b/.github/workflows/test-resnet50.yml index 2c6fddf8f..957d3b0b5 100755 --- a/.github/workflows/test-resnet50.yml +++ b/.github/workflows/test-resnet50.yml @@ -1,7 +1,7 @@ # This workflow will install Python dependencies, run tests and lint with a variety of Python versions # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions -name: Test for MLPerf inference resnet50 submission generation using CM script automation +name: Test for MLPerf inference resnet50 submission generation using MLC script automation on: pull_request: @@ -35,7 +35,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python3 -m pip install cm4mlops + python3 -m pip install mlc-scripts - name: Test Resnet50 and end to end submission generation run: | - cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --quiet --submitter="MLCommons" --hw_name=default --model=resnet50 --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --adr.inference-src.tags=_branch.$PR_HEAD_REF,_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src.version=custom --adr.inference-src-loadgen.tags=_branch.$PR_HEAD_REF,_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src-loadgen.version=custom --adr.loadgen.version=custom ${{ matrix.loadgen-flag }} + mlcr run,mlperf,inference,generate-run-cmds,_submission,_short --quiet --submitter="MLCommons" --hw_name=default --model=resnet50 --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --adr.inference-src.tags=_branch.$PR_HEAD_REF,_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src.version=custom --adr.inference-src-loadgen.tags=_branch.$PR_HEAD_REF,_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src-loadgen.version=custom --adr.loadgen.version=custom ${{ matrix.loadgen-flag }} diff --git a/.github/workflows/test-retinanet.yml b/.github/workflows/test-retinanet.yml index 5b18619de..2a99e9d0c 100755 --- a/.github/workflows/test-retinanet.yml +++ b/.github/workflows/test-retinanet.yml @@ -1,7 +1,7 @@ # This workflow will install Python dependencies, run tests and lint with a variety of Python versions # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions -name: Test for MLPerf inference retinanet submission generation using CM script automation +name: Test for MLPerf inference retinanet submission generation using MLC script automation on: pull_request: @@ -33,7 +33,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python3 -m pip install cm4mlops + python3 -m pip install mlc-scripts - name: Test Retinanet and end to end submission generation run: | - cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --quiet --submitter="MLCommons" --hw_name=default --model=retinanet --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=10 --adr.compiler.tags=gcc --adr.inference-src.version=custom --adr.inference-src.tags=_repo.${{ github.event.pull_request.head.repo.html_url }},_branch.$PR_HEAD_REF --adr.inference-src-loadgen.tags=_branch.$PR_HEAD_REF,_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src-loadgen.version=custom --adr.loadgen.version=custom + mlcr run,mlperf,inference,generate-run-cmds,_submission,_short --quiet --submitter="MLCommons" --hw_name=default --model=retinanet --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=10 --adr.compiler.tags=gcc --adr.inference-src.version=custom --adr.inference-src.tags=_repo.${{ github.event.pull_request.head.repo.html_url }},_branch.$PR_HEAD_REF --adr.inference-src-loadgen.tags=_branch.$PR_HEAD_REF,_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src-loadgen.version=custom --adr.loadgen.version=custom diff --git a/.github/workflows/test-rgat.yml b/.github/workflows/test-rgat.yml index 990c58a66..f334dda45 100644 --- a/.github/workflows/test-rgat.yml +++ b/.github/workflows/test-rgat.yml @@ -1,4 +1,4 @@ -name: Test for MLPerf inference rgat submission generation using CM script automation +name: Test for MLPerf inference rgat submission generation using MLC script automation on: pull_request: @@ -32,7 +32,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python3 -m pip install cm4mlops + python3 -m pip install mlc-scripts - name: Test R-GAT and end to end submission generation run: | - cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --quiet --submitter="MLCommons" --category=datacenter --hw_name=default --model=rgat --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --adr.inference-src.tags=_branch.$PR_HEAD_REF,_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src-loadgen.tags=_branch.$PR_HEAD_REF,_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src-loadgen.version=custom --adr.loadgen.version=custom ${{ matrix.loadgen-flag }} + mlcr run,mlperf,inference,generate-run-cmds,_submission,_short --quiet --submitter="MLCommons" --category=datacenter --hw_name=default --model=rgat --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --adr.inference-src.tags=_branch.$PR_HEAD_REF,_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src-loadgen.tags=_branch.$PR_HEAD_REF,_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src-loadgen.version=custom --adr.loadgen.version=custom ${{ matrix.loadgen-flag }} diff --git a/.github/workflows/test-rnnt.yml b/.github/workflows/test-rnnt.yml index a08803741..415a012d6 100644 --- a/.github/workflows/test-rnnt.yml +++ b/.github/workflows/test-rnnt.yml @@ -1,7 +1,7 @@ # This workflow will install Python dependencies, run tests and lint with a variety of Python versions # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions -name: Test for MLPerf inference rnnt submission generation using CM script automation +name: Test for MLPerf inference rnnt submission generation using MLC script automation on: pull_request: @@ -39,4 +39,4 @@ jobs: cm run script --quiet --tags=get,sys-utils-cm - name: Test RNNT and end to end submission generation run: | - cm run script --tags=run,mlperf,inference,generate-run-cmds,_performance-only --quiet --submitter="MLCommons" --hw_name=default --model=rnnt --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --precision=${{ matrix.precision }} --adr.compiler.tags=gcc --adr.inference-src.version=custom --adr.inference-src.env.CM_GIT_CHECKOUT=$PR_HEAD_REF --adr.inference-src.env.CM_GIT_URL=${{ github.event.pull_request.head.repo.html_url }} --adr.ml-engine-pytorch.version=1.13.0 --adr.ml-engine-torchvision.version=0.14.1 --adr.librosa.version_max=0.9.1 + mlcr run,mlperf,inference,generate-run-cmds,_performance-only --quiet --submitter="MLCommons" --hw_name=default --model=rnnt --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --precision=${{ matrix.precision }} --adr.compiler.tags=gcc --adr.inference-src.version=custom --adr.inference-src.env.CM_GIT_CHECKOUT=$PR_HEAD_REF --adr.inference-src.env.CM_GIT_URL=${{ github.event.pull_request.head.repo.html_url }} --adr.ml-engine-pytorch.version=1.13.0 --adr.ml-engine-torchvision.version=0.14.1 --adr.librosa.version_max=0.9.1 diff --git a/.github/workflows/test-submission-checker.yml b/.github/workflows/test-submission-checker.yml index 4ad4be4c8..7dd653ec8 100644 --- a/.github/workflows/test-submission-checker.yml +++ b/.github/workflows/test-submission-checker.yml @@ -1,7 +1,7 @@ # This workflow will install Python dependencies, run tests and lint with a variety of Python versions # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions -name: Test for MLPerf inference submission checker using CM script automation +name: Test for MLPerf inference submission checker using MLC script automation on: pull_request: diff --git a/.github/workflows/test-submission-generation.yml b/.github/workflows/test-submission-generation.yml index 97afc58cd..0bb2ad353 100644 --- a/.github/workflows/test-submission-generation.yml +++ b/.github/workflows/test-submission-generation.yml @@ -1,6 +1,6 @@ # This workflow will test the submission generation using MLPerf Automation -name: CM based Submission Generation +name: MLC based Submission Generation on: pull_request: @@ -34,7 +34,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - pip install cm4mlops + pip install mlc-scripts - name: Pull repo where test cases are uploaded run: | git clone -b submission-generation-examples https://github.com/mlcommons/inference.git submission_generation_examples @@ -48,5 +48,5 @@ jobs: fi # Dynamically set the log group to simulate a dynamic step name echo "::group::$description" - cm ${{ matrix.action }} script --tags=generate,inference,submission --adr.compiler.tags=gcc --version=v5.0 --clean --preprocess_submission=yes --submission_base_dir=mysubmissions --results_dir=$PWD/submission_generation_tests/${{ matrix.case }}/ --run-checker --submitter=MLCommons --tar=yes --division=${{ matrix.division }} --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes --quiet - cm ${{ matrix.action }} script --tags=run,submission,checker --submitter_id_off=mysubmitter_id --tar=yes --submission_dir=mysubmissions/submissions --submission_tar_file=mysubmission.tar.gz + mlc ${{ matrix.action }} script --tags=generate,inference,submission --adr.compiler.tags=gcc --version=v5.0 --clean --preprocess_submission=yes --submission_base_dir=mysubmissions --results_dir=$PWD/submission_generation_tests/${{ matrix.case }}/ --run-checker --submitter=MLCommons --tar=yes --division=${{ matrix.division }} --env.MLC_DETERMINE_MEMORY_CONFIGURATION=yes --quiet + mlc ${{ matrix.action }} script --tags=run,submission,checker --submitter_id_off=mysubmitter_id --tar=yes --submission_dir=mysubmissions/submissions --submission_tar_file=mysubmission.tar.gz diff --git a/.github/workflows/test-tvm.yml b/.github/workflows/test-tvm.yml index 6741c3610..1856ff4bd 100755 --- a/.github/workflows/test-tvm.yml +++ b/.github/workflows/test-tvm.yml @@ -1,7 +1,7 @@ # This workflow will install Python dependencies, run tests and lint with a variety of Python versions # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions -name: Test for MLPerf inference resnet50 TVM backend using CM script automation +name: Test for MLPerf inference resnet50 TVM backend using MLC script automation on: pull_request: @@ -33,7 +33,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python3 -m pip install cm4mlops + python3 -m pip install mlc-scripts - name: Test Resnet50 TVM backend run: | - cm run script --tags=run,mlperf,inference,generate-run-cmds --quiet --submitter="MLCommons" --hw_name=default --model=resnet50 --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=5 --adr.compiler.tags=gcc --adr.inference-src.version=custom --adr.inference-src.tags=_repo.${{ github.event.pull_request.head.repo.html_url }},_branch.$PR_HEAD_REF + mlcr run,mlperf,inference,generate-run-cmds --quiet --submitter="MLCommons" --hw_name=default --model=resnet50 --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=5 --adr.compiler.tags=gcc --adr.inference-src.version=custom --adr.inference-src.tags=_repo.${{ github.event.pull_request.head.repo.html_url }},_branch.$PR_HEAD_REF diff --git a/README.md b/README.md index 161da09a9..6cab3e2f6 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,7 @@ MLPerf Inference is a benchmark suite for measuring how fast systems can run mod Please see the [MLPerf Inference benchmark paper](https://arxiv.org/abs/1911.02549) for a detailed description of the benchmarks along with the motivation and guiding principles behind the benchmark suite. If you use any part of this benchmark (e.g., reference implementations, submissions, etc.), please cite the following: + ``` @misc{reddi2019mlperf, title={MLPerf Inference Benchmark}, diff --git a/docs/README.md b/docs/README.md index 92d66225c..496a93718 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,4 +1,4 @@ -# Documentation Website for MLPerf Inference using the unified CM interface +# Documentation Website for MLPerf Inference using the MLC interface ## Commands to get the website running locally ``` diff --git a/docs/benchmarks/automotive/3d_object_detection/get-pointpainting-data.md b/docs/benchmarks/automotive/3d_object_detection/get-pointpainting-data.md new file mode 100644 index 000000000..2e5ef3337 --- /dev/null +++ b/docs/benchmarks/automotive/3d_object_detection/get-pointpainting-data.md @@ -0,0 +1,9 @@ +--- +hide: + - toc +--- + +# 3D Object Detection using PointPainting + +TBD + diff --git a/docs/benchmarks/automotive/3d_object_detection/pointpainting.md b/docs/benchmarks/automotive/3d_object_detection/pointpainting.md new file mode 100644 index 000000000..efeb6f215 --- /dev/null +++ b/docs/benchmarks/automotive/3d_object_detection/pointpainting.md @@ -0,0 +1,13 @@ +--- +hide: + - toc +--- + + +# 3D Object Detection using PointPainting + + +=== "MLCommons-Python" + ## MLPerf Reference Implementation in Python + +{{ mlperf_inference_implementation_readme (4, "pointpainting", "reference", devices = ["CPU", "CUDA"]) }} \ No newline at end of file diff --git a/docs/benchmarks/graph/get-rgat-data.md b/docs/benchmarks/graph/get-rgat-data.md index 189c25b87..6ab9515e5 100644 --- a/docs/benchmarks/graph/get-rgat-data.md +++ b/docs/benchmarks/graph/get-rgat-data.md @@ -14,7 +14,7 @@ The benchmark implementation run command will automatically download the validat ### Get Full Dataset ``` - cm run script --tags=get,dataset,igbh,_full -j + mlcr get,dataset,igbh,_full -j ``` === "Debug Dataset" @@ -22,7 +22,7 @@ The benchmark implementation run command will automatically download the validat ### Get Full Dataset ``` - cm run script --tags=get,dataset,igbh,_debug -j + mlcr get,dataset,igbh,_debug -j ``` ## Model @@ -34,6 +34,6 @@ Get the Official MLPerf R-GAT Model ### PyTorch ``` - cm run script --tags=get,ml-model,rgat -j + mlcr get,ml-model,rgat -j ``` diff --git a/docs/benchmarks/image_classification/get-resnet50-data.md b/docs/benchmarks/image_classification/get-resnet50-data.md index 900379d5a..771571d5c 100644 --- a/docs/benchmarks/image_classification/get-resnet50-data.md +++ b/docs/benchmarks/image_classification/get-resnet50-data.md @@ -14,18 +14,18 @@ The benchmark implementation run command will automatically download the validat ### Get Validation Dataset ``` - cm run script --tags=get,dataset,imagenet,validation -j + mlcr get,dataset,imagenet,validation -j ``` === "Calibration" ResNet50 calibration dataset consist of 500 images selected from the Imagenet 2012 validation dataset. There are 2 alternative options for the calibration dataset. ### Get Calibration Dataset Using Option 1 ``` - cm run script --tags=get,dataset,imagenet,calibration,_mlperf.option1 -j + mlcr get,dataset,imagenet,calibration,_mlperf.option1 -j ``` ### Get Calibration Dataset Using Option 2 ``` - cm run script --tags=get,dataset,imagenet,calibration,_mlperf.option2 -j + mlcr get,dataset,imagenet,calibration,_mlperf.option2 -j ``` ## Model @@ -37,12 +37,12 @@ Get the Official MLPerf ResNet50 Model ### Tensorflow ``` - cm run script --tags=get,ml-model,resnet50,_tensorflow -j + mlcr get,ml-model,resnet50,_tensorflow -j ``` === "Onnx" ### Onnx ``` - cm run script --tags=get,ml-model,resnet50,_onnx -j + mlcr get,ml-model,resnet50,_onnx -j ``` diff --git a/docs/benchmarks/image_classification/mobilenets.md b/docs/benchmarks/image_classification/mobilenets.md index 09840ef1d..7e4605b4b 100644 --- a/docs/benchmarks/image_classification/mobilenets.md +++ b/docs/benchmarks/image_classification/mobilenets.md @@ -5,7 +5,7 @@ hide: # Image Classification using Mobilenet models -Install CM following the [installation page](site:install). +Install MLC following the [installation page](site:install). Mobilenet models are not official MLPerf models and so cannot be used for a Closed division MLPerf inference submission. But since they can be run with Imagenet dataset, we are allowed to use them for Open division submission. Only CPU runs are supported now. @@ -14,31 +14,31 @@ Mobilenet models are not official MLPerf models and so cannot be used for a Clos === "Mobilenet-V1" ### Mobilenet V1 ```bash - cm run script --tags=run,mobilenet-models,_tflite,_mobilenet-v1 \ + mlcr run,mobilenet-models,_tflite,_mobilenet-v1 \ --adr.compiler.tags=gcc ``` === "Mobilenet-V2" ### Mobilenet V2 ```bash - cm run script --tags=run,mobilenet-models,_tflite,_mobilenet-v2 \ + mlcr run,mobilenet-models,_tflite,_mobilenet-v2 \ --adr.compiler.tags=gcc ``` === "Mobilenet-V2" ### Mobilenet V2 ```bash - cm run script --tags=run,mobilenet-models,_tflite,_mobilenet-v2 \ + mlcr run,mobilenet-models,_tflite,_mobilenet-v2 \ --adr.compiler.tags=gcc ``` === "Mobilenets" ### Mobilenet V1,V2,V3 ```bash - cm run script --tags=run,mobilenet-models,_tflite,_mobilenet \ + mlcr run,mobilenet-models,_tflite,_mobilenet \ --adr.compiler.tags=gcc ``` === "Efficientnet" ### Efficientnet ```bash - cm run script --tags=run,mobilenet-models,_tflite,_efficientnet \ + mlcr run,mobilenet-models,_tflite,_efficientnet \ --adr.compiler.tags=gcc ``` @@ -46,31 +46,31 @@ Mobilenet models are not official MLPerf models and so cannot be used for a Clos === "Mobilenet-V1" ### Mobilenet V1 ```bash - cm run script --tags=run,mobilenet-models,_tflite,_armnn,_mobilenet-v1 \ + mlcr run,mobilenet-models,_tflite,_armnn,_mobilenet-v1 \ --adr.compiler.tags=gcc ``` === "Mobilenet-V2" ### Mobilenet V2 ```bash - cm run script --tags=run,mobilenet-models,_tflite,_armnn,_mobilenet-v2 \ + mlcr run,mobilenet-models,_tflite,_armnn,_mobilenet-v2 \ --adr.compiler.tags=gcc ``` === "Mobilenet-V2" ### Mobilenet V2 ```bash - cm run script --tags=run,mobilenet-models,_tflite,_armnn,_mobilenet-v2 \ + mlcr run,mobilenet-models,_tflite,_armnn,_mobilenet-v2 \ --adr.compiler.tags=gcc ``` === "Mobilenets" ### Mobilenet V1,V2,V3 ```bash - cm run script --tags=run,mobilenet-models,_tflite,_armnn,_mobilenet \ + mlcr run,mobilenet-models,_tflite,_armnn,_mobilenet \ --adr.compiler.tags=gcc ``` === "Efficientnet" ### Efficientnet ```bash - cm run script --tags=run,mobilenet-models,_tflite,_armnn,_efficientnet \ + mlcr run,mobilenet-models,_tflite,_armnn,_efficientnet \ --adr.compiler.tags=gcc ``` diff --git a/docs/benchmarks/language/get-bert-data.md b/docs/benchmarks/language/get-bert-data.md index fed637572..430031f31 100644 --- a/docs/benchmarks/language/get-bert-data.md +++ b/docs/benchmarks/language/get-bert-data.md @@ -14,7 +14,7 @@ The benchmark implementation run command will automatically download the validat ### Get Validation Dataset ``` - cm run script --tags=get,dataset,squad,validation -j + mlcr get,dataset,squad,validation -j ``` ## Model @@ -26,18 +26,18 @@ Get the Official MLPerf Bert-Large Model ### Pytorch ``` - cm run script --tags=get,ml-model,bert-large,_pytorch -j + mlcr get,ml-model,bert-large,_pytorch -j ``` === "Onnx" ### Onnx ``` - cm run script --tags=get,ml-model,bert-large,_onnx -j + mlcr get,ml-model,bert-large,_onnx -j ``` === "Tensorflow" ### Tensorflow ``` - cm run script --tags=get,ml-model,bert-large,_tensorflow -j + mlcr get,ml-model,bert-large,_tensorflow -j ``` diff --git a/docs/benchmarks/language/get-gptj-data.md b/docs/benchmarks/language/get-gptj-data.md index 90591fb76..34140598e 100644 --- a/docs/benchmarks/language/get-gptj-data.md +++ b/docs/benchmarks/language/get-gptj-data.md @@ -14,7 +14,7 @@ The benchmark implementation run command will automatically download the validat ### Get Validation Dataset ``` - cm run script --tags=get,dataset,cnndm,validation -j + mlcr get,dataset,cnndm,validation -j ``` ## Model @@ -26,5 +26,5 @@ Get the Official MLPerf GPT-J Model ### Pytorch ``` - cm run script --tags=get,ml-model,gptj,_pytorch -j + mlcr get,ml-model,gptj,_pytorch -j ``` diff --git a/docs/benchmarks/language/get-llama2-70b-data.md b/docs/benchmarks/language/get-llama2-70b-data.md index 0214d95a5..d75540b5a 100644 --- a/docs/benchmarks/language/get-llama2-70b-data.md +++ b/docs/benchmarks/language/get-llama2-70b-data.md @@ -14,7 +14,7 @@ The benchmark implementation run command will automatically download the validat ### Get Validation Dataset ``` - cm run script --tags=get,dataset,openorca,validation -j + mlcr get,dataset,openorca,validation -j ``` ## Model @@ -26,7 +26,7 @@ Get the Official MLPerf LLAMA2-70b Model ### Pytorch ``` - cm run script --tags=get,ml-model,llama2-70b,_pytorch -j + mlcr get,ml-model,llama2-70b,_pytorch -j ``` !!! tip diff --git a/docs/benchmarks/language/get-llama3_1-405b-data.md b/docs/benchmarks/language/get-llama3_1-405b-data.md index 7333be64d..e1a5e1c3f 100644 --- a/docs/benchmarks/language/get-llama3_1-405b-data.md +++ b/docs/benchmarks/language/get-llama3_1-405b-data.md @@ -13,14 +13,14 @@ The benchmark implementation run command will automatically download the validat ### Get Validation Dataset ``` - cm run script --tags=get,dataset,mlperf,inference,llama3,_validation --outdirname= -j + mlcr get,dataset,mlperf,inference,llama3,_validation --outdirname= -j ``` === "Calibration" ### Get Calibration Dataset ``` - cm run script --tags=get,dataset,mlperf,inference,llama3,_calibration --outdirname= -j + mlcr get,dataset,mlperf,inference,llama3,_calibration --outdirname= -j ``` ## Model @@ -32,7 +32,7 @@ Get the Official MLPerf LLAMA3.1-405b Model ### Pytorch ``` - cm run script --tags=get,ml-model,llama3 --outdirname= --hf_token= -j + mlcr get,ml-model,llama3 --outdirname= --hf_token= -j ``` !!! tip diff --git a/docs/benchmarks/language/get-mixtral-8x7b-data.md b/docs/benchmarks/language/get-mixtral-8x7b-data.md index 1b2df1b9e..81b90cdb5 100644 --- a/docs/benchmarks/language/get-mixtral-8x7b-data.md +++ b/docs/benchmarks/language/get-mixtral-8x7b-data.md @@ -12,7 +12,7 @@ The benchmark implementation run command will automatically download the preproc ### Get Validation Dataset ``` - cm run script --tags=get,dataset-mixtral,openorca-mbxp-gsm8k-combined -j + mlcr get,dataset-mixtral,openorca-mbxp-gsm8k-combined -j ``` ## Model @@ -24,5 +24,5 @@ Get the Official MLPerf MIXTRAL-8x7b Model ### Pytorch ``` - cm run script --tags=get,ml-model,mixtral -j + mlcr get,ml-model,mixtral -j ``` \ No newline at end of file diff --git a/docs/benchmarks/language/reproducibility/indyscc24-bert.md b/docs/benchmarks/language/reproducibility/indyscc24-bert.md index 463d1a299..a9df27117 100644 --- a/docs/benchmarks/language/reproducibility/indyscc24-bert.md +++ b/docs/benchmarks/language/reproducibility/indyscc24-bert.md @@ -17,7 +17,7 @@ In the IndySCC 2024, your objective will be to run a reference (unoptimized) Pyt !!! info - Both MLPerf and CM automation are evolving projects. + Both MLPerf and MLC automation are evolving projects. If you encounter issues or have questions, please submit them [here](https://github.com/mlcommons/cm4mlops/issues) ## Artifacts to submit to the SCC committee @@ -35,7 +35,7 @@ All the needed files are automatically pushed to the GitHub repository if you ma ### Generate actual submission tree ```bash -cm run script --tags=generate,inference,submission \ +mlcr generate,inference,submission \ --clean \ --run-checker \ --tar=yes \ @@ -58,7 +58,7 @@ Fork the `mlperf-inference-results-scc24` branch of the repository URL at [https Run the following command after **replacing `--repo_url` with your GitHub fork URL**. ```bash -cm run script --tags=push,github,mlperf,inference,submission \ +mlcr push,github,mlperf,inference,submission \ --repo_url=https://github.com//cm4mlperf-inference \ --repo_branch=mlperf-inference-results-scc24 \ --commit_message="Results on system " \ diff --git a/docs/benchmarks/medical_imaging/get-3d-unet-data.md b/docs/benchmarks/medical_imaging/get-3d-unet-data.md index 6c361f6f1..d68b76920 100644 --- a/docs/benchmarks/medical_imaging/get-3d-unet-data.md +++ b/docs/benchmarks/medical_imaging/get-3d-unet-data.md @@ -14,12 +14,12 @@ The benchmark implementation run command will automatically download the validat ### Get Validation Dataset(Original) ``` - cm run script --tags=get,dataset,kits19,_validation -j + mlcr get,dataset,kits19,_validation -j ``` ### Get Validation Dataset(Preprocessed) ``` - cm run script --tags=get,dataset,kits19,preprocessed -j + mlcr get,dataset,kits19,preprocessed -j ``` ## Model @@ -31,18 +31,18 @@ Get the Official MLPerf 3d-unet Model ### Pytorch ``` - cm run script --tags=get,ml-model,3d-unet,_pytorch -j + mlcr get,ml-model,3d-unet,_pytorch -j ``` === "Onnx" ### Onnx ``` - cm run script --tags=get,ml-model,3d-unet,_onnx -j + mlcr get,ml-model,3d-unet,_onnx -j ``` === "Tensorflow" ### Tensorflow ``` - cm run script --tags=get,ml-model,3d-unet,_tensorflow -j + mlcr get,ml-model,3d-unet,_tensorflow -j ``` diff --git a/docs/benchmarks/object_detection/get-retinanet-data.md b/docs/benchmarks/object_detection/get-retinanet-data.md index 900fd572a..6cd677b4e 100644 --- a/docs/benchmarks/object_detection/get-retinanet-data.md +++ b/docs/benchmarks/object_detection/get-retinanet-data.md @@ -14,13 +14,13 @@ The benchmark implementation run command will automatically download the validat ### Get Validation Dataset ``` - cm run script --tags=get,dataset,openimages,_validation -j + mlcr get,dataset,openimages,_validation -j ``` === "Calibration" Retinanet calibration dataset consist of 500 images selected from the OpenImages v6 dataset. ``` - cm run script --tags=get,dataset,openimages,_calibration -j + mlcr get,dataset,openimages,_calibration -j ``` ## Model @@ -32,12 +32,12 @@ Get the Official MLPerf Retinanet Model ### Pytorch ``` - cm run script --tags=get,ml-model,retinanet,_pytorch -j + mlcr get,ml-model,retinanet,_pytorch -j ``` === "Onnx" ### Onnx ``` - cm run script --tags=get,ml-model,retinanet,_onnx -j + mlcr get,ml-model,retinanet,_onnx -j ``` diff --git a/docs/benchmarks/recommendation/get-dlrm-v2-data.md b/docs/benchmarks/recommendation/get-dlrm-v2-data.md index 1c44ec471..bb35660b6 100644 --- a/docs/benchmarks/recommendation/get-dlrm-v2-data.md +++ b/docs/benchmarks/recommendation/get-dlrm-v2-data.md @@ -14,7 +14,7 @@ The benchmark implementation run command will automatically download the validat ### Get Validation Dataset ``` - cm run script --tags=get,dataset,criteo,_validation -j + mlcr get,dataset,criteo,_validation -j ``` ## Model The benchmark implementation run command will automatically download the required model and do the necessary conversions. In case you want to only download the official model, you can use the below commands. @@ -25,6 +25,6 @@ Get the Official MLPerf DLRM v2 Model ### Pytorch ``` - cm run script --tags=get,ml-model,dlrm,_pytorch -j + mlcr get,ml-model,dlrm,_pytorch -j ``` diff --git a/docs/benchmarks/text_to_image/get-sdxl-data.md b/docs/benchmarks/text_to_image/get-sdxl-data.md index f0d1376bd..6d79e331d 100644 --- a/docs/benchmarks/text_to_image/get-sdxl-data.md +++ b/docs/benchmarks/text_to_image/get-sdxl-data.md @@ -14,7 +14,7 @@ The benchmark implementation run command will automatically download the validat ### Get Validation Dataset ``` - cm run script --tags=get,dataset,coco2014,_validation -j + mlcr get,dataset,coco2014,_validation -j ``` ## Model @@ -26,6 +26,6 @@ Get the Official MLPerf Stable Diffusion Model ### Pytorch ``` - cm run script --tags=get,ml-model,sdxl,_pytorch -j + mlcr get,ml-model,sdxl,_pytorch -j ``` diff --git a/docs/benchmarks/text_to_image/reproducibility/scc24.md b/docs/benchmarks/text_to_image/reproducibility/scc24.md index 82a231b4d..6a989273e 100644 --- a/docs/benchmarks/text_to_image/reproducibility/scc24.md +++ b/docs/benchmarks/text_to_image/reproducibility/scc24.md @@ -23,14 +23,14 @@ Additionally, significant bonus points will be awarded if your team enhances an !!! info - Both MLPerf and CM automation are evolving projects. + Both MLPerf and MLC automation are evolving projects. If you encounter issues or have questions, please submit them [here](https://github.com/mlcommons/cm4mlops/issues) ## Artifacts to submit to the SCC committee You will need to submit the following files: -* `mlperf_submission.run` - CM commands to run MLPerf inference benchmark saved to this file. +* `mlperf_submission.run` - MLC commands to run MLPerf inference benchmark saved to this file. * `mlperf_submission.md` - description of your platform and some highlights of the MLPerf benchmark execution. * `` under which results are pushed to the github repository. @@ -61,7 +61,7 @@ or supporting multi-node execution) useful for the community and [MLCommons](htt ```bash -cm run script --tags=generate,inference,submission \ +mlcr generate,inference,submission \ --clean \ --run-checker \ --tar=yes \ @@ -85,7 +85,7 @@ Fork the `mlperf-inference-results-scc24` branch of the repository URL at [https Run the following command after **replacing `--repo_url` with your GitHub fork URL**. ```bash -cm run script --tags=push,github,mlperf,inference,submission \ +mlcr push,github,mlperf,inference,submission \ --repo_url=https://github.com//cm4mlperf-inference \ --repo_branch=mlperf-inference-results-scc24 \ --commit_message="Results on system " \ diff --git a/docs/install/index.md b/docs/install/index.md index 1750d86e4..a3bf002e3 100644 --- a/docs/install/index.md +++ b/docs/install/index.md @@ -4,28 +4,28 @@ hide: --- # Installation -We use MLCommons CM Automation framework to run MLPerf inference benchmarks. +We use MLCommons MLC Automation framework to run MLPerf inference benchmarks. -CM needs `git`, `python3-pip` and `python3-venv` installed on your system. If any of these are absent, please follow the [official CM installation page](https://docs.mlcommons.org/ck/install) to install them. Once the dependencies are installed, do the following +MLC needs `git`, `python3-pip` and `python3-venv` installed on your system. Once the dependencies are installed, do the following -## Activate a Virtual ENV for CM -This step is not mandatory as CM can use separate virtual environment for MLPerf inference. But the latest `pip` install requires this or else will need the `--break-system-packages` flag while installing `cm4mlops`. +## Activate a Virtual ENV for MLCFlow +This step is not mandatory as MLC can use separate virtual environment for MLPerf inference. But the latest `pip` install requires this or else will need the `--break-system-packages` flag while installing `mlc-scripts`. ```bash -python3 -m venv cm -source cm/bin/activate +python3 -m venv mlc +source mlc/bin/activate ``` -## Install CM and pulls any needed repositories -=== "Use the default fork of CM MLOps repository" +## Install MLC and pulls any needed repositories +=== "Use the default fork of MLC-Scripts repository" ```bash - pip install cm4mlops + pip install mlc-scripts ``` -=== "Use custom fork/branch of the CM MLOps repository" +=== "Use custom fork/branch of the MLC-Scripts repository" ```bash - pip install cmind && cm init --quiet --repo=mlcommons@cm4mlops --branch=mlperf-inference + pip install mlcflow && mlc pull repo --url=mlcommons@cm4mlops --branch=mlperf-inference ``` Here, `repo` is in the format `githubUsername@githubRepo`. -Now, you are ready to use the `cm` commands to run MLPerf inference as given in the [benchmarks](../index.md) page +Now, you are ready to use the `mlcr` commands to run MLPerf inference as given in the [benchmarks](../index.md) page diff --git a/docs/submission/index.md b/docs/submission/index.md index 64ef5afa7..97414e8bb 100644 --- a/docs/submission/index.md +++ b/docs/submission/index.md @@ -3,10 +3,12 @@ hide: - toc --- -Click [here](https://docs.google.com/presentation/d/1cmbpZUpVr78EIrhzyMBnnWnjJrD-mZ2vmSb-yETkTA8/edit?usp=sharing) to view the proposal slide for Common Automation for MLPerf Inference Submission Generation through CM. +Click [here](https://docs.google.com/presentation/d/1cmbpZUpVr78EIrhzyMBnnWnjJrD-mZ2vmSb-yETkTA8/edit?usp=sharing) to view the proposal slide for Common Automation for MLPerf Inference Submission Generation through MLCFlow. + +Please refer to the [installation page](site:inference/install/) to install MLCFlow for automating the submission generation. In a typical development environment `pip install mlc-scripts` should be enough. === "Custom automation based MLPerf results" - If you have not followed the `cm run` commands under the individual model pages in the [benchmarks](../index.md) directory, please make sure that the result directory is structured in the following way. You can see the real examples for the expected folder structure [here](https://github.com/mlcommons/inference/tree/submission-generation-examples). + If you have not followed the `mlcr` commands under the individual model pages in the [benchmarks](../index.md) directory, please make sure that the result directory is structured in the following way. You can see the real examples for the expected folder structure [here](https://github.com/mlcommons/inference/tree/submission-generation-examples). ``` └── System description ID(SUT Name) ├── system_meta.json @@ -51,11 +53,11 @@ Click [here](https://docs.google.com/presentation/d/1cmbpZUpVr78EIrhzyMBnnWnjJrD ``` -=== "CM automation based results" - If you have followed the `cm run` commands under the individual model pages in the [benchmarks](../index.md) directory, all the valid results will get aggregated to the `cm cache` folder. The following command could be used to browse the structure of inference results folder generated by CM. +=== "MLC automation based results" + If you have followed the `mlcr` commands under the individual model pages in the [benchmarks](../index.md) directory, all the valid results will get aggregated to the `mlc cache` folder. The following command could be used to browse the structure of inference results folder generated by MLCFlow. ### Get results folder structure ```bash - cm find cache --tags=get,mlperf,inference,results,dir | xargs tree + mlc find cache --tags=get,mlperf,inference,results,dir | xargs tree ``` @@ -83,13 +85,13 @@ flowchart LR ### Command to generate submission folder ```bash -cm run script --tags=generate,inference,submission \ +mlcr generate,inference,submission \ --clean \ --preprocess_submission=yes \ - --run-checker=yes \ + --run_checker=yes \ --submitter=MLCommons \ --division=closed \ - --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes \ + --env.MLC_DETERMINE_MEMORY_CONFIGURATION=yes \ --quiet ``` !!! tip @@ -99,9 +101,9 @@ cm run script --tags=generate,inference,submission \ * Use `--hw_notes_extra` option to add additional notes like `--hw_notes_extra="Result taken by NAME" ` - * Use `--results_dir` option to specify the results folder. It is automatically taken from CM cache for MLPerf automation based runs + * Use `--results_dir` option to specify the results folder. It is automatically taken from MLC cache for MLPerf automation based runs - * Use `--submission_dir` option to specify the submission folder. (You can avoid this if you're pushing to github or only running a single SUT and CM will use its cache folder) + * Use `--submission_dir` option to specify the submission folder. (You can avoid this if you're pushing to github or only running a single SUT and MLC will use its cache folder) * Use `--division=open` for open division submission @@ -145,7 +147,7 @@ If there are multiple systems where MLPerf results are collected, the same proce Run the following command after **replacing `--repo_url` with your GitHub repository URL**. ```bash - cm run script --tags=push,github,mlperf,inference,submission \ + mlcr push,github,mlperf,inference,submission \ --repo_url=https://github.com/mlcommons/mlperf_inference_submissions_v5.0 \ --commit_message="Results on added by " \ --quiet @@ -182,14 +184,14 @@ Once you have all the results on the system, you can upload them to the MLCommon === "via CLI" You can do the following command which will run the submission checker and upload the results to the MLCommons submission server ``` - cm run script --tags=run,submission,checker \ + mlcr run,submission,checker \ --submitter_id=<> \ --submission_dir= ``` === "via Browser" You can do the following command to generate the final submission tar file and then upload to the [MLCommons Submission UI](https://submissions-ui.mlcommons.org/submission). ``` - cm run script --tags=run,submission,checker \ + mlcr run,submission,checker \ --submission_dir= \ --tar=yes \ --submission_tar_file=mysubmission.tar.gz diff --git a/docs/usage/index.md b/docs/usage/index.md index 01d54c3aa..ad322582c 100644 --- a/docs/usage/index.md +++ b/docs/usage/index.md @@ -3,4 +3,4 @@ hide: - toc --- -# Using CM for MLPerf Inference +# Using MLC for MLPerf Inference diff --git a/graph/R-GAT/README.md b/graph/R-GAT/README.md index 4638d526f..1380cb047 100644 --- a/graph/R-GAT/README.md +++ b/graph/R-GAT/README.md @@ -17,11 +17,11 @@ This is the reference implementation for MLPerf Inference Graph Neural Network. | IGBH | Illinois Graph Benchmark Heterogeneous is a graph dataset consisting of one heterogeneous graph with 547,306,935 nodes and 5,812,005,639 edges. Node types: Author, Conference, FoS, Institute, Journal, Paper. A subset of 1% of the paper nodes are randomly choosen as the validation dataset using the [split seeds script](tools/split_seeds.py). The validation dataset will be used as the input queries for the SUT, however the whole dataset is needed to run the benchmarks, since all the graph connections are needed to achieve the quality target. | Node Classification | | IGBH (calibration) | We sampled 5000 nodes from the training paper nodes of the IGBH for the calibration dataset. We provide the [Node ids](../../calibration/IGBH/calibration.txt) and the [script](tools/split_seeds.py) to generate them (using the `--calibration` flag). | Node Classification | -## Automated command to run the benchmark via MLCommons CM +## Automated command to run the benchmark via MLCFlow Please see the [new docs site](https://docs.mlcommons.org/inference/benchmarks/graph/rgat/) for an automated way to run this benchmark across different available implementations and do an end-to-end submission with or without docker. -You can also do `pip install cm4mlops` and then use `cm` commands for downloading the model and datasets using the commands given in the later sections. +You can also do `pip install mlc-scripts` and then use `mlcr` commands for downloading the model and datasets using the commands given in the later sections. ## Setup Set the following helper variables @@ -77,10 +77,10 @@ pip install dgl -f https://data.dgl.ai/wheels/torch-2.1/repo.html ``` -### Download model through CM (Collective Minds) +### Download model through MLCFlow Automation ``` -cm run script --tags=get,ml-model,rgat --outdirname= +mlcr get,ml-model,rgat --outdirname= ``` ### Download model using Rclone @@ -106,9 +106,9 @@ rclone copy mlc-inference:mlcommons-inference-wg-public/R-GAT/RGAT.pt $MODEL_PAT ### Download and setup dataset #### Debug Dataset -**CM Command** +**MLC Command** ``` -cm run script --tags=get,dataset,igbh,_debug --outdirname= +mlcr get,dataset,igbh,_debug --outdirname= ``` **Download Dataset** @@ -128,9 +128,9 @@ python3 tools/split_seeds.py --path igbh --dataset_size tiny #### Full Dataset **Warning:** This script will download 2.2TB of data -**CM Command** +**MLC Command** ``` -cm run script --tags=get,dataset,igbh,_full --outdirname= +mlcr get,dataset,igbh,_full --outdirname= ``` ```bash @@ -149,9 +149,9 @@ python3 tools/split_seeds.py --path igbh --dataset_size full The calibration dataset contains 5000 nodes from the training paper nodes of the IGBH dataset. We provide the [Node ids](../../calibration/IGBH/calibration.txt) and the [script](tools/split_seeds.py) to generate them (using the `--calibration` flag). -**CM Command** +**MLC Command** ``` -cm run script --tags=get,dataset,igbh,_full,_calibration --outdirname= +mlcr get,dataset,igbh,_full,_calibration --outdirname= ``` ### Run the benchmark @@ -176,7 +176,7 @@ python3 main.py --dataset igbh-dgl --dataset-path igbh/ --profile rgat-dgl-full ### Evaluate the accuracy ```bash -cm run script --tags=process,mlperf,accuracy,_igbh --result_dir= +mlcr process,mlperf,accuracy,_igbh --result_dir= ``` Please click [here](https://github.com/mlcommons/inference/blob/dev/graph/R-GAT/tools/accuracy_igbh.py) to view the Python script for evaluating accuracy for the IGBH dataset. diff --git a/language/bert/README.md b/language/bert/README.md index 6e0468666..e60745d45 100644 --- a/language/bert/README.md +++ b/language/bert/README.md @@ -33,7 +33,7 @@ Please run the following commands: - `make launch_docker`: launch docker container with an interaction session. - `python3 run.py --backend=[tf|pytorch|onnxruntime|tf_estimator] --scenario=[Offline|SingleStream|MultiStream|Server] [--accuracy] [--quantized]`: run the harness inside the docker container. Performance or Accuracy results will be printed in console. -* ENV variable `CM_MAX_NUM_THREADS` can be used to control the number of parallel threads issuing queries. +* ENV variable `MLC_MAX_NUM_THREADS` can be used to control the number of parallel threads issuing queries. ## Details @@ -48,13 +48,13 @@ Please run the following commands: ## Loadgen over the Network ``` -pip install cm4mlops +pip install mlc-scripts ``` -The below CM command will launch the SUT server +The below MLC command will launch the SUT server ``` -cm run script --tags=generate-run-cmds,inference --model=bert-99 --backend=pytorch \ +mlcr generate-run-cmds,inference --model=bert-99 --backend=pytorch \ --mode=performance --device=cuda --quiet --test_query_count=1000 --network=sut ``` @@ -62,12 +62,12 @@ Once the SUT server is launched, the below command can be run on the loadgen nod ``` -cm run script --tags=generate-run-cmds,inference --model=bert-99 --backend=pytorch --rerun \ +mlcr generate-run-cmds,inference --model=bert-99 --backend=pytorch --rerun \ --mode=performance --device=cuda --quiet --test_query_count=1000 \ --sut_servers,=http://localhost:8000 --network=lon ``` -If you are not using CM, just add `--network=lon` along with your normal run command on the SUT side. +If you are not using MLC, just add `--network=lon` along with your normal run command on the SUT side. On the loadgen node, add `--network=lon` option and `--sut_server ` to the normal command to connect to SUT nodes at IP addresses IP1, IP2 etc. Loadgen over the network works for `onnxruntime` and `pytorch` backends. diff --git a/language/bert/bert_QDL.py b/language/bert/bert_QDL.py index 6e804a799..f18c9f571 100644 --- a/language/bert/bert_QDL.py +++ b/language/bert/bert_QDL.py @@ -82,7 +82,7 @@ def process_query_async(self, query_samples): max_num_threads = int( os.environ.get( - "CM_MAX_NUM_THREADS", + "MLC_MAX_NUM_THREADS", os.cpu_count())) for i in range(len(query_samples)): diff --git a/language/bert/onnxruntime_SUT.py b/language/bert/onnxruntime_SUT.py index 598192509..ee4882654 100644 --- a/language/bert/onnxruntime_SUT.py +++ b/language/bert/onnxruntime_SUT.py @@ -69,7 +69,7 @@ def __init__(self, args): def issue_queries(self, query_samples): max_num_threads = int( os.environ.get( - "CM_MAX_NUM_THREADS", + "MLC_MAX_NUM_THREADS", os.cpu_count())) for i in range(len(query_samples)): diff --git a/language/bert/ray_SUT.py b/language/bert/ray_SUT.py index 6280d0eff..d947d15ed 100644 --- a/language/bert/ray_SUT.py +++ b/language/bert/ray_SUT.py @@ -42,7 +42,7 @@ raise Exception("Error importing local modules") # Adjustable Parameters -# Note. num_samples (called "test_query_count" in CM) must be a multiple +# Note. num_samples (called "test_query_count" in MLC) must be a multiple # of batch_size BATCH_SIZE = 16 diff --git a/language/gpt-j/GPTJ_QDL.py b/language/gpt-j/GPTJ_QDL.py index f75ece48d..e450beae6 100644 --- a/language/gpt-j/GPTJ_QDL.py +++ b/language/gpt-j/GPTJ_QDL.py @@ -58,7 +58,7 @@ def process_query_async(self, query_samples): max_num_threads = int( os.environ.get( - "CM_MAX_NUM_THREADS", + "MLC_MAX_NUM_THREADS", os.cpu_count())) if self.scenario == "Offline": # Client sends multiple requests using threads diff --git a/language/gpt-j/README.md b/language/gpt-j/README.md index 765317635..cfcf06879 100644 --- a/language/gpt-j/README.md +++ b/language/gpt-j/README.md @@ -74,12 +74,12 @@ python prepare-calibration.py --calibration-list-file calibration-list.txt --out ### Download GPT-J model Please download the fine-tuned GPT-J checkpoint using the instructions below. The download_gptj.py only downloads the default huggingface model which is not fine-tuned on CNN-Daily mail dataset. -#### CM method +#### MLC method -The following MLCommons CM commands can be used to programmatically download the model checkpoint. +The following MLCommons MLC commands can be used to programmatically download the model checkpoint. ``` -cm run script --tags=get,ml-model,gptj,_pytorch,_rclone ---outdirname =./model -P +mlcr get,ml-model,gptj,_pytorch,_rclone ---outdirname =./model -P ``` #### Manual method @@ -142,13 +142,13 @@ Intel expressly disclaims the accuracy, adequacy, or completeness of any data, d ## Loadgen over the Network ``` -pip install cm4mlops +pip install mlc-scripts ``` -The below CM command will launch the SUT server +The below MLC command will launch the SUT server ``` -cm run script --tags=run-mlperf,inference,_performance-only --model=gptj-99 \ +mlcr run-mlperf,inference,_performance-only --model=gptj-99 \ --backend=pytorch --device=cuda --beam_size=1 --precision=bfloat16 \ --network=sut --rerun --quiet --adr.compiler.tags=gcc ``` @@ -159,7 +159,7 @@ In our experimentation, we found out that in addition to memory occupied by the Once the SUT server is launched, the below command can be run on the loadgen node to do issue queries to the SUT nodes. In this command `-sut_servers` has just the localhost address - it can be changed to a comma-separated list of any hostname/IP in the network. ``` -cm run script --tags=run-mlperf,inference,_performance-only --model=gptj-99 \ +mlcr run-mlperf,inference,_performance-only --model=gptj-99 \ --backend=pytorch --test_query_count=30 \ --network=lon --rerun --quiet --scenario=Offline \ --sut_servers,=http://localhost:8000 --adr.compiler.tags=gcc diff --git a/language/gpt-j/main.py b/language/gpt-j/main.py index 438c0854b..981d2cd9d 100644 --- a/language/gpt-j/main.py +++ b/language/gpt-j/main.py @@ -189,7 +189,7 @@ def main(): if args.gpu: free_mem = int( os.environ.get( - "CM_CUDA_DEVICE_PROP_GLOBAL_MEMORY", get_gpu_memory_info() + "MLC_CUDA_DEVICE_PROP_GLOBAL_MEMORY", get_gpu_memory_info() ) ) / (1024**3) else: diff --git a/language/llama2-70b/README.md b/language/llama2-70b/README.md index 5268863ac..506423cc2 100644 --- a/language/llama2-70b/README.md +++ b/language/llama2-70b/README.md @@ -9,14 +9,10 @@ Please see the [new docs site](https://docs.mlcommons.org/inference/benchmarks/language/llama2-70b) for an automated way to run this benchmark across different available implementations and do an end-to-end submission with or without docker. +You can also do `pip install mlc-scripts` and then use `mlcr` commands for downloading the model and datasets using the commands given in the later sections. ## Prepare environment -Copy the mlperf.conf file to this folder. -``` -cp ../../mlperf.conf . -``` - For a CPU-only run: ``` @@ -69,8 +65,12 @@ CPU-only setup, as well as any GPU versions for applicable libraries like PyTorc ### MLCommons Members Download MLCommons hosts the model and preprocessed dataset for download **exclusively by MLCommons Members**. You must first agree to the [confidentiality notice](https://llama2.mlcommons.org) using your organizational email address, then you will receive a link to a directory containing Rclone download instructions. _If you cannot access the form but you are part of a MLCommons Member organization, submit the [MLCommons subscription form](https://mlcommons.org/community/subscribe/) with your organizational email address and [associate a Google account](https://accounts.google.com/SignUpWithoutGmail) with your organizational email address._ +Once you have the access, you can download the model automatically via the below command +``` +mlcr get,ml-model,llama2 --outdirname=${CHECKPOINT_PATH} -j +``` -### External Download +### External Download (Not recommended for official submission) + First go to [llama2-request-link](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) and make a request, sign in to HuggingFace (if you don't have account, you'll need to create one). **Please note your authentication credentials** as you may be required to provide them when cloning below. + Requires Git Large Files Storage ``` @@ -181,7 +181,6 @@ mkdir -p "run_outputs" # The script will dump all the outputs to 'run_outputs'. python -u main.py --scenario Offline \ --model-path ${CHECKPOINT_PATH} \ --accuracy \ - --mlperf-conf mlperf.conf \ --user-conf user.conf \ --total-sample-count 24576 \ --dataset-path ${DATASET_PATH} \ @@ -218,7 +217,6 @@ OUTPUT_LOG_DIR=server-accuracy-logs python -u main.py --scenario Server \ --model-path ${CHECKPOINT_PATH} \ --accuracy \ - --mlperf-conf mlperf.conf \ --user-conf user.conf \ --total-sample-count 24576 \ --dataset-path ${DATASET_PATH} \ @@ -255,7 +253,6 @@ In order to run interactive category, it is sufficient to set the flag `--lg-mod ``` python -u main.py --scenario Server \ --model-path ${CHECKPOINT_PATH} \ - --mlperf-conf mlperf.conf \ --user-conf user.conf \ --total-sample-count 24576 \ --device cpu \ diff --git a/language/llama3.1-405b/README.md b/language/llama3.1-405b/README.md index ea358bb98..50668263c 100644 --- a/language/llama3.1-405b/README.md +++ b/language/llama3.1-405b/README.md @@ -7,13 +7,11 @@ - For server scenario, it is necessary to call `lg.FirstTokenComplete(response)` for each query. This way the first token will be reported and it's latency will be measured. - For all scenarios, when calling `lg.QuerySamplesComplete(response)`, it is necessary that each of the elements in response is a `lg.QuerySampleResponse` that contains the number of tokens (can be create this way: `lg.QuerySampleResponse(qitem.id, bi[0], bi[1], n_tokens)`). The number of tokens reported should match with the number of tokens on your answer and this will be checked in [TEST06](../../compliance/nvidia/TEST06/) -Please see the [new docs site](https://docs.mlcommons.org/inference/benchmarks/language/llama3.1-405b) for an automated way to run this benchmark across different available implementations and do an end-to-end submission with or without docker. - -## Automated command to run the benchmark via MLCommons CM +## Automated command to run the benchmark via MLFlow Please see the [new docs site](https://docs.mlcommons.org/inference/benchmarks/language/llama3_1-405b/) for an automated way to run this benchmark across different available implementations and do an end-to-end submission with or without docker. -You can also do pip install cm4mlops and then use cm commands for downloading the model and datasets using the commands given in the later sections. +You can also do pip install mlc-scripts and then use `mlcr` commands for downloading the model and datasets using the commands given in the later sections. ## Prepare environment @@ -99,12 +97,15 @@ pip install -e ../../loadgen ## Get Model -### MLCommons Members Download +### MLCommons Members Download (Recommended for official submission) -TODO: Host model and grant access to submitters +You need to request for access to [MLcommons](http://llama3-1.mlcommons.org/) and you'll receive an email with the download instructions. You can download the model automatically via the below command +``` +mlcr get,ml-model,llama3 --outdirname=${CHECKPOINT_PATH} -j +``` -### External Download +### External Download (Not recommended for official submission) + First go to [llama3.1-request-link](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) and make a request, sign in to HuggingFace (if you don't have account, you'll need to create one). **Please note your authentication credentials** as you may be required to provide them when cloning below. + Requires Git Large Files Storage ``` @@ -114,10 +115,10 @@ git clone https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct ${CHECKPOINT cd ${CHECKPOINT_PATH} && git checkout be673f326cab4cd22ccfef76109faf68e41aa5f1 ``` -### Download model through CM (Collective Mind) +### Download huggingface model through MLC ``` -cm run script --tags=get,ml-model,llama3 --outdirname=${CHECKPOINT_PATH} --hf_token= -j +mlcr get,ml-model,llama3,_hf --outdirname=${CHECKPOINT_PATH} --hf_token= -j ``` **Note:** @@ -143,10 +144,10 @@ You can then navigate in the terminal to your desired download directory and run ``` rclone copy mlc-inference:mlcommons-inference-wg-public/llama3.1_405b/mlperf_llama3.1_405b_dataset_8313_processed_fp16_eval.pkl ./ -P ``` -**CM Command** +**MLC Command** ``` -cm run script --tags=get,dataset,mlperf,inference,llama3,_validation --outdirname= -j +mlcr get,dataset,mlperf,inference,llama3,_validation --outdirname= -j ``` You can also download the calibration dataset from the Cloudflare R2 bucket by running the following command: @@ -155,9 +156,9 @@ You can also download the calibration dataset from the Cloudflare R2 bucket by r rclone copy mlc-inference:mlcommons-inference-wg-public/llama3.1_405b/mlperf_llama3.1_405b_calibration_dataset_512_processed_fp16_eval.pkl ./ -P ``` -**CM Command** +**MLC Command** ``` -cm run script --tags=get,dataset,mlperf,inference,llama3,_calibration --outdirname= -j +mlcr get,dataset,mlperf,inference,llama3,_calibration --outdirname= -j ``` @@ -250,10 +251,10 @@ fi The ServerSUT was not tested for GPU runs. -### Evaluate the accuracy using CM -You can also evaulate the accuracy from the generated accuracy log by using the following CM command +### Evaluate the accuracy using MLCFlow +You can also evaulate the accuracy from the generated accuracy log by using the following MLC command ``` -cm run script --tags=process,mlperf,accuracy,_dataset_llama3 --result_dir= +mlcr process,mlperf,accuracy,_dataset_llama3 --result_dir= ``` ## Accuracy Target diff --git a/main.py b/main.py index fc5a1ef5b..48923e290 100755 --- a/main.py +++ b/main.py @@ -28,7 +28,7 @@ def mlperf_inference_implementation_readme( content = "" execution_envs = ["Docker", "Native"] - code_version = "r5.0-dev" + code_version = "r4.1-dev" implementation_run_options = [] if model == "rnnt": @@ -36,6 +36,8 @@ def mlperf_inference_implementation_readme( if implementation == "reference": # Tip + if model != "rnnt": + code_version = "r5.0-dev" if "99.9" not in model and implementation_tips: content += f"\n{pre_space}!!! tip\n\n" content += f"{pre_space} - MLCommons reference implementations are only meant to provide a rules compliant reference implementation for the submitters and in most cases are not best performing. If you want to benchmark any system, it is advisable to use the vendor MLPerf implementation for that system like Nvidia, Intel etc.\n\n" @@ -124,7 +126,8 @@ def mlperf_inference_implementation_readme( if not categories: if model.lower() == "bert-99.9": categories = ["Datacenter"] - + elif model.lower() in ["pointpainting"]: + categories = ["Edge"] elif ( "dlrm" in model.lower() or "llama2" in model.lower() @@ -146,6 +149,8 @@ def mlperf_inference_implementation_readme( if model.lower() in [ "resnet50", "retinanet"] and not "MultiStream" in scenarios: # MultiStream was duplicating scenarios.append("MultiStream") + if model.lower() in ["pointpainting"]: + scenarios.remove("Offline") elif category == "Datacenter": scenarios = ["Offline", "Server"] if fixed_scenarios: @@ -192,8 +197,8 @@ def mlperf_inference_implementation_readme( continue # Nvidia implementation only supports execution through docker content += f'{cur_space2}=== "{execution_env}"\n' content += f"{cur_space3}###### {execution_env} Environment\n\n" - # ref to cm installation - content += f"{cur_space3}Please refer to the [installation page](site:inference/install/) to install CM for running the automated benchmark commands.\n\n" + # ref to MLCFlow installation + content += f"{cur_space3}Please refer to the [installation page](site:inference/install/) to install MLCFlow for running the automated benchmark commands.\n\n" test_query_count = get_test_query_count( model, implementation, device.lower() @@ -227,7 +232,8 @@ def mlperf_inference_implementation_readme( implementation, framework.lower(), category.lower(), - "Offline", + "SingleStream" if model.lower() in [ + "pointpainting"] else "Offline", device.lower(), "test", test_query_count, @@ -283,12 +289,12 @@ def mlperf_inference_implementation_readme( content += f"{cur_space3}The above command should get you to an interactive shell inside the docker container and do a quick test run for the Offline scenario. Once inside the docker container please do the below commands to do the accuracy + performance runs for {scenario_text}.\n\n" content += f"{cur_space3}
\n" content += f"{cur_space3} Please click here to see more options for the docker launch \n\n" - content += f"{cur_space3}* `--docker_cm_repo=`: to use a custom fork of cm4mlops repository inside the docker image\n\n" - content += f"{cur_space3}* `--docker_cm_repo_branch=`: to checkout a custom branch of the cloned cm4mlops repository inside the docker image\n\n" + content += f"{cur_space3}* `--docker_mlc_repo=`: to use a custom fork of cm4mlops repository inside the docker image\n\n" + content += f"{cur_space3}* `--docker_mlc_repo_branch=`: to checkout a custom branch of the cloned cm4mlops repository inside the docker image\n\n" content += f"{cur_space3}* `--docker_cache=no`: to not use docker cache during the image build\n" if implementation.lower() == "nvidia": - content += f"{cur_space3}* `--gpu_name=` : The GPUs with supported configs in CM are `orin`, `rtx_4090`, `rtx_a6000`, `rtx_6000_ada`, `l4`, `t4`and `a100`. For other GPUs, default configuration as per the GPU memory will be used.\n" + content += f"{cur_space3}* `--gpu_name=` : The GPUs with supported configs in MLC are `orin`, `rtx_4090`, `rtx_a6000`, `rtx_6000_ada`, `l4`, `t4`and `a100`. For other GPUs, default configuration as per the GPU memory will be used.\n" if device.lower() not in ["cuda"]: content += f"{cur_space3}* `--docker_os=ubuntu`: ubuntu and rhel are supported. \n" @@ -305,7 +311,8 @@ def mlperf_inference_implementation_readme( implementation, framework.lower(), category.lower(), - "Offline", + "SingleStream" if model.lower() in [ + "pointpainting"] else "Offline", device.lower(), "test", test_query_count, @@ -321,10 +328,17 @@ def mlperf_inference_implementation_readme( run_suffix += f"{cur_space3} Please click here to see more options for the RUN command\n\n" run_suffix += f"{cur_space3}* Use `--division=closed` to do a closed division submission which includes compliance runs\n\n" run_suffix += f"{cur_space3}* Use `--rerun` to do a rerun even when a valid run exists\n" + run_suffix += f"{cur_space3}* Use `--compliance` to do the compliance runs (only applicable for closed division) once the valid runs are successful\n" + if implementation.lower() == "nvidia": - run_suffix += f"{cur_space3}* `--gpu_name=` : The GPUs with supported configs in CM are `orin`, `rtx_4090`, `rtx_a6000`, `rtx_6000_ada`, `l4`, `t4`and `a100`. For other GPUs, default configuration as per the GPU memory will be used.\n" + run_suffix += f"{cur_space3}* `--gpu_name=` : The GPUs with supported configs in MLC are `orin`, `rtx_4090`, `rtx_a6000`, `rtx_6000_ada`, `l4`, `t4`and `a100`. For other GPUs, default configuration as per the GPU memory will be used.\n" run_suffix += f"{cur_space3}
\n\n" + if ( + "bert" in model.lower() + and framework.lower() == "deepsparse" + ): + run_suffix += "You can use any model from [NeuralMagic sparse zoo](https://sparsezoo.neuralmagic.com/?modelSet=computer_vision&architectures=resnet_v1) (trained on Imagenet dataset) as --nm_model_zoo_stub" if ( "bert" in model.lower() and framework.lower() == "deepsparse" @@ -446,6 +460,8 @@ def get_min_system_requirements(spaces, model, implementation, device): device_memory = "24GB(fp32), 16GB(fp16)" elif "gptj" in model: device_memory = "80GB(fp32). 40GB(fp16)" + elif "pointpainting" in model: + device_memory = "To be updated" else: device_memory = "8GB" min_sys_req_content += f"{spaces}* **Device Memory**: {device_memory}\n\n" @@ -458,6 +474,8 @@ def get_min_system_requirements(spaces, model, implementation, device): disk_space = "100GB" elif "retinanet" in model: disk_space = "200GB" + elif "pointpainting" in model: + disk_space = "To be updated" else: disk_space = "50GB" min_sys_req_content += f"{spaces}* **Disk Space**: {disk_space}\n\n" @@ -476,7 +494,7 @@ def get_inference_server_run_cmd(spaces, implementation): pre_space = " " * spaces return f"""\n {pre_space}```bash -{pre_space}cm run script --tags=run,vllm-server \\ +{pre_space}mlcr run,vllm-server \\ {indent}--model=nm-testing/Llama-2-70b-chat-hf-FP8 \\ {indent}--vllm_model_name=nm-testing/Llama-2-70b-chat-hf-FP8 \\ {indent}--quiet @@ -486,8 +504,8 @@ def get_venv_command(spaces): pre_space = " " * spaces return f"""\n {pre_space}```bash -{pre_space}cm run script --tags=install,python-venv --name=mlperf -{pre_space}export CM_SCRIPT_EXTRA_CMD=\"--adr.python.name=mlperf\" +{pre_space}mlcr install,python-venv --name=mlperf +{pre_space}export MLC_SCRIPT_EXTRA_CMD=\"--adr.python.name=mlperf\" {pre_space}```\n""" # contains run command information which is common to both docker and @@ -500,13 +518,21 @@ def get_common_info(spaces, implementation, model): pre_space += " " # pre_space = " " info += f"\n{pre_space}!!! tip\n\n" - info += f"{pre_space} - Number of threads could be adjusted using `--threads=#`, where `#` is the desired number of threads. This option works only if the implementation in use supports threading.\n\n" - info += f"{pre_space} - Batch size could be adjusted using `--batch_size=#`, where `#` is the desired batch size. This option works only if the implementation in use is supporting the given batch size.\n\n" - if implementation.lower() == "reference": + info += f"{pre_space} - Compliance runs can be enabled by adding `--compliance=yes`.\n\n" + if model.lower() not in ["pointpainting"]: + info += f"{pre_space} - Number of threads could be adjusted using `--threads=#`, where `#` is the desired number of threads. This option works only if the implementation in use supports threading.\n\n" + info += f"{pre_space} - Batch size could be adjusted using `--batch_size=#`, where `#` is the desired batch size. This option works only if the implementation in use is supporting the given batch size.\n\n" + elif model.lower() in ["pointpainting"]: + info += f"{pre_space} - The maximum duration for a performance run can be disabled by using `--env.MLC_MLPERF_USE_MAX_DURATION=no`.\n\n" + info += f"{pre_space} - In valid execution mode, the query count for performance mode can be adjusted using `--env.MLC_MLPERF_LOADGEN_QUERY_COUNT=`.\n\n" + + if implementation.lower() == "reference" and model.lower() not in [ + "pointpainting"]: + info += f"{pre_space} - `_r4.1-dev` could also be given instead of `_r5.0-dev` if you want to run the benchmark with the MLPerf version being 4.1.\n\n" if model == "rgat": - info += f"{pre_space} - Add `--env.CM_DATASET_IGBH_PATH=` if you have already downloaded the dataset. The path will be automatically mounted when using docker run.\n\n" - info += f"{pre_space} - Add `--env.CM_ML_MODEL_RGAT_CHECKPOINT_PATH=` if you have already downloaded the model. The path will be automatically mounted when using docker run.\n\n" + info += f"{pre_space} - Add `--env.MLC_DATASET_IGBH_PATH=` if you have already downloaded the dataset. The path will be automatically mounted when using docker run.\n\n" + info += f"{pre_space} - Add `--env.MLC_ML_MODEL_RGAT_CHECKPOINT_PATH=` if you have already downloaded the model. The path will be automatically mounted when using docker run.\n\n" if implementation.lower() == "reference": info += f"{pre_space} - Add `--adr.mlperf-implementation.tags=_branch.master,_repo.` if you are modifying the official MLPerf Inference implementation in a custom fork.\n\n" info += f"{pre_space} - Add `--adr.inference-src.tags=_repo.` if you are modifying the model config accuracy script in the submission checker within a custom fork.\n\n" @@ -526,10 +552,10 @@ def get_docker_info(spaces, model, implementation, info += f"\n{pre_space}!!! tip\n\n" if model == "sdxl": - info += f"{pre_space} - `--env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes` option can be used to download the model on the host so that it can be reused across different container lanuches. \n\n" + info += f"{pre_space} - `--env.MLC_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes` option can be used to download the model on the host so that it can be reused across different container lanuches. \n\n" elif "llama3" in model.lower(): - info += f"{pre_space} - `--env.CM_MLPERF_MODEL_LLAMA3_DOWNLOAD_TO_HOST=yes` option can be used to download the model on the host so that it can be reused across different container lanuches. \n\n" - info += f"{pre_space} - `--env.CM_MLPERF_DATASET_LLAMA3_DOWNLOAD_TO_HOST=yes` option can be used to download the dataset on the host so that it can be reused across different container lanuches. \n\n" + info += f"{pre_space} - `--env.MLC_MLPERF_MODEL_LLAMA3_DOWNLOAD_TO_HOST=yes` option can be used to download the model on the host so that it can be reused across different container lanuches. \n\n" + info += f"{pre_space} - `--env.MLC_MLPERF_DATASET_LLAMA3_DOWNLOAD_TO_HOST=yes` option can be used to download the dataset on the host so that it can be reused across different container lanuches. \n\n" if implementation.lower() == "nvidia": info += f"{pre_space} - Default batch size is assigned based on [GPU memory](https://github.com/mlcommons/cm4mlops/blob/dd0c35856969c68945524d5c80414c615f5fe42c/script/app-mlperf-inference-nvidia/_cm.yaml#L1129) or the [specified GPU](https://github.com/mlcommons/cm4mlops/blob/dd0c35856969c68945524d5c80414c615f5fe42c/script/app-mlperf-inference-nvidia/_cm.yaml#L1370). Please click more option for *docker launch* or *run command* to see how to specify the GPU name.\n\n" info += f"{pre_space} - When run with `--all_models=yes`, all the benchmark models of NVIDIA implementation can be executed within the same container.\n\n" @@ -539,7 +565,7 @@ def get_docker_info(spaces, model, implementation, else: if model == "sdxl": info += f"\n{pre_space}!!! tip\n\n" - info += f"{pre_space} - `--env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes` option can be used to download the model on the host so that it can be reused across different container lanuches. \n\n" + info += f"{pre_space} - `--env.MLC_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes` option can be used to download the model on the host so that it can be reused across different container lanuches. \n\n" # return empty string if nothing is filled inside the tip if info == f"\n{pre_space}!!! tip\n\n": @@ -603,6 +629,8 @@ def get_run_cmd_extra( extra_content += f"{f_pre_space} * `--precision=bfloat16` can give better performance \n" if "gptj" in model and implementation == "reference": extra_content += f"{f_pre_space} * `--beam-size=1` Beam size of 4 is mandatory for a closed division submission but reducing the beam size can help in running the model on GPUs with lower device memory\n" + if "pointpainting" in model and implementation == "reference": + extra_content += f"{f_pre_space} * Tjhe `pointpillars_checkpoint_path`, `deeplab_resnet50_path` and `waymo_path` do not need to be provided inside the Docker container as they are already registered in the MLC cache.\n" if extra_content: extra_content = f"{f_pre_space}!!! tip\n\n" + extra_content @@ -671,8 +699,10 @@ def mlperf_inference_run_command( docker_cmd_suffix += ( f" \\\n{pre_space} {extra_docker_input_string} {extra_input_string}" ) + if "resnet50" in model.lower() and framework == "deepsparse": + docker_cmd_suffix += f"\\\n{pre_space} --nm_model_zoo_stub=zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/pruned85_quant-none-vnni" if "bert" in model.lower() and framework == "deepsparse": - docker_cmd_suffix += f"\\\n{pre_space} --env.CM_MLPERF_NEURALMAGIC_MODEL_ZOO_STUB=zoo:nlp/question_answering/mobilebert-none/pytorch/huggingface/squad/base_quant-none" + docker_cmd_suffix += f"\\\n{pre_space} --nm_model_zoo_stub=zoo:nlp/question_answering/mobilebert-none/pytorch/huggingface/squad/base_quant-none" if "llama2-70b" in model.lower(): if implementation == "nvidia": docker_cmd_suffix += f" \\\n{pre_space} --tp_size=2" @@ -687,6 +717,11 @@ def mlperf_inference_run_command( if "dlrm-v2" in model.lower() and implementation == "nvidia": docker_cmd_suffix += f" \\\n{pre_space} --criteo_day23_raw_data_path=" + if "pointpainting" in model.lower() and implementation == "reference": + docker_cmd_suffix += f" \\\n{pre_space} --pointpillars_checkpoint_path=" + docker_cmd_suffix += f" \\\n{pre_space} --deeplab_resnet50_path=" + docker_cmd_suffix += f" \\\n{pre_space} --waymo_path=" + if "short" in extra_variation_tags: full_ds_needed_tag = "" else: @@ -694,7 +729,7 @@ def mlperf_inference_run_command( docker_setup_cmd = f"""\n {f_pre_space}```bash -{f_pre_space}cm run script --tags=run-mlperf,inference,_find-performance,{full_ds_needed_tag}_{code_version}{scenario_variation_tag}{extra_variation_tags} \\ +{f_pre_space}mlcr run-mlperf,inference,_find-performance,{full_ds_needed_tag}_{code_version}{scenario_variation_tag}{extra_variation_tags} \\ {pre_space} --model={model} \\ {pre_space} --implementation={implementation} \\ {pre_space} --framework={framework} \\ @@ -711,8 +746,11 @@ def mlperf_inference_run_command( if execution_mode == "test" and not skip_test_query_count: cmd_suffix += f" \\\n {pre_space} --test_query_count={test_query_count}" + if "resnet50" in model.lower() and framework == "deepsparse": + cmd_suffix += f"\\\n{pre_space} --nm_model_zoo_stub=zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/pruned85_quant-none-vnni" if "bert" in model.lower() and framework == "deepsparse": - cmd_suffix += f"\\\n{pre_space} --env.CM_MLPERF_NEURALMAGIC_MODEL_ZOO_STUB=zoo:nlp/question_answering/mobilebert-none/pytorch/huggingface/squad/base_quant-none" + cmd_suffix += f"\\\n{pre_space} --nm_model_zoo_stub=zoo:nlp/question_answering/mobilebert-none/pytorch/huggingface/squad/base_quant-none" + if "llama2-70b" in model.lower(): if implementation == "nvidia": cmd_suffix += f" \\\n{pre_space} --tp_size=" @@ -722,12 +760,22 @@ def mlperf_inference_run_command( cmd_suffix += f" \\\n{pre_space} --vllm_model_name=nm-testing/Llama-2-70b-chat-hf-FP8" cmd_suffix += f" \\\n{pre_space} --adr.mlperf-implementation.tags=_repo.https://github.com/neuralmagic/inference,_branch.vllm" + if "pointpainting" in model.lower() and implementation == "reference": + cmd_suffix += f" \\\n{pre_space} --pointpillars_checkpoint_path=" + cmd_suffix += f" \\\n{pre_space} --deeplab_resnet50_path=" + cmd_suffix += f" \\\n{pre_space} --waymo_path=" + if "dlrm-v2" in model and implementation == "nvidia": cmd_suffix += f" \\\n{pre_space} --criteo_day23_raw_data_path=" + if "short" in extra_variation_tags: + full_ds_needed_tag = "" + else: + full_ds_needed_tag = "_full," + run_cmd = f"""\n {f_pre_space}```bash -{f_pre_space}cm run script --tags=run-mlperf,inference,_{code_version}{scenario_variation_tag}{extra_variation_tags} \\ +{f_pre_space}mlcr run-mlperf,inference,{full_ds_needed_tag}_{code_version}{scenario_variation_tag}{extra_variation_tags} \\ {pre_space} --model={model} \\ {pre_space} --implementation={implementation} \\ {pre_space} --framework={framework} \\ diff --git a/mkdocs.yml b/mkdocs.yml index 9178191a3..9723075ff 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -28,8 +28,11 @@ nav: - Run Commands: benchmarks/text_to_image/sdxl.md - Reproducibility: - SCC24: benchmarks/text_to_image/reproducibility/scc24.md - - Object Detection: + - 2D Object Detection: - RetinaNet: benchmarks/object_detection/retinanet.md + - Automotive: + - 3D Object Detection: + - PointPainting: benchmarks/automotive/3d_object_detection/pointpainting.md - Medical Imaging: - 3d-unet: benchmarks/medical_imaging/3d-unet.md - Language Processing: @@ -45,7 +48,7 @@ nav: - DLRM-v2: benchmarks/recommendation/dlrm-v2.md - Graph Neural Networks: - R-GAT: benchmarks/graph/rgat.md - - Install CM: + - Install MLCFlow: - install/index.md - Submission: - Submission Generation: submission/index.md diff --git a/recommendation/dlrm_v2/pytorch/README.md b/recommendation/dlrm_v2/pytorch/README.md index d93e15ac3..6f09e26de 100755 --- a/recommendation/dlrm_v2/pytorch/README.md +++ b/recommendation/dlrm_v2/pytorch/README.md @@ -102,13 +102,13 @@ framework | Size in bytes (`du *`) | MD5 hash (`md5sum *`) N/A | pytorch | <2GB | - pytorch | 97.31GB | - -#### CM method +#### MLC method -The following MLCommons CM commands can be used to programmatically download the model checkpoint. +The following MLCommons MLC commands can be used to programmatically download the model checkpoint. ``` -pip install cm4mlops -cm run script --tags=get,ml-model,dlrm,_pytorch,_weight_sharded,_rclone -j +pip install mlc-scripts +mlcr get,ml-model,dlrm,_pytorch,_weight_sharded,_rclone -j ``` #### Manual method diff --git a/text_to_image/README.md b/text_to_image/README.md index 84c8c7245..b00595785 100644 --- a/text_to_image/README.md +++ b/text_to_image/README.md @@ -1,10 +1,10 @@ # MLPerf™ Inference Benchmarks for Text to Image -## Automated command to run the benchmark via MLCommons CM +## Automated command to run the benchmark via MLCFlow Please see the [new docs site](https://docs.mlcommons.org/inference/benchmarks/text_to_image/sdxl) for an automated way to run this benchmark across different available implementations and do an end-to-end submission with or without docker. -You can also do `pip install cm4mlops` and then use `cm` commands for downloading the model and datasets using the commands given in the later sections. +You can also do `pip install mlc-scripts` and then use `mlcr` commands for downloading the model and datasets using the commands given in the later sections. ## Supported Models @@ -50,15 +50,15 @@ CFLAGS="-std=c++14" python setup.py install We host two checkpoints (fp32 and fp16) that are a snapshot of the [Hugging Face](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0) pipeline at the time of the release of the benchmark. Download them and move them to your model path. -#### CM method +#### MLC method -The following MLCommons CM commands can be used to programmatically download the model checkpoints. +The following MLCommons MLC commands can be used to programmatically download the model checkpoints. ``` -cm run script --tags=get,ml-model,sdxl,_fp16,_rclone --outdirname=$MODEL_PATH +mlcr get,ml-model,sdxl,_fp16,_rclone --outdirname=$MODEL_PATH ``` ``` -cm run script --tags=get,ml-model,sdxl,_fp32,_rclone --outdirname-$MODEL_PATH +mlcr get,ml-model,sdxl,_fp32,_rclone --outdirname-$MODEL_PATH ``` #### Manual method @@ -89,16 +89,16 @@ rclone copy mlc-inference:mlcommons-inference-wg-public/stable_diffusion_fp16 $M ### Download validation dataset -#### CM METHOD -The following MLCommons CM commands can be used to programmatically download the validation dataset. +#### MLC METHOD +The following MLCommons MLC commands can be used to programmatically download the validation dataset. ``` -cm run script --tags=get,dataset,coco2014,_validation,_full --outdirname=coco2014 +mlcr get,dataset,coco2014,_validation,_full --outdirname=coco2014 ``` For debugging you can download only a part of all the images in the dataset ``` -cm run script --tags=get,dataset,coco2014,_validation,_size.50 --outdirname=coco2014 +mlcr get,dataset,coco2014,_validation,_size.50 --outdirname=coco2014 ``` @@ -116,11 +116,11 @@ If the file [captions.tsv](coco2014/captions/captions.tsv) can be found in the s ### Download Calibration dataset (only if you are doing quantization) -#### CM METHOD -The following MLCommons CM commands can be used to programmatically download the calibration dataset. +#### MLC METHOD +The following MLCommons MLC commands can be used to programmatically download the calibration dataset. ``` -cm run script --tags=get,dataset,coco2014,_calibration --outdirname=coco2014 +mlcr get,dataset,coco2014,_calibration --outdirname=coco2014 ``` diff --git a/tools/submission/submission_checker.py b/tools/submission/submission_checker.py index 8e131d4a3..c3c30c14d 100755 --- a/tools/submission/submission_checker.py +++ b/tools/submission/submission_checker.py @@ -1702,7 +1702,7 @@ def get_power_metric(config, scenario_fixed, log_path, is_valid, res): samples_per_query = 8 if (scenario_fixed in ["MultiStream"] - ) and scenario in ["SingleStream"]: + ) and scenario in ["SingleStream"]: power_metric = ( avg_power * power_duration * samples_per_query * 1000 / num_queries )