PR feedback

egalli · egalli · commit e2032989da10 · 2024-07-22T16:16:44.000-07:00
* Missing GPUBufferDataTypes to MLBufferDataTypes changes
* Remove unused [jsep]GetBuffer method
* Enable io-binding on test-runner-cli
* Correctly use MLContext on test-runner
* Hoist `shouldTransferToMLBuffer` closer to the start of DataTransfer::CopyTensor
* Corrected indentation on pre-jsep.js
diff --git a/js/common/lib/tensor-factory-impl.ts b/js/common/lib/tensor-factory-impl.ts
@@ -1,7 +1,7 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-import {OptionsDimensions, OptionsFormat, OptionsNormalizationParameters, OptionsTensorFormat, OptionsTensorLayout, TensorFromGpuBufferOptions, TensorFromImageBitmapOptions, TensorFromImageDataOptions, TensorFromImageElementOptions, TensorFromTextureOptions, TensorFromUrlOptions} from './tensor-factory.js';
+import {OptionsDimensions, OptionsFormat, OptionsNormalizationParameters, OptionsTensorFormat, OptionsTensorLayout, TensorFromGpuBufferOptions, TensorFromImageBitmapOptions, TensorFromImageDataOptions, TensorFromImageElementOptions, TensorFromMLBufferOptions, TensorFromTextureOptions, TensorFromUrlOptions} from './tensor-factory.js';
 import {Tensor} from './tensor-impl.js';
 import {Tensor as TensorInterface} from './tensor.js';
 
@@ -277,8 +277,8 @@ export const tensorFromGpuBuffer = <T extends TensorInterface.GpuBufferDataTypes
 /**
  * implementation of Tensor.fromMLBuffer().
  */
-export const tensorFromMLBuffer = <T extends TensorInterface.GpuBufferDataTypes>(
-    mlBuffer: TensorInterface.MLBufferType, options: TensorFromGpuBufferOptions<T>): Tensor => {
+export const tensorFromMLBuffer = <T extends TensorInterface.MLBufferDataTypes>(
+    mlBuffer: TensorInterface.MLBufferType, options: TensorFromMLBufferOptions<T>): Tensor => {
   const {dataType, dims, download, dispose} = options;
   return new Tensor({location: 'ml-buffer', type: dataType ?? 'float32', mlBuffer, dims, download, dispose});
 };
diff --git a/js/common/lib/tensor-impl.ts b/js/common/lib/tensor-impl.ts
@@ -4,7 +4,7 @@
 import {tensorToDataURL, tensorToImageData} from './tensor-conversion-impl.js';
 import {TensorToDataUrlOptions, TensorToImageDataOptions} from './tensor-conversion.js';
 import {tensorFromGpuBuffer, tensorFromImage, tensorFromMLBuffer, tensorFromPinnedBuffer, tensorFromTexture} from './tensor-factory-impl.js';
-import {CpuPinnedConstructorParameters, GpuBufferConstructorParameters, MLBufferConstructorParameters, TensorFromGpuBufferOptions, TensorFromImageBitmapOptions, TensorFromImageDataOptions, TensorFromImageElementOptions, TensorFromTextureOptions, TensorFromUrlOptions, TextureConstructorParameters} from './tensor-factory.js';
+import {CpuPinnedConstructorParameters, GpuBufferConstructorParameters, MLBufferConstructorParameters, TensorFromGpuBufferOptions, TensorFromImageBitmapOptions, TensorFromImageDataOptions, TensorFromImageElementOptions, TensorFromMLBufferOptions, TensorFromTextureOptions, TensorFromUrlOptions, TextureConstructorParameters} from './tensor-factory.js';
 import {checkTypedArray, NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP, NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP, SupportedTypedArray, SupportedTypedArrayConstructors} from './tensor-impl-type-mapping.js';
 import {calculateSize, tensorReshape} from './tensor-utils-impl.js';
 import {Tensor as TensorInterface} from './tensor.js';
@@ -273,8 +273,8 @@ export class Tensor implements TensorInterface {
     return tensorFromGpuBuffer(gpuBuffer, options);
   }
 
-  static fromMLBuffer<T extends TensorInterface.GpuBufferDataTypes>(
-      mlBuffer: TensorMLBufferType, options: TensorFromGpuBufferOptions<T>): TensorInterface {
+  static fromMLBuffer<T extends TensorInterface.MLBufferDataTypes>(
+      mlBuffer: TensorMLBufferType, options: TensorFromMLBufferOptions<T>): TensorInterface {
     return tensorFromMLBuffer(mlBuffer, options);
   }
 
diff --git a/js/web/lib/wasm/jsep/backend-webnn.ts b/js/web/lib/wasm/jsep/backend-webnn.ts
@@ -99,10 +99,6 @@ export class WebNNBackend {
     this.bufferManager.releaseBufferId(bufferId);
   }
 
-  public getBuffer(bufferId: BufferId): MLBuffer {
-    return this.bufferManager.getBuffer(bufferId);
-  }
-
   public ensureBuffer(bufferId: BufferId, onnxDataType: number|MLOperandDataType, dimensions: number[]): MLBuffer {
     let dataType: MLOperandDataType;
     if (typeof onnxDataType === 'number') {
@@ -129,7 +125,7 @@ export class WebNNBackend {
     return this.bufferManager.download(bufferId);
   }
 
-  public createMLBufferDownloader(bufferId: BufferId, type: Tensor.GpuBufferDataTypes): () => Promise<Tensor.DataType> {
+  public createMLBufferDownloader(bufferId: BufferId, type: Tensor.MLBufferDataTypes): () => Promise<Tensor.DataType> {
     return async () => {
       const data = await this.bufferManager.download(bufferId);
       return createView(data, type);
diff --git a/js/web/lib/wasm/jsep/webnn/buffer-manager.ts b/js/web/lib/wasm/jsep/webnn/buffer-manager.ts
@@ -22,10 +22,6 @@ export interface BufferManager {
    * Release a BufferId.
    */
   releaseBufferId(bufferId: BufferId): void;
-  /**
-   * Get MLBuffer by BufferId.
-   */
-  getBuffer(bufferId: BufferId): MLBuffer;
   /**
    * Ensure a MLBuffer is created for the BufferId.
    */
@@ -155,16 +151,6 @@ class BufferManagerImpl implements BufferManager {
     }
   }
 
-  public getBuffer(bufferId: BufferId): MLBuffer {
-    if (!this.buffersById.has(bufferId)) {
-      throw new Error('BufferID not found.');
-    }
-    if (!this.buffersById.get(bufferId)!.buffer) {
-      throw new Error('Buffer has not been created.');
-    }
-    return this.buffersById.get(bufferId)!.buffer!;
-  }
-
   public ensureBuffer(bufferId: BufferId, dataType: MLOperandDataType, dimensions: number[]): MLBuffer {
     const buffer = this.buffersById.get(bufferId);
     if (!buffer) {
diff --git a/js/web/lib/wasm/session-handler-inference.ts b/js/web/lib/wasm/session-handler-inference.ts
@@ -37,7 +37,7 @@ export const decodeTensorMetadata = (tensor: TensorMetadata): Tensor => {
     case 'ml-buffer': {
       const dataType = tensor[0];
       if (!isMLBufferSupportedType(dataType)) {
-        throw new Error(`not supported data type: ${dataType} for deserializing GPU tensor`);
+        throw new Error(`not supported data type: ${dataType} for deserializing MLBuffer tensor`);
       }
       const {mlBuffer, download, dispose} = tensor[2];
       return Tensor.fromMLBuffer(mlBuffer, {dataType, dims: tensor[1], download, dispose});
diff --git a/js/web/lib/wasm/wasm-types.ts b/js/web/lib/wasm/wasm-types.ts
@@ -160,14 +160,6 @@ export declare namespace JSEP {
      * @param bufferId - specify the MLBuffer ID.
      * @returns the MLBuffer.
      */
-    jsepGetMLBuffer: (bufferId: number) => MLBuffer;
-    /**
-     * [exported from pre-jsep.js] Ensure MLBuffer has been created with the correct type and dimensions.
-     * @param bufferId - specify the MLBuffer ID.
-     * @param dataType - specify the data type.
-     * @param dimensions - specify the dimensions.
-     * @returns the MLBuffer.
-     */
     jsepEnsureBuffer: (bufferId: number, dataType: number|MLOperandDataType, dimensions: number[]) => MLBuffer;
     /**
      * [exported from pre-jsep.js] Upload data to MLBuffer.
diff --git a/js/web/script/test-runner-cli.ts b/js/web/script/test-runner-cli.ts
@@ -359,7 +359,7 @@ async function main() {
     }
 
     let ioBinding: Test.IOBindingMode;
-    if (backend !== 'webgpu' && args.ioBindingMode !== 'none') {
+    if (!['webgpu', 'webnn'].includes(backend) && args.ioBindingMode !== 'none') {
       npmlog.warn(
           'TestRunnerCli.Init.Model', `Ignoring IO Binding Mode "${args.ioBindingMode}" for backend "${backend}".`);
       ioBinding = 'none';
diff --git a/js/web/test/test-runner.ts b/js/web/test/test-runner.ts
@@ -204,6 +204,7 @@ export class ModelTestContext {
       readonly perfData: ModelTestContext.ModelTestPerfData,
       readonly ioBinding: Test.IOBindingMode,
       private readonly profile: boolean,
+      public readonly mlContext?: MLContext,
   ) {}
 
   /**
@@ -254,7 +255,25 @@ export class ModelTestContext {
 
       const initStart = now();
       const executionProviderConfig =
-          modelTest.backend === 'webnn' ? (testOptions?.webnnOptions || 'webnn') : modelTest.backend!;
+          modelTest.backend === 'webnn' ? (testOptions?.webnnOptions || {name: 'webnn'}) : modelTest.backend!;
+      let mlContext: MLContext|undefined;
+      if (modelTest.ioBinding.includes('ml-tensor') || modelTest.ioBinding.includes('ml-location')) {
+
+        const webnnOptions = executionProviderConfig as ort.InferenceSession.WebNNExecutionProviderOption;
+        const deviceType = (webnnOptions as ort.InferenceSession.WebNNContextOptions)?.deviceType;
+        const numThreads = (webnnOptions as ort.InferenceSession.WebNNContextOptions)?.numThreads;
+        const powerPreference = (webnnOptions as ort.InferenceSession.WebNNContextOptions)?.powerPreference;
+
+        mlContext = await navigator.ml.createContext({
+          deviceType,
+          numThreads,
+          powerPreference,
+        });
+        (executionProviderConfig as ort.InferenceSession.WebNNExecutionProviderOption).context = mlContext;
+        if (!deviceType) {
+          (executionProviderConfig as ort.InferenceSession.WebNNContextOptions).deviceType = deviceType;
+        }
+      }
       const session = await initializeSession(
           modelTest.modelUrl, executionProviderConfig, modelTest.ioBinding, profile, modelTest.externalData,
           testOptions?.sessionOptions || {}, this.cache);
@@ -271,6 +290,7 @@ export class ModelTestContext {
           {init: initEnd - initStart, firstRun: -1, runs: [], count: 0},
           modelTest.ioBinding,
           profile,
+          mlContext,
       );
     } finally {
       this.initializing = false;
@@ -565,46 +585,34 @@ function createGpuTensorForOutput(type: ort.Tensor.Type, dims: readonly number[]
   });
 }
 
-const getContext = (() => {
-  let context: MLContext|undefined;
-
-  return async(): Promise<MLContext> => {
-    if (!context) {
-      context = await navigator.ml.createContext();
-    }
-    return context;
-  };
-})();
 
-async function createMlTensorForOutput(type: ort.Tensor.Type, dims: readonly number[]) {
+async function createMLTensorForOutput(mlContext: MLContext, type: ort.Tensor.Type, dims: readonly number[]) {
   if (!isMLBufferSupportedType(type)) {
-    throw new Error(`createMlTensorForOutput can not work with ${type} tensor`);
+    throw new Error(`createMLTensorForOutput can not work with ${type} tensor`);
   }
 
   const dataType = type === 'bool' ? 'uint8' : type;
 
-  const context = await getContext();
-  const mlBuffer = context.createBuffer({dataType, dimensions: dims as number[]});
+  const mlBuffer = mlContext.createBuffer({dataType, dimensions: dims as number[]});
 
   return ort.Tensor.fromMLBuffer(mlBuffer, {
     dataType: type,
     dims,
     dispose: () => mlBuffer.destroy(),
     download: async () => {
-      const arrayBuffer = await context.readBuffer(mlBuffer);
-      return createView(arrayBuffer, type) as ort.Tensor.DataTypeMap[ort.Tensor.GpuBufferDataTypes];
+      const arrayBuffer = await mlContext.readBuffer(mlBuffer);
+      return createView(arrayBuffer, type) as ort.Tensor.DataTypeMap[ort.Tensor.MLBufferDataTypes];
     }
   });
 }
 
-async function createMlTensorForInput(cpuTensor: ort.Tensor): Promise<ort.Tensor> {
+async function createMLTensorForInput(mlContext: MLContext, cpuTensor: ort.Tensor): Promise<ort.Tensor> {
   if (!isMLBufferSupportedType(cpuTensor.type) || Array.isArray(cpuTensor.data)) {
-    throw new Error(`createMlTensorForInput can not work with ${cpuTensor.type} tensor`);
+    throw new Error(`createMLTensorForInput can not work with ${cpuTensor.type} tensor`);
   }
-  const context = await getContext();
   const dataType = cpuTensor.type === 'bool' ? 'uint8' : cpuTensor.type;
-  const mlBuffer = context.createBuffer({dataType, dimensions: cpuTensor.dims as number[]});
-  context.writeBuffer(mlBuffer, cpuTensor.data);
+  const mlBuffer = mlContext.createBuffer({dataType, dimensions: cpuTensor.dims as number[]});
+  mlContext.writeBuffer(mlBuffer, cpuTensor.data);
   return ort.Tensor.fromMLBuffer(
       mlBuffer, {dataType: cpuTensor.type, dims: cpuTensor.dims, dispose: () => mlBuffer.destroy()});
 }
@@ -613,6 +621,7 @@ export async function sessionRun(options: {
   session: ort.InferenceSession; feeds: Record<string, ort.Tensor>;
   outputsMetaInfo: Record<string, Pick<ort.Tensor, 'dims'|'type'>>;
   ioBinding: Test.IOBindingMode;
+  mlContext?: MLContext;
 }): Promise<[number, number, ort.InferenceSession.OnnxValueMapType]> {
   const session = options.session;
   const feeds = options.feeds;
@@ -633,7 +642,7 @@ export async function sessionRun(options: {
         if (Object.hasOwnProperty.call(feeds, name)) {
           if (feeds[name].size > 0) {
             if (options.ioBinding === 'ml-location' || options.ioBinding === 'ml-tensor') {
-              feeds[name] = await createMlTensorForInput(feeds[name]);
+              feeds[name] = await createMLTensorForInput(options.mlContext!, feeds[name]);
             } else {
               feeds[name] = createGpuTensorForInput(feeds[name]);
             }
@@ -650,7 +659,7 @@ export async function sessionRun(options: {
             fetches[name] = new ort.Tensor(type, [], dims);
           } else {
             if (options.ioBinding === 'ml-tensor') {
-              fetches[name] = await createMlTensorForOutput(type, dims);
+              fetches[name] = await createMLTensorForOutput(options.mlContext!, type, dims);
             } else {
               fetches[name] = createGpuTensorForOutput(type, dims);
             }
@@ -701,8 +710,8 @@ export async function runModelTestSet(
     const outputsMetaInfo: Record<string, ort.Tensor> = {};
     testCase.inputs!.forEach((tensor) => feeds[tensor.name] = tensor);
     testCase.outputs!.forEach((tensor) => outputsMetaInfo[tensor.name] = tensor);
-    const [start, end, outputs] =
-        await sessionRun({session: context.session, feeds, outputsMetaInfo, ioBinding: context.ioBinding});
+    const [start, end, outputs] = await sessionRun(
+        {session: context.session, feeds, outputsMetaInfo, ioBinding: context.ioBinding, mlContext: context.mlContext});
     if (context.perfData.count === 0) {
       context.perfData.firstRun = end - start;
     } else {
diff --git a/onnxruntime/core/providers/webnn/data_transfer.cc b/onnxruntime/core/providers/webnn/data_transfer.cc
@@ -16,18 +16,18 @@ bool DataTransfer::CanCopy(const OrtDevice& src_device, const OrtDevice& dst_dev
 }
 
 common::Status DataTransfer::CopyTensor(const Tensor& src, Tensor& dst) const {
+  if (!emscripten::val::module_property("shouldTransferToMLBuffer").as<bool>()) {
+    // We don't need to transfer the buffer to an MLBuffer, so we don't need to copy the buffer.
+    return Status::OK();
+  }
+
   size_t bytes = src.SizeInBytes();
   if (bytes > 0) {
     const void* src_data = src.DataRaw();
     void* dst_data = dst.MutableDataRaw();
 
     const auto& dst_device = dst.Location().device;
 
-    if (!emscripten::val::module_property("shouldTransferToMLBuffer").as<bool>()) {
-      // We don't need to transfer the buffer to an MLBuffer, so we don't need to copy the buffer.
-      return Status::OK();
-    }
-
     if (dst_device.Type() == OrtDevice::GPU) {
       EM_ASM({
         Module.jsepUploadBuffer($0, HEAPU8.subarray($1, $1 + $2));
diff --git a/onnxruntime/wasm/pre-jsep.js b/onnxruntime/wasm/pre-jsep.js
@@ -201,11 +201,11 @@ Module['jsepInit'] = (name, params) => {
   } else if(name === 'webnn') {
     // Functions called from EM_ASM need to be assigned in a way that can be minified.
     [Module.jsepBackend,
-      Module.jsepReserveBufferId,
-      Module.jsepReleaseBufferId,
-      Module.jsepEnsureBuffer,
-      Module.jsepUploadBuffer,
-      Module.jsepDownloadBuffer,
+     Module.jsepReserveBufferId,
+     Module.jsepReleaseBufferId,
+     Module.jsepEnsureBuffer,
+     Module.jsepUploadBuffer,
+     Module.jsepDownloadBuffer,
     ] = params;
 
 
@@ -225,12 +225,10 @@ Module['jsepInit'] = (name, params) => {
     Module['jsepOnReleaseSession'] = sessionId => {
       backend['onReleaseSession'](sessionId);
     };
+    Module['jsepReleaseBufferId'] = Module.jsepReleaseBufferId;
     Module['jsepGetMLContext'] = sessionId => {
       return backend['getMLContext'](sessionId);
     };
-    Module['jsepGetMLBuffer'] = (bufferId) => {
-      return backend['getBuffer'](bufferId);
-    }
     Module['jsepCreateMLBufferDownloader'] = (bufferId, type) => {
       return backend['createMLBufferDownloader'](bufferId, type);
     }

Original file line number	Diff line number	Diff line change
`@@ -37,7 +37,7 @@ export const decodeTensorMetadata = (tensor: TensorMetadata): Tensor => {`
`37`	`37`	`case 'ml-buffer': {`
`38`	`38`	`const dataType = tensor[0];`
`39`	`39`	`if (!isMLBufferSupportedType(dataType)) {`
`40`		- throw new Error(`not supported data type: ${dataType} for deserializing GPU tensor`);
	`40`	+ throw new Error(`not supported data type: ${dataType} for deserializing MLBuffer tensor`);
`41`	`41`	`}`
`42`	`42`	`const {mlBuffer, download, dispose} = tensor[2];`
`43`	`43`	`return Tensor.fromMLBuffer(mlBuffer, {dataType, dims: tensor[1], download, dispose});`
Original file line number	Diff line number	Diff line change
`@@ -359,7 +359,7 @@ async function main() {`
`359`	`359`	`}`
`360`	`360`
`361`	`361`	`let ioBinding: Test.IOBindingMode;`
`362`		`- if (backend !== 'webgpu' && args.ioBindingMode !== 'none') {`
	`362`	`+ if (!['webgpu', 'webnn'].includes(backend) && args.ioBindingMode !== 'none') {`
`363`	`363`	`npmlog.warn(`
`364`	`364`	'TestRunnerCli.Init.Model', `Ignoring IO Binding Mode "${args.ioBindingMode}" for backend "${backend}".`);
`365`	`365`	`ioBinding = 'none';`