Updating MLBuffer specification

egalli · egalli · commit 1324a9861507 · 2024-07-29T14:12:00.000-07:00
* CPU devices now support MLBuffer
* MLContext.createBuffer now returns an Promise
diff --git a/js/web/lib/wasm/jsep/backend-webnn.ts b/js/web/lib/wasm/jsep/backend-webnn.ts
@@ -99,7 +99,8 @@ export class WebNNBackend {
     this.bufferManager.releaseBufferId(bufferId);
   }
 
-  public ensureBuffer(bufferId: BufferId, onnxDataType: number|MLOperandDataType, dimensions: number[]): MLBuffer {
+  public async ensureBuffer(bufferId: BufferId, onnxDataType: number|MLOperandDataType, dimensions: number[]):
+      Promise<MLBuffer> {
     let dataType: MLOperandDataType;
     if (typeof onnxDataType === 'number') {
       const webnnDataType = onnxDataTypeToWebnnDataType.get(onnxDataType)!;
diff --git a/js/web/lib/wasm/jsep/init.ts b/js/web/lib/wasm/jsep/init.ts
@@ -246,7 +246,7 @@ export const init =
       // jsepReleaseBufferId,
       (bufferId: number) => backend.releaseBufferId(bufferId),
       // jsepEnsureBuffer
-      (bufferId: number, onnxDataType: number, dimensions: number[]) =>
+      async (bufferId: number, onnxDataType: number, dimensions: number[]) =>
           backend.ensureBuffer(bufferId, onnxDataType, dimensions),
       // jsepUploadBuffer
       (bufferId: number, data: Uint8Array) => {
diff --git a/js/web/lib/wasm/jsep/webnn/buffer-manager.ts b/js/web/lib/wasm/jsep/webnn/buffer-manager.ts
@@ -25,7 +25,7 @@ export interface BufferManager {
   /**
    * Ensure a MLBuffer is created for the BufferId.
    */
-  ensureBuffer(bufferId: BufferId, dataType: MLOperandDataType, dimensions: number[]): MLBuffer;
+  ensureBuffer(bufferId: BufferId, dataType: MLOperandDataType, dimensions: number[]): Promise<MLBuffer>;
   /**
    * Upload data to a MLBuffer.
    */
@@ -85,12 +85,12 @@ class BufferTracker {
     this.mlBuffer = undefined;
   }
 
-  public ensureBuffer(dataType: MLOperandDataType, dimensions: number[]): MLBuffer {
+  public async ensureBuffer(dataType: MLOperandDataType, dimensions: number[]): Promise<MLBuffer> {
     if (this.mlBuffer) {
       return this.mlBuffer;
     }
 
-    const buffer = this.context.createBuffer({dataType, dimensions});
+    const buffer = await this.context.createBuffer({dataType, dimensions});
     this.mlBuffer = buffer;
 
     if (this.activeUpload) {
@@ -151,7 +151,7 @@ class BufferManagerImpl implements BufferManager {
     }
   }
 
-  public ensureBuffer(bufferId: BufferId, dataType: MLOperandDataType, dimensions: number[]): MLBuffer {
+  public async ensureBuffer(bufferId: BufferId, dataType: MLOperandDataType, dimensions: number[]): Promise<MLBuffer> {
     const buffer = this.buffersById.get(bufferId);
     if (!buffer) {
       throw new Error('Buffer not found.');
diff --git a/js/web/lib/wasm/jsep/webnn/webnn.d.ts b/js/web/lib/wasm/jsep/webnn/webnn.d.ts
@@ -387,7 +387,7 @@ interface MLBuffer {
 
 type MLNamedBuffers = Record<string, MLBuffer>;
 interface MLContext {
-  createBuffer(descriptor: MLOperandDescriptor): MLBuffer;
+  createBuffer(descriptor: MLOperandDescriptor): Promise<MLBuffer>;
   writeBuffer(
       dstBuffer: MLBuffer, srcData: ArrayBufferView|ArrayBuffer, srcElementOffset?: number,
       srcElementSize?: number): void;
diff --git a/js/web/lib/wasm/wasm-core-impl.ts b/js/web/lib/wasm/wasm-core-impl.ts
@@ -704,7 +704,7 @@ export const run = async(
 
             // If the graph has been partitioned, the output tensor may have not been created. For this reason, we use
             // ensureBuffer to get/create the MLBuffer.
-            const mlBuffer = ensureBuffer(dataOffset, dataType, dims);
+            const mlBuffer = await ensureBuffer(dataOffset, dataType, dims);
 
             // do not release the tensor right now. it will be released when user calls tensor.dispose().
             keepOutputTensor = true;
diff --git a/js/web/lib/wasm/wasm-types.ts b/js/web/lib/wasm/wasm-types.ts
@@ -25,7 +25,8 @@ export declare namespace JSEP {
   type ReplayFunction = () => void;
   type ReserveBufferIdFunction = () => number;
   type ReleaseBufferIdFunction = (bufferId: number) => void;
-  type EnsureBufferFunction = (bufferId: number, dataType: number|MLOperandDataType, dimensions: number[]) => MLBuffer;
+  type EnsureBufferFunction = (bufferId: number, dataType: number|MLOperandDataType, dimensions: number[]) =>
+      Promise<MLBuffer>;
   type UploadBufferFunction = (bufferId: number, data: Uint8Array) => void;
   type DownloadBufferFunction = (bufferId: number) => Promise<ArrayBuffer>;
 
@@ -154,7 +155,7 @@ export declare namespace JSEP {
      * @param bufferId - specify the MLBuffer ID.
      * @returns the MLBuffer.
      */
-    jsepEnsureBuffer: (bufferId: number, dataType: number|MLOperandDataType, dimensions: number[]) => MLBuffer;
+    jsepEnsureBuffer: (bufferId: number, dataType: number|MLOperandDataType, dimensions: number[]) => Promise<MLBuffer>;
     /**
      * [exported from pre-jsep.js] Upload data to MLBuffer.
      * @param bufferId - specify the MLBuffer ID.
diff --git a/js/web/test/test-runner.ts b/js/web/test/test-runner.ts
@@ -257,8 +257,7 @@ export class ModelTestContext {
       const executionProviderConfig =
           modelTest.backend === 'webnn' ? (testOptions?.webnnOptions || {name: 'webnn'}) : modelTest.backend!;
       let mlContext: MLContext|undefined;
-      if(['ml-tensor', 'ml-location'].includes(modelTest.ioBinding)) {
-
+      if (['ml-tensor', 'ml-location'].includes(modelTest.ioBinding)) {
         const webnnOptions = executionProviderConfig as ort.InferenceSession.WebNNExecutionProviderOption;
         const deviceType = (webnnOptions as ort.InferenceSession.WebNNContextOptions)?.deviceType;
         const numThreads = (webnnOptions as ort.InferenceSession.WebNNContextOptions)?.numThreads;
@@ -593,7 +592,7 @@ async function createMLTensorForOutput(mlContext: MLContext, type: ort.Tensor.Ty
 
   const dataType = type === 'bool' ? 'uint8' : type;
 
-  const mlBuffer = mlContext.createBuffer({dataType, dimensions: dims as number[]});
+  const mlBuffer = await mlContext.createBuffer({dataType, dimensions: dims as number[]});
 
   return ort.Tensor.fromMLBuffer(mlBuffer, {
     dataType: type,
@@ -611,7 +610,7 @@ async function createMLTensorForInput(mlContext: MLContext, cpuTensor: ort.Tenso
     throw new Error(`createMLTensorForInput can not work with ${cpuTensor.type} tensor`);
   }
   const dataType = cpuTensor.type === 'bool' ? 'uint8' : cpuTensor.type;
-  const mlBuffer = mlContext.createBuffer({dataType, dimensions: cpuTensor.dims as number[]});
+  const mlBuffer = await mlContext.createBuffer({dataType, dimensions: cpuTensor.dims as number[]});
   mlContext.writeBuffer(mlBuffer, cpuTensor.data);
   return ort.Tensor.fromMLBuffer(
       mlBuffer, {dataType: cpuTensor.type, dims: cpuTensor.dims, dispose: () => mlBuffer.destroy()});
diff --git a/onnxruntime/core/providers/webnn/builders/helper.cc b/onnxruntime/core/providers/webnn/builders/helper.cc
@@ -211,10 +211,9 @@ bool SetWebnnDataType(emscripten::val& desc, const int32_t data_type) {
   }
 }
 
-bool IsMLBufferSupported(WebnnDeviceType device_type) {
+bool IsMLBufferSupported() {
   static bool is_supported = !emscripten::val::global("MLBuffer").isUndefined();
-  // The current MLBuffer implementation only supports GPU and NPU devices.
-  return is_supported && device_type != WebnnDeviceType::CPU;
+  return is_supported;
 }
 
 }  // namespace webnn
diff --git a/onnxruntime/core/providers/webnn/builders/helper.h b/onnxruntime/core/providers/webnn/builders/helper.h
@@ -285,7 +285,7 @@ bool GetBidirectionalBroadcastShape(std::vector<int64_t>& shape_a,
 
 bool SetWebnnDataType(emscripten::val& desc, const int32_t data_type);
 
-bool IsMLBufferSupported(WebnnDeviceType device_type);
+bool IsMLBufferSupported();
 
 }  // namespace webnn
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/webnn/builders/model.cc b/onnxruntime/core/providers/webnn/builders/model.cc
@@ -155,27 +155,31 @@ onnxruntime::common::Status Model::Compute(const InlinedHashMap<std::string, Onn
 onnxruntime::common::Status Model::Dispatch(const InlinedHashMap<std::string, OnnxTensorData>& inputs,
                                             const InlinedHashMap<std::string, OnnxTensorData>& outputs) {
   auto jsepEnsureBuffer = emscripten::val::module_property("jsepEnsureBuffer");
-  for (const auto& input : inputs) {
-    const std::string& name = input.first;
-    const struct OnnxTensorData tensor = input.second;
+  auto promises = emscripten::val::array();
+  for (const auto& [_, tensor] : inputs) {
     emscripten::val shape = emscripten::val::array();
     for (const auto& dim : tensor.tensor_info.shape) {
       uint32_t dim_val = SafeInt<uint32_t>(dim);
       shape.call<void>("push", dim_val);
     }
     auto buffer = jsepEnsureBuffer(reinterpret_cast<intptr_t>(tensor.buffer), tensor.tensor_info.data_type, shape);
-    wnn_inputs_.set(name, buffer);
+    promises.call<void>("push", buffer);
   }
-  for (const auto& output : outputs) {
-    const std::string& name = output.first;
-    const struct OnnxTensorData tensor = output.second;
+  for (const auto& [_, tensor] : outputs) {
     emscripten::val shape = emscripten::val::array();
     for (const auto& dim : tensor.tensor_info.shape) {
       uint32_t dim_val = SafeInt<uint32_t>(dim);
       shape.call<void>("push", dim_val);
     }
     auto buffer = jsepEnsureBuffer(reinterpret_cast<intptr_t>(tensor.buffer), tensor.tensor_info.data_type, shape);
-    wnn_outputs_.set(name, buffer);
+    promises.call<void>("push", buffer);
+  }
+  auto buffers = emscripten::val::global("Promise").call<emscripten::val>("all", promises).await();
+  for (const auto& [name, _] : inputs) {
+    wnn_inputs_.set(name, buffers.call<emscripten::val>("shift"));
+  }
+  for (const auto& [name, _] : outputs) {
+    wnn_outputs_.set(name, buffers.call<emscripten::val>("shift"));
   }
   wnn_context_.call<void>("dispatch", wnn_graph_, wnn_inputs_, wnn_outputs_);
 
diff --git a/onnxruntime/core/providers/webnn/builders/model_builder.cc b/onnxruntime/core/providers/webnn/builders/model_builder.cc
@@ -332,7 +332,7 @@ Status ModelBuilder::Compile(std::unique_ptr<Model>& model) {
   if (!wnn_graph.as<bool>()) {
     return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to build WebNN graph.");
   }
-  model.reset(new Model(std::move(wnn_context_), std::move(wnn_graph), logger_, IsMLBufferSupported(wnn_device_type_)));
+  model.reset(new Model(std::move(wnn_context_), std::move(wnn_graph), logger_, IsMLBufferSupported()));
   model->SetInputs(std::move(input_names_));
   model->SetOutputs(std::move(output_names_));
   model->SetScalarOutputs(std::move(scalar_outputs_));
diff --git a/onnxruntime/core/providers/webnn/webnn_execution_provider.cc b/onnxruntime/core/providers/webnn/webnn_execution_provider.cc
@@ -24,7 +24,7 @@ WebNNExecutionProvider::WebNNExecutionProvider(const std::string& webnn_device_f
           onnxruntime::kWebNNExecutionProvider,
           // If MLBuffer is supported, we force all the tensors to be allocated as MLBuffer.
           OrtDevice(
-              webnn::IsMLBufferSupported(webnn::DeviceTypeFromString(webnn_device_flags)) ? OrtDevice::GPU : OrtDevice::CPU,
+              webnn::IsMLBufferSupported() ? OrtDevice::GPU : OrtDevice::CPU,
               OrtDevice::MemType::DEFAULT,
               0)},
       wnn_device_type_(webnn::DeviceTypeFromString(webnn_device_flags)) {
@@ -378,14 +378,14 @@ WebNNExecutionProvider::GetKernelRegistry() const {
 }
 
 std::unique_ptr<onnxruntime::IDataTransfer> WebNNExecutionProvider::GetDataTransfer() const {
-  if (!webnn::IsMLBufferSupported(wnn_device_type_)) {
+  if (!webnn::IsMLBufferSupported()) {
     return nullptr;
   }
   return std::make_unique<webnn::DataTransfer>();
 }
 
 std::vector<AllocatorPtr> WebNNExecutionProvider::CreatePreferredAllocators() {
-  if (!webnn::IsMLBufferSupported(wnn_device_type_)) {
+  if (!webnn::IsMLBufferSupported()) {
     return {};
   }
   AllocatorCreationInfo customAllocatorCreationInfo([&](OrtDevice::DeviceId) {

Original file line number	Diff line number	Diff line change
`@@ -99,7 +99,8 @@ export class WebNNBackend {`
`99`	`99`	`this.bufferManager.releaseBufferId(bufferId);`
`100`	`100`	`}`
`101`	`101`
`102`		`- public ensureBuffer(bufferId: BufferId, onnxDataType: number\|MLOperandDataType, dimensions: number[]): MLBuffer {`
	`102`	`+ public async ensureBuffer(bufferId: BufferId, onnxDataType: number\|MLOperandDataType, dimensions: number[]):`
	`103`	`+ Promise<MLBuffer> {`
`103`	`104`	`let dataType: MLOperandDataType;`
`104`	`105`	`if (typeof onnxDataType === 'number') {`
`105`	`106`	`const webnnDataType = onnxDataTypeToWebnnDataType.get(onnxDataType)!;`
Original file line number	Diff line number	Diff line change
`@@ -211,10 +211,9 @@ bool SetWebnnDataType(emscripten::val& desc, const int32_t data_type) {`
`211`	`211`	`}`
`212`	`212`	`}`
`213`	`213`
`214`		`-bool IsMLBufferSupported(WebnnDeviceType device_type) {`
	`214`	`+bool IsMLBufferSupported() {`
`215`	`215`	`static bool is_supported = !emscripten::val::global("MLBuffer").isUndefined();`
`216`		`- // The current MLBuffer implementation only supports GPU and NPU devices.`
`217`		`- return is_supported && device_type != WebnnDeviceType::CPU;`
	`216`	`+ return is_supported;`
`218`	`217`	`}`
`219`	`218`
`220`	`219`	`} // namespace webnn`
Original file line number	Diff line number	Diff line change
`@@ -332,7 +332,7 @@ Status ModelBuilder::Compile(std::unique_ptr<Model>& model) {`
`332`	`332`	`if (!wnn_graph.as<bool>()) {`
`333`	`333`	`return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to build WebNN graph.");`
`334`	`334`	`}`
`335`		`- model.reset(new Model(std::move(wnn_context_), std::move(wnn_graph), logger_, IsMLBufferSupported(wnn_device_type_)));`
	`335`	`+ model.reset(new Model(std::move(wnn_context_), std::move(wnn_graph), logger_, IsMLBufferSupported()));`
`336`	`336`	`model->SetInputs(std::move(input_names_));`
`337`	`337`	`model->SetOutputs(std::move(output_names_));`
`338`	`338`	`model->SetScalarOutputs(std::move(scalar_outputs_));`
Original file line number	Diff line number	Diff line change
`@@ -24,7 +24,7 @@ WebNNExecutionProvider::WebNNExecutionProvider(const std::string& webnn_device_f`
`24`	`24`	`onnxruntime::kWebNNExecutionProvider,`
`25`	`25`	`// If MLBuffer is supported, we force all the tensors to be allocated as MLBuffer.`
`26`	`26`	`OrtDevice(`
`27`		`- webnn::IsMLBufferSupported(webnn::DeviceTypeFromString(webnn_device_flags)) ? OrtDevice::GPU : OrtDevice::CPU,`
	`27`	`+ webnn::IsMLBufferSupported() ? OrtDevice::GPU : OrtDevice::CPU,`
`28`	`28`	`OrtDevice::MemType::DEFAULT,`
`29`	`29`	`0)},`
`30`	`30`	`wnn_device_type_(webnn::DeviceTypeFromString(webnn_device_flags)) {`
`@@ -378,14 +378,14 @@ WebNNExecutionProvider::GetKernelRegistry() const {`
`378`	`378`	`}`
`379`	`379`
`380`	`380`	`std::unique_ptr<onnxruntime::IDataTransfer> WebNNExecutionProvider::GetDataTransfer() const {`
`381`		`- if (!webnn::IsMLBufferSupported(wnn_device_type_)) {`
	`381`	`+ if (!webnn::IsMLBufferSupported()) {`
`382`	`382`	`return nullptr;`
`383`	`383`	`}`
`384`	`384`	`return std::make_unique<webnn::DataTransfer>();`
`385`	`385`	`}`
`386`	`386`
`387`	`387`	`std::vector<AllocatorPtr> WebNNExecutionProvider::CreatePreferredAllocators() {`
`388`		`- if (!webnn::IsMLBufferSupported(wnn_device_type_)) {`
	`388`	`+ if (!webnn::IsMLBufferSupported()) {`
`389`	`389`	`return {};`
`390`	`390`	`}`
`391`	`391`	`AllocatorCreationInfo customAllocatorCreationInfo([&](OrtDevice::DeviceId) {`