From 5deb4bdd6d5843719f55ab26751fcd3c1fc637ea Mon Sep 17 00:00:00 2001 From: Ben Schmidt Date: Fri, 20 Dec 2024 12:08:03 -0500 Subject: [PATCH] extensions to webGPU strategies for hamming distance --- package.json | 2 +- src/deepscatter.ts | 3 +- src/webGPU/HammingPipeline.ts | 228 ++++++++++++++++++++++++++++++++++ src/webGPU/lib.ts | 207 +----------------------------- 4 files changed, 234 insertions(+), 206 deletions(-) create mode 100644 src/webGPU/HammingPipeline.ts diff --git a/package.json b/package.json index e46584a0..1724f462 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "deepscatter", "type": "module", - "version": "3.0.0-next.45", + "version": "3.0.0-next.46", "description": "Fast, animated zoomable scatterplots scaling to billions of points", "files": [ "dist" diff --git a/src/deepscatter.ts b/src/deepscatter.ts index a52e6ada..9bb96e01 100644 --- a/src/deepscatter.ts +++ b/src/deepscatter.ts @@ -4,7 +4,8 @@ export { Deeptable } from './Deeptable'; export { LabelMaker } from './label_rendering'; export { dictionaryFromArrays } from './utilityFunctions'; export { Tile } from './tile'; -export { DeepGPU, create_hamming_transform, HammingPipeline, ReusableWebGPUPipeline } from './webGPU/lib' +export { DeepGPU, ReusableWebGPUPipeline } from './webGPU/lib' +export { create_multi_hamming_transform, HammingPipeline } from './webGPU/HammingPipeline' export type { APICall, diff --git a/src/webGPU/HammingPipeline.ts b/src/webGPU/HammingPipeline.ts new file mode 100644 index 00000000..01c00e9f --- /dev/null +++ b/src/webGPU/HammingPipeline.ts @@ -0,0 +1,228 @@ +import { DeepGPU, ReusableWebGPUPipeline } from './lib'; +import { makeShaderDataDefinitions, makeStructuredView } from 'webgpu-utils'; +import { createSingletonBuffer } from './buffertools'; +import { Deeptable, Tile, Transformation } from '../deepscatter'; +import { Bool, Type, Vector, vectorFromArray } from 'apache-arrow'; + + +export class HammingPipeline extends ReusableWebGPUPipeline { + public gpuState: DeepGPU; + public dimensionality? : number; + public comparisonBuffer: GPUBuffer; + private fieldName : string; + constructor( + gpuState: DeepGPU, + fieldName: string + ) { + super(gpuState) + this.fieldName = fieldName + } + + bindGroupLayout(device: GPUDevice) { + return device.createBindGroupLayout({ + entries: [ + { + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { type: 'read-only-storage' }, + }, + { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { type: 'read-only-storage' }, + }, + { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { type: 'storage' }, + }, + { + binding: 3, + visibility: GPUShaderStage.COMPUTE, + buffer: { type: 'uniform' }, + }, + ], + }); + } + + shaderCode() { + return ` + struct SizeEtc { + objectSize: u32, + }; + + @group(0) @binding(0) var comparisonArray : array; + @group(0) @binding(1) var matrixArray : array; + @group(0) @binding(2) var outputArray : array; + @group(0) @binding(3) var myUniforms: SizeEtc; + + @compute @workgroup_size(64) + fn main(@builtin(global_invocation_id) global_id : vec3) { + let idx = global_id.x; + let o = myUniforms.objectSize; + if (idx < arrayLength(&matrixArray)) { + var totalDistance: u32 = 0; + for (var i: u32 = 0; i < o; i = i + 1) { + for (var j: u32 = 0; j < arrayLength(&comparisonArray) / o; j = j + 1) { + totalDistance = totalDistance + countOneBits(comparisonArray[j * o + i] ^ matrixArray[idx * o + i]); + } + } + outputArray[global_id.x] = totalDistance; + } + } + `} + + setComparisonArray( + arrs: Vector[] + ) { + if (arrs.length === 0) { + throw new Error("No embeddings provided."); + } + // Ensure all have the same length and type. + const length = arrs[0].length; + for (const arr of arrs) { + if (arr.length !== length) { + throw new Error("All provided embeddings must have the same length."); + } + const underlying = arr.data[0]; + if (underlying.type.typeId !== Type.Bool) { + throw new Error("All embeddings must be boolean."); + } + } + + this.dimensionality = length; + + // Convert each embedding into bytes and concatenate. + const allBytes: Uint8Array[] = []; + for (const arr of arrs) { + const underlying = arr.data[0]; + const bytes = underlying.values.slice( + underlying.offset / 8, + underlying.offset / 8 + underlying.length / 8 + ); + allBytes.push(bytes); + } + + // Concatenate all embeddings into one large Uint8Array + const totalLength = allBytes.reduce((acc, b) => acc + b.length, 0); + const concatenated = new Uint8Array(totalLength); + let offset = 0; + for (const b of allBytes) { + concatenated.set(b, offset); + offset += b.length; + } + + this.comparisonBuffer = createSingletonBuffer( + this.gpuState.device, + concatenated, + GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, + ); + } + + uniforms() { + return { + objectSize: this.dimensionality / 32, + } + } + + prepUniforms() { + const defs = makeShaderDataDefinitions(this.shaderCode()); + + const myUniformValues = makeStructuredView(defs.uniforms.myUniforms); + + myUniformValues.set(this.uniforms()); + return myUniformValues; + } + + prep() { + const { device } = this.gpuState; + const layout = device.createPipelineLayout({ + bindGroupLayouts: [this.bindGroupLayout(device)], + }); + // Create shader module and pipeline + const shaderModule = device.createShaderModule({ code: this.shaderCode() }); + this.pipeline = device.createComputePipeline({ + layout, + compute: { + module: shaderModule, + entryPoint: 'main', + }, + }); + this.uniformBuffer = createSingletonBuffer( + device, + this.prepUniforms().arrayBuffer, + GPUBufferUsage.UNIFORM, + ); + } + + async runOnTile(tile: Tile) { + const { comparisonBuffer, fieldName, pipeline, uniformBuffer, dimensionality: embeddingSize } = this; + const { device } = this.gpuState; + const commandEncoder = device.createCommandEncoder(); + + const { buffer, offset, byte_size: size } = await this.gpuState.get(fieldName, tile) + const outputSize = (size / embeddingSize) * 8; + const paddedSize = Math.ceil(outputSize / 4) * 4; + + const outputBuffer = device.createBuffer({ + size: paddedSize * 4, + usage: GPUBufferUsage.COPY_SRC | GPUBufferUsage.STORAGE, + }); + + const passEncoder = commandEncoder.beginComputePass(); + passEncoder.setPipeline(pipeline); + passEncoder.setBindGroup( + 0, + device.createBindGroup({ + layout: pipeline.getBindGroupLayout(0), + entries: [ + { binding: 0, resource: { buffer: comparisonBuffer } }, + { binding: 1, resource: { buffer, offset, size } }, + { binding: 2, resource: { buffer: outputBuffer } }, + { binding: 3, resource: { buffer: uniformBuffer } }, + ], + }), + ); + + passEncoder.dispatchWorkgroups(size / 4 / 64); + passEncoder.end(); + + // Submit the commands + const gpuReadBuffer = device.createBuffer({ + size: paddedSize * 4, + usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ, + }); + + commandEncoder.copyBufferToBuffer( + outputBuffer, + 0, + gpuReadBuffer, + 0, + paddedSize * 4, + ); + device.queue.submit([commandEncoder.finish()]); + + // Read back the results + await gpuReadBuffer.mapAsync(GPUMapMode.READ); + const outputArray = new Uint32Array(gpuReadBuffer.getMappedRange()); + const usable = outputArray.slice(0, outputSize); + const returnVal = new Float32Array(usable.length) + for (let i = 0; i < returnVal.length; i++) { + returnVal[i] = usable[i] / embeddingSize + } + return vectorFromArray(returnVal) + } +} + + +export async function create_multi_hamming_transform( + deeptable: Deeptable, + field: string, + views: Vector[], +) : Promise { + const gpuState = await deeptable.deepGPU + const pipeline = new HammingPipeline(gpuState, field); + pipeline.setComparisonArray(views) + pipeline.prep(); + return (tile: Tile) => pipeline.runOnTile(tile) +} diff --git a/src/webGPU/lib.ts b/src/webGPU/lib.ts index 85ce5379..d5615618 100644 --- a/src/webGPU/lib.ts +++ b/src/webGPU/lib.ts @@ -1,7 +1,5 @@ -import { makeShaderDataDefinitions, makeStructuredView } from 'webgpu-utils'; -import { WebGPUBufferSet, createSingletonBuffer } from './buffertools'; -import { Deeptable, Tile, Transformation } from '../deepscatter'; -import { Bool, Type, Vector, vectorFromArray } from 'apache-arrow'; +import { WebGPUBufferSet } from './buffertools'; +import { Deeptable, Tile } from '../deepscatter'; export class DeepGPU { // This is a stateful class for bundling together GPU buffers and resources. @@ -69,203 +67,4 @@ export abstract class ReusableWebGPUPipeline { abstract uniforms(): Record; protected uniformBuffer?: GPUBuffer; protected pipeline?: GPUComputePipeline; -} - -export class HammingPipeline extends ReusableWebGPUPipeline { - public gpuState: DeepGPU; - public dimensionality? : number; - public comparisonBuffer: GPUBuffer; - private fieldName : string; - constructor( - gpuState: DeepGPU, - fieldName: string - ) { - super(gpuState) - this.fieldName = fieldName - } - - bindGroupLayout(device: GPUDevice) { - return device.createBindGroupLayout({ - entries: [ - { - binding: 0, - visibility: GPUShaderStage.COMPUTE, - buffer: { type: 'read-only-storage' }, - }, - { - binding: 1, - visibility: GPUShaderStage.COMPUTE, - buffer: { type: 'read-only-storage' }, - }, - { - binding: 2, - visibility: GPUShaderStage.COMPUTE, - buffer: { type: 'storage' }, - }, - { - binding: 3, - visibility: GPUShaderStage.COMPUTE, - buffer: { type: 'uniform' }, - }, - ], - }); - } - - shaderCode() { - return ` - struct SizeEtc { - objectSize: u32, - }; - - @group(0) @binding(0) var comparisonArray : array; - @group(0) @binding(1) var matrixArray : array; - @group(0) @binding(2) var outputArray : array; - @group(0) @binding(3) var myUniforms: SizeEtc; - - @compute @workgroup_size(64) - fn main(@builtin(global_invocation_id) global_id : vec3) { - let idx = global_id.x; - let o = myUniforms.objectSize; - if (idx < arrayLength(&matrixArray)) { - var totalDistance: u32 = 0; - for (var i: u32 = 0; i < o; i = i + 1) { - for (var j: u32 = 0; j < arrayLength(&comparisonArray) / o; j = j + 1) { - totalDistance = totalDistance + countOneBits(comparisonArray[j * o + i] ^ matrixArray[idx * o + i]); - } - } - outputArray[global_id.x] = totalDistance; - } - } - `} - - setComparisonArray( - arr: Vector - ) { - const underlying = arr.data[0] - if (underlying.type.typeId !== Type.Bool) { - throw new Error("uhuh") - } - const bytes = underlying.values.slice(underlying.offset / 8, underlying.offset / 8 + underlying.length / 8) - if (bytes.length !== 768 / 8) { - throw new Error("WTF") - } - this.comparisonBuffer = createSingletonBuffer( - this.gpuState.device, - bytes, - GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, - ); - this.dimensionality = underlying.length; - } - - uniforms() { - return { - objectSize: this.dimensionality / 32, - } - } - - prepUniforms() { - const defs = makeShaderDataDefinitions(this.shaderCode()); - - const myUniformValues = makeStructuredView(defs.uniforms.myUniforms); - - myUniformValues.set(this.uniforms()); - return myUniformValues; - } - - prep() { - const { device } = this.gpuState; - const layout = device.createPipelineLayout({ - bindGroupLayouts: [this.bindGroupLayout(device)], - }); - // Create shader module and pipeline - const shaderModule = device.createShaderModule({ code: this.shaderCode() }); - this.pipeline = device.createComputePipeline({ - layout, - compute: { - module: shaderModule, - entryPoint: 'main', - }, - }); - this.uniformBuffer = createSingletonBuffer( - device, - this.prepUniforms().arrayBuffer, - GPUBufferUsage.UNIFORM, - ); - } - - async runOnTile(tile: Tile) { - const { comparisonBuffer, fieldName, pipeline, uniformBuffer, dimensionality: embeddingSize } = this; - const { device } = this.gpuState; - const commandEncoder = device.createCommandEncoder(); - - const { buffer, offset, byte_size: size } = await this.gpuState.get(fieldName, tile) - const outputSize = (size / embeddingSize) * 8; - const paddedSize = Math.ceil(outputSize / 4) * 4; - - // TODO this should be a permanent buffer. - const outputBuffer = device.createBuffer({ - // Put a ceiling on it. - size: paddedSize * 4, - usage: GPUBufferUsage.COPY_SRC | GPUBufferUsage.STORAGE, - }); - - const passEncoder = commandEncoder.beginComputePass(); - passEncoder.setPipeline(pipeline); - passEncoder.setBindGroup( - 0, - device.createBindGroup({ - layout: pipeline.getBindGroupLayout(0), - entries: [ - { binding: 0, resource: { buffer: comparisonBuffer } }, - { binding: 1, resource: { buffer, offset, size } }, - { binding: 2, resource: { buffer: outputBuffer } }, - { binding: 3, resource: { buffer: uniformBuffer } }, - ], - }), - ); - - passEncoder.dispatchWorkgroups(size / 4 / 64); - passEncoder.end(); - - // Submit the commands - const gpuReadBuffer = device.createBuffer({ - size: paddedSize * 4, - usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ, - }); - - commandEncoder.copyBufferToBuffer( - outputBuffer, - 0, - gpuReadBuffer, - 0, - paddedSize * 4, - ); - device.queue.submit([commandEncoder.finish()]); - - // Read back the results - await gpuReadBuffer.mapAsync(GPUMapMode.READ); - const outputArray = new Uint32Array(gpuReadBuffer.getMappedRange()); - const usable = outputArray.slice(0, outputSize); - const returnVal = new Float32Array(usable.length) - for (let i = 0; i < returnVal.length; i++) { - returnVal[i] = usable[i] / embeddingSize // (originally this was squared??) - } - return vectorFromArray(returnVal) - } -} - - -export async function create_hamming_transform( - deeptable: Deeptable, - field: string, - view: Vector, -) : Promise { - - const gpuState = await deeptable.deepGPU - const pipeline = new HammingPipeline(gpuState, field); - pipeline.setComparisonArray(view) - pipeline.prep(); - return (tile: Tile) => pipeline.runOnTile(tile) -} - - +} \ No newline at end of file