From 5deb4bdd6d5843719f55ab26751fcd3c1fc637ea Mon Sep 17 00:00:00 2001
From: Ben Schmidt <bmschmidt@gmail.com>
Date: Fri, 20 Dec 2024 12:08:03 -0500
Subject: [PATCH] extensions to webGPU strategies for hamming distance

---
 package.json                  |   2 +-
 src/deepscatter.ts            |   3 +-
 src/webGPU/HammingPipeline.ts | 228 ++++++++++++++++++++++++++++++++++
 src/webGPU/lib.ts             | 207 +-----------------------------
 4 files changed, 234 insertions(+), 206 deletions(-)
 create mode 100644 src/webGPU/HammingPipeline.ts
diff --git a/package.json b/package.json
index e46584a0..1724f462 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
   "name": "deepscatter",
   "type": "module",
-  "version": "3.0.0-next.45",
+  "version": "3.0.0-next.46",
   "description": "Fast, animated zoomable scatterplots scaling to billions of points",
   "files": [
     "dist"
diff --git a/src/deepscatter.ts b/src/deepscatter.ts
index a52e6ada..9bb96e01 100644
--- a/src/deepscatter.ts
+++ b/src/deepscatter.ts
@@ -4,7 +4,8 @@ export { Deeptable } from './Deeptable';
 export { LabelMaker } from './label_rendering';
 export { dictionaryFromArrays } from './utilityFunctions';
 export { Tile } from './tile';
-export { DeepGPU, create_hamming_transform, HammingPipeline, ReusableWebGPUPipeline } from './webGPU/lib'
+export { DeepGPU, ReusableWebGPUPipeline } from './webGPU/lib'
+export { create_multi_hamming_transform, HammingPipeline } from './webGPU/HammingPipeline'
 
 export type {
   APICall,
diff --git a/src/webGPU/HammingPipeline.ts b/src/webGPU/HammingPipeline.ts
new file mode 100644
index 00000000..01c00e9f
--- /dev/null
+++ b/src/webGPU/HammingPipeline.ts
@@ -0,0 +1,228 @@
+import { DeepGPU, ReusableWebGPUPipeline } from './lib';
+import { makeShaderDataDefinitions, makeStructuredView } from 'webgpu-utils';
+import { createSingletonBuffer } from './buffertools';
+import { Deeptable, Tile, Transformation } from '../deepscatter';
+import { Bool, Type, Vector, vectorFromArray } from 'apache-arrow';
+
+
+export class HammingPipeline extends ReusableWebGPUPipeline {
+	public gpuState: DeepGPU;
+	public dimensionality? : number;
+	public comparisonBuffer: GPUBuffer;
+	private fieldName : string;
+	constructor(
+		gpuState: DeepGPU,
+		fieldName: string
+	) {
+		super(gpuState)
+		this.fieldName = fieldName
+	}
+
+	bindGroupLayout(device: GPUDevice) {
+		return device.createBindGroupLayout({
+			entries: [
+				{
+					binding: 0,
+					visibility: GPUShaderStage.COMPUTE,
+					buffer: { type: 'read-only-storage' },
+				},
+				{
+					binding: 1,
+					visibility: GPUShaderStage.COMPUTE,
+					buffer: { type: 'read-only-storage' },
+				},
+				{
+					binding: 2,
+					visibility: GPUShaderStage.COMPUTE,
+					buffer: { type: 'storage' },
+				},
+				{
+					binding: 3,
+					visibility: GPUShaderStage.COMPUTE,
+					buffer: { type: 'uniform' },
+				},
+			],
+		});
+	}
+
+	shaderCode() {
+		return `
+		struct SizeEtc {
+			objectSize: u32,
+		};
+		
+		@group(0) @binding(0) var<storage, read> comparisonArray : array<u32>;
+		@group(0) @binding(1) var<storage, read> matrixArray : array<u32>;
+		@group(0) @binding(2) var<storage, read_write> outputArray : array<u32>;
+		@group(0) @binding(3) var<uniform> myUniforms: SizeEtc;
+		
+		@compute @workgroup_size(64)
+		fn main(@builtin(global_invocation_id) global_id : vec3<u32>) {
+				let idx = global_id.x;
+				let o = myUniforms.objectSize;
+				if (idx < arrayLength(&matrixArray)) {
+						var totalDistance: u32 = 0;
+						for (var i: u32 = 0; i < o; i = i + 1) {
+								for (var j: u32 = 0; j < arrayLength(&comparisonArray) / o; j = j + 1) {
+									totalDistance = totalDistance + countOneBits(comparisonArray[j * o + i] ^ matrixArray[idx * o + i]);
+								}
+						}
+						outputArray[global_id.x] = totalDistance;
+				}
+		}
+	`}
+
+	setComparisonArray(
+		arrs: Vector<Bool>[]
+	) {
+		if (arrs.length === 0) {
+			throw new Error("No embeddings provided.");
+		}
+		// Ensure all have the same length and type.
+		const length = arrs[0].length;
+		for (const arr of arrs) {
+			if (arr.length !== length) {
+				throw new Error("All provided embeddings must have the same length.");
+			}
+			const underlying = arr.data[0];
+			if (underlying.type.typeId !== Type.Bool) {
+				throw new Error("All embeddings must be boolean.");
+			}
+		}
+		
+		this.dimensionality = length;
+		
+		// Convert each embedding into bytes and concatenate.
+		const allBytes: Uint8Array[] = [];
+		for (const arr of arrs) {
+			const underlying = arr.data[0];
+			const bytes = underlying.values.slice(
+				underlying.offset / 8,
+				underlying.offset / 8 + underlying.length / 8
+			);
+			allBytes.push(bytes);
+		}
+
+		// Concatenate all embeddings into one large Uint8Array
+		const totalLength = allBytes.reduce((acc, b) => acc + b.length, 0);
+		const concatenated = new Uint8Array(totalLength);
+		let offset = 0;
+		for (const b of allBytes) {
+			concatenated.set(b, offset);
+			offset += b.length;
+		}
+
+		this.comparisonBuffer = createSingletonBuffer(
+			this.gpuState.device,
+			concatenated,
+			GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC,
+		);
+	}
+
+	uniforms() {
+		return {
+			objectSize: this.dimensionality / 32,
+		}
+	}
+
+	prepUniforms() {
+		const defs = makeShaderDataDefinitions(this.shaderCode());
+	
+		const myUniformValues = makeStructuredView(defs.uniforms.myUniforms);
+	
+		myUniformValues.set(this.uniforms());
+		return myUniformValues;
+	}
+
+	prep() {
+		const { device } = this.gpuState;
+		const layout = device.createPipelineLayout({
+			bindGroupLayouts: [this.bindGroupLayout(device)],
+		});
+		// Create shader module and pipeline
+		const shaderModule = device.createShaderModule({ code: this.shaderCode() });
+		this.pipeline = device.createComputePipeline({
+			layout,
+			compute: {
+				module: shaderModule,
+				entryPoint: 'main',
+			},
+		});
+		this.uniformBuffer = createSingletonBuffer(
+			device,
+			this.prepUniforms().arrayBuffer,
+			GPUBufferUsage.UNIFORM,
+		);
+	}
+
+	async runOnTile(tile: Tile) {
+		const { comparisonBuffer, fieldName, pipeline, uniformBuffer, dimensionality: embeddingSize } = this;
+		const { device } = this.gpuState;
+		const commandEncoder = device.createCommandEncoder();
+	
+		const { buffer, offset, byte_size: size } = await this.gpuState.get(fieldName, tile)
+		const outputSize = (size / embeddingSize) * 8;
+		const paddedSize = Math.ceil(outputSize / 4) * 4;
+	
+		const outputBuffer = device.createBuffer({
+			size: paddedSize * 4,
+			usage: GPUBufferUsage.COPY_SRC | GPUBufferUsage.STORAGE,
+		});
+	
+		const passEncoder = commandEncoder.beginComputePass();
+		passEncoder.setPipeline(pipeline);
+		passEncoder.setBindGroup(
+			0,
+			device.createBindGroup({
+				layout: pipeline.getBindGroupLayout(0),
+				entries: [
+					{ binding: 0, resource: { buffer: comparisonBuffer } },
+					{ binding: 1, resource: { buffer, offset, size } },
+					{ binding: 2, resource: { buffer: outputBuffer } },
+					{ binding: 3, resource: { buffer: uniformBuffer } },
+				],
+			}),
+		);
+	
+		passEncoder.dispatchWorkgroups(size / 4 / 64);
+		passEncoder.end();
+
+		// Submit the commands
+		const gpuReadBuffer = device.createBuffer({
+			size: paddedSize * 4,
+			usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ,
+		});
+
+		commandEncoder.copyBufferToBuffer(
+			outputBuffer,
+			0,
+			gpuReadBuffer,
+			0,
+			paddedSize * 4,
+		);
+		device.queue.submit([commandEncoder.finish()]);
+
+		// Read back the results
+		await gpuReadBuffer.mapAsync(GPUMapMode.READ);
+		const outputArray = new Uint32Array(gpuReadBuffer.getMappedRange());
+		const usable = outputArray.slice(0, outputSize);
+		const returnVal = new Float32Array(usable.length)
+		for (let i = 0; i < returnVal.length; i++) {
+			returnVal[i] = usable[i] / embeddingSize
+		}
+		return vectorFromArray(returnVal)
+	}
+}
+	
+
+export async function create_multi_hamming_transform(
+	deeptable: Deeptable,
+	field: string,
+	views: Vector<Bool>[],
+) : Promise<Transformation> {
+	const gpuState = await deeptable.deepGPU
+	const pipeline = new HammingPipeline(gpuState, field);
+	pipeline.setComparisonArray(views)
+	pipeline.prep();
+	return (tile: Tile) => pipeline.runOnTile(tile)
+}
diff --git a/src/webGPU/lib.ts b/src/webGPU/lib.ts
index 85ce5379..d5615618 100644
--- a/src/webGPU/lib.ts
+++ b/src/webGPU/lib.ts
@@ -1,7 +1,5 @@
-import { makeShaderDataDefinitions, makeStructuredView } from 'webgpu-utils';
-import { WebGPUBufferSet, createSingletonBuffer } from './buffertools';
-import { Deeptable, Tile, Transformation } from '../deepscatter';
-import { Bool, Type, Vector, vectorFromArray } from 'apache-arrow';
+import { WebGPUBufferSet } from './buffertools';
+import { Deeptable, Tile } from '../deepscatter';
 
 export class DeepGPU {
   // This is a stateful class for bundling together GPU buffers and resources.
@@ -69,203 +67,4 @@ export abstract class ReusableWebGPUPipeline {
 	abstract uniforms(): Record<string, any>;
 	protected uniformBuffer?: GPUBuffer;
 	protected pipeline?: GPUComputePipeline;
-}
-
-export class HammingPipeline extends ReusableWebGPUPipeline {
-	public gpuState: DeepGPU;
-	public dimensionality? : number;
-	public comparisonBuffer: GPUBuffer;
-	private fieldName : string;
-	constructor(
-		gpuState: DeepGPU,
-		fieldName: string
-	) {
-		super(gpuState)
-		this.fieldName = fieldName
-	}
-
-	bindGroupLayout(device: GPUDevice) {
-		return device.createBindGroupLayout({
-			entries: [
-				{
-					binding: 0,
-					visibility: GPUShaderStage.COMPUTE,
-					buffer: { type: 'read-only-storage' },
-				},
-				{
-					binding: 1,
-					visibility: GPUShaderStage.COMPUTE,
-					buffer: { type: 'read-only-storage' },
-				},
-				{
-					binding: 2,
-					visibility: GPUShaderStage.COMPUTE,
-					buffer: { type: 'storage' },
-				},
-				{
-					binding: 3,
-					visibility: GPUShaderStage.COMPUTE,
-					buffer: { type: 'uniform' },
-				},
-			],
-		});
-	}
-
-	shaderCode() {
-		return `
-		struct SizeEtc {
-			objectSize: u32,
-		};
-		
-		@group(0) @binding(0) var<storage, read> comparisonArray : array<u32>;
-		@group(0) @binding(1) var<storage, read> matrixArray : array<u32>;
-		@group(0) @binding(2) var<storage, read_write> outputArray : array<u32>;
-		@group(0) @binding(3) var<uniform> myUniforms: SizeEtc;
-		
-		@compute @workgroup_size(64)
-		fn main(@builtin(global_invocation_id) global_id : vec3<u32>) {
-				let idx = global_id.x;
-				let o = myUniforms.objectSize;
-				if (idx < arrayLength(&matrixArray)) {
-						var totalDistance: u32 = 0;
-						for (var i: u32 = 0; i < o; i = i + 1) {
-								for (var j: u32 = 0; j < arrayLength(&comparisonArray) / o; j = j + 1) {
-									totalDistance = totalDistance + countOneBits(comparisonArray[j * o + i] ^ matrixArray[idx * o + i]);
-								}
-						}
-						outputArray[global_id.x] = totalDistance;
-				}
-		}
-	`}
-
-	setComparisonArray(
-		arr: Vector<Bool>
-	) {
-		const underlying = arr.data[0]
-		if (underlying.type.typeId !== Type.Bool) {
-			throw new Error("uhuh")
-		}
-		const bytes = underlying.values.slice(underlying.offset / 8, underlying.offset / 8 + underlying.length / 8)
-		if (bytes.length !== 768 / 8) {
-			throw new Error("WTF")
-		}
-		this.comparisonBuffer = createSingletonBuffer(
-			this.gpuState.device,
-			bytes,
-			GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC,
-		);
-		this.dimensionality = underlying.length;
-	}
-
-	uniforms() {
-		return {
-			objectSize: this.dimensionality / 32,
-		}
-	}
-
-	prepUniforms() {
-		const defs = makeShaderDataDefinitions(this.shaderCode());
-	
-		const myUniformValues = makeStructuredView(defs.uniforms.myUniforms);
-	
-		myUniformValues.set(this.uniforms());
-		return myUniformValues;
-	}
-
-	prep() {
-		const { device } = this.gpuState;
-		const layout = device.createPipelineLayout({
-			bindGroupLayouts: [this.bindGroupLayout(device)],
-		});
-		// Create shader module and pipeline
-		const shaderModule = device.createShaderModule({ code: this.shaderCode() });
-		this.pipeline = device.createComputePipeline({
-			layout,
-			compute: {
-				module: shaderModule,
-				entryPoint: 'main',
-			},
-		});
-		this.uniformBuffer = createSingletonBuffer(
-			device,
-			this.prepUniforms().arrayBuffer,
-			GPUBufferUsage.UNIFORM,
-		);
-	}
-
-	async runOnTile(tile: Tile) {
-		const { comparisonBuffer, fieldName, pipeline, uniformBuffer, dimensionality: embeddingSize } = this;
-		const { device } = this.gpuState;
-		const commandEncoder = device.createCommandEncoder();
-	
-		const { buffer, offset, byte_size: size } = await this.gpuState.get(fieldName, tile)
-		const outputSize = (size / embeddingSize) * 8;
-		const paddedSize = Math.ceil(outputSize / 4) * 4;
-	
-		// TODO this should be a permanent buffer.
-		const outputBuffer = device.createBuffer({
-			// Put a ceiling on it.
-			size: paddedSize * 4,
-			usage: GPUBufferUsage.COPY_SRC | GPUBufferUsage.STORAGE,
-		});
-	
-		const passEncoder = commandEncoder.beginComputePass();
-		passEncoder.setPipeline(pipeline);
-		passEncoder.setBindGroup(
-			0,
-			device.createBindGroup({
-				layout: pipeline.getBindGroupLayout(0),
-				entries: [
-					{ binding: 0, resource: { buffer: comparisonBuffer } },
-					{ binding: 1, resource: { buffer, offset, size } },
-					{ binding: 2, resource: { buffer: outputBuffer } },
-					{ binding: 3, resource: { buffer: uniformBuffer } },
-				],
-			}),
-		);
-	
-		passEncoder.dispatchWorkgroups(size / 4 / 64);
-		passEncoder.end();
-
-		// Submit the commands
-		const gpuReadBuffer = device.createBuffer({
-			size: paddedSize * 4,
-			usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ,
-		});
-
-		commandEncoder.copyBufferToBuffer(
-			outputBuffer,
-			0,
-			gpuReadBuffer,
-			0,
-			paddedSize * 4,
-		);
-		device.queue.submit([commandEncoder.finish()]);
-
-		// Read back the results
-		await gpuReadBuffer.mapAsync(GPUMapMode.READ);
-		const outputArray = new Uint32Array(gpuReadBuffer.getMappedRange());
-		const usable = outputArray.slice(0, outputSize);
-		const returnVal = new Float32Array(usable.length)
-		for (let i = 0; i < returnVal.length; i++) {
-			returnVal[i] = usable[i] / embeddingSize // (originally this was squared??)
-		}
-		return vectorFromArray(returnVal)
-	}
-}
-	
-
-export async function create_hamming_transform(
-	deeptable: Deeptable,
-	field: string,
-	view: Vector<Bool>,
-) : Promise<Transformation> {
-
-	const gpuState = await deeptable.deepGPU
-	const pipeline = new HammingPipeline(gpuState, field);
-	pipeline.setComparisonArray(view)
-	pipeline.prep();
-	return (tile: Tile) => pipeline.runOnTile(tile)
-}
-
-
+}
\ No newline at end of file