Skip to content

Commit 1e1ae8e

Browse files
committed
webGPU resources
1 parent 7237d17 commit 1e1ae8e

File tree

3 files changed

+491
-0
lines changed

3 files changed

+491
-0
lines changed

src/webGPU/buffertools.ts

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
import { isTypedArray, type TypedArray } from 'webgpu-utils';
2+
import { BufferSet } from '../regl_rendering';
3+
import { WebGPUBufferLocation } from '../types';
4+
// I track locations on buffers like this.
5+
// We keep track of both size -- the number of meaningful data bytes
6+
// and paddedSize -- the number of bytes including 256-byte padding.
7+
8+
export class WebGPUBufferSet extends BufferSet<GPUBuffer, WebGPUBufferLocation> {
9+
// Copied with alterations from deepscatter
10+
11+
// An abstraction creating an expandable set of buffers that can be subdivided
12+
// to put more than one variable on the same
13+
// block of memory. Reusing buffers this way can have performance benefits over allocating
14+
// multiple different buffers for each small block used.
15+
16+
// The general purpose here is to call 'allocate_block' that releases a block of memory
17+
// to use in creating a new array to be passed to regl.
18+
19+
public device: GPUDevice;
20+
private stagingBuffer: GPUBuffer;
21+
public usage: number;
22+
23+
public store: Map<string, WebGPUBufferLocation> = new Map();
24+
25+
/**
26+
*
27+
* @param regl the Regl context we're using.
28+
* @param buffer_size The number of bytes on each strip of memory that we'll ask for.
29+
*/
30+
31+
constructor(
32+
device: GPUDevice,
33+
buffer_size: number,
34+
usage: number = GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC
35+
) {
36+
super(buffer_size)
37+
this.device = device;
38+
// Track the ends in case we want to allocate smaller items.
39+
this.usage = usage;
40+
this.generate_new_buffer();
41+
this.stagingBuffer = device.createBuffer({
42+
size: buffer_size,
43+
usage: GPUBufferUsage.COPY_SRC | GPUBufferUsage.MAP_WRITE,
44+
mappedAtCreation: false // saves a little trouble in the passThrough function
45+
});
46+
}
47+
48+
private async passThroughStagingBuffer(values: Uint32Array, bufferLocation: WebGPUBufferLocation) {
49+
// WebGPU
50+
const { buffer, offset, paddedSize } = bufferLocation;
51+
while (this.stagingBuffer.mapState !== 'unmapped') {
52+
// Wait in line for a millisecond.
53+
// Would be better to hold a queue and apply more than one of these at once.
54+
await new Promise((resolve) => setTimeout(resolve, 1));
55+
}
56+
await this.stagingBuffer.mapAsync(GPUMapMode.WRITE, 0, paddedSize);
57+
new Uint32Array(this.stagingBuffer.getMappedRange(0, values.byteLength)).set(values);
58+
this.stagingBuffer.unmap();
59+
const commandEncoder = this.device.createCommandEncoder();
60+
commandEncoder.copyBufferToBuffer(this.stagingBuffer, 0, buffer, offset, paddedSize);
61+
this.device.queue.submit([commandEncoder.finish()]);
62+
}
63+
64+
register(k: string, v: WebGPUBufferLocation) {
65+
this.store.set(k, v);
66+
}
67+
68+
async set(key: string, value: TypedArray) {
69+
if (this.store.has(key)) {
70+
throw new Error(`Key ${key} already exists in buffer set.`);
71+
}
72+
const size = value.byteLength;
73+
const paddedSize = Math.ceil(size / 256) * 256;
74+
75+
const { buffer, offset } = this.allocate_block(paddedSize);
76+
77+
// If it's a typed array, we can just copy it directly.
78+
// cast it to uint32array
79+
const v2 = value;
80+
const data = new Uint32Array(v2.buffer, v2.byteOffset, v2.byteLength / 4);
81+
const description = { buffer, offset, size, paddedSize };
82+
await this.passThroughStagingBuffer(data, description);
83+
this.register(key, description);
84+
}
85+
86+
_create_buffer() : GPUBuffer {
87+
return this.device.createBuffer({
88+
size: this.buffer_size,
89+
usage: this.usage,
90+
mappedAtCreation: false
91+
})
92+
}
93+
94+
_create_leftover_buffer() : WebGPUBufferLocation {
95+
return {
96+
buffer: this.buffers[0],
97+
offset: this.pointer,
98+
stride: 4, // meaningless here.
99+
byte_size: this.buffer_size - this.pointer,
100+
paddedSize: this.buffer_size - this.pointer
101+
}
102+
}
103+
}
104+
105+
106+
export function createSingletonBuffer(
107+
device: GPUDevice,
108+
data: Uint32Array | Int32Array | Float32Array | ArrayBuffer,
109+
usage: number
110+
): GPUBuffer {
111+
// Creates a disposable singleton buffer.
112+
// ReadonlyBufferSet ought to provide better performance; but
113+
// this allows more different buffer sizes and easier destruction.
114+
const buffer = device.createBuffer({
115+
size: data.byteLength,
116+
usage,
117+
mappedAtCreation: true
118+
});
119+
const mappedRange = buffer.getMappedRange();
120+
if (isTypedArray(data)) {
121+
new Uint32Array(mappedRange).set(data as TypedArray);
122+
} else {
123+
new Uint32Array(mappedRange).set(new Uint32Array(data as ArrayBuffer));
124+
}
125+
buffer.unmap();
126+
return buffer;
127+
}

src/webGPU/forests.ts

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
import { createSingletonBuffer, WebGPUBufferSet } from "./buffertools";
2+
import { StatefulGPU } from "./lib";
3+
4+
type TinyForestParams = {
5+
nTrees: number;
6+
depth: number;
7+
// The number of features to consider at each split.
8+
maxFeatures: number;
9+
D: number;
10+
}
11+
12+
const defaultTinyForestParams : TinyForestParams = {
13+
nTrees: 128,
14+
depth: 8,
15+
maxFeatures: 32,
16+
D: 768,
17+
}
18+
19+
export class TinyForest extends StatefulGPU {
20+
params: TinyForestParams;
21+
22+
private _bootstrapSamples?: GPUBuffer; // On the order of 100 KB
23+
protected _forests?: GPUBuffer // On the order of 10 MB.
24+
// private trainedThrough: number = 0;
25+
constructor(
26+
device: GPUDevice,
27+
bufferSize = 1024 * 1024 * 256,
28+
t: Partial<TinyForestParams> = {}) {
29+
super(device, bufferSize)
30+
this.params = {...defaultTinyForestParams, ...t}
31+
this.initializeForestsToZero()
32+
this.bufferSet = new WebGPUBufferSet(device, bufferSize);
33+
}
34+
35+
countPipeline(): GPUComputePipeline {
36+
const { device } = this;
37+
// const { maxFeatures, nTrees } = this.params
38+
// const OPTIONS = 2;
39+
// const countBuffer = device.createBuffer({
40+
// size: OPTIONS * maxFeatures * nTrees * 4,
41+
// usage: GPUBufferUsage.STORAGE & GPUBufferUsage.COPY_SRC,
42+
// mappedAtCreation: false
43+
// });
44+
45+
const layout = device.createBindGroupLayout({
46+
entries: [
47+
{
48+
// features buffer;
49+
binding: 0,
50+
visibility: GPUShaderStage.COMPUTE,
51+
buffer: { type: 'storage' }
52+
},
53+
{
54+
// dims to check array;
55+
binding: 1,
56+
visibility: GPUShaderStage.COMPUTE,
57+
buffer: { type: 'storage' }
58+
},
59+
{
60+
// output count buffer.
61+
binding: 2,
62+
visibility: GPUShaderStage.COMPUTE,
63+
buffer: { type: 'storage' }
64+
}
65+
]
66+
})
67+
68+
// const subsetsToCheck = this.chooseNextFeatures();
69+
const pipelineLayout = device.createPipelineLayout({ bindGroupLayouts: [layout] });
70+
71+
const shaderModule = device.createShaderModule({ code: `
72+
@group(0) @binding(0) var<storage, read> features: array<u32>;
73+
@group(0) @binding(1) var<storage, read> dimsToCheck: array<u16>;
74+
@group(0) @binding(2) var<storage, write> counts: array<u32>;
75+
76+
@compute @workgroup_size(64)
77+
//TODOD HERE
78+
` });
79+
80+
81+
return device.createComputePipeline({
82+
layout: pipelineLayout,
83+
compute: {
84+
module: shaderModule,
85+
entryPoint: 'main'
86+
}
87+
});
88+
}
89+
90+
//@ts-expect-error foo
91+
private chooseNextFeatures(n = 32) {
92+
console.log({n})
93+
const { maxFeatures, nTrees, D } = this.params;
94+
const features = new Uint16Array(maxFeatures * D);
95+
for (let i = 0; i < nTrees; i++) {
96+
const set = new Set<number>();
97+
while (set.size < maxFeatures) {
98+
set.add(Math.floor(Math.random() * D));
99+
}
100+
const arr = new Uint16Array([...set].sort());
101+
features.set(arr, i * maxFeatures);
102+
}
103+
return createSingletonBuffer(
104+
this.device,
105+
features,
106+
GPUBufferUsage.STORAGE
107+
)
108+
}
109+
110+
111+
112+
initializeForestsToZero() {
113+
// Each tree is a set of bits; For every possible configuration
114+
// the first D indicating
115+
// the desired outcome for the dimension,
116+
// the second D indicating whether the bits in those
117+
// positions are to be considered in checking if the tree
118+
// fits. There are 2**depth bitmasks for each dimension--each point
119+
// will match only one, and part of the inference task is determining which one.
120+
121+
const treeSizeInBytes =
122+
2 * this.params.D * (2 ** this.params.depth) / 8;
123+
124+
const data = new Uint8Array(treeSizeInBytes * this.params.nTrees)
125+
this._forests = createSingletonBuffer(
126+
this.device,
127+
data,
128+
GPUBufferUsage.STORAGE
129+
)
130+
}
131+
132+
133+
// Rather than actually bootstrap, we generate a single
134+
// list of 100,000 numbers drawn from a poisson distribution.
135+
// These serve as weights for draws with replacement; to
136+
// bootstrap any given record batch, we take a sequence of
137+
// numbers from the buffer with offset i.
138+
get bootstrapSamples() {
139+
if (this._bootstrapSamples) {
140+
return this._bootstrapSamples
141+
} else {
142+
const arr = new Uint8Array(100000)
143+
for (let i = 0; i < arr.length; i++) {
144+
arr[i] = poissonRandomNumber()
145+
}
146+
this._bootstrapSamples = createSingletonBuffer(
147+
this.device,
148+
arr,
149+
GPUBufferUsage.STORAGE
150+
)
151+
return this._bootstrapSamples
152+
}
153+
}
154+
155+
156+
}
157+
158+
159+
function poissonRandomNumber() : number {
160+
let p = 1.0;
161+
let k = 0;
162+
163+
do {
164+
k++;
165+
p *= Math.random();
166+
} while (p > 1/Math.E);
167+
168+
return k - 1;
169+
}
170+

0 commit comments

Comments
 (0)