Skip to content

feat: Implement write instruction compilation #813

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Mar 17, 2025
2 changes: 2 additions & 0 deletions apps/typegpu-docs/src/pages/benchmark/suites.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import type { Bench } from 'tinybench';
import { atomWithUrl } from './atom-with-url';
import type { TypeGPUDataModule, TypeGPUModule } from './modules';
import type { BenchParameterSet } from './parameter-set';
import { compiledWriteSuite } from './test-suites/compiled-write';
import { partialWriteSuite } from './test-suites/partial-write';

export type TestIdentifier = `${string}_${string}`;
Expand Down Expand Up @@ -37,6 +38,7 @@ export function createSuite<T extends { bench: Bench }>(

export const unfilteredSuites: Record<string, Suite> = {
'Partial write': partialWriteSuite,
'Compiled write': compiledWriteSuite,
};

export function getFilteredSuites(selectedTests: TestIdentifier[]) {
Expand Down
289 changes: 289 additions & 0 deletions apps/typegpu-docs/src/pages/benchmark/test-suites/compiled-write.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,289 @@
import { Bench } from 'tinybench';
import type { TgpuBuffer, TgpuRoot } from 'typegpu';
import type { v3f } from 'typegpu/data';
import { stringifyLocator } from '../parameter-set';
import { createSuite } from '../suites';

export const compiledWriteSuite = createSuite(
({ params, tgpuModule, d }) => {
const { tgpu } = tgpuModule;

const sizes = {
small: 32,
medium: 32 * 32,
large: 32 * 32 * 32,
};

const Boid = d.struct({
pos: d.vec3f,
vel: d.vec3f,
});

const Particle = d.struct({
x: d.f32,
y: d.f32,
z: d.f32,
opacity: d.f32,
});

const boidArrays = {
small: d.arrayOf(Boid, sizes.small),
medium: d.arrayOf(Boid, sizes.medium),
large: d.arrayOf(Boid, sizes.large),
};

const particleArrays = {
small: d.arrayOf(Particle, sizes.small),
medium: d.arrayOf(Particle, sizes.medium),
large: d.arrayOf(Particle, sizes.large),
};

const ctx = {
bench: null as unknown as Bench,
sizes,
d,
root: null as unknown as TgpuRoot,
buffers: {
small: null as unknown as TgpuBuffer<typeof boidArrays.small>,
medium: null as unknown as TgpuBuffer<typeof boidArrays.medium>,
large: null as unknown as TgpuBuffer<typeof boidArrays.large>,
},
vectorlessBuffers: {
small: null as unknown as TgpuBuffer<typeof particleArrays.small>,
medium: null as unknown as TgpuBuffer<typeof particleArrays.medium>,
large: null as unknown as TgpuBuffer<typeof particleArrays.large>,
},
webgpuData: {
small: null as unknown as ArrayBuffer,
medium: null as unknown as ArrayBuffer,
large: null as unknown as ArrayBuffer,
},
webgpuVectorlessData: {
small: null as unknown as ArrayBuffer,
medium: null as unknown as ArrayBuffer,
large: null as unknown as ArrayBuffer,
},
typegpuBoidData: {
small: null as unknown as Array<{ pos: v3f; vel: v3f }>,
medium: null as unknown as Array<{ pos: v3f; vel: v3f }>,
large: null as unknown as Array<{ pos: v3f; vel: v3f }>,
},
typegpuParticleData: {
small: null as unknown as Array<{
x: number;
y: number;
z: number;
opacity: number;
}>,
medium: null as unknown as Array<{
x: number;
y: number;
z: number;
opacity: number;
}>,
large: null as unknown as Array<{
x: number;
y: number;
z: number;
opacity: number;
}>,
},
};

ctx.bench = new Bench({
name: stringifyLocator('typegpu', params.typegpu),
time: 1000,
async setup() {
ctx.root = await tgpu.init();

ctx.buffers.small = ctx.root.createBuffer(boidArrays.small);
ctx.buffers.medium = ctx.root.createBuffer(boidArrays.medium);
ctx.buffers.large = ctx.root.createBuffer(boidArrays.large);

for (const buffer of Object.values(ctx.buffers)) {
if ('compileWriter' in buffer) {
buffer.compileWriter();
}
}

ctx.vectorlessBuffers.small = ctx.root.createBuffer(
particleArrays.small,
);
ctx.vectorlessBuffers.medium = ctx.root.createBuffer(
particleArrays.medium,
);
ctx.vectorlessBuffers.large = ctx.root.createBuffer(
particleArrays.large,
);

for (const buffer of Object.values(ctx.vectorlessBuffers)) {
if ('compileWriter' in buffer) {
buffer.compileWriter();
}
}

// Create the data ahead of time to measure only the write time
for (const size of ['small', 'medium', 'large'] as const) {
const amountOfBoids = sizes[size];
const BoidArray = d.arrayOf(Boid, amountOfBoids);
const data = new ArrayBuffer(d.sizeOf(BoidArray));
const fView = new Float32Array(data);

for (let i = 0; i < amountOfBoids; ++i) {
fView[i * 8 + 0] = 1;
fView[i * 8 + 1] = 2;
fView[i * 8 + 2] = 3;

fView[i * 8 + 4] = 4;
fView[i * 8 + 5] = 5;
fView[i * 8 + 6] = 6;
}

ctx.webgpuData[size] = data;
}

for (const size of ['small', 'medium', 'large'] as const) {
const amountOfBoids = sizes[size];
const ParticleArray = d.arrayOf(Particle, amountOfBoids);
const data = new ArrayBuffer(d.sizeOf(ParticleArray));
const fView = new Float32Array(data);

for (let i = 0; i < amountOfBoids; ++i) {
fView[i * 4 + 0] = 1;
fView[i * 4 + 1] = 2;
fView[i * 4 + 2] = 3;
fView[i * 4 + 3] = 4;
}

ctx.webgpuVectorlessData[size] = data;
}

for (const size of ['small', 'medium', 'large'] as const) {
const amountOfBoids = sizes[size];
ctx.typegpuBoidData[size] = Array.from(
{ length: amountOfBoids },
() => ({
pos: d.vec3f(1, 2, 3),
vel: d.vec3f(4, 5, 6),
}),
);
}

for (const size of ['small', 'medium', 'large'] as const) {
const amountOfBoids = sizes[size];
ctx.typegpuParticleData[size] = Array.from(
{ length: amountOfBoids },
() => ({
x: 1,
y: 2,
z: 3,
opacity: 4,
}),
);
}
},
teardown() {
ctx.root.destroy();
},
});

return ctx;
},
{
'WebGPU reference (32 elements)': (getCtx) => async () => {
const { root, buffers, webgpuData } = getCtx();
root.device.queue.writeBuffer(
root.unwrap(buffers.small),
0,
webgpuData.small,
);
await root.device.queue.onSubmittedWorkDone();
},

'WebGPU reference (32² elements)': (getCtx) => async () => {
const { root, buffers, webgpuData } = getCtx();
root.device.queue.writeBuffer(
root.unwrap(buffers.medium),
0,
webgpuData.medium,
);
await root.device.queue.onSubmittedWorkDone();
},

'WebGPU reference (32³ elements)': (getCtx) => async () => {
const { root, buffers, webgpuData } = getCtx();
root.device.queue.writeBuffer(
root.unwrap(buffers.large),
0,
webgpuData.large,
);
await root.device.queue.onSubmittedWorkDone();
},

'TypeGPU (32 elements)': (getCtx) => async () => {
const { root, buffers, typegpuBoidData } = getCtx();
buffers.small.write(typegpuBoidData.small);
await root.device.queue.onSubmittedWorkDone();
},

'TypeGPU (32² elements)': (getCtx) => async () => {
const { root, buffers, typegpuBoidData } = getCtx();
buffers.medium.write(typegpuBoidData.medium);
await root.device.queue.onSubmittedWorkDone();
},

'TypeGPU (32³ elements)': (getCtx) => async () => {
const { root, buffers, typegpuBoidData } = getCtx();
buffers.large.write(typegpuBoidData.large);
await root.device.queue.onSubmittedWorkDone();
},

'WebGPU reference vectorless (32 elements)': (getCtx) => async () => {
const { root, vectorlessBuffers, webgpuVectorlessData } = getCtx();
root.device.queue.writeBuffer(
root.unwrap(vectorlessBuffers.small),
0,
webgpuVectorlessData.small,
);
await root.device.queue.onSubmittedWorkDone();
},

'WebGPU reference vectorless (32² elements)': (getCtx) => async () => {
const { root, vectorlessBuffers, webgpuVectorlessData } = getCtx();
root.device.queue.writeBuffer(
root.unwrap(vectorlessBuffers.medium),
0,
webgpuVectorlessData.medium,
);
await root.device.queue.onSubmittedWorkDone();
},

'WebGPU reference vectorless (32³ elements)': (getCtx) => async () => {
const { root, vectorlessBuffers, webgpuVectorlessData } = getCtx();
root.device.queue.writeBuffer(
root.unwrap(vectorlessBuffers.large),
0,
webgpuVectorlessData.large,
);
await root.device.queue.onSubmittedWorkDone();
},

'TypeGPU vectorless (32 elements)': (getCtx) => async () => {
const { root, vectorlessBuffers, typegpuParticleData } = getCtx();
vectorlessBuffers.small.write(typegpuParticleData.small);
await root.device.queue.onSubmittedWorkDone();
},

'TypeGPU vectorless (32² elements)': (getCtx) => async () => {
const { root, vectorlessBuffers, typegpuParticleData } = getCtx();
vectorlessBuffers.medium.write(typegpuParticleData.medium);
await root.device.queue.onSubmittedWorkDone();
},

'TypeGPU vectorless (32³ elements)': (getCtx) => async () => {
const { root, vectorlessBuffers, typegpuParticleData } = getCtx();
vectorlessBuffers.large.write(typegpuParticleData.large);
await root.device.queue.onSubmittedWorkDone();
},
},
);
34 changes: 31 additions & 3 deletions packages/typegpu/src/core/buffer/buffer.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
import { BufferReader, BufferWriter } from 'typed-binary';
import { getSystemEndianness } from 'typed-binary';
import { isWgslData } from '../../data';
import {
EVAL_ALLOWED_IN_ENV,
getCompiledWriterForSchema,
} from '../../data/compiledIO';
import { readData, writeData } from '../../data/dataIO';
import type { AnyData } from '../../data/dataTypes';
import { getWriteInstructions } from '../../data/partialIO';
Expand Down Expand Up @@ -91,6 +96,7 @@ export interface TgpuBuffer<TData extends BaseData> extends TgpuNamable {

as<T extends ViewUsages<this>>(usage: T): UsageTypeToBufferUsage<TData>[T];

compileWriter(): void;
write(data: Infer<TData>): void;
writePartial(data: InferPartial<TData>): void;
copyFrom(srcBuffer: TgpuBuffer<MemIdentity<TData>>): void;
Expand Down Expand Up @@ -127,6 +133,7 @@ export function isUsableAsVertex<T extends TgpuBuffer<AnyData>>(
// --------------
// Implementation
// --------------
const endianness = getSystemEndianness();

type RestrictVertexUsages<TData extends BaseData> = TData extends {
readonly type: WgslTypeLiteral;
Expand All @@ -141,6 +148,7 @@ class TgpuBufferImpl<TData extends AnyData> implements TgpuBuffer<TData> {
private _buffer: GPUBuffer | null = null;
private _ownBuffer: boolean;
private _destroyed = false;
private _hostBuffer: ArrayBuffer | undefined;

private _label: string | undefined;
readonly initial: Infer<TData> | undefined;
Expand Down Expand Up @@ -245,24 +253,44 @@ class TgpuBufferImpl<TData extends AnyData> implements TgpuBuffer<TData> {
return this;
}

compileWriter(): void {
if (EVAL_ALLOWED_IN_ENV) {
getCompiledWriterForSchema(this.dataType);
} else {
throw new Error('This environment does not allow eval');
}
}

write(data: Infer<TData>): void {
const gpuBuffer = this.buffer;
const device = this._group.device;

if (gpuBuffer.mapState === 'mapped') {
const mapped = gpuBuffer.getMappedRange();
if (EVAL_ALLOWED_IN_ENV) {
const writer = getCompiledWriterForSchema(this.dataType);
writer(new DataView(mapped), 0, data, endianness === 'little');
return;
}
writeData(new BufferWriter(mapped), this.dataType, data);
return;
}

const size = sizeOf(this.dataType);
if (!this._hostBuffer) {
this._hostBuffer = new ArrayBuffer(size);
}

// Flushing any commands yet to be encoded.
this._group.flush();

const hostBuffer = new ArrayBuffer(size);
writeData(new BufferWriter(hostBuffer), this.dataType, data);
device.queue.writeBuffer(gpuBuffer, 0, hostBuffer, 0, size);
if (EVAL_ALLOWED_IN_ENV) {
const writer = getCompiledWriterForSchema(this.dataType);
writer(new DataView(this._hostBuffer), 0, data, endianness === 'little');
} else {
writeData(new BufferWriter(this._hostBuffer), this.dataType, data);
}
device.queue.writeBuffer(gpuBuffer, 0, this._hostBuffer, 0, size);
}

public writePartial(data: InferPartial<TData>): void {
Expand Down
Loading