diff --git a/implicit3-rt/denoise.js b/implicit3-rt/denoise.js index 91ae243..75d1c58 100644 --- a/implicit3-rt/denoise.js +++ b/implicit3-rt/denoise.js @@ -199,12 +199,17 @@ function UNet1(nin, n0, n1, n2, n3, params) { function initDenoiserModel_unet1(params) { - let gl = renderer.gl; - let unet = new UNet1(3, 16, 24, 48, 64, params); window.addEventListener("resize", function (event) { setTimeout(unet.updateLayers, 20); }); + applyDenoiser(unet); + useDenoiser.denoisers['unet1'] = renderer.denoiser; +} + + +function applyDenoiser(model) { + let gl = renderer.gl; let programOutput = createShaderProgram(gl, null, `#version 300 es @@ -223,23 +228,21 @@ function initDenoiserModel_unet1(params) { renderer.denoiser = function(inputs, framebuffer) { if (inputs.pixel !== 'framebuffer') throw new Error("Unsupported NN input"); - gl.bindTexture(gl.TEXTURE_2D, unet.layers.input.imgs[0].texture); + gl.bindTexture(gl.TEXTURE_2D, model.layers.input.imgs[0].texture); gl.copyTexImage2D(gl.TEXTURE_2D, 0, gl.RGBA32F, 0, 0, state.width, state.height, 0); - unet.forward(); + model.forward(); gl.disable(gl.BLEND); gl.bindFramebuffer(gl.FRAMEBUFFER, framebuffer); gl.useProgram(programOutput); setPositionBuffer(gl, programOutput); gl.activeTexture(gl.TEXTURE0); - gl.bindTexture(gl.TEXTURE_2D, unet.layers.output.imgs[0].texture); + gl.bindTexture(gl.TEXTURE_2D, model.layers.output.imgs[0].texture); gl.uniform1i(gl.getUniformLocation(programOutput, "uSrc"), 0); gl.drawArrays(gl.TRIANGLE_STRIP, 0, 4); } - useDenoiser.denoisers['unet1'] = renderer.denoiser; } - function applyResidualDenoiser(model) { let gl = renderer.gl; @@ -322,16 +325,15 @@ function applyNormalizedResidualDenoiser(model) { gl.copyTexImage2D(gl.TEXTURE_2D, 0, gl.RGBA32F, 0, 0, state.width, state.height, 0); // normalize - var mean = Dnn.global_mean(gl, layerTempI); - var std = Dnn.global_std(gl, layerTempI); - Dnn.batch_norm_2d(gl, layerTempI, model.layers.input, 0.0, 1.0, mean, std); + var ms = Dnn.global_mean_and_std(gl, layerTempI); + Dnn.batch_norm_2d(gl, layerTempI, model.layers.input, 0.0, 1.0, ms.mean, ms.std); // inference residual model model.forward(); Dnn.add(gl, model.layers.input, model.layers.output, layerTempI); // normalize back - for (var i = 0; i < mean.length; i++) - std[i] = 1.0 / std[i], mean[i] *= -std[i]; - Dnn.batch_norm_2d(gl, layerTempI, layerTempO, 0.0, 1.0, mean, std); + for (var i = 0; i < ms.length; i++) + ms.std[i] = 1.0 / ms.std[i], ms.mean[i] *= -ms.std[i]; + Dnn.batch_norm_2d(gl, layerTempI, layerTempO, 0.0, 1.0, ms.mean, ms.std); // gamma transform gl.bindFramebuffer(gl.FRAMEBUFFER, framebuffer); gl.useProgram(programOutput); @@ -540,12 +542,149 @@ function initDenoiserModel_runet2gan2(params) { useDenoiser.denoisers['runet2gan2'] = renderer.denoiser; } + +function AttUNet1(nin, k1, k2, k3, k4, ko, params) { + let gl = renderer.gl; + let kc = k1+ko; + if (kc % 2) throw new Error("Fractional attention layer size."); + let enc1 = new Dnn.Conv2d311(nin, k1, params['enc1.weight'], params['enc1.bias']); + let div1 = new Dnn.Conv2d321(k1, k2, params['div1.weight'], params['div1.bias']); + let enc2 = new Dnn.Conv2d311(k2, k2, params['enc2.weight'], params['enc2.bias']); + let div2 = new Dnn.Conv2d321(k2, k3, params['div2.weight'], params['div2.bias']); + let enc3 = new Dnn.Conv2d311(k3, k3, params['enc3.weight'], params['enc3.bias']); + let div3 = new Dnn.Conv2d321(k3, k4, params['div3.weight'], params['div3.bias']); + let dec31 = new Dnn.Conv2d311(k4, k4, params['dec31.weight'], params['dec31.bias']); + let dec32 = new Dnn.Conv2d311(k4, k4, params['dec32.weight'], params['dec32.bias']); + let upc3 = new Dnn.ConvTranspose2D421(k4, k3, params['upc3.weight'], params['upc3.bias']); + let dec21 = new Dnn.Conv2d311(k3, k3, params['dec21.weight'], params['dec21.bias']); + let dec22 = new Dnn.Conv2d311(k3, k3, params['dec22.weight'], params['dec22.bias']); + let upc2 = new Dnn.ConvTranspose2D421(k3+k3, k2, params['upc2.weight'], params['upc2.bias']); + let dec11 = new Dnn.Conv2d311(k2, k2, params['dec11.weight'], params['dec11.bias']); + let dec12 = new Dnn.Conv2d311(k2, k2, params['dec12.weight'], params['dec12.bias']); + let upc1 = new Dnn.ConvTranspose2D421(k2+k2, ko, params['upc1.weight'], params['upc1.bias']); + let dec01 = new Dnn.Conv2d311(ko, ko, params['dec01.weight'], params['dec01.bias']); + let dec02 = new Dnn.Conv2d311(ko, ko, params['dec02.weight'], params['dec02.bias']); + let att1conv1 = new Dnn.Conv2d110(kc, 1, params['attention1.conv1.weight'], params['attention1.conv1.bias']); + let att1conv2 = new Dnn.Conv2d110(kc, kc/2, params['attention1.conv2.weight'], params['attention1.conv2.bias']); + let att1conv3 = { w: params['attention1.conv3.weight'], b: params['attention1.conv3.bias'] }; + let att2conv1 = new Dnn.Conv2d110(kc, kc/2, params['attention2.conv1.weight'], params['attention2.conv1.bias']); + let att2conv2 = new Dnn.Conv2d110(kc, kc/2, params['attention2.conv2.weight'], params['attention2.conv2.bias']); + let convo = new Dnn.Conv2d311(kc, 3, params['convo.weight'], params['convo.bias']); + + let layers = {}; + function ul(key, n, scale) { + var w = Math.ceil(state.width/16)*16; + var h = Math.ceil(state.height/16)*16; + var oldLayer = layers[key]; + layers[key] = new Dnn.CNNLayer(gl, n, w/scale, h/scale); + if (oldLayer) Dnn.destroyCnnLayer(gl, oldLayer); + }; + this.layers = layers; + this.updateLayers = function() { + ul("input", nin, 1); + ul("e1", k1, 1); + ul("e1r", k1, 1); ul("d1", k2, 2); + ul("d1r", k2, 2); ul("e2", k2, 2); + ul("e2r", k2, 2); ul("d2", k3, 4); + ul("d2r", k3, 4); ul("e3", k3, 4); + ul("e3r", k3, 4); ul("d3", k4, 8); + ul("d3r", k4, 8); ul("d31", k4, 8); ul("d31r", k4, 8); ul("d32", k4, 8); + ul("d32r", k4, 8); ul("u3", k3, 4); + ul("u3r", k3, 4); ul("d21", k3, 4); ul("d21r", k3, 4); ul("d22", k3, 4); + ul("d22r", k3, 4); ul("u2", k2, 2); + ul("u2r", k2, 2); ul("d11", k2, 2); ul("d11r", k2, 2); ul("d12", k2, 2); + ul("d12r", k2, 2); ul("u1", ko, 1); + ul("u1r", ko, 1); ul("d01", ko, 1); ul("d01r", ko, 1); ul("d02", ko, 1); + ul("d02r", ko, 1); + ul("att1x1", 1, 1); ul("att1x1a", 1, 1); ul("att1x2", kc/2, 1); ul("att1o", kc, 1); + ul("att2x1", kc/2, 1); ul("att2x2", kc/2, 1); ul("att2x3", 1, 1); ul("att2x3a", 1, 1); ul("att2o", kc, 1); + ul("output", 3, 1); + } + this.updateLayers(); + + function channelAttention(x) { + att1conv1.forward(gl, x, layers.att1x1); + Dnn.softmax2d(gl, layers.att1x1, layers.att1x1a); + att1conv2.forward(gl, x, layers.att1x2); + let x3 = Dnn.global_dot(gl, layers.att1x1a, layers.att1x2); + let x3a = new Array(kc); + for (var i = 0; i < kc; i++) { + var s = att1conv3.b[i]; + for (var j = 0; j < kc/2; j++) + s += att1conv3.w[i*(kc/2)+j] * x3[j]; + x3a[i] = 1.0 / (1.0+Math.exp(-s)); + } + Dnn.batch_norm_2d(gl, x, layers.att1o, 0.0, x3a, 0.0, 1.0); + } + + function spacialAttention(x) { + att2conv1.forward(gl, x, layers.att2x1); + let x1 = Dnn.global_mean(gl, layers.att2x1); + var sexp = 0.0; + for (var i = 0; i < kc/2; i++) + sexp += (x1[i] = Math.exp(x1[i])); + for (var i = 0; i < kc/2; i++) + x1[i] /= sexp; + att2conv2.forward(gl, x, layers.att2x2); + Dnn.channel_sum(gl, layers.att2x2, layers.att2x3, x1); + Dnn.sigmoid(gl, layers.att2x3, layers.att2x3a); + Dnn.mul(gl, x, layers.att2x3a, layers.att2o); + } + + this.forward = function() { + enc1.forward(gl, layers.input, layers.e1); + Dnn.relu(gl, layers.e1, layers.e1r); + div1.forward(gl, layers.e1r, layers.d1); + Dnn.relu(gl, layers.d1, layers.d1r); + enc2.forward(gl, layers.d1r, layers.e2); + Dnn.relu(gl, layers.e2, layers.e2r); + div2.forward(gl, layers.e2r, layers.d2); + Dnn.relu(gl, layers.d2, layers.d2r); + enc3.forward(gl, layers.d2r, layers.e3); + Dnn.relu(gl, layers.e3, layers.e3r); + div3.forward(gl, layers.e3r, layers.d3); + Dnn.relu(gl, layers.d3, layers.d3r); + dec31.forward(gl, layers.d3r, layers.d31); + Dnn.relu(gl, layers.d31, layers.d31r); + dec32.forward(gl, layers.d31r, layers.d32); + Dnn.relu(gl, layers.d32, layers.d32r); + upc3.forward(gl, layers.d32r, layers.u3); + Dnn.relu(gl, layers.u3, layers.u3r); + dec21.forward(gl, layers.u3r, layers.d21); + Dnn.relu(gl, layers.d21, layers.d21r); + dec22.forward(gl, layers.d21r, layers.d22); + Dnn.relu(gl, layers.d22, layers.d22r); + upc2.forward(gl, Dnn.shallowConcat(layers.e3, layers.d22r), layers.u2); + Dnn.relu(gl, layers.u2, layers.u2r); + dec11.forward(gl, layers.u2r, layers.d11); + Dnn.relu(gl, layers.d11, layers.d11r); + dec12.forward(gl, layers.d11r, layers.d12); + Dnn.relu(gl, layers.d12, layers.d12r); + upc1.forward(gl, Dnn.shallowConcat(layers.e2, layers.d12r), layers.u1); + Dnn.relu(gl, layers.u1, layers.u1r); + dec01.forward(gl, layers.u1r, layers.d01); + Dnn.relu(gl, layers.d01, layers.d01r); + dec02.forward(gl, layers.d01r, layers.d02); + Dnn.relu(gl, layers.d02, layers.d02r); + channelAttention(Dnn.shallowConcat(layers.e1, layers.d02r)); + // layers.output = layers.att1x1a; // visualize attention map + spacialAttention(layers.att1o); + // layers.output = layers.att2o; + convo.forward(gl, layers.att2o, layers.output); + }; +} + + + function initDenoiserModel_temp(params) { - let unet = new UNet2(3, 12, 16, 24, 32, params); + // let unet = new UNet2(3, 12, 16, 24, 32, params); + let unet = new AttUNet1(3, 12, 16, 24, 32, 12, params); window.addEventListener("resize", function (event) { setTimeout(unet.updateLayers, 20); }); - applyNormalizedResidualDenoiser(unet); + // applyNormalizedResidualDenoiser(unet); + applyResidualDenoiser(unet); + // applyDenoiser(unet); useDenoiser.denoisers['temp'] = renderer.denoiser; } diff --git a/implicit3-rt/export_model.py b/implicit3-rt/export_model.py index 1d18de5..5144349 100644 --- a/implicit3-rt/export_model.py +++ b/implicit3-rt/export_model.py @@ -2,11 +2,17 @@ import numpy as np import json +class AttentionChannelOnly(torch.nn.Module): + pass + +class AttentionSpacialOnly(torch.nn.Module): + pass + class Model(torch.nn.Module): pass model = torch.load( - '../../Graphics/image/denoise/data_spirulae_5/resunet2gan_2_1.pth', + '../../Graphics/image/denoise/data_spirulae_5/attunet1_1_1.pth', map_location=torch.device('cpu')) state_dict = model.state_dict() @@ -24,6 +30,8 @@ class Model(torch.nn.Module): amin, amax = np.amin(tensor), np.amax(tensor) vmin, vmax = -2**(nbit-1)+0.1, 2**(nbit-1)-1.1 m = (amax-amin) / (vmax-vmin) + if m == 0.0: + m = 1.0 b = amin - m * vmin item = { 'shape': [*tensor.shape], @@ -38,5 +46,5 @@ class Model(torch.nn.Module): name = "temp" with open(f"denoise_models/denoise_{name}.json", 'w') as fp: - json.dump(info, fp) + json.dump(info, fp, separators=(',', ':')) data.tofile(f"denoise_models/denoise_{name}.bin") diff --git a/implicit3-rt/script.js b/implicit3-rt/script.js index c22a386..071e67f 100644 --- a/implicit3-rt/script.js +++ b/implicit3-rt/script.js @@ -357,7 +357,7 @@ vec3 {%funname%}Color(float x, float y, float z) {\n\ "../shaders/frag-copy.glsl", "../shaders/frag-rt-post.glsl", "../shaders/frag-tonemap.glsl", - "../shaders/dnn-conv2d311.glsl", + "../shaders/dnn-conv2d-3.glsl", "../shaders/dnn-conv2d110.glsl", "../shaders/dnn-convtranspose2d421.glsl", "../shaders/complex-zeta.glsl", diff --git a/scripts/dnn.js b/scripts/dnn.js index bef69da..b44b38c 100644 --- a/scripts/dnn.js +++ b/scripts/dnn.js @@ -63,10 +63,12 @@ Dnn.destroyCnnLayer = function(gl, layer) { } -Dnn.global_mean = function(gl, buffer) { +Dnn._global_mean_f = function(gl, f, buffer) { + let programKey = 'programGlobalMean_' + f; + let bufferKey = 'bufferGlobalMean_' + f; const tile = 64; - if (!Dnn.programGlobalMean) { - Dnn.programGlobalMean = createShaderProgram(gl, null, + if (!Dnn[programKey]) { + Dnn[programKey] = createShaderProgram(gl, null, `#version 300 es precision mediump float; @@ -83,23 +85,25 @@ Dnn.global_mean = function(gl, buffer) { vec4 total = vec4(0); for (int x = pos0.x; x < pos1.x; x++) { vec4 s = vec4(0); - for (int y = pos0.y; y < pos1.y; y++) - s += texelFetch(uSrc, ivec2(x,y), 0); + for (int y = pos0.y; y < pos1.y; y++) { + vec4 x = texelFetch(uSrc, ivec2(x,y), 0); + s += (${f}); + } total += s / sc.y; } fragColor = total / sc.x; }`); } - if (!Dnn.bufferGlobalMean) { - Dnn.bufferGlobalMean = createRenderTarget(gl, tile, tile, false, true, false); + if (!Dnn[bufferKey]) { + Dnn[bufferKey] = createRenderTarget(gl, tile, tile, false, true, false); } - let program = Dnn.programGlobalMean; + let program = Dnn[programKey]; gl.useProgram(program); gl.viewport(0, 0, buffer.w, buffer.h); gl.disable(gl.BLEND); var mean = new Array(buffer.n).fill(0.0); for (var i = 0; i < buffer.n; i += 4) { - gl.bindFramebuffer(gl.FRAMEBUFFER, Dnn.bufferGlobalMean.framebuffer); + gl.bindFramebuffer(gl.FRAMEBUFFER, Dnn[bufferKey].framebuffer); setPositionBuffer(gl, program); gl.activeTexture(gl.TEXTURE0); gl.bindTexture(gl.TEXTURE_2D, buffer.imgs[i/4].texture); @@ -117,63 +121,67 @@ Dnn.global_mean = function(gl, buffer) { } -Dnn.global_std = function(gl, buffer) { - const tile = 64; - if (!Dnn.programGlobalSquaredMean) { - Dnn.programGlobalSquaredMean = createShaderProgram(gl, null, +Dnn.global_mean = function(gl, buffer) { + let mean = Dnn._global_mean_f(gl, "x", buffer); + return mean; +} + + +Dnn.global_mean_and_std = function(gl, buffer) { + let mean = Dnn._global_mean_f(gl, "x", buffer); + let mean2 = Dnn._global_mean_f(gl, "x*x", buffer); + var std = new Array(buffer.n).fill(0.0); + for (var i = 0; i < buffer.n; i++) + std[i] = Math.sqrt(Math.max(mean2[i] - mean[i]*mean[i], 0.0)); + return { + length: buffer.n, + mean: mean, + std: std + }; +} + + +Dnn.softmax2d = function(gl, buffer_in, buffer_out, normalize=true) { + if (buffer_in.n != buffer_out.n) + throw new Error("Input and output buffer sizes don't match."); + if (buffer_out.w != buffer_in.w || buffer_out.h != buffer_in.h) + throw new Error("Input and output buffer dimensions don't match."); + if (!Dnn.programSoftmax2d) { + Dnn.programSoftmax2d = createShaderProgram(gl, null, `#version 300 es precision mediump float; uniform sampler2D uSrc; out vec4 fragColor; - + + uniform vec4 inv_denom; + void main() { - ivec2 ires = textureSize(uSrc, 0); - ivec2 tile = (ires+${tile}-1) / ${tile}; - vec2 sc = vec2(ires) / float(${tile}); - ivec2 xy = ivec2(gl_FragCoord.xy); - ivec2 pos0 = xy * tile; - ivec2 pos1 = min(pos0+tile, ires); - vec4 total = vec4(0); - for (int x = pos0.x; x < pos1.x; x++) { - vec4 s = vec4(0); - for (int y = pos0.y; y < pos1.y; y++) { - vec4 c = texelFetch(uSrc, ivec2(x,y), 0); - s += c*c; - } - total += s / sc.y; - } - fragColor = total / sc.x; + vec4 c = texelFetch(uSrc, ivec2(gl_FragCoord.xy), 0); + fragColor = inv_denom * exp(c); }`); } - if (!Dnn.bufferGlobalSquaredMean) { - Dnn.bufferGlobalSquaredMean = createRenderTarget(gl, tile, tile, false, true, false); + let w = Dnn._global_mean_f(gl, "exp(x)", buffer_in); + if (!normalize) { + for (var i = 0; i < w.length; i++) + w[i] *= buffer_in.w * buffer_in.h; } - let program = Dnn.programGlobalSquaredMean; + for (var i = 0; i < w.length; i++) + w[i] = 1.0 / Math.max(w[i], 1e-12); + let program = Dnn.programSoftmax2d; gl.useProgram(program); - gl.viewport(0, 0, buffer.w, buffer.h); + gl.viewport(0, 0, buffer_in.w, buffer_in.h); gl.disable(gl.BLEND); - var mean2 = new Array(buffer.n).fill(0.0); - for (var i = 0; i < buffer.n; i += 4) { - gl.bindFramebuffer(gl.FRAMEBUFFER, Dnn.bufferGlobalSquaredMean.framebuffer); + for (var i = 0; i < buffer_in.n; i += 4) { + gl.bindFramebuffer(gl.FRAMEBUFFER, buffer_out.imgs[i/4].framebuffer); setPositionBuffer(gl, program); gl.activeTexture(gl.TEXTURE0); - gl.bindTexture(gl.TEXTURE_2D, buffer.imgs[i/4].texture); + gl.bindTexture(gl.TEXTURE_2D, buffer_in.imgs[i/4].texture); gl.uniform1i(gl.getUniformLocation(program, "uSrc"), 0); + gl.uniform4f(gl.getUniformLocation(program, "inv_denom"), + w[i], w[i+1], w[i+2], w[i+3]); gl.drawArrays(gl.TRIANGLE_STRIP, 0, 4); - var pixels = new Float32Array(4*tile*tile); - gl.readPixels(0, 0, tile, tile, gl.RGBA, gl.FLOAT, pixels); - var total = [0.0, 0.0, 0.0, 0.0]; - for (var _ = 0; _ < 4*tile*tile; _++) - total[_%4] += pixels[_]; - for (var _ = i; _ < i+4 && _ < buffer.n; _++) - mean2[_] = total[_-i] / (tile*tile); } - let mean = Dnn.global_mean(gl, buffer); - var std = new Array(buffer.n).fill(0.0); - for (var i = 0; i < buffer.n; i++) - std[i] = Math.sqrt(Math.max(mean2[i] - mean[i]*mean[i], 0.0)); - return std; } @@ -201,15 +209,22 @@ Dnn.batch_norm_2d = function( fragColor = slope * c + intercept; }`); } + if (std === null) { + let ms = Dnn.global_mean_and_std(buffer_in); + std = ms.std; + if (mean === null) mean = ms.mean; + } if (mean === null) - mean = Dnn.global_mean(mean); - if (std === null) - std = Dnn.global_std(std); + mean = Dnn.global_mean(buffer_in); let n = buffer_in.n; if (typeof beta === 'number') beta = new Array(n).fill(beta); if (typeof gamma === 'number') gamma = new Array(n).fill(gamma); + if (typeof mean === 'number') + mean = new Array(n).fill(mean); + if (typeof std === 'number') + std = new Array(n).fill(std); var slope = new Array(n); var intercept = new Array(n); for (var i = 0; i < n; i++) { @@ -241,8 +256,177 @@ Dnn.batch_norm_2d = function( } -Dnn.Conv2d311 = function( - n_in, n_out, weights, biases = [] +Dnn._global_mean_f2 = function(gl, f, buffer1, buffer2) { + let programKey = `programGlobalMean2_${f}`; + let bufferKey = `bufferGlobalMean2_${f}`; + const tile = 64; + if (!Dnn[programKey]) { + Dnn[programKey] = createShaderProgram(gl, null, + `#version 300 es + precision mediump float; + + uniform sampler2D uSrc1; + uniform sampler2D uSrc2; + uniform bool one_channel; + out vec4 fragColor; + + void main() { + ivec2 ires = textureSize(uSrc1, 0); + ivec2 tile = (ires+${tile}-1) / ${tile}; + vec2 sc = vec2(ires) / float(${tile}); + ivec2 xy = ivec2(gl_FragCoord.xy); + ivec2 pos0 = xy * tile; + ivec2 pos1 = min(pos0+tile, ires); + vec4 total = vec4(0); + for (int x = pos0.x; x < pos1.x; x++) { + vec4 s = vec4(0); + for (int y = pos0.y; y < pos1.y; y++) { + vec4 x1 = texelFetch(uSrc1, ivec2(x,y), 0); + if (one_channel) x1 = vec4(x1.x); + vec4 x2 = texelFetch(uSrc2, ivec2(x,y), 0); + s += (${f}); + } + total += s / sc.y; + } + fragColor = total / sc.x; + }`); + } + if (!Dnn[bufferKey]) { + Dnn[bufferKey] = createRenderTarget(gl, tile, tile, false, true, false); + } + if (buffer1.w != buffer2.w || buffer1.h != buffer2.h) + throw new Error("Input and output buffer dimensions don't match."); + if (buffer1.n != buffer2.n) { + if (buffer1.n == 1); + else if (buffer2.n == 1) { + let buffer = buffer1; + buffer1 = buffer2; + buffer2 = buffer; + } + else + throw new Error("Input and output buffer sizes don't match."); + } + let one_channel = (buffer1.n == 1); + let n = buffer2.n; + let program = Dnn[programKey]; + gl.useProgram(program); + gl.viewport(0, 0, buffer1.w, buffer1.h); + gl.disable(gl.BLEND); + var mean = new Array(n).fill(0.0); + for (var i = 0; i < n; i += 4) { + gl.bindFramebuffer(gl.FRAMEBUFFER, Dnn[bufferKey].framebuffer); + setPositionBuffer(gl, program); + gl.activeTexture(gl.TEXTURE0); + gl.bindTexture(gl.TEXTURE_2D, buffer1.imgs[one_channel?0:i/4].texture); + gl.uniform1i(gl.getUniformLocation(program, "uSrc1"), 0); + gl.activeTexture(gl.TEXTURE1); + gl.bindTexture(gl.TEXTURE_2D, buffer2.imgs[i/4].texture); + gl.uniform1i(gl.getUniformLocation(program, "uSrc2"), 1); + gl.uniform1i(gl.getUniformLocation(program, "one_channel"), one_channel); + gl.drawArrays(gl.TRIANGLE_STRIP, 0, 4); + var pixels = new Float32Array(4*tile*tile); + gl.readPixels(0, 0, tile, tile, gl.RGBA, gl.FLOAT, pixels); + var total = [0.0, 0.0, 0.0, 0.0]; + for (var _ = 0; _ < 4*tile*tile; _++) + total[_%4] += pixels[_]; + for (var _ = i; _ < i+4 && _ < n; _++) + mean[_] = total[_-i] / (tile*tile); + } + return mean; +} + + +Dnn.global_dot = function(gl, buffer1, buffer2, mean=true) { + let dot = Dnn._global_mean_f2(gl, "x1*x2", buffer1, buffer2); + if (!mean) for (var i = 0; i < dot.length; i++) + dot[i] *= buffer1.w*buffer1.h; + return dot; +} + + +Dnn.channel_sum = function( + gl, buffer_in, buffer_out, weights=1.0 +) { + if (buffer_in.w != buffer_out.w || buffer_in.h != buffer_out.h) + throw new Error("Input and output buffer dimensions don't match."); + if (buffer_out.n != 1) + throw new Error("Number of channels in output buffer is not 1."); + if (!Dnn.programChannelSum) { + Dnn.programChannelSum = createShaderProgram(gl, null, + `#version 300 es + precision mediump float; + + uniform int nChannel; + // uniform sampler2D accumBuffer; + uniform sampler2D uSrc0; + uniform sampler2D uSrc1; + uniform sampler2D uSrc2; + uniform sampler2D uSrc3; + uniform sampler2D uSrc4; + uniform sampler2D uSrc5; + uniform sampler2D uSrc6; + uniform sampler2D uSrc7; + uniform vec4 w0, w1, w2, w3, w4, w5, w6, w7; + out vec4 fragColor; + + void main() { + ivec2 xy = ivec2(gl_FragCoord.xy); + fragColor = vec4(0); + + int ci = 0; + #define one_channel(w, uSrc) \ + fragColor += dot(w, texelFetch(uSrc, xy, 0)); \ + if ((ci += 4) >= nChannel) return; + + one_channel(w0, uSrc0) + one_channel(w1, uSrc1) + one_channel(w2, uSrc2) + one_channel(w3, uSrc3) + one_channel(w4, uSrc4) + one_channel(w5, uSrc5) + one_channel(w6, uSrc6) + one_channel(w7, uSrc7) + + }`); + } + if (typeof weights === "number") + weights = new Array(buffer_in.n).fill(weights); + let program = Dnn.programChannelSum; + gl.useProgram(program); + gl.viewport(0, 0, buffer_in.w, buffer_in.h); + gl.disable(gl.BLEND); + gl.bindFramebuffer(gl.FRAMEBUFFER, buffer_out.imgs[0].framebuffer); + gl.clearColor(0, 0, 0, 1); + gl.clear(gl.COLOR_BUFFER_BIT); + let maxChannel = 32; + let n = buffer_in.n; + for (var j = 0; j < n; j += 4*maxChannel) { + gl.bindTexture(gl.TEXTURE_2D, buffer_out.imgs[0].sampler); + gl.copyTexImage2D(gl.TEXTURE_2D, 0, gl.RGBA32F, 0, 0, buffer_out.w, buffer_out.h, 0); + // gl.activeTexture(gl.TEXTURE9); + // gl.bindTexture(gl.TEXTURE_2D, buffer_out.imgs[0].sampler); + // gl.uniform1i(gl.getUniformLocation(program, "accumBuffer"), 9); + gl.uniform1i(gl.getUniformLocation(program, "nChannel"), + Math.min(maxChannel, n-(j/4))); + for (var dj = 0; dj < 4*maxChannel; dj += 4) { + if (j+dj >= n) { + gl.uniform1i(gl.getUniformLocation(program, "uSrc"+(dj/4)), 15); + continue; + } + gl.activeTexture(gl['TEXTURE'+(dj/4)]); + gl.bindTexture(gl.TEXTURE_2D, buffer_in.imgs[(j+dj)/4].texture); + gl.uniform1i(gl.getUniformLocation(program, "uSrc"+(dj/4)), dj/4); + gl.uniform4f(gl.getUniformLocation(program, "w"+(dj/4)), + weights[j+dj], weights[j+dj+1], weights[j+dj+2], weights[j+dj+3]); + } + setPositionBuffer(gl, program); + gl.drawArrays(gl.TRIANGLE_STRIP, 0, 4); + } +} + + +Dnn.Conv2d3 = function( + stride, padding, n_in, n_out, weights, biases = [] ) { if (weights.length != n_in*n_out*9) throw new Error("Incorrect weight size"); @@ -288,17 +472,21 @@ Dnn.Conv2d311 = function( throw new Error("Incorrect input buffer length ("+buffer_in.n+","+this.n_in+")"); if (buffer_out.n != this.n_out) throw new Error("Incorrect output buffer length ("+buffer_out.n+","+this.n_out+")"); - if (buffer_out.w != buffer_in.w || buffer_out.h != buffer_in.h) + if (Math.abs(buffer_out.w - buffer_in.w/stride) > 1.001 || + Math.abs(buffer_out.h - buffer_in.h/stride) > 1.001) throw new Error("Input and output buffer dimensions don't match."); let maxChannel = Math.min(4, Math.floor( gl.getParameter(gl.MAX_FRAGMENT_UNIFORM_VECTORS) / 36 - 1)); - if (!Dnn.programConv2d311 || !Dnn.programConv2d311wt) { - let src = getShaderSource('../shaders/dnn-conv2d311.glsl'); + let programKey = `programConv2d3${stride}${padding}`; + let programKeyWt = programKey + "wt"; + if (!Dnn[programKey] || !Dnn[programKeyWt]) { + let src = getShaderSource('../shaders/dnn-conv2d-3.glsl') + .replaceAll("{%STRIDE%}", stride).replaceAll("{%PADDING%}", padding); src = src.replaceAll("{%MAX_CHANNEL%}", maxChannel); - Dnn.programConv2d311 = createShaderProgram(gl, null, + Dnn[programKey] = createShaderProgram(gl, null, src.replaceAll("{%USE_WEIGHT_TEXTURE%}", 0) ); - Dnn.programConv2d311wt = createShaderProgram(gl, null, + Dnn[programKeyWt] = createShaderProgram(gl, null, src.replaceAll("{%USE_WEIGHT_TEXTURE%}", 1) ); } if (!this.weightTexture) { @@ -312,9 +500,9 @@ Dnn.Conv2d311 = function( let useWeightTexture = (buffer_in.w*buffer_in.h < 1e+4); let program = useWeightTexture ? - Dnn.programConv2d311wt : Dnn.programConv2d311; + Dnn[programKeyWt] : Dnn[programKey]; gl.useProgram(program); - gl.viewport(0, 0, buffer_in.w, buffer_in.h); + gl.viewport(0, 0, buffer_out.w, buffer_out.h); gl.disable(gl.BLEND); for (var i = 0; i < this.n_out; i += 4) { gl.bindFramebuffer(gl.FRAMEBUFFER, buffer_out.imgs[i/4].framebuffer); @@ -380,6 +568,15 @@ Dnn.Conv2d311 = function( } +Dnn.Conv2d311 = function(n_in, n_out, weights, biases = []) { + return new Dnn.Conv2d3(1, 1, n_in, n_out, weights, biases); +} + +Dnn.Conv2d321 = function(n_in, n_out, weights, biases = []) { + return new Dnn.Conv2d3(2, 1, n_in, n_out, weights, biases); +} + + Dnn.Conv2d110 = function( n_in, n_out, weights, biases = [] ) { @@ -510,6 +707,7 @@ Dnn.Conv2d110 = function( } + Dnn.ConvTranspose2D421 = function( n_in, n_out, weights, biases = [] ) { @@ -587,13 +785,14 @@ Dnn.ConvTranspose2D421 = function( } -Dnn.relu = function(gl, buffer_in, buffer_out) { +Dnn._activation_f = function(gl, f, buffer_in, buffer_out) { if (buffer_in.n != buffer_out.n) throw new Error("Input and output buffer sizes don't match."); if (buffer_out.w != buffer_in.w || buffer_out.h != buffer_in.h) throw new Error("Input and output buffer dimensions don't match."); - if (!Dnn.programReLU) { - Dnn.programReLU = createShaderProgram(gl, null, + let programKey = 'programActivation_'+f; + if (!Dnn[programKey]) { + Dnn[programKey] = createShaderProgram(gl, null, `#version 300 es precision mediump float; @@ -601,11 +800,11 @@ Dnn.relu = function(gl, buffer_in, buffer_out) { out vec4 fragColor; void main() { - vec4 c = texelFetch(uSrc, ivec2(gl_FragCoord.xy), 0); - fragColor = max(c, 0.0); + vec4 x = texelFetch(uSrc, ivec2(gl_FragCoord.xy), 0); + fragColor = (${f}); }`); } - let program = Dnn.programReLU; + let program = Dnn[programKey]; gl.useProgram(program); gl.viewport(0, 0, buffer_in.w, buffer_in.h); gl.disable(gl.BLEND); @@ -620,45 +819,83 @@ Dnn.relu = function(gl, buffer_in, buffer_out) { } -Dnn.add = function(gl, buffer_in1, buffer_in2, buffer_out) { - if (buffer_in1.n != buffer_out.n || buffer_in2.n != buffer_out.n) - throw new Error("Input and output buffer sizes don't match."); - if (buffer_in1.w != buffer_out.w || buffer_in1.h != buffer_out.h || - buffer_in2.w != buffer_out.w || buffer_in2.h != buffer_out.h) +Dnn.relu = function(gl, buffer_in, buffer_out) { + Dnn._activation_f(gl, "max(x,0.0)", buffer_in, buffer_out); +} + + +Dnn.sigmoid = function(gl, buffer_in, buffer_out) { + Dnn._activation_f(gl, "1.0/(1.0+exp(-x))", buffer_in, buffer_out); +} + + +Dnn._activation_f2 = function(gl, f, buffer1, buffer2, buffer_out) { + if (buffer1.w != buffer_out.w || buffer1.h != buffer_out.h || + buffer2.w != buffer_out.w || buffer2.h != buffer_out.h) throw new Error("Input and output buffer dimensions don't match."); - if (!Dnn.programAdd) { - Dnn.programAdd = createShaderProgram(gl, null, + if (buffer1.n != buffer2.n) { + if (buffer1.n == 1); + else if (buffer2.n == 1) { + let buffer = buffer1; + buffer1 = buffer2; + buffer2 = buffer; + } + else + throw new Error("Input and output buffer sizes don't match."); + } + if (buffer2.n != buffer_out.n) + throw new Error("Input and output buffer sizes don't match."); + let programKey = 'programActivation2_'+f; + if (!Dnn[programKey]) { + Dnn[programKey] = createShaderProgram(gl, null, `#version 300 es precision mediump float; uniform sampler2D uSrc1; uniform sampler2D uSrc2; + uniform bool one_channel; out vec4 fragColor; void main() { - vec4 c1 = texelFetch(uSrc1, ivec2(gl_FragCoord.xy), 0); - vec4 c2 = texelFetch(uSrc2, ivec2(gl_FragCoord.xy), 0); - fragColor = c1 + c2; + vec4 x1 = texelFetch(uSrc1, ivec2(gl_FragCoord.xy), 0); + if (one_channel) x1 = vec4(x1.x); + // x1 = vec4(x1.x); + vec4 x2 = texelFetch(uSrc2, ivec2(gl_FragCoord.xy), 0); + // x2 = vec4(x2.x); + fragColor = (${f}); }`); } - let program = Dnn.programAdd; + let program = Dnn[programKey]; gl.useProgram(program); - gl.viewport(0, 0, buffer_out.w, buffer_out.h); + gl.viewport(0, 0, buffer1.w, buffer1.h); gl.disable(gl.BLEND); - for (var i = 0; i < buffer_out.n; i += 4) { - gl.bindFramebuffer(gl.FRAMEBUFFER, buffer_out.imgs[Math.floor(i/4)].framebuffer); + let one_channel = (buffer1.n == 1); + let n = buffer2.n; + for (var i = 0; i < n; i += 4) { + gl.bindFramebuffer(gl.FRAMEBUFFER, buffer_out.imgs[i/4].framebuffer); setPositionBuffer(gl, program); gl.activeTexture(gl.TEXTURE0); - gl.bindTexture(gl.TEXTURE_2D, buffer_in1.imgs[Math.floor(i/4)].texture); + gl.bindTexture(gl.TEXTURE_2D, buffer1.imgs[one_channel?0:i/4].texture); gl.uniform1i(gl.getUniformLocation(program, "uSrc1"), 0); gl.activeTexture(gl.TEXTURE1); - gl.bindTexture(gl.TEXTURE_2D, buffer_in2.imgs[Math.floor(i/4)].texture); + gl.bindTexture(gl.TEXTURE_2D, buffer2.imgs[i/4].texture); gl.uniform1i(gl.getUniformLocation(program, "uSrc2"), 1); + gl.uniform1i(gl.getUniformLocation(program, "one_channel"), one_channel); gl.drawArrays(gl.TRIANGLE_STRIP, 0, 4); } } +Dnn.add = function(gl, buffer_in1, buffer_in2, buffer_out) { + Dnn._activation_f2(gl, "x1+x2", buffer_in1, buffer_in2, buffer_out); +} + + +Dnn.mul = function(gl, buffer_in1, buffer_in2, buffer_out) { + Dnn._activation_f2(gl, "x1*x2", buffer_in1, buffer_in2, buffer_out); +} + + Dnn.maxpool2d2 = function(gl, buffer_in, buffer_out) { if (buffer_in.n != buffer_out.n) throw new Error("Input and output buffer sizes don't match."); diff --git a/shaders/dnn-conv2d311.glsl b/shaders/dnn-conv2d-3.glsl similarity index 90% rename from shaders/dnn-conv2d311.glsl rename to shaders/dnn-conv2d-3.glsl index be03fd0..2646681 100644 --- a/shaders/dnn-conv2d311.glsl +++ b/shaders/dnn-conv2d-3.glsl @@ -4,7 +4,7 @@ precision mediump int; uniform sampler2D accumBuffer; -// 3x3 convolution, stride 1, zero padding 1 +// 3x3 convolution, stride {%STRIDE%}, zero padding {%PADDING%} #if {%USE_WEIGHT_TEXTURE%} @@ -25,12 +25,12 @@ out vec4 fragColor; void main() { ivec2 iRes = textureSize(uSrc0, 0); - ivec2 xy0 = ivec2(gl_FragCoord.xy); + ivec2 xy0 = {%STRIDE%}*ivec2(gl_FragCoord.xy); vec4 r = texelFetch(accumBuffer, xy0, 0); for (int i = 0; i < 3; i++) { for (int j = 0; j < 3; j++) { - ivec2 xy = xy0-1+ivec2(i,j); + ivec2 xy = xy0-{%PADDING%}+ivec2(i,j); if (xy.x<0 || xy.x>=iRes.x || xy.y<0 || xy.y>=iRes.y) continue; mat4 R; @@ -75,13 +75,13 @@ out vec4 fragColor; void main() { ivec2 iRes = textureSize(uSrc0, 0); - ivec2 xy0 = ivec2(gl_FragCoord.xy); + ivec2 xy0 = {%STRIDE%}*ivec2(gl_FragCoord.xy); vec4 r = texelFetch(accumBuffer, xy0, 0); // r *= 0.0; for (int i = 0; i < 3; i++) { for (int j = 0; j < 3; j++) { - ivec2 xy = xy0-1+ivec2(i,j); + ivec2 xy = xy0-{%PADDING%}+ivec2(i,j); if (xy.x<0 || xy.x>=iRes.x || xy.y<0 || xy.y>=iRes.y) continue; mat4 R;