Skip to content

Commit 5dd5cba

Browse files
committed
Add tools to generate transcripts
1 parent 4fc4158 commit 5dd5cba

File tree

3 files changed

+287
-0
lines changed

3 files changed

+287
-0
lines changed

_data/talks.json

+168
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
{
2+
"ay_tensorflow": {
3+
"title": "Fast client-side ML with TensorFlow.js",
4+
"thumbnail": "https://cjx1uopmt0m4q0667xmnrqpk.blob.core.windows.net/ckdobzb8u76t007727v6c7xr0/thumbs/thumb-003.jpeg",
5+
"video": "https://app.streamfizz.live/embed/ckdobzb8u76t007727v6c7xr0",
6+
"author": "Ann Yuan",
7+
"affiliation": "Google"
8+
},
9+
"cc_directml": {
10+
"title": "Accelerated graphics and compute API for Machine Learning - DirectML",
11+
"author": "Chai Chaoweeraprasit",
12+
"affiliation": "Microsoft",
13+
"thumbnail": "https://cjx1uopmt0m4q0667xmnrqpk.blob.core.windows.net/ckdobv3o075l707723b397arm/thumbs/thumb-004.jpeg",
14+
"video": "https://app.streamfizz.live/embed/ckdobv3o075l707723b397arm"
15+
},
16+
"dhm-web-platform-overview": {
17+
"title": "Web Platform: a 30,000 feet view / Web Platform and JS environment constraints",
18+
"author": "Dominique Hazaël-Massieux",
19+
"affiliation": "W3C",
20+
"thumbnail": "https://cjx1uopmt0m4q0667xmnrqpk.blob.core.windows.net/ckdo77h2f4st7077212hxr6v7/thumbs/thumb-001.jpeg",
21+
"video": "https://app.streamfizz.live/embed/ckdo77h2f4st7077212hxr6v7",
22+
"format": "shower"
23+
},
24+
"en_onnxjs": {
25+
"title": "ONNX.js - A Javascript library to run ONNX models in browsers and Node.js",
26+
"author": "Emma Ning",
27+
"affiliation": "Microsoft",
28+
"thumbnail": "https://cjx1uopmt0m4q0667xmnrqpk.blob.core.windows.net/ckdobtrbx74rf0772xh98cjs1/thumbs/thumb-004.jpeg",
29+
"video": "https://app.streamfizz.live/embed/ckdobtrbx74rf0772xh98cjs1"
30+
},
31+
"fd-media-hooks": {
32+
"title": "Media processing hooks for the Web",
33+
"author": "François Daoust",
34+
"affiliation": "W3C",
35+
"thumbnail": "https://cjx1uopmt0m4q0667xmnrqpk.blob.core.windows.net/ckdo86r7h5725077210nfqpeg/thumbs/thumb-007.jpeg",
36+
"video": "https://app.streamfizz.live/embed/ckdo86r7h5725077210nfqpeg",
37+
"format": "shower",
38+
"url": "https://www.w3.org/2020/Talks/fd-media-processing/#"
39+
},
40+
"jm_lr_mobile": {
41+
"title": "Mobile-first web-based Machine Learning",
42+
"author": "Josh Meyer & Lindy Rauchenstein",
43+
"affiliation": "Artie",
44+
"thumbnail": "https://cjx1uopmt0m4q0667xmnrqpk.blob.core.windows.net/ckdor39sqc98m0772093khiu4/thumbs/thumb-002.jpeg",
45+
"video": "https://app.streamfizz.live/embed/ckdor39sqc98m0772093khiu4"
46+
},
47+
"jm_tensorflowjs": {
48+
"title": "Opportunities and Challenges for TensorFlow.js and beyond",
49+
"author": "Jason Mayes",
50+
"affiliation": "Google",
51+
"thumbnail": "https://cjx1uopmt0m4q0667xmnrqpk.blob.core.windows.net/ckdobxb1t766z0772f2e19nqo/thumbs/thumb-001.jpeg",
52+
"video": "https://app.streamfizz.live/embed/ckdobxb1t766z0772f2e19nqo"
53+
},
54+
"jmv_rnnoise": {
55+
"title": "RNNoise, Neural Speech Enhancement, and the Browser",
56+
"author": "Jean-Marc Valin",
57+
"affiliation": "",
58+
"thumbnail": "https://cjx1uopmt0m4q0667xmnrqpk.blob.core.windows.net/ckdocse0q7mt70772two73led/thumbs/thumb-001.jpeg",
59+
"video": "https://app.streamfizz.live/embed/ckdocse0q7mt70772two73led"
60+
},
61+
"jt_ai-bias": {
62+
"title": "We Count: Fair Treatment, Disability and Machine Learning",
63+
"author": "Jutta Treviranus",
64+
"affiliation": "OCAD University",
65+
"thumbnail": "https://cjx1uopmt0m4q0667xmnrqpk.blob.core.windows.net/ckdobyfxs76fm0772ymgk0qfv/thumbs/thumb-001.jpeg",
66+
"video": "https://app.streamfizz.live/embed/ckdobyfxs76fm0772ymgk0qfv"
67+
},
68+
"kd_speech": {
69+
"title": "Wreck a Nice Beach in the Browser: Getting the Browser to Recognize Speech",
70+
"author": "Kelly Davis",
71+
"affiliation": "Mozilla",
72+
"thumbnail": "https://cjx1uopmt0m4q0667xmnrqpk.blob.core.windows.net/ckdone3vbbufj0772u1a3hudu/thumbs/thumb-001.jpeg",
73+
"video": "https://app.streamfizz.live/embed/ckdone3vbbufj0772u1a3hudu"
74+
},
75+
"lmc_musicians": {
76+
"title": "Empowering Musicians and Artists using Machine Learning to Build Their Own Tools in the Browser",
77+
"author": "Louis McCallum",
78+
"affiliation": "University of London",
79+
"thumbnail": "https://cjx1uopmt0m4q0667xmnrqpk.blob.core.windows.net/ckdocqdzk7ljl07724aih3whc/thumbs/thumb-005.jpeg",
80+
"video": "https://app.streamfizz.live/embed/ckdocqdzk7ljl07724aih3whc"
81+
},
82+
"ms_ab_wasinn": {
83+
"title": "Introducing WASI-NN",
84+
"author": "Mingqiu Sun & Andrew Brown",
85+
"affiliation": "Intel",
86+
"thumbnail": "https://cjx1uopmt0m4q0667xmnrqpk.blob.core.windows.net/ckdobysfk76ns0772a6u4dbq5/thumbs/thumb-001.jpeg",
87+
"video": "https://app.streamfizz.live/embed/ckdobysfk76ns0772a6u4dbq5"
88+
},
89+
"mw-androidnn": {
90+
"title": "Accelerate ML inference on mobile devices with Android NNAPI",
91+
"author": "Miao Wang",
92+
"affiliation": "Google",
93+
"thumbnail": "https://cjx1uopmt0m4q0667xmnrqpk.blob.core.windows.net/ckdo78esr4t0j0772i5r7h1td/thumbs/thumb-001.jpeg",
94+
"video": "https://app.streamfizz.live/embed/ckdo78esr4t0j0772i5r7h1td"
95+
},
96+
"nh_webnn": {
97+
"title": "Access purpose-built ML hardware with Web Neural Network API",
98+
"author": "Ningxin Hu",
99+
"affiliation": "Intel",
100+
"thumbnail": "https://cjx1uopmt0m4q0667xmnrqpk.blob.core.windows.net/ckdo75ck44sip0772u0gj0kog/thumbs/thumb-006.jpeg",
101+
"video": "https://app.streamfizz.live/embed/ckdo75ck44sip0772u0gj0kog"
102+
},
103+
"op_content-filtering": {
104+
"title": "Machine Learning on the Web for content filtering applications",
105+
"author": "Oleksandr Paraska",
106+
"affiliation": "eyeo",
107+
"thumbnail": "https://cjx1uopmt0m4q0667xmnrqpk.blob.core.windows.net/ckdo73x214sg70772y38lkixw/thumbs/thumb-003.jpeg",
108+
"video": "https://app.streamfizz.live/embed/ckdo73x214sg70772y38lkixw"
109+
},
110+
"pl_privacy": {
111+
"title": "Privacy-first approach to machine learning",
112+
"author": "Philip Laszkowicz",
113+
"affiliation": "",
114+
"thumbnail": "https://cjx1uopmt0m4q0667xmnrqpk.blob.core.windows.net/ckdor50iyc9lm07729dsrtpqu/thumbs/thumb-001.jpeg",
115+
"video": "https://app.streamfizz.live/embed/ckdor50iyc9lm07729dsrtpqu"
116+
},
117+
"pm_visualization": {
118+
"title": "Exploring unsupervised image segmentation results",
119+
"author": "Piotr Migdal & Bartłomiej Olechno",
120+
"affiliation": "",
121+
"thumbnail": "https://cjx1uopmt0m4q0667xmnrqpk.blob.core.windows.net/ckdor4gvwc9bh0772h8h13fs2/thumbs/thumb-001.jpeg",
122+
"video": "https://app.streamfizz.live/embed/ckdor4gvwc9bh0772h8h13fs2",
123+
"format": "shiny",
124+
"url": "https://www.w3.org/2020/Talks/mlws/piotr_migdal/#"
125+
},
126+
"pw_paddlejs": {
127+
"title": "Paddle.js - Machine Learning for the Web",
128+
"author": "Ping Wu",
129+
"affiliation": "Baidu",
130+
"thumbnail": "https://cjx1uopmt0m4q0667xmnrqpk.blob.core.windows.net/ckdor4rqzc9ix0772t2z9zo80/thumbs/thumb-001.jpeg",
131+
"video": "https://app.streamfizz.live/embed/ckdor4rqzc9ix0772t2z9zo80"
132+
},
133+
"sm_architecture": {
134+
"title": "Machine Learning in Web Architecture",
135+
"author": "Sangwhan Moon",
136+
"affiliation": "",
137+
"thumbnail": "https://cjx1uopmt0m4q0667xmnrqpk.blob.core.windows.net/ckdo77c2k4srf07726abf6aps/thumbs/thumb-001.jpeg",
138+
"video": "https://app.streamfizz.live/embed/ckdo77c2k4srf07726abf6aps"
139+
},
140+
"tp_interactive_music": {
141+
"title": "Interactive ML - Powered Music Applications on the Web",
142+
"author": "Tero Parviainen",
143+
"affiliation": "Counterpoint",
144+
"thumbnail": "https://cjx1uopmt0m4q0667xmnrqpk.blob.core.windows.net/ckdo76mfh4sne0772endmrepx/thumbs/thumb-002.jpeg",
145+
"video": "https://app.streamfizz.live/embed/ckdo76mfh4sne0772endmrepx"
146+
},
147+
"wm_collaborative-learning": {
148+
"title": "Collaborative Learning",
149+
"author": "Wolfgang Maß",
150+
"affiliation": "DFKI",
151+
"thumbnail": "https://cjx1uopmt0m4q0667xmnrqpk.blob.core.windows.net/ckdobw6x0764l07729krgy57d/thumbs/thumb-001.jpeg",
152+
"video": "https://app.streamfizz.live/embed/ckdobw6x0764l07729krgy57d"
153+
},
154+
"yh-xq-dnn": {
155+
"title": "Enabling Distributed DNNs for the Mobile Web Over Cloud, Edge and End Devices",
156+
"author": "Yakun Huang & Xiuquan Qiao",
157+
"affiliation": "BPTU",
158+
"thumbnail": "https://cjx1uopmt0m4q0667xmnrqpk.blob.core.windows.net/ckdobyv0p76q107720jc58j6h/thumbs/thumb-002.jpeg",
159+
"video": "https://app.streamfizz.live/embed/ckdobyv0p76q107720jc58j6h"
160+
},
161+
"zc_expression": {
162+
"title": "A virtual character web meeting with expression enhance power by machine learning",
163+
"author": "Zelun Chen",
164+
"affiliation": "Netease",
165+
"thumbnail": "https://cjx1uopmt0m4q0667xmnrqpk.blob.core.windows.net/ckdocpfld7kwy0772xhma7lrs/thumbs/thumb-002.jpeg",
166+
"video": "https://app.streamfizz.live/embed/ckdocpfld7kwy0772xhma7lrs"
167+
}
168+
}

_tools/build-transcript.js

+113
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
const fs = require("fs");
2+
const WebVTTParser = require("webvtt-parser").WebVTTParser;
3+
const parser = new WebVTTParser();
4+
const splitter = require("sentence-splitter");
5+
const talks = require("../_data/talks.json");
6+
7+
const linkableTerms = [
8+
"GAN",
9+
"GRU",
10+
"SoC",
11+
"multiply-add",
12+
"matrix",
13+
"tensor",
14+
"convolution",
15+
"correlation",
16+
"SNR",
17+
"DNN",
18+
"FTT",
19+
"IFFT",
20+
"EMScripten",
21+
"WebAssembly",
22+
"WASM",
23+
"WebGL",
24+
"WebGPU",
25+
"SIMD",
26+
"vocoder",
27+
"TTS",
28+
"DSP",
29+
"WebBLE",
30+
"WASI",
31+
"CG",
32+
"OpenVINO",
33+
"PyTorch",
34+
"Keras",
35+
"TensorFlow.js",
36+
"TensorFlow",
37+
"TFJS",
38+
"python",
39+
"model DAG",
40+
"YOLO",
41+
"differential privacy",
42+
"NLP",
43+
"ASR",
44+
"NPM",
45+
"Node.js",
46+
"WebRTC",
47+
"WebXR"
48+
];
49+
50+
function annotateSentence(sentence) {
51+
sentence = sentence.replace(/^slide [a-z0-9]*\.?/i, '');
52+
sentence = sentence.replace(/^next slide\.?/i, '');
53+
sentence = sentence.replace(/<v [^>]*>/, '').replace(/<\/v>/, '');
54+
for (let term of linkableTerms) {
55+
sentence = sentence.replace(new RegExp("(.)?(" + term + ")(.)?", "g"), (match,p1,p2,p3) => {
56+
if ((!p1 || !p1.match(/[a-zA-Z0-9>]/)) && (!p3 || !p3.match(/[<a-zA-Z0-9]/)))
57+
return (p1 ? p1 : '') + "<a class=dfn>" + p2 + "</a>" + (p3 ? p3 : '');
58+
else
59+
return p1 + p2 + p3;
60+
});
61+
}
62+
return sentence;
63+
}
64+
65+
for (let shortname of Object.keys(talks)) {
66+
const {cues} = parser.parse(fs.readFileSync("talks/captions/" + shortname + ".vtt", 'utf-8'));
67+
68+
const sentences = splitter.split(cues.map(c => c.text).join(' '))
69+
.map(s => s.raw.trim()).filter(s => s);
70+
71+
const divs = [];
72+
let div = [];
73+
let sentencesCursor = 0;
74+
let slideNum = 2;
75+
cues.forEach(c => {
76+
if (c.id.startsWith("slide-" + slideNum)) {
77+
divs.push(div);
78+
div = [];
79+
} else {
80+
return;
81+
}
82+
while (sentencesCursor < sentences.length) {
83+
const sentence = sentences[sentencesCursor];
84+
if (sentence.startsWith(c.text)) {
85+
break;
86+
}
87+
if (!sentence.match(/^slide [a-z0-9]*\.?$/i)) {
88+
div.push(annotateSentence(sentence));
89+
}
90+
sentencesCursor++;
91+
}
92+
slideNum++;
93+
});
94+
// dealing with last slide
95+
divs.push(div);
96+
div = [];
97+
while (sentencesCursor < sentences.length) {
98+
let sentence = sentences[sentencesCursor];
99+
if (!sentence.match(/^slide [a-z0-9]*\.?$/i)) {
100+
div.push(annotateSentence(sentence));
101+
}
102+
sentencesCursor++;
103+
}
104+
divs.push(div);
105+
let content = ""
106+
for (i = 1 ; i < divs.length; i++) {
107+
content += `<div class="slide" role='region' aria-label="Slide ${i} of ${divs.length - 1}" id="slide-${i}" data-fmt="${talks[shortname].format || 'pdf'}" data-src="${talks[shortname].url || 'https://www.w3.org/2020/Talks/mlws/' + shortname + '.pdf#page=' + i}"><noscript><a href="${talks[shortname].url || 'https://www.w3.org/2020/Talks/mlws/' + shortname + '.pdf#page=' + i}">Slide ${i}</a></noscript></div>`;
108+
content += `<div role='region'>`;
109+
content += "<p>" + divs[i].join("</p>\n<p>") + "</p>";
110+
content += "</div>";
111+
}
112+
fs.writeFileSync("_includes/transcripts/" + shortname + ".html", content, {encoding: "utf-8"});
113+
}

package.json

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"dependencies": {
3+
"sentence-splitter": "^3.2.0",
4+
"webvtt-parser": "^2.1.2"
5+
}
6+
}

0 commit comments

Comments
 (0)