-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathbuild.js
84 lines (65 loc) · 1.59 KB
/
build.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
var fs = require('fs')
var path = require('path')
var https = require('https')
var concat = require('concat-stream')
var yauzl = require('yauzl')
var dsv = require('d3-dsv')
var bail = require('bail')
// See: http://crr.ugent.be/programs-data/subtitle-frequencies
var endpoint =
'https://www.ugent.be/pp/experimentele-psychologie/en/research/documents/subtlexus/subtlexus2.zip/at_download/file'
// Name in archive.
var name = 'SUBTLEXus74286wordstextversion.txt'
var found = false
https.request(endpoint, onrequest).end()
function onrequest(res) {
res
.pipe(fs.createWriteStream('archive.zip'))
.on('close', onclose)
.on('error', bail)
}
function onclose() {
yauzl.open('archive.zip', {lazyEntries: true}, onopen)
}
function onopen(err, archive) {
bail(err)
read()
archive.on('entry', onentry)
archive.on('end', onend)
function onentry(entry) {
if (path.basename(entry.fileName) !== name) {
return read()
}
found = true
archive.openReadStream(entry, onreadstream)
}
function onreadstream(err, rs) {
bail(err)
rs.pipe(concat(onconcat)).on('error', bail)
rs.on('end', read)
}
function read() {
archive.readEntry()
}
}
function onend() {
if (!found) {
throw new Error('File not found')
}
}
function onconcat(buf) {
var data = dsv
.tsvParse(String(buf))
.map(map)
.sort(sort)
fs.writeFile('index.json', JSON.stringify(data, null, 2) + '\n', bail)
}
function map(d) {
return {word: d.Word, count: Number(d.FREQcount)}
}
function sort(a, b) {
return pick(b) - pick(a)
}
function pick(d) {
return d.count
}