Skip to content

Commit 1a0bbac

Browse files
committed
Problem: index not periodically updated.
Solution: Update the index periodically. Cache the downloaded file that remains valid for 24 hours.
1 parent d317085 commit 1a0bbac

File tree

4 files changed

+117
-36
lines changed

4 files changed

+117
-36
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
node_modules/
22
.env
3+
index.cache.json

index.js

Lines changed: 103 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -2,44 +2,109 @@ require('dotenv').config();
22
const fs = require('fs');
33
const { App, ExpressReceiver } = require('@slack/bolt');
44
const FlexSearch = require("flexsearch");
5+
const axios = require('axios');
56

6-
let paperData = JSON.parse(fs.readFileSync('index.json'));
7-
8-
// 1. We need to convert the JSON object into an array so that FlexSearch can
9-
// swallow it.
10-
// 2. Since field search in FlexSearch is sensitive to field reordering
11-
// (https://github.com/nextapps-de/flexsearch/issues/70), we will merge all the
12-
// data we intend to search on (paperId, title, author, date) into one string
13-
// and put it at the top in the 'index' definition.
14-
// 3. We still want the 'type' field because we can filter results with it.
15-
// We still want the 'date' field because we sort the results with it.
16-
// 'paperId' is necessary because FlexSearch index needs an 'id'.
17-
const adjustedPaperData = Object.keys(paperData).map(paperId => {
18-
const paper = paperData[paperId];
19-
return {
20-
data: [paperId, paper.title, paper.author, paper.date].join(' '),
21-
paperId: paperId,
22-
type: paper.type,
23-
date: paper.date
24-
};
25-
});
7+
let paperData = undefined;
8+
let searchIndex = undefined;
9+
10+
const isCacheStale = () => {
11+
const cacheInfo = fs.statSync('index.cache.json');
12+
const ageMs = Date.now() - cacheInfo.mtimeMs;
13+
const ageHours = ageMs / (1000*60*60);
14+
return ageHours >= 24;
15+
};
2616

27-
const index = new FlexSearch({
28-
tokenize: "strict",
29-
depth: 1,
30-
doc: {
31-
id: "paperId",
32-
field: {
33-
data: {},
34-
paperId: {
35-
tokenize: "forward"
36-
},
37-
type: {},
38-
date: {}
17+
const loadCache = () => {
18+
try {
19+
if (isCacheStale()) {
20+
return undefined;
21+
} else {
22+
return fs.readFileSync('index.cache.json');
3923
}
24+
} catch (e) {
25+
return undefined;
4026
}
41-
});
42-
index.add(adjustedPaperData);
27+
};
28+
29+
const downloadIndex = async () => {
30+
const response = await axios.get('http://wg21.link/index.json');
31+
if (typeof response.data === 'object') {
32+
return response.data;
33+
} else {
34+
return undefined;
35+
}
36+
};
37+
38+
const updateSearchIndex = () => {
39+
// 1. We need to convert the JSON object into an array so that FlexSearch can
40+
// swallow it.
41+
// 2. Since field search in FlexSearch is sensitive to field reordering
42+
// (https://github.com/nextapps-de/flexsearch/issues/70), we will merge all the
43+
// data we intend to search on (paperId, title, author, date) into one string
44+
// and put it at the top in the 'index' definition.
45+
// 3. We still want the 'type' field because we can filter results with it.
46+
// We still want the 'date' field because we sort the results with it.
47+
// 'paperId' is necessary because FlexSearch index needs an 'id'.
48+
const adjustedPaperData = Object.keys(paperData).map(paperId => {
49+
const paper = paperData[paperId];
50+
return {
51+
data: [paperId, paper.title, paper.author, paper.date].join(' '),
52+
paperId: paperId,
53+
type: paper.type,
54+
date: paper.date
55+
};
56+
});
57+
58+
searchIndex = new FlexSearch({
59+
tokenize: "strict",
60+
depth: 1,
61+
doc: {
62+
id: "paperId",
63+
field: {
64+
data: {},
65+
paperId: {
66+
tokenize: "forward"
67+
},
68+
type: {},
69+
date: {}
70+
}
71+
}
72+
});
73+
searchIndex.add(adjustedPaperData);
74+
};
75+
76+
const initializeIndex = async () => {
77+
const cache = loadCache();
78+
if (cache !== undefined) {
79+
paperData = JSON.parse(cache);
80+
updateSearchIndex();
81+
console.log('Loaded index from cache successfully!');
82+
return;
83+
}
84+
85+
const index = await downloadIndex();
86+
if (index !== undefined) {
87+
paperData = index;
88+
updateSearchIndex();
89+
fs.writeFile('index.cache.json', JSON.stringify(index), () => {});
90+
console.log('Downloaded index successfully!');
91+
return;
92+
}
93+
94+
console.log('Failed to initialize the index!');
95+
};
96+
97+
setInterval(async () => {
98+
const index = await downloadIndex();
99+
if (index !== undefined) {
100+
paperData = index;
101+
updateSearchIndex();
102+
fs.writeFile('index.cache.json', JSON.stringify(index), () => {});
103+
console.log('Downloaded and updated the index successfully!');
104+
}
105+
106+
console.log('Failed to download and update the index!');
107+
}, 24*60*60*1000);
43108

44109
const latestFirst = (x, y) => {
45110
if (x.date === undefined) return 1;
@@ -102,12 +167,12 @@ const makePaperMessage = (paperId) => {
102167
const search = ({ query, type }) => {
103168
let searchResults = [];
104169
if (type === undefined) {
105-
searchResults = index.search({
170+
searchResults = searchIndex.search({
106171
query: query,
107172
sort: latestFirst
108173
});
109174
} else {
110-
searchResults = index.search({
175+
searchResults = searchIndex.search({
111176
query: query,
112177
where: { type: type },
113178
sort: latestFirst
@@ -232,6 +297,8 @@ app.message(/.*/, async ({ context, event, say }) => {
232297
});
233298

234299
(async () => {
300+
await initializeIndex();
235301
await app.start(process.env.PORT || 3000);
236302
console.log('⚡️ Paperbot is running!');
237303
})();
304+

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
},
99
"dependencies": {
1010
"@slack/bolt": "^2.4.1",
11+
"axios": "^0.20.0",
1112
"dotenv": "^8.2.0",
1213
"flexsearch": "^0.6.32"
1314
}

yarn.lock

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,13 @@ axios@^0.19.0:
188188
dependencies:
189189
follow-redirects "1.5.10"
190190

191+
axios@^0.20.0:
192+
version "0.20.0"
193+
resolved "https://registry.yarnpkg.com/axios/-/axios-0.20.0.tgz#057ba30f04884694993a8cd07fa394cff11c50bd"
194+
integrity sha512-ANA4rr2BDcmmAQLOKft2fufrtuvlqR+cXNNinUmvfeSNCOF98PZL+7M/v1zIdGo7OLjEA9J2gXJL+j4zGsl0bA==
195+
dependencies:
196+
follow-redirects "^1.10.0"
197+
191198
192199
version "1.19.0"
193200
resolved "https://registry.yarnpkg.com/body-parser/-/body-parser-1.19.0.tgz#96b2709e57c9c4e09a6fd66a8fd979844f69f08a"
@@ -439,6 +446,11 @@ [email protected]:
439446
dependencies:
440447
debug "=3.1.0"
441448

449+
follow-redirects@^1.10.0:
450+
version "1.13.0"
451+
resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.13.0.tgz#b42e8d93a2a7eea5ed88633676d6597bc8e384db"
452+
integrity sha512-aq6gF1BEKje4a9i9+5jimNFIpq4Q1WiwBToeRK5NvZBd/TRsmW8BsJfOEGkr76TbOyPVD3OVDN910EcUNtRYEA==
453+
442454
form-data@^2.5.0:
443455
version "2.5.1"
444456
resolved "https://registry.yarnpkg.com/form-data/-/form-data-2.5.1.tgz#f2cbec57b5e59e23716e128fe44d4e5dd23895f4"

0 commit comments

Comments
 (0)