Skip to content

Commit 364090f

Browse files
committed
Add training script and classifier
1 parent f63545b commit 364090f

File tree

10 files changed

+659
-11
lines changed

10 files changed

+659
-11
lines changed

bun.lock

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@
313313
},
314314
"integrations/intercom-conversations": {
315315
"name": "@gitbook/integration-intercom-conversations",
316-
"version": "0.1.0",
316+
"version": "0.2.0",
317317
"dependencies": {
318318
"@gitbook/api": "*",
319319
"@gitbook/runtime": "*",
@@ -599,10 +599,13 @@
599599
"itty-router": "^4.0.26",
600600
"js-sha256": "^0.9.0",
601601
"remove-markdown": "^0.5.0",
602+
"toygrad": "^2.6.0",
602603
},
603604
"devDependencies": {
604605
"@gitbook/cli": "workspace:*",
605606
"@gitbook/tsconfig": "workspace:*",
607+
"@vanillaes/csv": "^3.0.4",
608+
"commander": "^14.0.2",
606609
},
607610
},
608611
"integrations/toucantoco": {
@@ -726,7 +729,7 @@
726729
},
727730
"packages/api": {
728731
"name": "@gitbook/api",
729-
"version": "0.145.0",
732+
"version": "0.146.0",
730733
"dependencies": {
731734
"event-iterator": "^2.0.0",
732735
"eventsource-parser": "^3.0.0",
@@ -741,7 +744,7 @@
741744
},
742745
"packages/cli": {
743746
"name": "@gitbook/cli",
744-
"version": "0.26.0",
747+
"version": "0.26.1",
745748
"bin": {
746749
"gitbook": "./cli.js",
747750
},
@@ -1435,6 +1438,8 @@
14351438

14361439
"@types/ws": ["@types/[email protected]", "", { "dependencies": { "@types/node": "*" } }, "sha512-bd/YFLW+URhBzMXurx7lWByOu+xzU9+kb3RboOteXYDfW+tr+JZa99OyNmPINEGB/ahzKrEuc8rcv4gnpJmxTw=="],
14371440

1441+
"@vanillaes/csv": ["@vanillaes/[email protected]", "", {}, "sha512-cMJ/pAljVGpsHvqgd5N4EpNJOvMjFubg7x+9ehjVgQUFi2h+u4Nc4O4C0ErNLV53rO3rI0W1JnKX3wQz0pWgIA=="],
1442+
14381443
"@whatwg-node/disposablestack": ["@whatwg-node/[email protected]", "", { "dependencies": { "@whatwg-node/promise-helpers": "^1.0.0", "tslib": "^2.6.3" } }, "sha512-LOtTn+JgJvX8WfBVJtF08TGrdjuFzGJc4mkP8EdDI8ADbvO7kiexYep1o8dwnt0okb0jYclCDXF13xU7Ge4zSw=="],
14391444

14401445
"@whatwg-node/fetch": ["@whatwg-node/[email protected]", "", { "dependencies": { "@whatwg-node/node-fetch": "^0.7.11", "urlpattern-polyfill": "^10.0.0" } }, "sha512-+yFJU3hmXPAHJULwx0VzCIsvr/H0lvbPvbOH3areOH3NAuCxCwaJsQ8w6/MwwMcvEWIynSsmAxoyaH04KeosPg=="],
@@ -1683,6 +1688,8 @@
16831688

16841689
"emoji-regex": ["[email protected]", "", {}, "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A=="],
16851690

1691+
"encoding": ["[email protected]", "", { "dependencies": { "iconv-lite": "^0.6.2" } }, "sha512-ETBauow1T35Y/WZMkio9jiM0Z5xjHHmJ4XmjZOq1l/dXz3lr2sRn87nJy20RupqSh1F2m3HHPSp8ShIPQJrJ3A=="],
1692+
16861693
"enquirer": ["[email protected]", "", { "dependencies": { "ansi-colors": "^4.1.1", "strip-ansi": "^6.0.1" } }, "sha512-rRqJg/6gd538VHvR3PSrdRBb/1Vy2YfzHqzvbhGIQpDRKIa4FgV/54b5Q1xYSxOOwKvjXweS26E0Q+nAMwp2pQ=="],
16871694

16881695
"env-paths": ["[email protected]", "", {}, "sha512-dtJUTepzMW3Lm/NPxRf3wP4642UWhjL2sQxc+ym2YMj1m/H2zDNQOlezafzkHwn6sMstjHTwG6iQQsctDW/b1A=="],
@@ -2467,6 +2474,8 @@
24672474

24682475
"toidentifier": ["[email protected]", "", {}, "sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA=="],
24692476

2477+
"toygrad": ["[email protected]", "", {}, "sha512-g4zBmlSbvzOE5FOILxYkAybTSxijKLkj1WoNqVGnbMcWDyj4wWQ+eYSr3ik7XOpIgMq/7eBcPRTJX3DM2E0YMg=="],
2478+
24702479
"tr46": ["[email protected]", "", {}, "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw=="],
24712480

24722481
"ts-log": ["[email protected]", "", {}, "sha512-320x5Ggei84AxzlXp91QkIGSw5wgaLT6GeAH0KsqDmRZdVWW2OiSeVvElVoatk3f7nicwXlElXsoFkARiGE2yg=="],
@@ -2671,6 +2680,8 @@
26712680

26722681
"@gitbook/integration-runkit/itty-router": ["[email protected]", "", {}, "sha512-hIPHtXGymCX7Lzb2I4G6JgZFE4QEEQwst9GORK7sMYUpJvLfy4yZJr95r04e8DzoAnj6HcxM2m4TbK+juu+18g=="],
26732682

2683+
"@gitbook/integration-slack/commander": ["[email protected]", "", {}, "sha512-TywoWNNRbhoD0BXs1P3ZEScW8W5iKrnbithIl0YH+uCmBd0QpPOA8yc82DS3BIE5Ma6FnBVUsJ7wVUDz4dvOWQ=="],
2684+
26742685
"@gitbook/integration-slack/itty-router": ["[email protected]", "", {}, "sha512-KegPW0l9SNPadProoFT07AB84uOqLUwzlXQ7HsqkS31WUrxkjdhcemRpTDUuetbMJ89uBtWeQSVoiEmUAu31uw=="],
26752686

26762687
"@gitbook/integration-va-auth0/itty-router": ["[email protected]", "", {}, "sha512-KegPW0l9SNPadProoFT07AB84uOqLUwzlXQ7HsqkS31WUrxkjdhcemRpTDUuetbMJ89uBtWeQSVoiEmUAu31uw=="],
@@ -2799,6 +2810,8 @@
27992810

28002811
"dot-case/tslib": ["[email protected]", "", {}, "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w=="],
28012812

2813+
"encoding/iconv-lite": ["[email protected]", "", { "dependencies": { "safer-buffer": ">= 2.1.2 < 3.0.0" } }, "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw=="],
2814+
28022815
"enquirer/strip-ansi": ["[email protected]", "", { "dependencies": { "ansi-regex": "^5.0.1" } }, "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A=="],
28032816

28042817
"execa/onetime": ["[email protected]", "", { "dependencies": { "mimic-fn": "^4.0.0" } }, "sha512-1FlR+gjXK7X+AsAHso35MnyN5KqGwJRi/31ft6x0M194ht7S+rWAvd7PHss9xSKMzE0asv1pyIHaJYq+BbacAQ=="],

integrations/slack/package.json

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,25 @@
33
"version": "2.5.3",
44
"private": true,
55
"dependencies": {
6-
"@gitbook/runtime": "*",
76
"@gitbook/api": "*",
7+
"@gitbook/runtime": "*",
88
"itty-router": "^4.0.26",
99
"js-sha256": "^0.9.0",
10-
"remove-markdown": "^0.5.0"
10+
"remove-markdown": "^0.5.0",
11+
"toygrad": "^2.6.0"
1112
},
1213
"devDependencies": {
1314
"@gitbook/cli": "workspace:*",
14-
"@gitbook/tsconfig": "workspace:*"
15+
"@gitbook/tsconfig": "workspace:*",
16+
"@vanillaes/csv": "^3.0.4",
17+
"commander": "^14.0.2"
1518
},
1619
"scripts": {
1720
"typecheck": "tsc --noEmit",
1821
"check": "gitbook check",
22+
"test": "bun test",
1923
"publish-integrations": "gitbook publish .",
20-
"publish-integrations-staging": "gitbook publish ."
24+
"publish-integrations-staging": "gitbook publish .",
25+
"train-classifier": "bun run scripts/train-classifier.ts --"
2126
}
2227
}
Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
import fs from 'fs';
2+
import path from 'path';
3+
import { Command } from 'commander';
4+
import { NeuralNetwork, Trainers, Tensor } from 'toygrad';
5+
import { Logger } from '@gitbook/runtime';
6+
import { parse as csvParse } from '@vanillaes/csv';
7+
8+
const logger = Logger('slack:scripts:train-classifier');
9+
10+
const __dirname = path.dirname(new URL(import.meta.url).pathname);
11+
12+
/**
13+
* Clean text: remove mentions, punctuation, lowercase.
14+
*/
15+
function cleanText(text: string): string {
16+
return text
17+
.replace(/@\w+/g, '') // remove mentions
18+
.replace(/[^\w\s]/g, '') // remove punctuation
19+
.toLowerCase()
20+
.trim();
21+
}
22+
23+
/**
24+
* Load CSV training data.
25+
*/
26+
function loadTrainingData(filePath: string): { text: string; intent: string }[] {
27+
if (!fs.existsSync(filePath)) {
28+
throw new Error(`CSV file not found: ${filePath}`);
29+
}
30+
31+
const csvContent = fs.readFileSync(filePath, 'utf-8');
32+
const rows = csvParse(csvContent) as string[][];
33+
const [, ...data] = rows;
34+
35+
return data.map(([text, intent]) => ({
36+
text: text.trim(),
37+
intent: intent.trim(),
38+
}));
39+
}
40+
41+
/**
42+
* Build vocabulary from training data.
43+
*/
44+
function buildVocabulary(records: { text: string; intent: string }[]): string[] {
45+
const vocabSet = new Set<string>();
46+
for (const r of records) {
47+
const words = cleanText(r.text).match(/\b\w+\b/g) || [];
48+
for (const word of words) {
49+
vocabSet.add(word);
50+
}
51+
}
52+
return Array.from(vocabSet);
53+
}
54+
55+
/**
56+
* Convert text to weighted bag-of-words vector.
57+
*/
58+
function textToWordVector(text: string, vocabulary: string[]): Float32Array {
59+
const vector = new Float32Array(vocabulary.length);
60+
const words = cleanText(text).match(/\b\w+\b/g) || [];
61+
for (const word of words) {
62+
const idx = vocabulary.indexOf(word);
63+
if (idx !== -1) {
64+
vector[idx] += 1;
65+
}
66+
}
67+
return vector;
68+
}
69+
70+
/**
71+
* Build or load the neural network model.
72+
*/
73+
function buildModel(inputSize: number, outputSize: number): NeuralNetwork {
74+
const options: NeuralNetwork['options'] = {
75+
layers: [
76+
{ type: 'input', sx: 1, sy: 1, sz: inputSize },
77+
{ type: 'dense', filters: 32 },
78+
{ type: 'relu' },
79+
{ type: 'dense', filters: 16 },
80+
{ type: 'relu' },
81+
{ type: 'dense', filters: outputSize },
82+
{ type: 'softmax' },
83+
],
84+
};
85+
86+
const nn = new NeuralNetwork(options);
87+
return nn;
88+
}
89+
90+
async function trainModel(
91+
nn: NeuralNetwork,
92+
records: { text: string; intent: string }[],
93+
vocabulary: string[],
94+
outputLabels: string[],
95+
epochs = 50,
96+
batchSize = 4,
97+
) {
98+
const trainingInputs: Tensor[] = [];
99+
const trainingTargets: number[] = []; // target label indices
100+
101+
for (const r of records) {
102+
const vec = textToWordVector(r.text, vocabulary);
103+
const inputTensor = new Tensor(1, 1, vec.length, vec);
104+
trainingInputs.push(inputTensor);
105+
106+
const targetIdx = outputLabels.indexOf(r.intent);
107+
if (targetIdx === -1) {
108+
throw new Error(`Unknown intent label: ${r.intent}`);
109+
}
110+
trainingTargets.push(targetIdx);
111+
}
112+
113+
const trainer = new Trainers.Adadelta(nn, {
114+
batchSize: batchSize,
115+
});
116+
117+
logger.info(`🚀 Training model on ${records.length} examples for ${epochs} epochs...`);
118+
119+
for (let epoch = 0; epoch < epochs; epoch++) {
120+
for (let i = 0; i < trainingInputs.length; i++) {
121+
trainer.train(trainingInputs[i], trainingTargets[i]);
122+
}
123+
if ((epoch + 1) % 10 === 0) {
124+
logger.info(`Epoch ${epoch + 1}/${epochs} done`);
125+
}
126+
}
127+
128+
logger.info('✅ Training complete');
129+
}
130+
131+
/**
132+
* Save model, vocabulary, and output labels into JSON file for classifier to use.
133+
*/
134+
function saveModel(
135+
nn: NeuralNetwork,
136+
vocabulary: string[],
137+
outputLabels: string[],
138+
filePath: string,
139+
) {
140+
const options = nn.getAsOptions('f32');
141+
const serialized = {
142+
model: options,
143+
vocabulary,
144+
outputLabels,
145+
};
146+
fs.writeFileSync(filePath, JSON.stringify(serialized, null, 2));
147+
logger.info(`💾 Saved updated classifier to ${filePath}`);
148+
}
149+
150+
async function main() {
151+
const program = new Command();
152+
153+
program
154+
.name('train-classifier')
155+
.description('Train or update the action intent classifier from a CSV file')
156+
.requiredOption('-c, --csv <path>', 'Path to the training CSV file')
157+
.option(
158+
'-m, --model <path>',
159+
'Path to serialized model JSON',
160+
'../src/actions/intent/classifier-model.json',
161+
)
162+
.parse(process.argv);
163+
164+
const opts = program.opts();
165+
const csvPath = path.resolve(opts.csv);
166+
const modelPath = path.resolve(__dirname, opts.model);
167+
168+
try {
169+
const records = loadTrainingData(csvPath);
170+
const vocabulary = buildVocabulary(records);
171+
const outputLabels = Array.from(new Set(records.map((r) => r.intent)));
172+
173+
const inputSize = vocabulary.length;
174+
const outputSize = outputLabels.length;
175+
176+
logger.info(`Vocabulary size: ${inputSize}`);
177+
logger.info(`Output labels: ${outputLabels.join(', ')}`);
178+
179+
const nn = buildModel(inputSize, outputSize);
180+
181+
await trainModel(nn, records, vocabulary, outputLabels, 50, 4);
182+
183+
saveModel(nn, vocabulary, outputLabels, modelPath);
184+
} catch (err) {
185+
logger.error('❌ Error:', (err as Error).message);
186+
process.exit(1);
187+
}
188+
}
189+
190+
main();

integrations/slack/src/actions/ingestConversation.ts

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,8 +150,6 @@ export async function handleIngestSlackConversationTask(
150150
const {
151151
payload: {
152152
channelId,
153-
userId,
154-
responseUrl,
155153
threadId,
156154
organizationId,
157155
installationId,

0 commit comments

Comments
 (0)