Skip to content

Commit c07e87f

Browse files
stduhpfngxson
andauthored
server : (webui) put DeepSeek R1 CoT in a collapsible <details> element (#11364)
* webui : put DeepSeek R1 CoT in a collapsible <details> element * webui: refactor split * webui: don't use regex to split cot and response * webui: format+qol * webui: no loading icon if the model isn't generating * ui fix, add configs * add jsdoc types * only filter </think> for assistant msg * build * update build --------- Co-authored-by: Xuan Son Nguyen <[email protected]>
1 parent 564804b commit c07e87f

File tree

3 files changed

+159
-33
lines changed

3 files changed

+159
-33
lines changed

examples/server/public/index.html.gz

658 Bytes
Binary file not shown.

examples/server/webui/index.html

+26-1
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ <h2 class="font-bold ml-4">Conversations</h2>
141141
:msg="pendingMsg"
142142
:key="pendingMsg.id"
143143
:is-generating="isGenerating"
144+
:show-thought-in-progress="config.showThoughtInProgress"
144145
:edit-user-msg-and-regenerate="() => {}"
145146
:regenerate-msg="() => {}"></message-bubble>
146147
</div>
@@ -202,6 +203,20 @@ <h3 class="text-lg font-bold mb-6">Settings</h3>
202203
</template>
203204
</div>
204205
</details>
206+
<!-- Section: Reasoning models -->
207+
<details class="collapse collapse-arrow bg-base-200 mb-2 overflow-visible">
208+
<summary class="collapse-title font-bold">Reasoning models</summary>
209+
<div class="collapse-content">
210+
<div class="flex flex-row items-center mb-2">
211+
<input type="checkbox" class="checkbox" v-model="config.showThoughtInProgress" />
212+
<span class="ml-4">Expand though process by default for generating message</span>
213+
</div>
214+
<div class="flex flex-row items-center mb-2">
215+
<input type="checkbox" class="checkbox" v-model="config.excludeThoughtOnReq" />
216+
<span class="ml-4">Exclude thought process when sending request to API (Recommended for DeepSeek-R1)</span>
217+
</div>
218+
</div>
219+
</details>
205220
<!-- Section: Advanced config -->
206221
<details class="collapse collapse-arrow bg-base-200 mb-2 overflow-visible">
207222
<summary class="collapse-title font-bold">Advanced config</summary>
@@ -261,7 +276,17 @@ <h3 class="text-lg font-bold mb-6">Settings</h3>
261276
<span v-if="msg.content === null" class="loading loading-dots loading-md"></span>
262277
<!-- render message as markdown -->
263278
<div v-else dir="auto">
264-
<vue-markdown :source="msg.content"></vue-markdown>
279+
<details v-if="msg.role === 'assistant' && splitMsgContent.cot" class="collapse bg-base-200 collapse-arrow mb-4" :open="splitMsgContent.isThinking && showThoughtInProgress">
280+
<summary class="collapse-title">
281+
<span v-if="splitMsgContent.isThinking">
282+
<span v-if="isGenerating" class="loading loading-spinner loading-md mr-2" style="vertical-align: middle;"></span>
283+
<b>Thinking</b>
284+
</span>
285+
<b v-else>Thought Process</b>
286+
</summary>
287+
<vue-markdown :source="splitMsgContent.cot" dir="auto" class="collapse-content"></vue-markdown>
288+
</details>
289+
<vue-markdown :source="splitMsgContent.content"></vue-markdown>
265290
</div>
266291
<!-- render timings if enabled -->
267292
<div class="dropdown dropdown-hover dropdown-top mt-2" v-if="timings && config.showTokensPerSecond">

examples/server/webui/src/main.js

+133-32
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@ import { asyncIterator } from '@sec-ant/readable-stream/ponyfill/asyncIterator';
1717

1818
const isDev = import.meta.env.MODE === 'development';
1919

20+
// types
21+
/** @typedef {{ id: number, role: 'user' | 'assistant', content: string, timings: any }} Message */
22+
/** @typedef {{ role: 'user' | 'assistant', content: string }} APIMessage */
23+
/** @typedef {{ id: string, lastModified: number, messages: Array<Message> }} Conversation */
24+
2025
// utility functions
2126
const isString = (x) => !!x.toLowerCase;
2227
const isBoolean = (x) => x === true || x === false;
@@ -50,6 +55,8 @@ const CONFIG_DEFAULT = {
5055
apiKey: '',
5156
systemMessage: 'You are a helpful assistant.',
5257
showTokensPerSecond: false,
58+
showThoughtInProgress: false,
59+
excludeThoughtOnReq: true,
5360
// make sure these default values are in sync with `common.h`
5461
samplers: 'edkypmxt',
5562
temperature: 0.8,
@@ -172,6 +179,7 @@ const MessageBubble = defineComponent({
172179
config: Object,
173180
msg: Object,
174181
isGenerating: Boolean,
182+
showThoughtInProgress: Boolean,
175183
editUserMsgAndRegenerate: Function,
176184
regenerateMsg: Function,
177185
},
@@ -188,7 +196,31 @@ const MessageBubble = defineComponent({
188196
prompt_per_second: this.msg.timings.prompt_n / (this.msg.timings.prompt_ms / 1000),
189197
predicted_per_second: this.msg.timings.predicted_n / (this.msg.timings.predicted_ms / 1000),
190198
};
191-
}
199+
},
200+
splitMsgContent() {
201+
const content = this.msg.content;
202+
if (this.msg.role !== 'assistant') {
203+
return { content };
204+
}
205+
let actualContent = '';
206+
let cot = '';
207+
let isThinking = false;
208+
let thinkSplit = content.split('<think>', 2);
209+
actualContent += thinkSplit[0];
210+
while (thinkSplit[1] !== undefined) {
211+
// <think> tag found
212+
thinkSplit = thinkSplit[1].split('</think>', 2);
213+
cot += thinkSplit[0];
214+
isThinking = true;
215+
if (thinkSplit[1] !== undefined) {
216+
// </think> closing tag found
217+
isThinking = false;
218+
thinkSplit = thinkSplit[1].split('<think>', 2);
219+
actualContent += thinkSplit[0];
220+
}
221+
}
222+
return { content: actualContent, cot, isThinking };
223+
},
192224
},
193225
methods: {
194226
copyMsg() {
@@ -208,7 +240,10 @@ const MessageBubble = defineComponent({
208240
// format: { [convId]: { id: string, lastModified: number, messages: [...] } }
209241
// convId is a string prefixed with 'conv-'
210242
const StorageUtils = {
211-
// manage conversations
243+
/**
244+
* manage conversations
245+
* @returns {Array<Conversation>}
246+
*/
212247
getAllConversations() {
213248
const res = [];
214249
for (const key in localStorage) {
@@ -219,11 +254,19 @@ const StorageUtils = {
219254
res.sort((a, b) => b.lastModified - a.lastModified);
220255
return res;
221256
},
222-
// can return null if convId does not exist
257+
/**
258+
* can return null if convId does not exist
259+
* @param {string} convId
260+
* @returns {Conversation | null}
261+
*/
223262
getOneConversation(convId) {
224263
return JSON.parse(localStorage.getItem(convId) || 'null');
225264
},
226-
// if convId does not exist, create one
265+
/**
266+
* if convId does not exist, create one
267+
* @param {string} convId
268+
* @param {Message} msg
269+
*/
227270
appendMsg(convId, msg) {
228271
if (msg.content === null) return;
229272
const conv = StorageUtils.getOneConversation(convId) || {
@@ -235,19 +278,36 @@ const StorageUtils = {
235278
conv.lastModified = Date.now();
236279
localStorage.setItem(convId, JSON.stringify(conv));
237280
},
281+
/**
282+
* Get new conversation id
283+
* @returns {string}
284+
*/
238285
getNewConvId() {
239286
return `conv-${Date.now()}`;
240287
},
288+
/**
289+
* remove conversation by id
290+
* @param {string} convId
291+
*/
241292
remove(convId) {
242293
localStorage.removeItem(convId);
243294
},
295+
/**
296+
* remove all conversations
297+
* @param {string} convId
298+
*/
244299
filterAndKeepMsgs(convId, predicate) {
245300
const conv = StorageUtils.getOneConversation(convId);
246301
if (!conv) return;
247302
conv.messages = conv.messages.filter(predicate);
248303
conv.lastModified = Date.now();
249304
localStorage.setItem(convId, JSON.stringify(conv));
250305
},
306+
/**
307+
* remove last message from conversation
308+
* @param {string} convId
309+
* @returns {Message | undefined}
310+
*/
251311
popMsg(convId) {
252312
const conv = StorageUtils.getOneConversation(convId);
253313
if (!conv) return;
@@ -322,17 +382,20 @@ const mainApp = createApp({
322382
data() {
323383
return {
324384
conversations: StorageUtils.getAllConversations(),
325-
messages: [], // { id: number, role: 'user' | 'assistant', content: string }
385+
/** @type {Array<Message>} */
386+
messages: [],
326387
viewingConvId: StorageUtils.getNewConvId(),
327388
inputMsg: '',
328389
isGenerating: false,
390+
/** @type {Array<Message> | null} */
329391
pendingMsg: null, // the on-going message from assistant
330392
stopGeneration: () => {},
331393
selectedTheme: StorageUtils.getTheme(),
332394
config: StorageUtils.getConfig(),
333395
showConfigDialog: false,
334396
// const
335397
themes: THEMES,
398+
/** @type {CONFIG_DEFAULT} */
336399
configDefault: {...CONFIG_DEFAULT},
337400
configInfo: {...CONFIG_INFO},
338401
isDev,
@@ -425,42 +488,50 @@ const mainApp = createApp({
425488
this.isGenerating = true;
426489

427490
try {
491+
/** @type {CONFIG_DEFAULT} */
492+
const config = this.config;
428493
const abortController = new AbortController();
429494
this.stopGeneration = () => abortController.abort();
495+
/** @type {Array<APIMessage>} */
496+
let messages = [
497+
{ role: 'system', content: config.systemMessage },
498+
...normalizeMsgsForAPI(this.messages),
499+
];
500+
if (config.excludeThoughtOnReq) {
501+
messages = filterThoughtFromMsgs(messages);
502+
}
503+
if (isDev) console.log({messages});
430504
const params = {
431-
messages: [
432-
{ role: 'system', content: this.config.systemMessage },
433-
...this.messages,
434-
],
505+
messages,
435506
stream: true,
436507
cache_prompt: true,
437-
samplers: this.config.samplers,
438-
temperature: this.config.temperature,
439-
dynatemp_range: this.config.dynatemp_range,
440-
dynatemp_exponent: this.config.dynatemp_exponent,
441-
top_k: this.config.top_k,
442-
top_p: this.config.top_p,
443-
min_p: this.config.min_p,
444-
typical_p: this.config.typical_p,
445-
xtc_probability: this.config.xtc_probability,
446-
xtc_threshold: this.config.xtc_threshold,
447-
repeat_last_n: this.config.repeat_last_n,
448-
repeat_penalty: this.config.repeat_penalty,
449-
presence_penalty: this.config.presence_penalty,
450-
frequency_penalty: this.config.frequency_penalty,
451-
dry_multiplier: this.config.dry_multiplier,
452-
dry_base: this.config.dry_base,
453-
dry_allowed_length: this.config.dry_allowed_length,
454-
dry_penalty_last_n: this.config.dry_penalty_last_n,
455-
max_tokens: this.config.max_tokens,
456-
timings_per_token: !!this.config.showTokensPerSecond,
457-
...(this.config.custom.length ? JSON.parse(this.config.custom) : {}),
508+
samplers: config.samplers,
509+
temperature: config.temperature,
510+
dynatemp_range: config.dynatemp_range,
511+
dynatemp_exponent: config.dynatemp_exponent,
512+
top_k: config.top_k,
513+
top_p: config.top_p,
514+
min_p: config.min_p,
515+
typical_p: config.typical_p,
516+
xtc_probability: config.xtc_probability,
517+
xtc_threshold: config.xtc_threshold,
518+
repeat_last_n: config.repeat_last_n,
519+
repeat_penalty: config.repeat_penalty,
520+
presence_penalty: config.presence_penalty,
521+
frequency_penalty: config.frequency_penalty,
522+
dry_multiplier: config.dry_multiplier,
523+
dry_base: config.dry_base,
524+
dry_allowed_length: config.dry_allowed_length,
525+
dry_penalty_last_n: config.dry_penalty_last_n,
526+
max_tokens: config.max_tokens,
527+
timings_per_token: !!config.showTokensPerSecond,
528+
...(config.custom.length ? JSON.parse(config.custom) : {}),
458529
};
459530
const chunks = sendSSEPostRequest(`${BASE_URL}/v1/chat/completions`, {
460531
method: 'POST',
461532
headers: {
462533
'Content-Type': 'application/json',
463-
...(this.config.apiKey ? {'Authorization': `Bearer ${this.config.apiKey}`} : {})
534+
...(config.apiKey ? {'Authorization': `Bearer ${config.apiKey}`} : {})
464535
},
465536
body: JSON.stringify(params),
466537
signal: abortController.signal,
@@ -477,7 +548,7 @@ const mainApp = createApp({
477548
};
478549
}
479550
const timings = chunk.timings;
480-
if (timings && this.config.showTokensPerSecond) {
551+
if (timings && config.showTokensPerSecond) {
481552
// only extract what's really needed, to save some space
482553
this.pendingMsg.timings = {
483554
prompt_n: timings.prompt_n,
@@ -598,3 +669,33 @@ try {
598669
<button class="btn" onClick="localStorage.clear(); window.location.reload();">Clear localStorage</button>
599670
</div>`;
600671
}
672+
673+
/**
674+
* filter out redundant fields upon sending to API
675+
* @param {Array<APIMessage>} messages
676+
* @returns {Array<APIMessage>}
677+
*/
678+
function normalizeMsgsForAPI(messages) {
679+
return messages.map((msg) => {
680+
return {
681+
role: msg.role,
682+
content: msg.content,
683+
};
684+
});
685+
}
686+
687+
/**
688+
* recommended for DeepsSeek-R1, filter out content between <think> and </think> tags
689+
* @param {Array<APIMessage>} messages
690+
* @returns {Array<APIMessage>}
691+
*/
692+
function filterThoughtFromMsgs(messages) {
693+
return messages.map((msg) => {
694+
return {
695+
role: msg.role,
696+
content: msg.role === 'assistant'
697+
? msg.content.split('</think>').at(-1).trim()
698+
: msg.content,
699+
};
700+
});
701+
}

0 commit comments

Comments
 (0)