Skip to content

Commit 4560c3e

Browse files
committed
update markdown entities for o1 completions
1 parent e3f8fb2 commit 4560c3e

File tree

1 file changed

+47
-47
lines changed

1 file changed

+47
-47
lines changed

src/modules/llms/utils/helpers.ts

Lines changed: 47 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -309,60 +309,18 @@ export const hasCodeSnippet = (ctx: OnMessageContext | OnCallBackQueryData): boo
309309
return entities.length > 0
310310
}
311311

312-
export const splitTelegramMessage2 = (text: string): string[] => {
313-
const maxLength = 4096
314-
const result: string[] = []
315-
316-
// Regular expression to match Markdown entities
317-
const markdownRegex = /(\*\*|__|\[.*?\]\(.*?\)|```[\s\S]*?```|`[^`\n]+`)/g
318-
319-
// Function to find the end index that avoids splitting Markdown entities
320-
const findEndIndex = (startIndex: number, chunk: string): number => {
321-
const matches = [...chunk.matchAll(markdownRegex)]
322-
if (matches.length === 0) return startIndex + maxLength
323-
324-
const lastMatch = matches[matches.length - 1]
325-
const lastMatchEnd = lastMatch.index + lastMatch[0].length
326-
return lastMatchEnd > chunk.length ? startIndex + lastMatch.index : startIndex + maxLength
327-
}
328-
329-
let startIndex = 0
330-
while (startIndex < text.length) {
331-
let endIndex = findEndIndex(startIndex, text.slice(startIndex, startIndex + maxLength))
332-
endIndex = Math.min(endIndex, text.length) // Ensure endIndex is within bounds
333-
334-
// Find a natural break point if necessary
335-
if (endIndex < text.length) {
336-
const lastSpaceIndex = text.slice(startIndex, endIndex).lastIndexOf(' ')
337-
if (lastSpaceIndex > 0) {
338-
endIndex = startIndex + lastSpaceIndex
339-
}
340-
}
341-
342-
result.push(text.slice(startIndex, endIndex).trim())
343-
startIndex = endIndex
344-
345-
// Move past any spaces or special characters that might cause issues
346-
while (startIndex < text.length && /\s/.test(text[startIndex])) {
347-
startIndex++
348-
}
349-
}
350-
351-
return result
352-
}
353-
354312
// Find all Markdown entities and their positions
355313
export const splitTelegramMessage = (text: string): string[] => {
356314
const maxLength = 4096
357315
const result: string[] = []
358316

359317
// Regex to match start of Markdown entities
360318
const entityStartPatterns = [
361-
/\*\*/g, // bold
362-
/__/g, // italic
363-
/```/g, // code block
364-
/`/g, // inline code
365-
/\[/g // link start
319+
/\*/g, // bold text (single asterisk)
320+
/_/g, // italic text (single underscore)
321+
/```/g, // pre-formatted code block (triple backtick)
322+
/`/g, // inline fixed-width code (single backtick)
323+
/\[/g // inline URL or user mention
366324
]
367325

368326
// Function to find the last safe split position
@@ -405,3 +363,45 @@ export const splitTelegramMessage = (text: string): string[] => {
405363
}
406364
return result
407365
}
366+
367+
// export const splitTelegramMessage = (text: string): string[] => {
368+
// const maxLength = 4096
369+
// const result: string[] = []
370+
371+
// // Regular expression to match Markdown entities
372+
// const markdownRegex = /(\*\*|__|\[.*?\]\(.*?\)|```[\s\S]*?```|`[^`\n]+`)/g
373+
374+
// // Function to find the end index that avoids splitting Markdown entities
375+
// const findEndIndex = (startIndex: number, chunk: string): number => {
376+
// const matches = [...chunk.matchAll(markdownRegex)]
377+
// if (matches.length === 0) return startIndex + maxLength
378+
379+
// const lastMatch = matches[matches.length - 1]
380+
// const lastMatchEnd = lastMatch.index + lastMatch[0].length
381+
// return lastMatchEnd > chunk.length ? startIndex + lastMatch.index : startIndex + maxLength
382+
// }
383+
384+
// let startIndex = 0
385+
// while (startIndex < text.length) {
386+
// let endIndex = findEndIndex(startIndex, text.slice(startIndex, startIndex + maxLength))
387+
// endIndex = Math.min(endIndex, text.length) // Ensure endIndex is within bounds
388+
389+
// // Find a natural break point if necessary
390+
// if (endIndex < text.length) {
391+
// const lastSpaceIndex = text.slice(startIndex, endIndex).lastIndexOf(' ')
392+
// if (lastSpaceIndex > 0) {
393+
// endIndex = startIndex + lastSpaceIndex
394+
// }
395+
// }
396+
397+
// result.push(text.slice(startIndex, endIndex).trim())
398+
// startIndex = endIndex
399+
400+
// // Move past any spaces or special characters that might cause issues
401+
// while (startIndex < text.length && /\s/.test(text[startIndex])) {
402+
// startIndex++
403+
// }
404+
// }
405+
406+
// return result
407+
// }

0 commit comments

Comments
 (0)