Skip to content

Commit e3ccc3e

Browse files
chore: refactor translator core to make it easier to add more formats
1 parent 51a01d5 commit e3ccc3e

File tree

7 files changed

+146
-125
lines changed

7 files changed

+146
-125
lines changed

src/commands/markdown.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import {Args, Command, Flags} from '@oclif/core'
22

3-
import {MarkdownTranslator} from '../core/translators/markdown.js'
3+
import {MARKDOWN_SYSTEM_PROMPT} from '../core/prompts/markdown.js'
4+
import {MarkdownSplitter} from '../core/splitters/markdown.js'
5+
import {Translator} from '../core/translators/translator.js'
46
import {createProviderFromProfile} from '../lib/profile/factory.js'
57
import {loadProfile} from '../lib/profile/storage.js'
68

@@ -55,7 +57,7 @@ export default class Markdown extends Command {
5557

5658
const profile = loadProfile(flags.profile)
5759
const llm = createProviderFromProfile(profile)
58-
const translator = new MarkdownTranslator(llm)
60+
const translator = new Translator(llm, new MarkdownSplitter(), MARKDOWN_SYSTEM_PROMPT)
5961

6062
if (flags.stream) {
6163
for await (const chunk of translator.translateStream({

src/core/prompts/markdown.ts

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/**
2+
* System prompt template for markdown translation.
3+
* Uses {{sourceLanguage}} and {{targetLanguage}} as placeholders.
4+
*/
5+
export const MARKDOWN_SYSTEM_PROMPT = `
6+
You are a helpful assistant that accurately translates markdown document snippets from {{sourceLanguage}} to {{targetLanguage}} while preserving markdown syntax, formatting, and custom directives.
7+
You always preserve the structure and formatting exactly as it is.
8+
You do not add, alter or modify the text you receive in any way.
9+
10+
Reminder:
11+
- Translate only the text, preserving the structure and formatting.
12+
- NEVER under any circumstances translate any words found inside backticks Eg. \`Text\`.
13+
- NEVER translate custom directive like ::startApplication{...} or ::openFile{...}.
14+
- DO translate titles inside the ::page{title=""} custom directive.
15+
- NEVER translate keywords that appear after colons, such as \`:fa-lightbulb-o:\`.
16+
- NEVER translate the sections "Author", "Other Contributors", and "Change Logs".
17+
- NEVER translate any URLs.
18+
- NEVER translate HTML tags like \`<details>\` and \`<summary>\`.
19+
- Translate idiomatically, adapting expressions to sound natural in {{targetLanguage}}.
20+
- Avoid overly literal translations; prioritize clarity and fluency in {{targetLanguage}} over word-for-word accuracy.
21+
- Use concise and clear language that would sound natural in everyday speech or written {{targetLanguage}}.
22+
- When technical {{sourceLanguage}} terms lack a common {{targetLanguage}} equivalent, use well-known {{targetLanguage}} alternatives or rephrase for clarity.
23+
- Be consistent with technical terms. If an equivalent technical term is not available in {{targetLanguage}}, always use the original term.
24+
25+
*IMPORTANT*
26+
Translate without any additional information or comments.
27+
`

src/core/splitters/markdown.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import {MarkdownTextSplitter} from '@langchain/textsplitters'
22

3-
import type {Chunk} from '../types.js'
3+
import type {BaseSplitter, Chunk} from '../types.js'
44

55
/**
66
* Hybrid two-pass markdown splitter for translation purposes.
@@ -13,7 +13,7 @@ import type {Chunk} from '../types.js'
1313
* - Prevents massive chunks from overwhelming translation APIs
1414
* - Uses LangChain's intelligent splitting for size management (tries headers > paragraphs > lines)
1515
*/
16-
export class MarkdownSplitter {
16+
export class MarkdownSplitter implements BaseSplitter {
1717
private readonly chunkSize: number
1818
private recursiveSplitter: MarkdownTextSplitter
1919

@@ -30,7 +30,7 @@ export class MarkdownSplitter {
3030
/**
3131
* Appends a chunk to an accumulator string, preserving whitespace
3232
*/
33-
reconstructChunk(accumulator: string, chunk: Chunk): string {
33+
reconstruct(accumulator: string, chunk: Chunk): string {
3434
return accumulator + (chunk.leadingWhitespace || '') + chunk.content + (chunk.trailingWhitespace || '')
3535
}
3636

src/core/translators/markdown.ts

Lines changed: 0 additions & 116 deletions
This file was deleted.

src/core/translators/translator.ts

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
import type {BaseChatModel} from '@langchain/core/language_models/chat_models'
2+
3+
import {BaseMessage, HumanMessage, SystemMessage} from '@langchain/core/messages'
4+
5+
import type {BaseSplitter, TranslationOptions} from '../types.js'
6+
7+
/**
8+
* Generic translator that orchestrates content splitting, translation, and reconstruction.
9+
* Works with any splitter and prompt via dependency injection.
10+
*/
11+
export class Translator {
12+
constructor(
13+
private chatModel: BaseChatModel,
14+
private splitter: BaseSplitter,
15+
private systemPromptTemplate: string,
16+
) {}
17+
18+
/**
19+
* Translates content from source language to target language
20+
*/
21+
async translate(options: TranslationOptions): Promise<string> {
22+
const chunks = await this.splitter.split(options.content)
23+
let response = ''
24+
25+
for (const chunk of chunks) {
26+
if (chunk.shouldTranslate) {
27+
// eslint-disable-next-line no-await-in-loop
28+
const translatedChunk = await this.chatModel.invoke(this.buildMessages({...options, content: chunk.content}))
29+
30+
response = this.splitter.reconstruct(response, {...chunk, content: translatedChunk.content as string})
31+
} else {
32+
response = this.splitter.reconstruct(response, chunk)
33+
}
34+
}
35+
36+
return response
37+
}
38+
39+
/**
40+
* Streams translated content from source language to target language
41+
* @yields {string} Chunks of translated content
42+
*/
43+
async *translateStream(options: TranslationOptions): AsyncGenerator<string> {
44+
const chunks = await this.splitter.split(options.content)
45+
46+
for (const chunk of chunks) {
47+
yield chunk.leadingWhitespace || ''
48+
49+
if (chunk.shouldTranslate) {
50+
// eslint-disable-next-line no-await-in-loop
51+
for await (const streamedChunk of this.streamChunk({
52+
...options,
53+
content: chunk.content,
54+
})) {
55+
yield streamedChunk
56+
}
57+
} else {
58+
yield chunk.content
59+
}
60+
61+
yield chunk.trailingWhitespace || ''
62+
}
63+
}
64+
65+
private buildMessages({content, sourceLanguage, targetLanguage}: TranslationOptions): BaseMessage[] {
66+
const systemPrompt = this.interpolatePrompt(this.systemPromptTemplate, sourceLanguage, targetLanguage)
67+
const messages = [new SystemMessage(systemPrompt), new HumanMessage(content)]
68+
69+
return messages
70+
}
71+
72+
private interpolatePrompt(template: string, sourceLanguage: string, targetLanguage: string): string {
73+
return template.replaceAll('{{sourceLanguage}}', sourceLanguage).replaceAll('{{targetLanguage}}', targetLanguage)
74+
}
75+
76+
/**
77+
* Streams a single chunk through the chat model
78+
* @yields {string} Chunks of translated content from the model
79+
*/
80+
private async *streamChunk(options: TranslationOptions): AsyncGenerator<string> {
81+
const stream = await this.chatModel.stream(this.buildMessages(options))
82+
83+
for await (const chunk of stream) {
84+
const {content} = chunk
85+
if (typeof content === 'string') {
86+
yield content
87+
}
88+
}
89+
}
90+
}

src/core/types.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,22 @@ export interface Chunk {
2424
trailingWhitespace?: string
2525
}
2626

27+
/**
28+
* Base interface for content splitters.
29+
* All splitter implementations must provide these methods.
30+
*/
31+
export interface BaseSplitter {
32+
/**
33+
* Reconstructs content by appending a chunk to an accumulator with proper whitespace
34+
*/
35+
reconstruct(accumulator: string, chunk: Chunk): string
36+
37+
/**
38+
* Splits content into chunks
39+
*/
40+
split(content: string): Promise<Chunk[]>
41+
}
42+
2743
/**
2844
* Translation configuration options
2945
*/

test/core/translators/markdown.test.ts

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,20 @@
11
import {describe, expect, it} from '@jest/globals'
22
import {FakeListChatModel} from '@langchain/core/utils/testing'
33

4-
import {MarkdownTranslator} from '../../../src/core/translators/markdown.js'
4+
import {MARKDOWN_SYSTEM_PROMPT} from '../../../src/core/prompts/markdown.js'
5+
import {MarkdownSplitter} from '../../../src/core/splitters/markdown.js'
6+
import {Translator} from '../../../src/core/translators/translator.js'
57

68
describe('MarkdownTranslator', () => {
79
let fakeChatModel: FakeListChatModel
8-
let translator: MarkdownTranslator
10+
let translator: Translator
911

1012
describe('translate', () => {
1113
it('translates markdown content and returns string', async () => {
1214
fakeChatModel = new FakeListChatModel({
1315
responses: ['Traducido'],
1416
})
15-
translator = new MarkdownTranslator(fakeChatModel)
17+
translator = new Translator(fakeChatModel, new MarkdownSplitter(), MARKDOWN_SYSTEM_PROMPT)
1618

1719
const result = await translator.translate({
1820
content: '# Hello World',
@@ -29,7 +31,7 @@ describe('MarkdownTranslator', () => {
2931
fakeChatModel = new FakeListChatModel({
3032
responses: ['Hello World'],
3133
})
32-
translator = new MarkdownTranslator(fakeChatModel)
34+
translator = new Translator(fakeChatModel, new MarkdownSplitter(), MARKDOWN_SYSTEM_PROMPT)
3335

3436
const chunks: string[] = []
3537
for await (const chunk of translator.translateStream({

0 commit comments

Comments
 (0)