Skip to content

Commit ecab79a

Browse files
chore: refactor translator core to make it easier to add more formats (#11)
* chore: refactor translator core to make it easier to add more formats * Refactor commands
1 parent 51a01d5 commit ecab79a

File tree

11 files changed

+271
-212
lines changed

11 files changed

+271
-212
lines changed

package.json

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,12 @@
5757
"@oclif/plugin-help",
5858
"@oclif/plugin-plugins"
5959
],
60-
"topicSeparator": " ",
6160
"topics": {
62-
"hello": {
63-
"description": "Say hello to the world and others"
61+
"profiles": {
62+
"description": "Manage LLM provider profiles"
63+
},
64+
"translate": {
65+
"description": "Translate content between languages"
6466
}
6567
}
6668
},
@@ -77,4 +79,4 @@
7779
},
7880
"types": "dist/index.d.ts",
7981
"packageManager": "[email protected]"
80-
}
82+
}

src/commands/markdown.ts

Lines changed: 0 additions & 78 deletions
This file was deleted.

src/commands/translate/base.ts

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
import {BaseChatModel} from '@langchain/core/language_models/chat_models'
2+
import {Args, Command, Flags, Interfaces} from '@oclif/core'
3+
4+
import {Translator} from '../../core/translators/translator.js'
5+
import {createProviderFromProfile} from '../../lib/profile/factory.js'
6+
import {loadProfile} from '../../lib/profile/storage.js'
7+
8+
type TranslateFlags<T extends typeof Command> = Interfaces.InferredFlags<
9+
T['flags'] & typeof BaseTranslateCommand.baseFlags
10+
>
11+
type TranslateArgs<T extends typeof Command> = Interfaces.InferredArgs<T['args']>
12+
13+
export abstract class BaseTranslateCommand<T extends typeof Command> extends Command {
14+
static args = {
15+
input: Args.string({
16+
description: 'The text you want to translate',
17+
required: false,
18+
}),
19+
}
20+
static baseFlags = {
21+
from: Flags.string({
22+
description: 'Source language',
23+
required: true,
24+
}),
25+
profile: Flags.string({
26+
description: 'Profile to use for translation',
27+
required: true,
28+
}),
29+
stream: Flags.boolean({
30+
default: false,
31+
description: 'Stream the translation output',
32+
}),
33+
to: Flags.string({
34+
description: 'Target language',
35+
required: true,
36+
}),
37+
}
38+
protected args!: TranslateArgs<T>
39+
protected flags!: TranslateFlags<T>
40+
41+
abstract createTranslator(llm: BaseChatModel): Translator
42+
43+
public async init(): Promise<void> {
44+
await super.init()
45+
const {args, flags} = await this.parse({
46+
args: this.ctor.args,
47+
baseFlags: (super.ctor as typeof BaseTranslateCommand).baseFlags,
48+
flags: this.ctor.flags,
49+
strict: this.ctor.strict,
50+
})
51+
this.flags = flags as TranslateFlags<T>
52+
this.args = args as TranslateArgs<T>
53+
}
54+
55+
async run(): Promise<void> {
56+
let input: string
57+
58+
if (this.args.input) {
59+
input = this.args.input
60+
} else {
61+
const chunks: Buffer[] = []
62+
for await (const chunk of process.stdin) {
63+
chunks.push(chunk)
64+
}
65+
66+
input = Buffer.concat(chunks).toString('utf8')
67+
}
68+
69+
const llm = createProviderFromProfile(loadProfile(this.flags.profile))
70+
const translator = this.createTranslator(llm)
71+
72+
if (this.flags.stream) {
73+
for await (const chunk of translator.translateStream({
74+
content: input,
75+
sourceLanguage: this.flags.from,
76+
targetLanguage: this.flags.to,
77+
})) {
78+
process.stdout.write(chunk)
79+
}
80+
} else {
81+
const result = await translator.translate({
82+
content: input,
83+
sourceLanguage: this.flags.from,
84+
targetLanguage: this.flags.to,
85+
})
86+
87+
process.stdout.write(result)
88+
}
89+
}
90+
}

src/commands/translate/markdown.ts

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import {BaseChatModel} from '@langchain/core/language_models/chat_models'
2+
3+
import {MARKDOWN_SYSTEM_PROMPT} from '../../core/prompts/markdown.js'
4+
import {MarkdownSplitter} from '../../core/splitters/markdown.js'
5+
import {Translator} from '../../core/translators/translator.js'
6+
import {BaseTranslateCommand} from './base.js'
7+
8+
export default class TranslateMarkdown extends BaseTranslateCommand<typeof TranslateMarkdown> {
9+
static args = {
10+
...BaseTranslateCommand.args,
11+
}
12+
static description = 'Translate markdown'
13+
static examples = [
14+
'<%= config.bin %> <%= command.id %> --profile default-openai --from EN --to ES "Hello"',
15+
'<%= config.bin %> <%= command.id %> --profile default-openai --from EN --to ES --stream "Hello"',
16+
'cat doc.md | <%= config.bin %> <%= command.id %> --profile default-openai --from EN --to ES',
17+
'echo "# Hello" | <%= config.bin %> <%= command.id %> --profile default-openai --from EN --to ES',
18+
]
19+
static flags = {
20+
...BaseTranslateCommand.baseFlags,
21+
}
22+
23+
createTranslator(llm: BaseChatModel): Translator {
24+
return new Translator(llm, new MarkdownSplitter(), MARKDOWN_SYSTEM_PROMPT)
25+
}
26+
}

src/core/prompts/markdown.ts

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/**
2+
* System prompt template for markdown translation.
3+
* Uses {{sourceLanguage}} and {{targetLanguage}} as placeholders.
4+
*/
5+
export const MARKDOWN_SYSTEM_PROMPT = `
6+
You are a helpful assistant that accurately translates markdown document snippets from {{sourceLanguage}} to {{targetLanguage}} while preserving markdown syntax, formatting, and custom directives.
7+
You always preserve the structure and formatting exactly as it is.
8+
You do not add, alter or modify the text you receive in any way.
9+
10+
Reminder:
11+
- Translate only the text, preserving the structure and formatting.
12+
- NEVER under any circumstances translate any words found inside backticks Eg. \`Text\`.
13+
- NEVER translate custom directive like ::startApplication{...} or ::openFile{...}.
14+
- DO translate titles inside the ::page{title=""} custom directive.
15+
- NEVER translate keywords that appear after colons, such as \`:fa-lightbulb-o:\`.
16+
- NEVER translate the sections "Author", "Other Contributors", and "Change Logs".
17+
- NEVER translate any URLs.
18+
- NEVER translate HTML tags like \`<details>\` and \`<summary>\`.
19+
- Translate idiomatically, adapting expressions to sound natural in {{targetLanguage}}.
20+
- Avoid overly literal translations; prioritize clarity and fluency in {{targetLanguage}} over word-for-word accuracy.
21+
- Use concise and clear language that would sound natural in everyday speech or written {{targetLanguage}}.
22+
- When technical {{sourceLanguage}} terms lack a common {{targetLanguage}} equivalent, use well-known {{targetLanguage}} alternatives or rephrase for clarity.
23+
- Be consistent with technical terms. If an equivalent technical term is not available in {{targetLanguage}}, always use the original term.
24+
25+
*IMPORTANT*
26+
Translate without any additional information or comments.
27+
`

src/core/splitters/markdown.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import {MarkdownTextSplitter} from '@langchain/textsplitters'
22

3-
import type {Chunk} from '../types.js'
3+
import type {BaseSplitter, Chunk} from '../types.js'
44

55
/**
66
* Hybrid two-pass markdown splitter for translation purposes.
@@ -13,7 +13,7 @@ import type {Chunk} from '../types.js'
1313
* - Prevents massive chunks from overwhelming translation APIs
1414
* - Uses LangChain's intelligent splitting for size management (tries headers > paragraphs > lines)
1515
*/
16-
export class MarkdownSplitter {
16+
export class MarkdownSplitter implements BaseSplitter {
1717
private readonly chunkSize: number
1818
private recursiveSplitter: MarkdownTextSplitter
1919

@@ -30,7 +30,7 @@ export class MarkdownSplitter {
3030
/**
3131
* Appends a chunk to an accumulator string, preserving whitespace
3232
*/
33-
reconstructChunk(accumulator: string, chunk: Chunk): string {
33+
reconstruct(accumulator: string, chunk: Chunk): string {
3434
return accumulator + (chunk.leadingWhitespace || '') + chunk.content + (chunk.trailingWhitespace || '')
3535
}
3636

src/core/translators/markdown.ts

Lines changed: 0 additions & 116 deletions
This file was deleted.

0 commit comments

Comments
 (0)