Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,12 @@
"@oclif/plugin-help",
"@oclif/plugin-plugins"
],
"topicSeparator": " ",
"topics": {
"hello": {
"description": "Say hello to the world and others"
"profiles": {
"description": "Manage LLM provider profiles"
},
"translate": {
"description": "Translate content between languages"
}
}
},
Expand All @@ -77,4 +79,4 @@
},
"types": "dist/index.d.ts",
"packageManager": "[email protected]"
}
}
78 changes: 0 additions & 78 deletions src/commands/markdown.ts

This file was deleted.

90 changes: 90 additions & 0 deletions src/commands/translate/base.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import {BaseChatModel} from '@langchain/core/language_models/chat_models'
import {Args, Command, Flags, Interfaces} from '@oclif/core'

import {Translator} from '../../core/translators/translator.js'
import {createProviderFromProfile} from '../../lib/profile/factory.js'
import {loadProfile} from '../../lib/profile/storage.js'

type TranslateFlags<T extends typeof Command> = Interfaces.InferredFlags<
T['flags'] & typeof BaseTranslateCommand.baseFlags
>
type TranslateArgs<T extends typeof Command> = Interfaces.InferredArgs<T['args']>

export abstract class BaseTranslateCommand<T extends typeof Command> extends Command {
static args = {
input: Args.string({
description: 'The text you want to translate',
required: false,
}),
}
static baseFlags = {
from: Flags.string({
description: 'Source language',
required: true,
}),
profile: Flags.string({
description: 'Profile to use for translation',
required: true,
}),
stream: Flags.boolean({
default: false,
description: 'Stream the translation output',
}),
to: Flags.string({
description: 'Target language',
required: true,
}),
}
protected args!: TranslateArgs<T>
protected flags!: TranslateFlags<T>

abstract createTranslator(llm: BaseChatModel): Translator

public async init(): Promise<void> {
await super.init()
const {args, flags} = await this.parse({
args: this.ctor.args,
baseFlags: (super.ctor as typeof BaseTranslateCommand).baseFlags,
flags: this.ctor.flags,
strict: this.ctor.strict,
})
this.flags = flags as TranslateFlags<T>
this.args = args as TranslateArgs<T>
}

async run(): Promise<void> {
let input: string

if (this.args.input) {
input = this.args.input
} else {
const chunks: Buffer[] = []
for await (const chunk of process.stdin) {
chunks.push(chunk)
}

input = Buffer.concat(chunks).toString('utf8')
}

const llm = createProviderFromProfile(loadProfile(this.flags.profile))
const translator = this.createTranslator(llm)

if (this.flags.stream) {
for await (const chunk of translator.translateStream({
content: input,
sourceLanguage: this.flags.from,
targetLanguage: this.flags.to,
})) {
process.stdout.write(chunk)
}
} else {
const result = await translator.translate({
content: input,
sourceLanguage: this.flags.from,
targetLanguage: this.flags.to,
})

process.stdout.write(result)
}
}
}
26 changes: 26 additions & 0 deletions src/commands/translate/markdown.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import {BaseChatModel} from '@langchain/core/language_models/chat_models'

import {MARKDOWN_SYSTEM_PROMPT} from '../../core/prompts/markdown.js'
import {MarkdownSplitter} from '../../core/splitters/markdown.js'
import {Translator} from '../../core/translators/translator.js'
import {BaseTranslateCommand} from './base.js'

export default class TranslateMarkdown extends BaseTranslateCommand<typeof TranslateMarkdown> {
static args = {
...BaseTranslateCommand.args,
}
static description = 'Translate markdown'
static examples = [
'<%= config.bin %> <%= command.id %> --profile default-openai --from EN --to ES "Hello"',
'<%= config.bin %> <%= command.id %> --profile default-openai --from EN --to ES --stream "Hello"',
'cat doc.md | <%= config.bin %> <%= command.id %> --profile default-openai --from EN --to ES',
'echo "# Hello" | <%= config.bin %> <%= command.id %> --profile default-openai --from EN --to ES',
]
static flags = {
...BaseTranslateCommand.baseFlags,
}

createTranslator(llm: BaseChatModel): Translator {
return new Translator(llm, new MarkdownSplitter(), MARKDOWN_SYSTEM_PROMPT)
}
}
27 changes: 27 additions & 0 deletions src/core/prompts/markdown.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/**
* System prompt template for markdown translation.
* Uses {{sourceLanguage}} and {{targetLanguage}} as placeholders.
*/
export const MARKDOWN_SYSTEM_PROMPT = `
You are a helpful assistant that accurately translates markdown document snippets from {{sourceLanguage}} to {{targetLanguage}} while preserving markdown syntax, formatting, and custom directives.
You always preserve the structure and formatting exactly as it is.
You do not add, alter or modify the text you receive in any way.

Reminder:
- Translate only the text, preserving the structure and formatting.
- NEVER under any circumstances translate any words found inside backticks Eg. \`Text\`.
- NEVER translate custom directive like ::startApplication{...} or ::openFile{...}.
- DO translate titles inside the ::page{title=""} custom directive.
- NEVER translate keywords that appear after colons, such as \`:fa-lightbulb-o:\`.
- NEVER translate the sections "Author", "Other Contributors", and "Change Logs".
- NEVER translate any URLs.
- NEVER translate HTML tags like \`<details>\` and \`<summary>\`.
- Translate idiomatically, adapting expressions to sound natural in {{targetLanguage}}.
- Avoid overly literal translations; prioritize clarity and fluency in {{targetLanguage}} over word-for-word accuracy.
- Use concise and clear language that would sound natural in everyday speech or written {{targetLanguage}}.
- When technical {{sourceLanguage}} terms lack a common {{targetLanguage}} equivalent, use well-known {{targetLanguage}} alternatives or rephrase for clarity.
- Be consistent with technical terms. If an equivalent technical term is not available in {{targetLanguage}}, always use the original term.

*IMPORTANT*
Translate without any additional information or comments.
`
6 changes: 3 additions & 3 deletions src/core/splitters/markdown.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import {MarkdownTextSplitter} from '@langchain/textsplitters'

import type {Chunk} from '../types.js'
import type {BaseSplitter, Chunk} from '../types.js'

/**
* Hybrid two-pass markdown splitter for translation purposes.
Expand All @@ -13,7 +13,7 @@ import type {Chunk} from '../types.js'
* - Prevents massive chunks from overwhelming translation APIs
* - Uses LangChain's intelligent splitting for size management (tries headers > paragraphs > lines)
*/
export class MarkdownSplitter {
export class MarkdownSplitter implements BaseSplitter {
private readonly chunkSize: number
private recursiveSplitter: MarkdownTextSplitter

Expand All @@ -30,7 +30,7 @@ export class MarkdownSplitter {
/**
* Appends a chunk to an accumulator string, preserving whitespace
*/
reconstructChunk(accumulator: string, chunk: Chunk): string {
reconstruct(accumulator: string, chunk: Chunk): string {
return accumulator + (chunk.leadingWhitespace || '') + chunk.content + (chunk.trailingWhitespace || '')
}

Expand Down
116 changes: 0 additions & 116 deletions src/core/translators/markdown.ts

This file was deleted.

Loading