Skip to content

Commit f039655

Browse files
committed
fix(core): add support for reading and writing ISO-8859-1/latin1 files
1 parent 576fda1 commit f039655

File tree

3 files changed

+115
-1
lines changed

3 files changed

+115
-1
lines changed

packages/core/src/services/fileSystemService.ts

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import fs from 'node:fs/promises';
88
import * as path from 'node:path';
99
import { globSync } from 'glob';
10+
import { detectEncodingFromBuffer } from '../utils/systemEncoding.js';
1011

1112
/**
1213
* Interface for file system operations that may be delegated to different implementations
@@ -47,7 +48,27 @@ export class StandardFileSystemService implements FileSystemService {
4748
}
4849

4950
async writeTextFile(filePath: string, content: string): Promise<void> {
50-
await fs.writeFile(filePath, content, 'utf-8');
51+
let encoding: BufferEncoding = 'utf-8';
52+
try {
53+
const buffer = await fs.readFile(filePath);
54+
const detected = detectEncodingFromBuffer(buffer);
55+
if (detected) {
56+
if (
57+
detected.startsWith('iso-8859-') ||
58+
detected.startsWith('windows-125')
59+
) {
60+
encoding = 'latin1';
61+
}
62+
}
63+
} catch (error) {
64+
// If the file doesn't exist, we'll create it with the default UTF-8 encoding.
65+
// For any other error (e.g., permissions), we should not silently ignore it,
66+
// as that could lead to overwriting a file with the wrong encoding.
67+
if ((error as NodeJS.ErrnoException).code !== 'ENOENT') {
68+
throw error;
69+
}
70+
}
71+
await fs.writeFile(filePath, content, encoding);
5172
}
5273

5374
findFiles(fileName: string, searchPaths: readonly string[]): string[] {

packages/core/src/utils/fileUtils.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import { ToolErrorType } from '../tools/tool-error.js';
1515
import { BINARY_EXTENSIONS } from './ignorePatterns.js';
1616
import { createRequire as createModuleRequire } from 'node:module';
1717
import { debugLogger } from './debugLogger.js';
18+
import { detectEncodingFromBuffer } from './systemEncoding.js';
1819

1920
const requireModule = createModuleRequire(import.meta.url);
2021

@@ -168,6 +169,21 @@ export async function readFileWithEncoding(filePath: string): Promise<string> {
168169

169170
const bom = detectBOM(full);
170171
if (!bom) {
172+
// No BOM. Try to detect encoding using chardet (via systemEncoding utility)
173+
const detected = detectEncodingFromBuffer(full);
174+
if (detected) {
175+
// Node.js 'latin1' covers ISO-8859-1.
176+
// We also map ISO-8859-2, windows-1252 etc. to latin1 as a best-effort fallback
177+
// for single-byte encodings, since Node doesn't natively support them all
178+
// and we want to preserve bytes as much as possible for simple text.
179+
if (
180+
detected.startsWith('iso-8859-') ||
181+
detected.startsWith('windows-125')
182+
) {
183+
return full.toString('latin1');
184+
}
185+
}
186+
171187
// No BOM → treat as UTF‑8
172188
return full.toString('utf8');
173189
}
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
/**
2+
* @license
3+
* Copyright 2025 Google LLC
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
7+
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
8+
import { readFileWithEncoding } from './fileUtils.js';
9+
import { StandardFileSystemService } from '../services/fileSystemService.js';
10+
import fs from 'node:fs';
11+
import path from 'node:path';
12+
import os from 'node:os';
13+
14+
describe('ISO-8859-1 Encoding Support', () => {
15+
let tmpDir: string;
16+
let filePath: string;
17+
const fileService = new StandardFileSystemService();
18+
19+
beforeEach(() => {
20+
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gemini-iso-test-'));
21+
filePath = path.join(tmpDir, 'test.txt');
22+
});
23+
24+
afterEach(() => {
25+
fs.rmSync(tmpDir, { recursive: true, force: true });
26+
});
27+
28+
it('should read ISO-8859-1 file correctly', async () => {
29+
// "café na manhã" in ISO-8859-1
30+
// c a f é _ n a _ m a n h ã
31+
// 63 61 66 e9 20 6e 61 20 6d 61 6e 68 e3
32+
const buffer = Buffer.from([
33+
0x63, 0x61, 0x66, 0xe9, 0x20, 0x6e, 0x61, 0x20, 0x6d, 0x61, 0x6e, 0x68,
34+
0xe3,
35+
]);
36+
fs.writeFileSync(filePath, buffer);
37+
38+
const content = await readFileWithEncoding(filePath);
39+
expect(content).toBe('café na manhã');
40+
});
41+
42+
it('should preserve ISO-8859-1 encoding when writing', async () => {
43+
// "café na manhã"
44+
const buffer = Buffer.from([
45+
0x63, 0x61, 0x66, 0xe9, 0x20, 0x6e, 0x61, 0x20, 0x6d, 0x61, 0x6e, 0x68,
46+
0xe3,
47+
]);
48+
fs.writeFileSync(filePath, buffer);
49+
50+
// Verify initial read
51+
const content = await readFileWithEncoding(filePath);
52+
expect(content).toBe('café na manhã');
53+
54+
// Write new content "café na manhã updated"
55+
await fileService.writeTextFile(filePath, 'café na manhã updated');
56+
57+
// Read back as buffer to check encoding
58+
const newBuffer = fs.readFileSync(filePath);
59+
60+
// Expect "café na manhã updated" in ISO-8859-1
61+
// Original: 13 bytes. " updated": 8 bytes. Total 21 bytes.
62+
expect(newBuffer.length).toBe(21);
63+
64+
const str = newBuffer.toString('latin1');
65+
expect(str).toBe('café na manhã updated');
66+
});
67+
68+
it('should fallback to UTF-8 for new files', async () => {
69+
const newFilePath = path.join(tmpDir, 'new.txt');
70+
await fileService.writeTextFile(newFilePath, 'café');
71+
72+
const buffer = fs.readFileSync(newFilePath);
73+
// UTF-8 for é is C3 A9. Total 5 bytes.
74+
expect(buffer.length).toBe(5);
75+
expect(buffer.includes(0xc3)).toBe(true);
76+
});
77+
});

0 commit comments

Comments
 (0)