Skip to content

Commit 9a436b1

Browse files
authored
Merge pull request #2143 from microsoft/connor4312/vs-2266626
fix: content verification of files in Node.js failing with UTF-8 BOM
2 parents c7f5a76 + eda229e commit 9a436b1

File tree

4 files changed

+71
-17
lines changed

4 files changed

+71
-17
lines changed

package-lock.json

+7-7
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
"test:lint": "gulp lint"
5151
},
5252
"dependencies": {
53-
"@c4312/chromehash": "^0.3.0",
53+
"@c4312/chromehash": "^0.3.1",
5454
"@jridgewell/gen-mapping": "^0.3.3",
5555
"@jridgewell/trace-mapping": "^0.3.22",
5656
"@vscode/js-debug-browsers": "^1.1.2",

src/common/hash/hash.ts

+54-9
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
/*---------------------------------------------------------
22
* Copyright (C) Microsoft Corporation. All rights reserved.
33
*--------------------------------------------------------*/
4-
import { hash, shaHash } from '@c4312/chromehash';
4+
import {
5+
hash as chromeHash,
6+
normalizeShaBuffer,
7+
shaHash as chromeShaHash,
8+
} from '@c4312/chromehash';
9+
import { createHash } from 'crypto';
510
import { promises as fs } from 'fs';
611
import { MessagePort, parentPort } from 'worker_threads';
712

@@ -13,17 +18,29 @@ export const enum MessageType {
1318
}
1419

1520
export const enum HashMode {
21+
// Legacy hash mode, pre-https://chromium-review.googlesource.com/c/v8/v8/+/3229957
1622
Chromehash,
23+
// BOM-aware hash mode, used by Chrome/Browsers. (Hashes the contents normalized to UTF-8)
1724
SHA256,
25+
// Naive hash mode, used by Node.js. (Hashes the raw file bytes)
26+
SHA256Naive,
1827
}
1928

29+
export const shaHashNaive = (input: Buffer) => createHash('sha256').update(input).digest('hex');
30+
2031
/**
2132
* Message sent to the hash worker.
2233
*/
2334
export type HashRequest =
2435
| { type: MessageType.HashFile; id: number; file: string; mode: HashMode }
2536
| { type: MessageType.HashBytes; id: number; data: string | Buffer; mode: HashMode }
26-
| { type: MessageType.VerifyFile; id: number; file: string; expected: string; checkNode: boolean }
37+
| {
38+
type: MessageType.VerifyFile;
39+
id: number;
40+
file: string;
41+
expected: string;
42+
checkNode: boolean;
43+
}
2744
| {
2845
type: MessageType.VerifyBytes;
2946
id: number;
@@ -74,9 +91,33 @@ const LF = Buffer.from('\n')[0];
7491

7592
const hasPrefix = (buf: Buffer, prefix: Buffer) => buf.slice(0, prefix.length).equals(prefix);
7693

94+
const verifyHash = (expected: string, ...data: Buffer[]) => {
95+
if (expected.length !== 64) {
96+
return chromeHash(data.length === 1 ? data[0] : Buffer.concat(data)) === expected;
97+
}
98+
99+
// Check if the non-normalized hash matches first. We check both because we
100+
// want to check with both BOM-normalization (used in Chrome) and
101+
// without it (used in Node.js).
102+
const nonNormalizedHash = createHash('sha256');
103+
for (const d of data) {
104+
nonNormalizedHash.update(d);
105+
}
106+
if (nonNormalizedHash.digest('hex') === expected) {
107+
return true;
108+
}
109+
110+
const normalizedInput = data.length === 1 ? data[0] : Buffer.concat(data);
111+
const normalizedOutput = normalizeShaBuffer(normalizedInput);
112+
if (normalizedInput === normalizedOutput) {
113+
return false;
114+
}
115+
116+
return createHash('sha256').update(normalizedOutput).digest('hex') === expected;
117+
};
118+
77119
const verifyBytes = (bytes: Buffer, expected: string, checkNode: boolean) => {
78-
const hashFn = expected.length === 64 ? shaHash : hash;
79-
if (hashFn(bytes) === expected) {
120+
if (verifyHash(expected, bytes)) {
80121
return true;
81122
}
82123

@@ -88,16 +129,16 @@ const verifyBytes = (bytes: Buffer, expected: string, checkNode: boolean) => {
88129
end--;
89130
}
90131

91-
return hashFn(bytes.slice(end)) === expected;
132+
return verifyHash(expected, bytes.subarray(end));
92133
}
93134

94-
if (hashFn(Buffer.concat([nodePrefix, bytes, nodeSuffix])) === expected) {
135+
if (verifyHash(expected, nodePrefix, bytes, nodeSuffix)) {
95136
return true;
96137
}
97138
}
98139

99140
// todo -- doing a lot of concats, make chromehash able to hash an iterable of buffers?
100-
if (hashFn(Buffer.concat([electronPrefix, bytes, electronSuffix])) === expected) {
141+
if (verifyHash(expected, electronPrefix, bytes, electronSuffix)) {
101142
return true;
102143
}
103144

@@ -114,14 +155,18 @@ async function handle(message: HashRequest): Promise<HashResponse<HashRequest>>
114155
const data = await fs.readFile(message.file);
115156
return {
116157
id: message.id,
117-
hash: message.mode === HashMode.Chromehash ? hash(data) : shaHash(data),
158+
hash: message.mode === HashMode.Chromehash
159+
? chromeHash(data)
160+
: message.mode === HashMode.SHA256Naive
161+
? shaHashNaive(data)
162+
: chromeShaHash(data),
118163
};
119164
} catch (e) {
120165
return { id: message.id };
121166
}
122167
case MessageType.HashBytes:
123168
try {
124-
return { id: message.id, hash: hash(toBuffer(message.data)) };
169+
return { id: message.id, hash: chromeHash(toBuffer(message.data)) };
125170
} catch (e) {
126171
return { id: message.id };
127172
}

src/common/hash/index.test.ts

+9
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,15 @@ describe('hash process', function() {
163163
);
164164
expect(b).to.be.false;
165165
});
166+
167+
it('verifies raw content for BOM-less Node', async () => {
168+
const a = await hasher.verifyFile(
169+
join(hashTestCaseDir, 'utf8-bom.js'),
170+
'027cd940c96749db7c716eabe8e09ad578a3c0b5dfc821dc2bb67f627be70ff1',
171+
false,
172+
);
173+
expect(a).to.be.true;
174+
});
166175
});
167176

168177
it('verifies if wrapped in node module', async () => {

0 commit comments

Comments
 (0)