Skip to content

Commit f372dcb

Browse files
committed
add multiValueHeader(name, value) helper for creating a multi-value header, also validates that the name is allowed
fix utils exports add test for multiValueHeader() and WARC-Protocol bump to 2.4.1
1 parent ddc5507 commit f372dcb

File tree

4 files changed

+37
-24
lines changed

4 files changed

+37
-24
lines changed

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "warcio",
3-
"version": "2.4.0",
3+
"version": "2.4.1",
44
"keywords": [
55
"WARC",
66
"web archiving"

src/lib/statusandheaders.ts

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { concatChunks, HeadersMultiMap, splitChunk } from "./utils";
1+
import { concatChunks, HeadersMultiMap, latin1ToUTF, splitChunk, UTFToLatin1 } from "./utils";
22
import { type AsyncIterReader } from "./readers";
33

44
export const CRLF = new Uint8Array([13, 10]);
@@ -220,24 +220,6 @@ function splitRemainder(str: string, sep: string, limit: number) {
220220
return newParts;
221221
}
222222

223-
// ===========================================================================
224-
function UTFToLatin1(value: string) {
225-
const buf = new TextEncoder().encode(value);
226-
227-
let str = "";
228-
buf.forEach((x) => (str += String.fromCharCode(x)));
229-
return str;
230-
}
231-
232-
// ===========================================================================
233-
function latin1ToUTF(str: string) {
234-
const buf = new Uint8Array(str.length);
235-
for (let i = 0; i < str.length; i++) {
236-
buf[i] = str.charCodeAt(i) & 0xff;
237-
}
238-
return new TextDecoder().decode(buf);
239-
}
240-
241223
// ===========================================================================
242224
export async function indexOfDoubleCRLF(
243225
buffer: Uint8Array,

src/lib/utils.ts

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,24 @@ export function splitChunk(
289289
return [chunk.slice(0, inx), chunk.slice(inx)];
290290
}
291291

292+
// ===========================================================================
293+
export function UTFToLatin1(value: string) {
294+
const buf = new TextEncoder().encode(value);
295+
296+
let str = "";
297+
buf.forEach((x) => (str += String.fromCharCode(x)));
298+
return str;
299+
}
300+
301+
// ===========================================================================
302+
export function latin1ToUTF(str: string) {
303+
const buf = new Uint8Array(str.length);
304+
for (let i = 0; i < str.length; i++) {
305+
buf[i] = str.charCodeAt(i) & 0xff;
306+
}
307+
return new TextDecoder().decode(buf);
308+
}
309+
292310
// ===========================================================================
293311
// headers multi map
294312
const MULTI_VALUE_ALLOWED = ["set-cookie", "warc-concurrent-to", "warc-protocol"];
@@ -297,6 +315,13 @@ const MULTI_VALUE_ALLOWED = ["set-cookie", "warc-concurrent-to", "warc-protocol"
297315
// in theory, collision still possible with arbitrary cookie value
298316
const JOIN_MARKER = ",,,";
299317

318+
export function multiValueHeader(name: string, value: string[]) {
319+
if (!MULTI_VALUE_ALLOWED.includes(name.toLowerCase())) {
320+
throw new Error("not a valid multi value header");
321+
}
322+
return value.join(JOIN_MARKER);
323+
}
324+
300325
export class HeadersMultiMap extends Map<string, string> {
301326
constructor(headersInit?: HeadersInit) {
302327
// if an array of array, parse that and add individually here

test/testSerializer.test.ts

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import {
55
WARCSerializer as BaseWARCSerializer,
66
} from "../src/lib";
77
import { WARCSerializer } from "../src/node/warcserializer";
8+
import { multiValueHeader } from "../src/lib/utils";
89

910
const decoder = new TextDecoder("utf-8");
1011
const encoder = new TextEncoder();
@@ -429,12 +430,13 @@ set-cookie: greeting=hello, name=world\r\n\
429430
}
430431
});
431432

432-
test("create request record with cookie array, keep headers case", async () => {
433-
const url = "http://example.com/";
433+
test("create request record with protocol + cookie array, keep headers case", async () => {
434+
const url = "https://example.com/";
434435
const date = "2000-01-01T00:00:00Z";
435436
const type = "request";
436437
const warcHeaders = {
437438
"WARC-Record-ID": "<urn:uuid:12345678-feb0-11e6-8f83-68a86d1772ce>",
439+
"WARC-Protocol": multiValueHeader("WARC-Protocol", ["h2", "tls/1.0"])
438440
};
439441
const httpHeaders: [string, string][] = [
440442
["Set-Cookie", "greeting=hello"],
@@ -463,7 +465,9 @@ set-cookie: greeting=hello, name=world\r\n\
463465
"\
464466
WARC/1.0\r\n\
465467
WARC-Record-ID: <urn:uuid:12345678-feb0-11e6-8f83-68a86d1772ce>\r\n\
466-
WARC-Target-URI: http://example.com/\r\n\
468+
WARC-Protocol: h2\r\n\
469+
WARC-Protocol: tls/1.0\r\n\
470+
WARC-Target-URI: https://example.com/\r\n\
467471
WARC-Date: 2000-01-01T00:00:00Z\r\n\
468472
WARC-Type: request\r\n\
469473
Content-Type: application/http; msgtype=request\r\n\
@@ -484,7 +488,9 @@ Set-Cookie: name=world\r\n\
484488
"\
485489
WARC/1.0\r\n\
486490
WARC-Record-ID: <urn:uuid:12345678-feb0-11e6-8f83-68a86d1772ce>\r\n\
487-
WARC-Target-URI: http://example.com/\r\n\
491+
WARC-Protocol: h2\r\n\
492+
WARC-Protocol: tls/1.0\r\n\
493+
WARC-Target-URI: https://example.com/\r\n\
488494
WARC-Date: 2000-01-01T00:00:00Z\r\n\
489495
WARC-Type: request\r\n\
490496
Content-Type: application/http; msgtype=request\r\n\

0 commit comments

Comments
 (0)