diff --git a/src/Config.zig b/src/Config.zig index 629df32b1..43ca75661 100644 --- a/src/Config.zig +++ b/src/Config.zig @@ -156,6 +156,13 @@ pub fn userAgentSuffix(self: *const Config) ?[]const u8 { }; } +pub fn cacheDir(self: *const Config) ?[]const u8 { + return switch (self.mode) { + inline .serve, .fetch, .mcp => |opts| opts.common.cache_dir, + else => null, + }; +} + pub fn cdpTimeout(self: *const Config) usize { return switch (self.mode) { .serve => |opts| if (opts.timeout > 604_800) 604_800_000 else @as(usize, opts.timeout) * 1000, @@ -240,6 +247,7 @@ pub const Common = struct { log_format: ?log.Format = null, log_filter_scopes: ?[]log.Scope = null, user_agent_suffix: ?[]const u8 = null, + cache_dir: ?[]const u8 = null, web_bot_auth_key_file: ?[]const u8 = null, web_bot_auth_keyid: ?[]const u8 = null, @@ -907,5 +915,14 @@ fn parseCommonArg( return true; } + if (std.mem.eql(u8, "--cache_dir", opt)) { + const str = args.next() orelse { + log.fatal(.app, "missing argument value", .{ .arg = "--cache_dir" }); + return error.InvalidArgument; + }; + common.cache_dir = try allocator.dupe(u8, str); + return true; + } + return false; } diff --git a/src/Notification.zig b/src/Notification.zig index e025820a0..e8d0d7d83 100644 --- a/src/Notification.zig +++ b/src/Notification.zig @@ -21,7 +21,7 @@ const lp = @import("lightpanda"); const log = @import("log.zig"); const Page = @import("browser/Page.zig"); -const Transfer = @import("browser/HttpClient.zig").Transfer; +const LiveTransfer = @import("browser/HttpClient.zig").LiveTransfer; const Allocator = std.mem.Allocator; @@ -138,34 +138,34 @@ pub const PageFrameCreated = struct { }; pub const RequestStart = struct { - transfer: *Transfer, + transfer: *LiveTransfer, }; pub const RequestIntercept = struct { - transfer: *Transfer, + transfer: *LiveTransfer, wait_for_interception: *bool, }; pub const RequestAuthRequired = struct { - transfer: *Transfer, + transfer: *LiveTransfer, wait_for_interception: *bool, }; pub const ResponseData = struct { data: []const u8, - transfer: *Transfer, + transfer: *LiveTransfer, }; pub const ResponseHeaderDone = struct { - transfer: *Transfer, + transfer: *LiveTransfer, }; pub const RequestDone = struct { - transfer: *Transfer, + transfer: *LiveTransfer, }; pub const RequestFail = struct { - transfer: *Transfer, + transfer: *LiveTransfer, err: anyerror, }; diff --git a/src/browser/HttpClient.zig b/src/browser/HttpClient.zig index 136b578b8..52dd925dc 100644 --- a/src/browser/HttpClient.zig +++ b/src/browser/HttpClient.zig @@ -32,6 +32,10 @@ const Robots = @import("../network/Robots.zig"); const RobotStore = Robots.RobotStore; const WebBotAuth = @import("../network/WebBotAuth.zig"); +const Cache = @import("../network/cache/Cache.zig"); +const CacheMetadata = Cache.CachedMetadata; +const CachedResponse = Cache.CachedResponse; + const Allocator = std.mem.Allocator; const ArenaAllocator = std.heap.ArenaAllocator; @@ -96,7 +100,7 @@ pending_robots_queue: std.StringHashMapUnmanaged(std.ArrayList(Request)) = .empt // Once we have a handle/easy to process a request with, we create a Transfer // which contains the Request as well as any state we need to process the // request. These wil come and go with each request. -transfer_pool: std.heap.MemoryPool(Transfer), +transfer_pool: std.heap.MemoryPool(LiveTransfer), // The current proxy. CDP can change it, restoreOriginalProxy restores // from config. @@ -135,7 +139,7 @@ pub const CDPClient = struct { const TransferQueue = std.DoublyLinkedList; pub fn init(allocator: Allocator, network: *Network) !*Client { - var transfer_pool = std.heap.MemoryPool(Transfer).init(allocator); + var transfer_pool = std.heap.MemoryPool(LiveTransfer).init(allocator); errdefer transfer_pool.deinit(); const client = try allocator.create(Client); @@ -199,7 +203,7 @@ fn _abort(self: *Client, comptime abort_all: bool, frame_id: u32) void { while (n) |node| { n = node.next; const conn: *Net.Connection = @fieldParentPtr("node", node); - var transfer = Transfer.fromConnection(conn) catch |err| { + var transfer = LiveTransfer.fromConnection(conn) catch |err| { // Let's cleanup what we can self.removeConn(conn); log.err(.http, "get private info", .{ .err = err, .source = "abort" }); @@ -223,7 +227,7 @@ fn _abort(self: *Client, comptime abort_all: bool, frame_id: u32) void { var n = q.first; while (n) |node| { n = node.next; - const transfer: *Transfer = @fieldParentPtr("_node", node); + const transfer: *LiveTransfer = @fieldParentPtr("_node", node); if (comptime abort_all) { transfer.kill(); } else if (transfer.req.frame_id == frame_id) { @@ -253,7 +257,7 @@ pub fn tick(self: *Client, timeout_ms: u32) !PerformStatus { self.queue.prepend(queue_node); break; }; - const transfer: *Transfer = @fieldParentPtr("_node", queue_node); + const transfer: *LiveTransfer = @fieldParentPtr("_node", queue_node); try self.makeRequest(conn, transfer); } return self.perform(@intCast(timeout_ms)); @@ -289,7 +293,62 @@ pub fn request(self: *Client, req: Request) !void { return self.fetchRobotsThenProcessRequest(robots_url, req); } +fn serveFromCache(allocator: std.mem.Allocator, req: Request, cached: *const CachedResponse) !void { + const response = Response.fromCached(req.ctx, cached); + defer cached.metadata.deinit(allocator); + + if (req.start_callback) |cb| { + try cb(response); + } + + const proceed = try req.header_callback(response); + if (!proceed) { + req.error_callback(req.ctx, error.Abort); + return; + } + + switch (cached.data) { + .buffer => |data| { + if (data.len > 0) { + try req.data_callback(response, data); + } + }, + .file => |file| { + var buf: [1024]u8 = undefined; + var file_reader = file.reader(&buf); + + const reader = &file_reader.interface; + var read_buf: [1024]u8 = undefined; + + while (true) { + const curr = try reader.readSliceShort(&read_buf); + if (curr == 0) break; + try req.data_callback(response, read_buf[0..curr]); + } + }, + } + + try req.done_callback(req.ctx); +} + fn processRequest(self: *Client, req: Request) !void { + if (self.network.cache) |*cache| { + if (req.method == .GET) { + if (cache.get(self.allocator, req.url)) |cached| { + log.debug(.browser, "http.cache.get", .{ + .url = req.url, + .found = true, + .metadata = cached.metadata, + }); + + defer req.headers.deinit(); + return serveFromCache(self.allocator, req, &cached); + } else { + log.debug(.browser, "http.cache.get", .{ .url = req.url, .found = false }); + } + } + } + const transfer = try self.makeTransfer(req); transfer.req.notification.dispatch(.http_request_start, &.{ .transfer = transfer }); @@ -376,8 +435,10 @@ fn fetchRobotsThenProcessRequest(self: *Client, robots_url: [:0]const u8, req: R try entry.value_ptr.append(self.allocator, req); } -fn robotsHeaderCallback(transfer: *Transfer) !bool { - const ctx: *RobotsRequestContext = @ptrCast(@alignCast(transfer.ctx)); +fn robotsHeaderCallback(response: Response) !bool { + const ctx: *RobotsRequestContext = @ptrCast(@alignCast(response.ctx)); + // Robots callbacks only happen on real live requests. + const transfer = response.inner.live; if (transfer.response_header) |hdr| { log.debug(.browser, "robots status", .{ .status = hdr.status, .robots_url = ctx.robots_url }); @@ -391,8 +452,8 @@ fn robotsHeaderCallback(transfer: *Transfer) !bool { return true; } -fn robotsDataCallback(transfer: *Transfer, data: []const u8) !void { - const ctx: *RobotsRequestContext = @ptrCast(@alignCast(transfer.ctx)); +fn robotsDataCallback(response: Response, data: []const u8) !void { + const ctx: *RobotsRequestContext = @ptrCast(@alignCast(response.ctx)); try ctx.buffer.appendSlice(ctx.client.allocator, data); } @@ -488,7 +549,7 @@ fn robotsShutdownCallback(ctx_ptr: *anyopaque) void { } } -fn waitForInterceptedResponse(self: *Client, transfer: *Transfer) !bool { +fn waitForInterceptedResponse(self: *Client, transfer: *LiveTransfer) !bool { // The request was intercepted and is blocking. This is messy, but our // callers, the ScriptManager -> Page, don't have a great way to stop the // parser and return control to the CDP server to wait for the interception @@ -533,7 +594,7 @@ fn waitForInterceptedResponse(self: *Client, transfer: *Transfer) !bool { // Above, request will not process if there's an interception request. In such // cases, the interecptor is expected to call resume to continue the transfer // or transfer.abort() to abort it. -fn process(self: *Client, transfer: *Transfer) !void { +fn process(self: *Client, transfer: *LiveTransfer) !void { // libcurl doesn't allow recursive calls, if we're in a `perform()` operation // then we _have_ to queue this. if (self.performing == false) { @@ -546,7 +607,7 @@ fn process(self: *Client, transfer: *Transfer) !void { } // For an intercepted request -pub fn continueTransfer(self: *Client, transfer: *Transfer) !void { +pub fn continueTransfer(self: *Client, transfer: *LiveTransfer) !void { if (comptime IS_DEBUG) { std.debug.assert(transfer._intercept_state != .not_intercepted); log.debug(.http, "continue transfer", .{ .intercepted = self.intercepted }); @@ -560,7 +621,7 @@ pub fn continueTransfer(self: *Client, transfer: *Transfer) !void { } // For an intercepted request -pub fn abortTransfer(self: *Client, transfer: *Transfer) void { +pub fn abortTransfer(self: *Client, transfer: *LiveTransfer) void { if (comptime IS_DEBUG) { std.debug.assert(transfer._intercept_state != .not_intercepted); log.debug(.http, "abort transfer", .{ .intercepted = self.intercepted }); @@ -574,7 +635,7 @@ pub fn abortTransfer(self: *Client, transfer: *Transfer) void { } // For an intercepted request -pub fn fulfillTransfer(self: *Client, transfer: *Transfer, status: u16, headers: []const Net.Header, body: ?[]const u8) !void { +pub fn fulfillTransfer(self: *Client, transfer: *LiveTransfer, status: u16, headers: []const Net.Header, body: ?[]const u8) !void { if (comptime IS_DEBUG) { std.debug.assert(transfer._intercept_state != .not_intercepted); log.debug(.http, "filfull transfer", .{ .intercepted = self.intercepted }); @@ -599,7 +660,7 @@ pub fn incrReqId(self: *Client) u32 { return id; } -fn makeTransfer(self: *Client, req: Request) !*Transfer { +fn makeTransfer(self: *Client, req: Request) !*LiveTransfer { errdefer req.headers.deinit(); const transfer = try self.transfer_pool.create(); @@ -611,14 +672,13 @@ fn makeTransfer(self: *Client, req: Request) !*Transfer { .id = id, .url = req.url, .req = req, - .ctx = req.ctx, .client = self, .max_response_size = self.network.config.httpMaxResponseSize(), }; return transfer; } -fn requestFailed(transfer: *Transfer, err: anyerror, comptime execute_callback: bool) void { +fn requestFailed(transfer: *LiveTransfer, err: anyerror, comptime execute_callback: bool) void { if (transfer._notified_fail) { // we can force a failed request within a callback, which will eventually // result in this being called again in the more general loop. We do this @@ -634,9 +694,9 @@ fn requestFailed(transfer: *Transfer, err: anyerror, comptime execute_callback: }); if (execute_callback) { - transfer.req.error_callback(transfer.ctx, err); + transfer.req.error_callback(transfer.req.ctx, err); } else if (transfer.req.shutdown_callback) |cb| { - cb(transfer.ctx); + cb(transfer.req.ctx); } } @@ -677,7 +737,7 @@ pub fn setTlsVerify(self: *Client, verify: bool) !void { self.tls_verify = verify; } -fn makeRequest(self: *Client, conn: *Net.Connection, transfer: *Transfer) anyerror!void { +fn makeRequest(self: *Client, conn: *Net.Connection, transfer: *LiveTransfer) anyerror!void { const req = &transfer.req; { @@ -689,7 +749,7 @@ fn makeRequest(self: *Client, conn: *Net.Connection, transfer: *Transfer) anyerr } // Set callbacks and per-client settings on the pooled connection. - try conn.setCallbacks(Transfer.headerCallback, Transfer.dataCallback); + try conn.setCallbacks(LiveTransfer.headerCallback, LiveTransfer.dataCallback); try conn.setProxy(self.http_proxy); try conn.setTlsVerify(self.tls_verify, self.use_proxy); @@ -743,7 +803,7 @@ fn makeRequest(self: *Client, conn: *Net.Connection, transfer: *Transfer) anyerr }; if (req.start_callback) |cb| { - cb(transfer) catch |err| { + cb(Response.fromLive(transfer)) catch |err| { transfer.deinit(); return err; }; @@ -804,7 +864,7 @@ fn perform(self: *Client, timeout_ms: c_int) !PerformStatus { fn processMessages(self: *Client) !bool { var processed = false; while (self.handles.readMessage()) |msg| { - const transfer = try Transfer.fromConnection(&msg.conn); + const transfer = try LiveTransfer.fromConnection(&msg.conn); // In case of auth challenge // TODO give a way to configure the number of auth retries. @@ -848,6 +908,23 @@ fn processMessages(self: *Client) !bool { } } + const allocator = transfer.arena.allocator(); + var header_list: std.ArrayList(Net.Header) = .empty; + + var it = transfer.responseHeaderIterator(); + while (it.next()) |hdr| { + header_list.append( + allocator, + .{ + .name = try allocator.dupe(u8, hdr.name), + .value = try allocator.dupe(u8, hdr.value), + }, + ) catch |err| { + log.warn(.http, "cache header collect failed", .{ .err = err }); + break; + }; + } + // release it ASAP so that it's available; some done_callbacks // will load more resources. self.endTransfer(transfer); @@ -875,23 +952,48 @@ fn processMessages(self: *Client) !bool { break :blk; } } - transfer.req.done_callback(transfer.ctx) catch |err| { + transfer.req.done_callback(transfer.req.ctx) catch |err| { // transfer isn't valid at this point, don't use it. log.err(.http, "done_callback", .{ .err = err }); requestFailed(transfer, err, true); continue; }; - transfer.req.notification.dispatch(.http_request_done, &.{ - .transfer = transfer, - }); - processed = true; + cache: { + if (self.network.cache) |*cache| { + const headers = &transfer.response_header.?; + + const metadata = try CacheMetadata.fromHeaders( + transfer.req.url, + headers.status, + std.time.timestamp(), + header_list.items, + ) orelse break :cache; + + // TODO: Support Vary Keying + const cache_key = transfer.req.url; + + log.err(.browser, "http cache", .{ .key = cache_key, .metadata = metadata }); + + cache.put( + cache_key, + metadata, + transfer.body.items, + ) catch |err| log.warn(.http, "cache put failed", .{ .err = err }); + log.debug(.browser, "http.cache.put", .{ .url = transfer.req.url }); + } + } } + + transfer.req.notification.dispatch(.http_request_done, &.{ + .transfer = transfer, + }); + processed = true; } return processed; } -fn endTransfer(self: *Client, transfer: *Transfer) void { +fn endTransfer(self: *Client, transfer: *LiveTransfer) void { const conn = transfer._conn.?; self.removeConn(conn); transfer._conn = null; @@ -962,9 +1064,9 @@ pub const Request = struct { // arbitrary data that can be associated with this request ctx: *anyopaque = undefined, - start_callback: ?*const fn (transfer: *Transfer) anyerror!void = null, - header_callback: *const fn (transfer: *Transfer) anyerror!bool, - data_callback: *const fn (transfer: *Transfer, data: []const u8) anyerror!void, + start_callback: ?*const fn (response: Response) anyerror!void = null, + header_callback: *const fn (response: Response) anyerror!bool, + data_callback: *const fn (response: Response, data: []const u8) anyerror!void, done_callback: *const fn (ctx: *anyopaque) anyerror!void, error_callback: *const fn (ctx: *anyopaque, err: anyerror) void, shutdown_callback: ?*const fn (ctx: *anyopaque) void = null, @@ -992,16 +1094,91 @@ pub const Request = struct { const AuthChallenge = Net.AuthChallenge; -pub const Transfer = struct { +pub const Response = struct { + ctx: *anyopaque, + inner: union(enum) { + live: *LiveTransfer, + cached: *const CachedResponse, + }, + + pub fn fromLive(transfer: *LiveTransfer) Response { + return .{ .ctx = transfer.req.ctx, .inner = .{ .live = transfer } }; + } + + pub fn fromCached(ctx: *anyopaque, resp: *const CachedResponse) Response { + return .{ .ctx = ctx, .inner = .{ .cached = resp } }; + } + + pub fn status(self: Response) ?u16 { + return switch (self.inner) { + .live => |live| if (live.response_header) |rh| rh.status else null, + .cached => |c| c.metadata.status, + }; + } + + pub fn contentType(self: Response) ?[]const u8 { + return switch (self.inner) { + .live => |live| if (live.response_header) |*rh| rh.contentType() else null, + .cached => |c| c.metadata.content_type, + }; + } + + pub fn contentLength(self: Response) ?u32 { + return switch (self.inner) { + .live => |live| live.getContentLength(), + .cached => |c| switch (c.data) { + .buffer => |buf| @intCast(buf.len), + .file => |f| @intCast(f.getEndPos() catch 0), + }, + }; + } + + pub fn redirectCount(self: Response) ?u32 { + return switch (self.inner) { + .live => |live| if (live.response_header) |rh| rh.redirect_count else null, + .cached => 0, + }; + } + + pub fn url(self: Response) [:0]const u8 { + return switch (self.inner) { + .live => |live| live.url, + .cached => |c| c.metadata.url, + }; + } + + pub fn headerIterator(self: Response) HeaderIterator { + return switch (self.inner) { + .live => |live| live.responseHeaderIterator(), + .cached => |c| HeaderIterator{ .list = .{ .list = c.metadata.headers } }, + }; + } + + pub fn abort(self: Response, err: anyerror) void { + switch (self.inner) { + .live => |live| live.abort(err), + .cached => {}, + } + } + + pub fn terminate(self: Response) void { + switch (self.inner) { + .live => |live| live.terminate(), + .cached => {}, + } + } +}; + +pub const LiveTransfer = struct { arena: ArenaAllocator, id: u32 = 0, req: Request, url: [:0]const u8, - ctx: *anyopaque, // copied from req.ctx to make it easier for callback handlers client: *Client, // total bytes received in the response, including the response status line, // the headers, and the [encoded] body. bytes_received: usize = 0, + body: std.ArrayListUnmanaged(u8) = .empty, aborted: bool = false, @@ -1037,7 +1214,7 @@ pub const Transfer = struct { fulfilled, }; - pub fn reset(self: *Transfer) void { + pub fn reset(self: *LiveTransfer) void { // There's an assertion in ScriptManager that's failing. Seemingly because // the headerCallback is being called multiple times. This shouldn't be // possible (hence the assertion). Previously, this `reset` would set @@ -1057,7 +1234,7 @@ pub const Transfer = struct { self._tries += 1; } - fn deinit(self: *Transfer) void { + fn deinit(self: *LiveTransfer) void { self.req.headers.deinit(); if (self._conn) |conn| { self.client.removeConn(conn); @@ -1066,7 +1243,7 @@ pub const Transfer = struct { self.client.transfer_pool.destroy(self); } - fn buildResponseHeader(self: *Transfer, conn: *const Net.Connection) !void { + fn buildResponseHeader(self: *LiveTransfer, conn: *const Net.Connection) !void { if (comptime IS_DEBUG) { std.debug.assert(self.response_header == null); } @@ -1093,12 +1270,12 @@ pub const Transfer = struct { } } - pub fn format(self: *Transfer, writer: *std.Io.Writer) !void { + pub fn format(self: *LiveTransfer, writer: *std.Io.Writer) !void { const req = self.req; return writer.print("{s} {s}", .{ @tagName(req.method), req.url }); } - pub fn updateURL(self: *Transfer, url: [:0]const u8) !void { + pub fn updateURL(self: *LiveTransfer, url: [:0]const u8) !void { // for cookies self.url = url; @@ -1106,11 +1283,11 @@ pub const Transfer = struct { self.req.url = url; } - pub fn updateCredentials(self: *Transfer, userpwd: [:0]const u8) void { + pub fn updateCredentials(self: *LiveTransfer, userpwd: [:0]const u8) void { self.req.credentials = userpwd; } - pub fn replaceRequestHeaders(self: *Transfer, allocator: Allocator, headers: []const Net.Header) !void { + pub fn replaceRequestHeaders(self: *LiveTransfer, allocator: Allocator, headers: []const Net.Header) !void { self.req.headers.deinit(); var buf: std.ArrayList(u8) = .empty; @@ -1126,7 +1303,7 @@ pub const Transfer = struct { self.req.headers = new_headers; } - pub fn abort(self: *Transfer, err: anyerror) void { + pub fn abort(self: *LiveTransfer, err: anyerror) void { requestFailed(self, err, true); const client = self.client; @@ -1146,7 +1323,7 @@ pub const Transfer = struct { self.deinit(); } - pub fn terminate(self: *Transfer) void { + pub fn terminate(self: *LiveTransfer) void { requestFailed(self, error.Shutdown, false); if (self._conn != null) { self.client.endTransfer(self); @@ -1156,12 +1333,12 @@ pub const Transfer = struct { // internal, when the page is shutting down. Doesn't have the same ceremony // as abort (doesn't send a notification, doesn't invoke an error callback) - fn kill(self: *Transfer) void { + fn kill(self: *LiveTransfer) void { if (self._conn != null) { self.client.endTransfer(self); } if (self.req.shutdown_callback) |cb| { - cb(self.ctx); + cb(self.req.ctx); } self.deinit(); } @@ -1169,7 +1346,7 @@ pub const Transfer = struct { // abortAuthChallenge is called when an auth challenge interception is // abort. We don't call self.client.endTransfer here b/c it has been done // before interception process. - pub fn abortAuthChallenge(self: *Transfer) void { + pub fn abortAuthChallenge(self: *LiveTransfer) void { if (comptime IS_DEBUG) { std.debug.assert(self._intercept_state != .not_intercepted); log.debug(.http, "abort auth transfer", .{ .intercepted = self.client.intercepted }); @@ -1185,7 +1362,7 @@ pub const Transfer = struct { // redirectionCookies manages cookies during redirections handled by Curl. // It sets the cookies from the current response to the cookie jar. // It also immediately sets cookies for the following request. - fn redirectionCookies(transfer: *Transfer, conn: *const Net.Connection) !void { + fn redirectionCookies(transfer: *LiveTransfer, conn: *const Net.Connection) !void { const req = &transfer.req; const arena = transfer.arena.allocator(); @@ -1227,7 +1404,7 @@ pub const Transfer = struct { // headerDoneCallback is called once the headers have been read. // It can be called either on dataCallback or once the request for those // w/o body. - fn headerDoneCallback(transfer: *Transfer, conn: *const Net.Connection) !bool { + fn headerDoneCallback(transfer: *LiveTransfer, conn: *const Net.Connection) !bool { lp.assert(transfer._header_done_called == false, "Transfer.headerDoneCallback", .{}); defer transfer._header_done_called = true; @@ -1263,7 +1440,7 @@ pub const Transfer = struct { } } - const proceed = transfer.req.header_callback(transfer) catch |err| { + const proceed = transfer.req.header_callback(Response.fromLive(transfer)) catch |err| { log.err(.http, "header_callback", .{ .err = err, .req = transfer }); return err; }; @@ -1428,7 +1605,12 @@ pub const Transfer = struct { } const chunk = buffer[0..chunk_len]; - transfer.req.data_callback(transfer, chunk) catch |err| { + transfer.body.appendSlice(transfer.arena.allocator(), chunk) catch |err| { + log.err(.http, "cache body append", .{ .err = err, .req = transfer }); + return Net.writefunc_error; + }; + + transfer.req.data_callback(Response.fromLive(transfer), chunk) catch |err| { log.err(.http, "data_callback", .{ .err = err, .req = transfer }); return Net.writefunc_error; }; @@ -1445,7 +1627,7 @@ pub const Transfer = struct { return @intCast(chunk_len); } - pub fn responseHeaderIterator(self: *Transfer) HeaderIterator { + pub fn responseHeaderIterator(self: *LiveTransfer) HeaderIterator { if (self._conn) |conn| { // If we have a connection, than this is a real curl request and we // iterate through the header that curl maintains. @@ -1458,12 +1640,12 @@ pub const Transfer = struct { return .{ .list = .{ .list = self.response_header.?._injected_headers } }; } - pub fn fromConnection(conn: *const Net.Connection) !*Transfer { + pub fn fromConnection(conn: *const Net.Connection) !*LiveTransfer { const private = try conn.getPrivate(); return @ptrCast(@alignCast(private)); } - pub fn fulfill(transfer: *Transfer, status: u16, headers: []const Net.Header, body: ?[]const u8) !void { + pub fn fulfill(transfer: *LiveTransfer, status: u16, headers: []const Net.Header, body: ?[]const u8) !void { if (transfer._conn != null) { // should never happen, should have been intercepted/paused, and then // either continued, aborted or fulfilled once. @@ -1477,10 +1659,10 @@ pub const Transfer = struct { }; } - fn _fulfill(transfer: *Transfer, status: u16, headers: []const Net.Header, body: ?[]const u8) !void { + fn _fulfill(transfer: *LiveTransfer, status: u16, headers: []const Net.Header, body: ?[]const u8) !void { const req = &transfer.req; if (req.start_callback) |cb| { - try cb(transfer); + try cb(Response.fromLive(transfer)); } transfer.response_header = .{ @@ -1499,13 +1681,13 @@ pub const Transfer = struct { } lp.assert(transfer._header_done_called == false, "Transfer.fulfill header_done_called", .{}); - if (try req.header_callback(transfer) == false) { + if (try req.header_callback(Response.fromLive(transfer)) == false) { transfer.abort(error.Abort); return; } if (body) |b| { - try req.data_callback(transfer, b); + try req.data_callback(Response.fromLive(transfer), b); } try req.done_callback(req.ctx); @@ -1513,12 +1695,12 @@ pub const Transfer = struct { // This function should be called during the dataCallback. Calling it after // such as in the doneCallback is guaranteed to return null. - pub fn getContentLength(self: *const Transfer) ?u32 { + pub fn getContentLength(self: *const LiveTransfer) ?u32 { const cl = self.getContentLengthRawValue() orelse return null; return std.fmt.parseInt(u32, cl, 10) catch null; } - fn getContentLengthRawValue(self: *const Transfer) ?[]const u8 { + fn getContentLengthRawValue(self: *const LiveTransfer) ?[]const u8 { if (self._conn) |conn| { // If we have a connection, than this is a normal request. We can get the // header value from the connection. diff --git a/src/browser/Mime.zig b/src/browser/Mime.zig index e23d48a25..8bf12de22 100644 --- a/src/browser/Mime.zig +++ b/src/browser/Mime.zig @@ -27,6 +27,9 @@ charset: [41]u8 = default_charset, charset_len: usize = default_charset_len, is_default_charset: bool = true, +type_buf: [127]u8 = @splat(0), +sub_type_buf: [127]u8 = @splat(0), + /// String "UTF-8" continued by null characters. const default_charset = .{ 'U', 'T', 'F', '-', '8' } ++ .{0} ** 36; const default_charset_len = 5; @@ -61,7 +64,10 @@ pub const ContentType = union(ContentTypeEnum) { image_webp: void, application_json: void, unknown: void, - other: struct { type: []const u8, sub_type: []const u8 }, + other: struct { + type: []const u8, + sub_type: []const u8, + }, }; pub fn contentTypeString(mime: *const Mime) []const u8 { @@ -112,17 +118,18 @@ fn parseCharset(value: []const u8) error{ CharsetTooBig, Invalid }![]const u8 { return value; } -pub fn parse(input: []u8) !Mime { +pub fn parse(input: []const u8) !Mime { if (input.len > 255) { return error.TooBig; } - // Zig's trim API is broken. The return type is always `[]const u8`, - // even if the input type is `[]u8`. @constCast is safe here. - var normalized = @constCast(std.mem.trim(u8, input, &std.ascii.whitespace)); + var buf: [255]u8 = undefined; + const normalized = std.ascii.lowerString(&buf, std.mem.trim(u8, input, &std.ascii.whitespace)); _ = std.ascii.lowerString(normalized, normalized); - const content_type, const type_len = try parseContentType(normalized); + var mime = Mime{ .content_type = undefined }; + + const content_type, const type_len = try parseContentType(normalized, &mime.type_buf, &mime.sub_type_buf); if (type_len >= normalized.len) { return .{ .content_type = content_type }; } @@ -163,13 +170,12 @@ pub fn parse(input: []u8) !Mime { } } - return .{ - .params = params, - .charset = charset, - .charset_len = charset_len, - .content_type = content_type, - .is_default_charset = !has_explicit_charset, - }; + mime.params = params; + mime.charset = charset; + mime.charset_len = charset_len; + mime.content_type = content_type; + mime.is_default_charset = !has_explicit_charset; + return mime; } /// Prescan the first 1024 bytes of an HTML document for a charset declaration. @@ -387,7 +393,7 @@ pub fn isHTML(self: *const Mime) bool { } // we expect value to be lowercase -fn parseContentType(value: []const u8) !struct { ContentType, usize } { +fn parseContentType(value: []const u8, type_buf: []u8, sub_type_buf: []u8) !struct { ContentType, usize } { const end = std.mem.indexOfScalarPos(u8, value, 0, ';') orelse value.len; const type_name = trimRight(value[0..end]); const attribute_start = end + 1; @@ -436,10 +442,18 @@ fn parseContentType(value: []const u8) !struct { ContentType, usize } { return error.Invalid; } - return .{ .{ .other = .{ - .type = main_type, - .sub_type = sub_type, - } }, attribute_start }; + @memcpy(type_buf[0..main_type.len], main_type); + @memcpy(sub_type_buf[0..sub_type.len], sub_type); + + return .{ + .{ + .other = .{ + .type = type_buf[0..main_type.len], + .sub_type = sub_type_buf[0..sub_type.len], + }, + }, + attribute_start, + }; } const VALID_CODEPOINTS = blk: { @@ -453,6 +467,13 @@ const VALID_CODEPOINTS = blk: { break :blk v; }; +pub fn typeString(self: *const Mime) []const u8 { + return switch (self.content_type) { + .other => |o| o.type[0..o.type_len], + else => "", + }; +} + fn validType(value: []const u8) bool { for (value) |b| { if (VALID_CODEPOINTS[b] == false) { diff --git a/src/browser/Page.zig b/src/browser/Page.zig index c3a6b5a3d..9f05aee46 100644 --- a/src/browser/Page.zig +++ b/src/browser/Page.zig @@ -821,12 +821,10 @@ fn notifyParentLoadComplete(self: *Page) void { parent.iframeCompletedLoading(self.iframe.?); } -fn pageHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool { - var self: *Page = @ptrCast(@alignCast(transfer.ctx)); +fn pageHeaderDoneCallback(response: HttpClient.Response) !bool { + var self: *Page = @ptrCast(@alignCast(response.ctx)); - const header = &transfer.response_header.?; - - const response_url = std.mem.span(header.url); + const response_url = response.url(); if (std.mem.eql(u8, response_url, self.url) == false) { // would be different than self.url in the case of a redirect self.url = try self.arena.dupeZ(u8, response_url); @@ -840,8 +838,8 @@ fn pageHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool { if (comptime IS_DEBUG) { log.debug(.page, "navigate header", .{ .url = self.url, - .status = header.status, - .content_type = header.contentType(), + .status = response.status(), + .content_type = response.contentType(), .type = self._type, }); } @@ -849,14 +847,14 @@ fn pageHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool { return true; } -fn pageDataCallback(transfer: *HttpClient.Transfer, data: []const u8) !void { - var self: *Page = @ptrCast(@alignCast(transfer.ctx)); +fn pageDataCallback(response: HttpClient.Response, data: []const u8) !void { + var self: *Page = @ptrCast(@alignCast(response.ctx)); if (self._parse_state == .pre) { // we lazily do this, because we might need the first chunk of data // to sniff the content type var mime: Mime = blk: { - if (transfer.response_header.?.contentType()) |ct| { + if (response.contentType()) |ct| { break :blk try Mime.parse(ct); } break :blk Mime.sniff(data); diff --git a/src/browser/ScriptManager.zig b/src/browser/ScriptManager.zig index 751ba58b1..6a5b725f0 100644 --- a/src/browser/ScriptManager.zig +++ b/src/browser/ScriptManager.zig @@ -694,85 +694,85 @@ pub const Script = struct { self.manager.page.releaseArena(self.arena); } - fn startCallback(transfer: *HttpClient.Transfer) !void { - log.debug(.http, "script fetch start", .{ .req = transfer }); + fn startCallback(response: HttpClient.Response) !void { + log.debug(.http, "script fetch start", .{ .req = response }); } - fn headerCallback(transfer: *HttpClient.Transfer) !bool { - const self: *Script = @ptrCast(@alignCast(transfer.ctx)); - const header = &transfer.response_header.?; - self.status = header.status; - if (header.status != 200) { + fn headerCallback(response: HttpClient.Response) !bool { + const self: *Script = @ptrCast(@alignCast(response.ctx)); + + self.status = response.status().?; + if (response.status() != 200) { log.info(.http, "script header", .{ - .req = transfer, - .status = header.status, - .content_type = header.contentType(), + .req = response, + .status = response.status(), + .content_type = response.contentType(), }); return false; } if (comptime IS_DEBUG) { log.debug(.http, "script header", .{ - .req = transfer, - .status = header.status, - .content_type = header.contentType(), + .req = response, + .status = response.status(), + .content_type = response.contentType(), }); } - { - // temp debug, trying to figure out why the next assert sometimes - // fails. Is the buffer just corrupt or is headerCallback really - // being called twice? - lp.assert(self.header_callback_called == false, "ScriptManager.Header recall", .{ - .m = @tagName(std.meta.activeTag(self.mode)), - .a1 = self.debug_transfer_id, - .a2 = self.debug_transfer_tries, - .a3 = self.debug_transfer_aborted, - .a4 = self.debug_transfer_bytes_received, - .a5 = self.debug_transfer_notified_fail, - .a6 = self.debug_transfer_redirecting, - .a7 = self.debug_transfer_intercept_state, - .a8 = self.debug_transfer_auth_challenge, - .a9 = self.debug_transfer_easy_id, - .b1 = transfer.id, - .b2 = transfer._tries, - .b3 = transfer.aborted, - .b4 = transfer.bytes_received, - .b5 = transfer._notified_fail, - .b6 = transfer._redirecting, - .b7 = @intFromEnum(transfer._intercept_state), - .b8 = transfer._auth_challenge != null, - .b9 = if (transfer._conn) |c| @intFromPtr(c.easy) else 0, - }); - self.header_callback_called = true; - self.debug_transfer_id = transfer.id; - self.debug_transfer_tries = transfer._tries; - self.debug_transfer_aborted = transfer.aborted; - self.debug_transfer_bytes_received = transfer.bytes_received; - self.debug_transfer_notified_fail = transfer._notified_fail; - self.debug_transfer_redirecting = transfer._redirecting; - self.debug_transfer_intercept_state = @intFromEnum(transfer._intercept_state); - self.debug_transfer_auth_challenge = transfer._auth_challenge != null; - self.debug_transfer_easy_id = if (transfer._conn) |c| @intFromPtr(c.easy) else 0; - } + // { + // // temp debug, trying to figure out why the next assert sometimes + // // fails. Is the buffer just corrupt or is headerCallback really + // // being called twice? + // lp.assert(self.header_callback_called == false, "ScriptManager.Header recall", .{ + // .m = @tagName(std.meta.activeTag(self.mode)), + // .a1 = self.debug_transfer_id, + // .a2 = self.debug_transfer_tries, + // .a3 = self.debug_transfer_aborted, + // .a4 = self.debug_transfer_bytes_received, + // .a5 = self.debug_transfer_notified_fail, + // .a6 = self.debug_transfer_redirecting, + // .a7 = self.debug_transfer_intercept_state, + // .a8 = self.debug_transfer_auth_challenge, + // .a9 = self.debug_transfer_easy_id, + // .b1 = transfer.id, + // .b2 = transfer._tries, + // .b3 = transfer.aborted, + // .b4 = transfer.bytes_received, + // .b5 = transfer._notified_fail, + // .b6 = transfer._redirecting, + // .b7 = @intFromEnum(transfer._intercept_state), + // .b8 = transfer._auth_challenge != null, + // .b9 = if (transfer._conn) |c| @intFromPtr(c.easy) else 0, + // }); + // self.header_callback_called = true; + // self.debug_transfer_id = transfer.id; + // self.debug_transfer_tries = transfer._tries; + // self.debug_transfer_aborted = transfer.aborted; + // self.debug_transfer_bytes_received = transfer.bytes_received; + // self.debug_transfer_notified_fail = transfer._notified_fail; + // self.debug_transfer_redirecting = transfer._redirecting; + // self.debug_transfer_intercept_state = @intFromEnum(transfer._intercept_state); + // self.debug_transfer_auth_challenge = transfer._auth_challenge != null; + // self.debug_transfer_easy_id = if (transfer._conn) |c| @intFromPtr(c.easy) else 0; + // } lp.assert(self.source.remote.capacity == 0, "ScriptManager.Header buffer", .{ .capacity = self.source.remote.capacity }); var buffer: std.ArrayList(u8) = .empty; - if (transfer.getContentLength()) |cl| { + if (response.contentLength()) |cl| { try buffer.ensureTotalCapacity(self.arena, cl); } self.source = .{ .remote = buffer }; return true; } - fn dataCallback(transfer: *HttpClient.Transfer, data: []const u8) !void { - const self: *Script = @ptrCast(@alignCast(transfer.ctx)); - self._dataCallback(transfer, data) catch |err| { - log.err(.http, "SM.dataCallback", .{ .err = err, .transfer = transfer, .len = data.len }); + fn dataCallback(response: HttpClient.Response, data: []const u8) !void { + const self: *Script = @ptrCast(@alignCast(response.ctx)); + self._dataCallback(response, data) catch |err| { + log.err(.http, "SM.dataCallback", .{ .err = err, .transfer = response, .len = data.len }); return err; }; } - fn _dataCallback(self: *Script, _: *HttpClient.Transfer, data: []const u8) !void { + fn _dataCallback(self: *Script, _: HttpClient.Response, data: []const u8) !void { try self.source.remote.appendSlice(self.arena, data); } diff --git a/src/browser/webapi/net/Fetch.zig b/src/browser/webapi/net/Fetch.zig index 0a44aae2d..ca238a481 100644 --- a/src/browser/webapi/net/Fetch.zig +++ b/src/browser/webapi/net/Fetch.zig @@ -126,16 +126,16 @@ fn handleBlobUrl(url: []const u8, resolver: js.PromiseResolver, page: *Page) !js return resolver.promise(); } -fn httpStartCallback(transfer: *HttpClient.Transfer) !void { - const self: *Fetch = @ptrCast(@alignCast(transfer.ctx)); +fn httpStartCallback(response: HttpClient.Response) !void { + const self: *Fetch = @ptrCast(@alignCast(response.ctx)); if (comptime IS_DEBUG) { log.debug(.http, "request start", .{ .url = self._url, .source = "fetch" }); } - self._response._transfer = transfer; + self._response._http_response = response; } -fn httpHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool { - const self: *Fetch = @ptrCast(@alignCast(transfer.ctx)); +fn httpHeaderDoneCallback(response: HttpClient.Response) !bool { + const self: *Fetch = @ptrCast(@alignCast(response.ctx)); if (self._signal) |signal| { if (signal._aborted) { @@ -144,25 +144,24 @@ fn httpHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool { } const arena = self._response._arena; - if (transfer.getContentLength()) |cl| { + if (response.contentLength()) |cl| { try self._buf.ensureTotalCapacity(arena, cl); } const res = self._response; - const header = transfer.response_header.?; if (comptime IS_DEBUG) { log.debug(.http, "request header", .{ .source = "fetch", .url = self._url, - .status = header.status, + .status = response.status(), }); } - res._status = header.status; - res._status_text = std.http.Status.phrase(@enumFromInt(header.status)) orelse ""; - res._url = try arena.dupeZ(u8, std.mem.span(header.url)); - res._is_redirected = header.redirect_count > 0; + res._status = response.status().?; + res._status_text = std.http.Status.phrase(@enumFromInt(response.status().?)) orelse ""; + res._url = try arena.dupeZ(u8, response.url()); + res._is_redirected = response.redirectCount().? > 0; // Determine response type based on origin comparison const page_origin = URL.getOrigin(arena, self._page.url) catch null; @@ -182,7 +181,7 @@ fn httpHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool { res._type = .basic; } - var it = transfer.responseHeaderIterator(); + var it = response.headerIterator(); while (it.next()) |hdr| { try res._headers.append(hdr.name, hdr.value, self._page); } @@ -190,8 +189,8 @@ fn httpHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool { return true; } -fn httpDataCallback(transfer: *HttpClient.Transfer, data: []const u8) !void { - const self: *Fetch = @ptrCast(@alignCast(transfer.ctx)); +fn httpDataCallback(response: HttpClient.Response, data: []const u8) !void { + const self: *Fetch = @ptrCast(@alignCast(response.ctx)); // Check if aborted if (self._signal) |signal| { @@ -206,7 +205,7 @@ fn httpDataCallback(transfer: *HttpClient.Transfer, data: []const u8) !void { fn httpDoneCallback(ctx: *anyopaque) !void { const self: *Fetch = @ptrCast(@alignCast(ctx)); var response = self._response; - response._transfer = null; + response._http_response = null; response._body = self._buf.items; log.info(.http, "request complete", .{ @@ -229,7 +228,7 @@ fn httpErrorCallback(ctx: *anyopaque, err: anyerror) void { const self: *Fetch = @ptrCast(@alignCast(ctx)); var response = self._response; - response._transfer = null; + response._http_response = null; // the response is only passed on v8 on success, if we're here, it's safe to // clear this. (defer since `self is in the response's arena). @@ -255,7 +254,7 @@ fn httpShutdownCallback(ctx: *anyopaque) void { if (self._owns_response) { var response = self._response; - response._transfer = null; + response._http_response = null; response.deinit(true, self._page._session); // Do not access `self` after this point: the Fetch struct was // allocated from response._arena which has been released. diff --git a/src/browser/webapi/net/Response.zig b/src/browser/webapi/net/Response.zig index 6a926369b..975729126 100644 --- a/src/browser/webapi/net/Response.zig +++ b/src/browser/webapi/net/Response.zig @@ -46,7 +46,7 @@ _type: Type, _status_text: []const u8, _url: [:0]const u8, _is_redirected: bool, -_transfer: ?*HttpClient.Transfer = null, +_http_response: ?HttpClient.Response = null, const InitOpts = struct { status: u16 = 200, @@ -79,13 +79,13 @@ pub fn init(body_: ?[]const u8, opts_: ?InitOpts, page: *Page) !*Response { } pub fn deinit(self: *Response, shutdown: bool, session: *Session) void { - if (self._transfer) |transfer| { + if (self._http_response) |resp| { if (shutdown) { - transfer.terminate(); + resp.terminate(); } else { - transfer.abort(error.Abort); + resp.abort(error.Abort); } - self._transfer = null; + self._http_response = null; } session.releaseArena(self._arena); } @@ -185,7 +185,7 @@ pub fn clone(self: *const Response, page: *Page) !*Response { ._type = self._type, ._is_redirected = self._is_redirected, ._headers = try Headers.init(.{ .obj = self._headers }, page), - ._transfer = null, + ._http_response = null, }; return cloned; } diff --git a/src/browser/webapi/net/XMLHttpRequest.zig b/src/browser/webapi/net/XMLHttpRequest.zig index 399e42176..a62a63f7f 100644 --- a/src/browser/webapi/net/XMLHttpRequest.zig +++ b/src/browser/webapi/net/XMLHttpRequest.zig @@ -42,7 +42,7 @@ const XMLHttpRequest = @This(); _page: *Page, _proto: *XMLHttpRequestEventTarget, _arena: Allocator, -_transfer: ?*HttpClient.Transfer = null, +_http_response: ?HttpClient.Response = null, _url: [:0]const u8 = "", _method: net_http.Method = .GET, @@ -97,13 +97,13 @@ pub fn init(page: *Page) !*XMLHttpRequest { } pub fn deinit(self: *XMLHttpRequest, shutdown: bool, session: *Session) void { - if (self._transfer) |transfer| { + if (self._http_response) |transfer| { if (shutdown) { transfer.terminate(); } else { transfer.abort(error.Abort); } - self._transfer = null; + self._http_response = null; } if (self._on_ready_state_change) |func| { @@ -169,9 +169,9 @@ pub fn setWithCredentials(self: *XMLHttpRequest, value: bool) !void { // TODO: url should be a union, as it can be multiple things pub fn open(self: *XMLHttpRequest, method_: []const u8, url: [:0]const u8) !void { // Abort any in-progress request - if (self._transfer) |transfer| { + if (self._http_response) |transfer| { transfer.abort(error.Abort); - self._transfer = null; + self._http_response = null; } // Reset internal state @@ -382,34 +382,32 @@ pub fn getResponseXML(self: *XMLHttpRequest, page: *Page) !?*Node.Document { }; } -fn httpStartCallback(transfer: *HttpClient.Transfer) !void { - const self: *XMLHttpRequest = @ptrCast(@alignCast(transfer.ctx)); +fn httpStartCallback(response: HttpClient.Response) !void { + const self: *XMLHttpRequest = @ptrCast(@alignCast(response.ctx)); if (comptime IS_DEBUG) { log.debug(.http, "request start", .{ .method = self._method, .url = self._url, .source = "xhr" }); } - self._transfer = transfer; + self._http_response = response; } -fn httpHeaderCallback(transfer: *HttpClient.Transfer, header: net_http.Header) !void { - const self: *XMLHttpRequest = @ptrCast(@alignCast(transfer.ctx)); +fn httpHeaderCallback(response: HttpClient.Response, header: net_http.Header) !void { + const self: *XMLHttpRequest = @ptrCast(@alignCast(response.ctx)); const joined = try std.fmt.allocPrint(self._arena, "{s}: {s}", .{ header.name, header.value }); try self._response_headers.append(self._arena, joined); } -fn httpHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool { - const self: *XMLHttpRequest = @ptrCast(@alignCast(transfer.ctx)); - - const header = &transfer.response_header.?; +fn httpHeaderDoneCallback(response: HttpClient.Response) !bool { + const self: *XMLHttpRequest = @ptrCast(@alignCast(response.ctx)); if (comptime IS_DEBUG) { log.debug(.http, "request header", .{ .source = "xhr", .url = self._url, - .status = header.status, + .status = response.status(), }); } - if (header.contentType()) |ct| { + if (response.contentType()) |ct| { self._response_mime = Mime.parse(ct) catch |e| { log.info(.http, "invalid content type", .{ .content_Type = ct, @@ -420,18 +418,18 @@ fn httpHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool { }; } - var it = transfer.responseHeaderIterator(); + var it = response.headerIterator(); while (it.next()) |hdr| { const joined = try std.fmt.allocPrint(self._arena, "{s}: {s}", .{ hdr.name, hdr.value }); try self._response_headers.append(self._arena, joined); } - self._response_status = header.status; - if (transfer.getContentLength()) |cl| { + self._response_status = response.status().?; + if (response.contentLength()) |cl| { self._response_len = cl; try self._response_data.ensureTotalCapacity(self._arena, cl); } - self._response_url = try self._arena.dupeZ(u8, std.mem.span(header.url)); + self._response_url = try self._arena.dupeZ(u8, response.url()); const page = self._page; @@ -446,8 +444,8 @@ fn httpHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool { return true; } -fn httpDataCallback(transfer: *HttpClient.Transfer, data: []const u8) !void { - const self: *XMLHttpRequest = @ptrCast(@alignCast(transfer.ctx)); +fn httpDataCallback(response: HttpClient.Response, data: []const u8) !void { + const self: *XMLHttpRequest = @ptrCast(@alignCast(response.ctx)); try self._response_data.appendSlice(self._arena, data); const page = self._page; @@ -470,7 +468,7 @@ fn httpDoneCallback(ctx: *anyopaque) !void { // Not that the request is done, the http/client will free the transfer // object. It isn't safe to keep it around. - self._transfer = null; + self._http_response = null; const page = self._page; @@ -492,21 +490,21 @@ fn httpDoneCallback(ctx: *anyopaque) !void { fn httpErrorCallback(ctx: *anyopaque, err: anyerror) void { const self: *XMLHttpRequest = @ptrCast(@alignCast(ctx)); // http client will close it after an error, it isn't safe to keep around - self._transfer = null; + self._http_response = null; self.handleError(err); self._page.js.weakRef(self); } fn httpShutdownCallback(ctx: *anyopaque) void { const self: *XMLHttpRequest = @ptrCast(@alignCast(ctx)); - self._transfer = null; + self._http_response = null; } pub fn abort(self: *XMLHttpRequest) void { self.handleError(error.Abort); - if (self._transfer) |transfer| { + if (self._http_response) |transfer| { transfer.abort(error.Abort); - self._transfer = null; + self._http_response = null; } self._page.js.weakRef(self); } diff --git a/src/cdp/domains/fetch.zig b/src/cdp/domains/fetch.zig index 310479b22..731cde666 100644 --- a/src/cdp/domains/fetch.zig +++ b/src/cdp/domains/fetch.zig @@ -50,7 +50,7 @@ pub fn processMessage(cmd: anytype) !void { // Stored in CDP pub const InterceptState = struct { allocator: Allocator, - waiting: std.AutoArrayHashMapUnmanaged(u32, *HttpClient.Transfer), + waiting: std.AutoArrayHashMapUnmanaged(u32, *HttpClient.LiveTransfer), pub fn init(allocator: Allocator) !InterceptState { return .{ @@ -63,11 +63,11 @@ pub const InterceptState = struct { return self.waiting.count() == 0; } - pub fn put(self: *InterceptState, transfer: *HttpClient.Transfer) !void { + pub fn put(self: *InterceptState, transfer: *HttpClient.LiveTransfer) !void { return self.waiting.put(self.allocator, transfer.id, transfer); } - pub fn remove(self: *InterceptState, request_id: u32) ?*HttpClient.Transfer { + pub fn remove(self: *InterceptState, request_id: u32) ?*HttpClient.LiveTransfer { const entry = self.waiting.fetchSwapRemove(request_id) orelse return null; return entry.value; } @@ -76,7 +76,7 @@ pub const InterceptState = struct { self.waiting.deinit(self.allocator); } - pub fn pendingTransfers(self: *const InterceptState) []*HttpClient.Transfer { + pub fn pendingTransfers(self: *const InterceptState) []*HttpClient.LiveTransfer { return self.waiting.values(); } }; diff --git a/src/cdp/domains/network.zig b/src/cdp/domains/network.zig index 5b9a49df6..0da6175c0 100644 --- a/src/cdp/domains/network.zig +++ b/src/cdp/domains/network.zig @@ -25,7 +25,7 @@ const CdpStorage = @import("storage.zig"); const id = @import("../id.zig"); const URL = @import("../../browser/URL.zig"); -const Transfer = @import("../../browser/HttpClient.zig").Transfer; +const LiveTransfer = @import("../../browser/HttpClient.zig").LiveTransfer; const Notification = @import("../../Notification.zig"); const Mime = @import("../../browser/Mime.zig"); @@ -294,9 +294,9 @@ pub fn httpRequestDone(bc: anytype, msg: *const Notification.RequestDone) !void } pub const TransferAsRequestWriter = struct { - transfer: *Transfer, + transfer: *LiveTransfer, - pub fn init(transfer: *Transfer) TransferAsRequestWriter { + pub fn init(transfer: *LiveTransfer) TransferAsRequestWriter { return .{ .transfer = transfer, }; @@ -348,9 +348,9 @@ pub const TransferAsRequestWriter = struct { const TransferAsResponseWriter = struct { arena: Allocator, - transfer: *Transfer, + transfer: *LiveTransfer, - fn init(arena: Allocator, transfer: *Transfer) TransferAsResponseWriter { + fn init(arena: Allocator, transfer: *LiveTransfer) TransferAsResponseWriter { return .{ .arena = arena, .transfer = transfer, diff --git a/src/network/Runtime.zig b/src/network/Runtime.zig index 72aebe812..fd4cefdac 100644 --- a/src/network/Runtime.zig +++ b/src/network/Runtime.zig @@ -29,6 +29,7 @@ const libcurl = @import("../sys/libcurl.zig"); const net_http = @import("http.zig"); const RobotStore = @import("Robots.zig").RobotStore; const WebBotAuth = @import("WebBotAuth.zig"); +const Cache = @import("cache/Cache.zig"); const Runtime = @This(); @@ -49,6 +50,7 @@ config: *const Config, ca_blob: ?net_http.Blob, robot_store: RobotStore, web_bot_auth: ?WebBotAuth, +cache: ?Cache, connections: []net_http.Connection, available: std.DoublyLinkedList = .{}, @@ -233,6 +235,11 @@ pub fn init(allocator: Allocator, config: *const Config) !Runtime { else null; + const cache = if (config.cacheDir()) |cache_dir_path| + Cache{ .kind = .{ .fs = try .init(cache_dir_path) } } + else + null; + return .{ .allocator = allocator, .config = config, @@ -246,6 +253,7 @@ pub fn init(allocator: Allocator, config: *const Config) !Runtime { .robot_store = RobotStore.init(allocator), .web_bot_auth = web_bot_auth, + .cache = cache, }; } diff --git a/src/network/cache/Cache.zig b/src/network/cache/Cache.zig new file mode 100644 index 000000000..cd176b918 --- /dev/null +++ b/src/network/cache/Cache.zig @@ -0,0 +1,213 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); +const Http = @import("../http.zig"); +const FsCache = @import("FsCache.zig"); + +/// A browser-wide cache for resources across the network. +/// This mostly conforms to RFC9111 with regards to caching behavior. +pub const Cache = @This(); + +kind: union(enum) { + fs: FsCache, +}, + +pub fn get(self: *Cache, allocator: std.mem.Allocator, key: []const u8) ?CachedResponse { + return switch (self.kind) { + inline else => |*c| c.get(allocator, key), + }; +} + +pub fn put(self: *Cache, key: []const u8, metadata: CachedMetadata, body: []const u8) !void { + return switch (self.kind) { + inline else => |*c| c.put(key, metadata, body), + }; +} + +pub const CacheControl = struct { + max_age: ?u64 = null, + s_maxage: ?u64 = null, + is_public: bool = false, + must_revalidate: bool = false, + no_cache: bool = false, + no_store: bool = false, + immutable: bool = false, + + pub fn parse(value: []const u8) CacheControl { + var cc: CacheControl = .{}; + + var iter = std.mem.splitScalar(u8, value, ','); + while (iter.next()) |part| { + const directive = std.mem.trim(u8, part, &std.ascii.whitespace); + if (std.ascii.eqlIgnoreCase(directive, "no-store")) { + cc.no_store = true; + } else if (std.ascii.eqlIgnoreCase(directive, "no-cache")) { + cc.no_cache = true; + } else if (std.ascii.eqlIgnoreCase(directive, "must-revalidate")) { + cc.must_revalidate = true; + } else if (std.ascii.eqlIgnoreCase(directive, "immutable")) { + cc.immutable = true; + } else if (std.ascii.eqlIgnoreCase(directive, "public")) { + cc.is_public = true; + } else if (std.ascii.startsWithIgnoreCase(directive, "max-age=")) { + cc.max_age = std.fmt.parseInt(u64, directive[8..], 10) catch null; + } else if (std.ascii.startsWithIgnoreCase(directive, "s-maxage=")) { + cc.s_maxage = std.fmt.parseInt(u64, directive[9..], 10) catch null; + // s-maxage takes precedence over max-age + cc.max_age = cc.s_maxage orelse cc.max_age; + } + } + return cc; + } +}; + +pub const Vary = union(enum) { + wildcard: void, + value: []const u8, + + pub fn parse(value: []const u8) Vary { + if (std.mem.eql(u8, value, "*")) return .wildcard; + return .{ .value = value }; + } + + pub fn deinit(self: Vary, allocator: std.mem.Allocator) void { + switch (self) { + .wildcard => {}, + .value => |v| allocator.free(v), + } + } + + pub fn toString(self: Vary) []const u8 { + return switch (self) { + .wildcard => "*", + .value => |v| v, + }; + } +}; + +pub const CachedMetadata = struct { + url: [:0]const u8, + content_type: []const u8, + + status: u16, + stored_at: i64, + age_at_store: u64, + + // for If-None-Match + etag: ?[]const u8, + // for If-Modified-Since + last_modified: ?[]const u8, + + cache_control: CacheControl, + vary: ?Vary, + headers: []const Http.Header, + + pub fn fromHeaders( + url: [:0]const u8, + status: u16, + timestamp: i64, + headers: []const Http.Header, + ) !?CachedMetadata { + var cc: CacheControl = .{}; + var vary: ?Vary = null; + var etag: ?[]const u8 = null; + var last_modified: ?[]const u8 = null; + var age_at_store: u64 = 0; + var content_type: []const u8 = "application/octet-stream"; + var has_set_cookie = false; + var has_authorization = false; + + for (headers) |hdr| { + if (std.ascii.eqlIgnoreCase(hdr.name, "cache-control")) { + cc = CacheControl.parse(hdr.value); + } else if (std.ascii.eqlIgnoreCase(hdr.name, "etag")) { + etag = hdr.value; + } else if (std.ascii.eqlIgnoreCase(hdr.name, "last-modified")) { + last_modified = hdr.value; + } else if (std.ascii.eqlIgnoreCase(hdr.name, "vary")) { + vary = Vary.parse(hdr.value); + } else if (std.ascii.eqlIgnoreCase(hdr.name, "age")) { + age_at_store = std.fmt.parseInt(u64, hdr.value, 10) catch 0; + } else if (std.ascii.eqlIgnoreCase(hdr.name, "content-type")) { + content_type = hdr.value; + } else if (std.ascii.eqlIgnoreCase(hdr.name, "set-cookie")) { + has_set_cookie = true; + } else if (std.ascii.eqlIgnoreCase(hdr.name, "authorization")) { + has_authorization = true; + } + } + + // no-store: must not be stored + if (cc.no_store) return null; + + // Vary: * means the response cannot be cached + if (vary) |v| if (v == .wildcard) return null; + + // must have an explicit max-age to be cacheable + if (cc.max_age == null) return null; + + // Set-Cookie without explicit public + if (has_set_cookie and !cc.is_public) return null; + + // Authorization header without explicit public or s-maxage + if (has_authorization and !cc.is_public and cc.s_maxage == null) return null; + + // Only cache 200 for now. Technically, we can cache others. + switch (status) { + 200 => {}, + else => return null, + } + + return .{ + .url = url, + .content_type = content_type, + .status = status, + .stored_at = timestamp, + .age_at_store = age_at_store, + .etag = etag, + .last_modified = last_modified, + .cache_control = cc, + .vary = vary, + .headers = headers, + }; + } + + pub fn deinit(self: CachedMetadata, allocator: std.mem.Allocator) void { + allocator.free(self.url); + allocator.free(self.content_type); + for (self.headers) |header| { + allocator.free(header.name); + allocator.free(header.value); + } + allocator.free(self.headers); + if (self.vary) |v| v.deinit(allocator); + if (self.etag) |e| allocator.free(e); + if (self.last_modified) |lm| allocator.free(lm); + } +}; + +pub const CachedData = union(enum) { + buffer: []const u8, + file: std.fs.File, +}; + +pub const CachedResponse = struct { + metadata: CachedMetadata, + data: CachedData, +}; diff --git a/src/network/cache/FsCache.zig b/src/network/cache/FsCache.zig new file mode 100644 index 000000000..de62cd306 --- /dev/null +++ b/src/network/cache/FsCache.zig @@ -0,0 +1,301 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); +const Cache = @import("Cache.zig"); +const Http = @import("../http.zig"); +const CachedMetadata = Cache.CachedMetadata; +const CachedResponse = Cache.CachedResponse; + +pub const FsCache = @This(); + +dir: std.fs.Dir, + +pub fn init(path: []const u8) !FsCache { + const cwd = std.fs.cwd(); + + cwd.makeDir(path) catch |err| switch (err) { + error.PathAlreadyExists => {}, + else => return err, + }; + + const dir = try cwd.openDir(path, .{ .iterate = true }); + return .{ .dir = dir }; +} + +pub fn deinit(self: *FsCache) void { + self.dir.close(); +} + +pub fn cache(self: *FsCache) Cache { + return Cache.init(self); +} + +const HASHED_KEY_LEN = 16; +const HASHED_PATH_LEN = HASHED_KEY_LEN + 5; +const HASHED_TMP_PATH_LEN = HASHED_PATH_LEN + 4; + +fn hashKey(key: []const u8) [HASHED_KEY_LEN]u8 { + const h = std.hash.Wyhash.hash(0, key); + var hex: [HASHED_KEY_LEN]u8 = undefined; + _ = std.fmt.bufPrint(&hex, "{x:0>16}", .{h}) catch unreachable; + return hex; +} + +fn serializeMeta(writer: *std.Io.Writer, meta: *const CachedMetadata) !void { + try writer.print("{s}\n{s}\n", .{ meta.url, meta.content_type }); + try writer.print("{d}\n{d}\n{d}\n", .{ + meta.status, + meta.stored_at, + meta.age_at_store, + }); + try writer.print("{s}\n", .{meta.etag orelse "null"}); + try writer.print("{s}\n", .{meta.last_modified orelse "null"}); + + // cache-control + try writer.print("{d}\n", .{meta.cache_control.max_age orelse 0}); + try writer.print("{}\n{}\n{}\n{}\n", .{ + meta.cache_control.max_age != null, + meta.cache_control.must_revalidate, + meta.cache_control.no_cache, + meta.cache_control.immutable, + }); + + // vary + if (meta.vary) |v| { + try writer.print("{s}\n", .{v.toString()}); + } else { + try writer.print("null\n", .{}); + } + try writer.flush(); + + try writer.print("{d}\n", .{meta.headers.len}); + for (meta.headers) |hdr| { + try writer.print("{s}\n{s}\n", .{ hdr.name, hdr.value }); + try writer.flush(); + } + try writer.flush(); +} + +fn deserializeMetaOptionalString(bytes: []const u8) ?[]const u8 { + if (std.mem.eql(u8, bytes, "null")) return null else return bytes; +} + +fn deserializeMetaBoolean(bytes: []const u8) !bool { + if (std.mem.eql(u8, bytes, "true")) return true; + if (std.mem.eql(u8, bytes, "false")) return false; + return error.Malformed; +} + +fn deserializeMeta(allocator: std.mem.Allocator, file: std.fs.File) !CachedMetadata { + var file_buf: [1024]u8 = undefined; + var file_reader = file.reader(&file_buf); + const reader = &file_reader.interface; + + const url = blk: { + const line = try reader.takeDelimiter('\n') orelse return error.Malformed; + break :blk try allocator.dupeZ(u8, line); + }; + errdefer allocator.free(url); + + const content_type = blk: { + const line = try reader.takeDelimiter('\n') orelse return error.Malformed; + break :blk try allocator.dupe(u8, line); + }; + errdefer allocator.free(content_type); + + const status = blk: { + const line = try reader.takeDelimiter('\n') orelse return error.Malformed; + break :blk std.fmt.parseInt(u16, line, 10) catch return error.Malformed; + }; + const stored_at = blk: { + const line = try reader.takeDelimiter('\n') orelse return error.Malformed; + break :blk std.fmt.parseInt(i64, line, 10) catch return error.Malformed; + }; + const age_at_store = blk: { + const line = try reader.takeDelimiter('\n') orelse return error.Malformed; + break :blk std.fmt.parseInt(u64, line, 10) catch return error.Malformed; + }; + + const etag = blk: { + const line = try reader.takeDelimiter('\n') orelse return error.Malformed; + break :blk if (std.mem.eql(u8, line, "null")) null else try allocator.dupe(u8, line); + }; + errdefer if (etag) |e| allocator.free(e); + + const last_modified = blk: { + const line = try reader.takeDelimiter('\n') orelse return error.Malformed; + break :blk if (std.mem.eql(u8, line, "null")) null else try allocator.dupe(u8, line); + }; + errdefer if (last_modified) |lm| allocator.free(lm); + + // cache-control + const cc = cache_control: { + const max_age_val = blk: { + const line = try reader.takeDelimiter('\n') orelse return error.Malformed; + break :blk std.fmt.parseInt(u64, line, 10) catch return error.Malformed; + }; + const max_age_present = blk: { + const line = try reader.takeDelimiter('\n') orelse return error.Malformed; + break :blk try deserializeMetaBoolean(line); + }; + const must_revalidate = blk: { + const line = try reader.takeDelimiter('\n') orelse return error.Malformed; + break :blk try deserializeMetaBoolean(line); + }; + const no_cache = blk: { + const line = try reader.takeDelimiter('\n') orelse return error.Malformed; + break :blk try deserializeMetaBoolean(line); + }; + const immutable = blk: { + const line = try reader.takeDelimiter('\n') orelse return error.Malformed; + break :blk try deserializeMetaBoolean(line); + }; + break :cache_control Cache.CacheControl{ + .max_age = if (max_age_present) max_age_val else null, + .must_revalidate = must_revalidate, + .no_cache = no_cache, + .immutable = immutable, + }; + }; + + // vary + const vary = blk: { + const line = try reader.takeDelimiter('\n') orelse return error.Malformed; + if (std.mem.eql(u8, line, "null")) break :blk null; + const duped = try allocator.dupe(u8, line); + break :blk Cache.Vary.parse(duped); + }; + errdefer if (vary) |v| if (v == .value) allocator.free(v.value); + + const headers = blk: { + const line = try reader.takeDelimiter('\n') orelse return error.Malformed; + const count = std.fmt.parseInt(usize, line, 10) catch return error.Malformed; + + const hdrs = try allocator.alloc(Http.Header, count); + errdefer allocator.free(hdrs); + + for (hdrs) |*hdr| { + const name = try reader.takeDelimiter('\n') orelse return error.Malformed; + const value = try reader.takeDelimiter('\n') orelse return error.Malformed; + hdr.* = .{ + .name = try allocator.dupe(u8, name), + .value = try allocator.dupe(u8, value), + }; + } + + break :blk hdrs; + }; + errdefer { + for (headers) |hdr| { + allocator.free(hdr.name); + allocator.free(hdr.value); + } + allocator.free(headers); + } + + return .{ + .url = url, + .content_type = content_type, + .status = status, + .stored_at = stored_at, + .age_at_store = age_at_store, + .cache_control = cc, + .etag = etag, + .last_modified = last_modified, + .vary = vary, + .headers = headers, + }; +} + +pub fn get(self: *FsCache, allocator: std.mem.Allocator, key: []const u8) ?Cache.CachedResponse { + const hashed_key = hashKey(key); + + var meta_path: [HASHED_PATH_LEN]u8 = undefined; + _ = std.fmt.bufPrint(&meta_path, "{s}.meta", .{hashed_key}) catch @panic("FsCache.get meta path overflowed"); + + var body_path: [HASHED_PATH_LEN]u8 = undefined; + _ = std.fmt.bufPrint(&body_path, "{s}.body", .{hashed_key}) catch @panic("FsCache.get body path overflowed"); + + const meta_file = self.dir.openFile(&meta_path, .{ .mode = .read_only }) catch return null; + defer meta_file.close(); + + const meta = deserializeMeta(allocator, meta_file) catch { + self.dir.deleteFile(&meta_path) catch {}; + self.dir.deleteFile(&body_path) catch {}; + return null; + }; + + const body_file = self.dir.openFile(&body_path, .{ .mode = .read_only }) catch return null; + + return .{ + .metadata = meta, + .data = .{ .file = body_file }, + }; +} + +pub fn put(self: *FsCache, key: []const u8, meta: CachedMetadata, body: []const u8) !void { + const hashed_key = hashKey(key); + + // Write meta to a temp file, then atomically rename into place + var meta_path: [HASHED_PATH_LEN]u8 = undefined; + _ = std.fmt.bufPrint(&meta_path, "{s}.meta", .{hashed_key}) catch + @panic("FsCache.put meta path overflowed"); + + var meta_tmp_path: [HASHED_TMP_PATH_LEN]u8 = undefined; + _ = std.fmt.bufPrint(&meta_tmp_path, "{s}.meta.tmp", .{hashed_key}) catch + @panic("FsCache.put meta tmp path overflowed"); + + { + const meta_file = try self.dir.createFile(&meta_tmp_path, .{}); + errdefer { + meta_file.close(); + self.dir.deleteFile(&meta_tmp_path) catch {}; + } + + var buf: [512]u8 = undefined; + var meta_file_writer = meta_file.writer(&buf); + try serializeMeta(&meta_file_writer.interface, &meta); + meta_file.close(); + } + errdefer self.dir.deleteFile(&meta_tmp_path) catch {}; + try self.dir.rename(&meta_tmp_path, &meta_path); + + // Write body to a temp file, then atomically rename into place + var body_path: [HASHED_PATH_LEN]u8 = undefined; + _ = std.fmt.bufPrint(&body_path, "{s}.body", .{hashed_key}) catch + @panic("FsCache.put body path overflowed"); + + var body_tmp_path: [HASHED_TMP_PATH_LEN]u8 = undefined; + _ = std.fmt.bufPrint(&body_tmp_path, "{s}.body.tmp", .{hashed_key}) catch + @panic("FsCache.put body tmp path overflowed"); + + { + const body_file = try self.dir.createFile(&body_tmp_path, .{}); + errdefer { + body_file.close(); + self.dir.deleteFile(&body_tmp_path) catch {}; + } + try body_file.writeAll(body); + body_file.close(); + } + errdefer self.dir.deleteFile(&body_tmp_path) catch {}; + + errdefer self.dir.deleteFile(&meta_path) catch {}; + try self.dir.rename(&body_tmp_path, &body_path); +}