Skip to content

Commit 44544ea

Browse files
authored
[WasmFS] Add chunking to the WasmFS fetch backend (#23021)
This extends the existing WasmFS fetch backend to support a configurable chunk size, and only fetching one chunk at a time as needed to satisfy requests. Also make the test http server support range requests, for testing.
1 parent 917568d commit 44544ea

File tree

9 files changed

+327
-65
lines changed

9 files changed

+327
-65
lines changed

ChangeLog.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,12 @@ See docs/process.md for more on how version tagging works.
3737
source maps, so it has not worked in many years, and there have been no
3838
requests for it. This has no impact on the source map support in browser
3939
devtools. (#23553)
40+
- The WASMFS fetch backend now fetches files in chunks using HTTP range
41+
requests (if supported by the server). `wasmfs_create_fetch_backend` now
42+
takes a second parameter (`uint32_t chunk_size`) to configure the size of
43+
each chunk. If a file is read a few times with random accesses, a small
44+
chunk size will minimize bandwidth; if a file is read in larger contiguous
45+
ranges, a larger chunk size will reduce the number of requests. (#23021)
4046

4147
4.0.2 - 01/30/25
4248
----------------

src/lib/libfetchfs.js

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,13 @@ addToLibrary({
88
$FETCHFS__deps: ['$stringToUTF8OnStack', 'wasmfs_create_fetch_backend'],
99
$FETCHFS: {
1010
createBackend(opts) {
11-
return _wasmfs_create_fetch_backend(stringToUTF8OnStack(opts.base_url));
12-
}
11+
return withStackSave(
12+
() => _wasmfs_create_fetch_backend(
13+
stringToUTF8OnStack(opts.base_url ?? ""),
14+
opts.chunkSize | 0
15+
)
16+
);
17+
},
1318
},
1419
});
1520

src/lib/libwasmfs_fetch.js

Lines changed: 102 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -5,28 +5,24 @@
55
*/
66

77
addToLibrary({
8+
$wasmFS$JSMemoryRanges: {},
9+
810
// Fetch backend: On first access of the file (either a read or a getSize), it
911
// will fetch() the data from the network asynchronously. Otherwise, after
1012
// that fetch it behaves just like JSFile (and it reuses the code from there).
1113

1214
_wasmfs_create_fetch_backend_js__deps: [
1315
'$wasmFS$backends',
14-
'$wasmFS$JSMemoryFiles',
15-
'_wasmfs_create_js_file_backend_js',
16-
'_wasmfs_fetch_get_file_path',
16+
'$wasmFS$JSMemoryRanges',
17+
'_wasmfs_fetch_get_file_url',
18+
'_wasmfs_fetch_get_chunk_size',
1719
],
1820
_wasmfs_create_fetch_backend_js: async function(backend) {
1921
// Get a promise that fetches the data and stores it in JS memory (if it has
2022
// not already been fetched).
21-
async function getFile(file) {
22-
if (wasmFS$JSMemoryFiles[file]) {
23-
// The data is already here, so nothing to do before we continue on to
24-
// the actual read below.
25-
return Promise.resolve();
26-
}
27-
// This is the first time we want the file's data.
23+
async function getFileRange(file, offset, len) {
2824
var url = '';
29-
var fileUrl_p = __wasmfs_fetch_get_file_path(file);
25+
var fileUrl_p = __wasmfs_fetch_get_file_url(file);
3026
var fileUrl = UTF8ToString(fileUrl_p);
3127
var isAbs = fileUrl.indexOf('://') !== -1;
3228
if (isAbs) {
@@ -35,55 +31,127 @@ addToLibrary({
3531
try {
3632
var u = new URL(fileUrl, self.location.origin);
3733
url = u.toString();
38-
} catch (e) {
34+
} catch (_e) {
35+
throw {status: 404};
3936
}
4037
}
41-
var response = await fetch(url);
42-
if (response.ok) {
43-
var buffer = await response['arrayBuffer']();
44-
wasmFS$JSMemoryFiles[file] = new Uint8Array(buffer);
45-
} else {
38+
var chunkSize = __wasmfs_fetch_get_chunk_size(file);
39+
offset ??= 0;
40+
len ??= chunkSize;
41+
// In which chunk does the seeked range start? E.g., 5-14 with chunksize 8 will start in chunk 0.
42+
var firstChunk = (offset / chunkSize) | 0;
43+
// In which chunk does the seeked range end? E.g., 5-14 with chunksize 8 will end in chunk 1, as will 5-16 (since byte 16 isn't requested).
44+
// This will always give us a chunk >= firstChunk since len > 0.
45+
var lastChunk = ((offset+len-1) / chunkSize) | 0;
46+
if (!(file in wasmFS$JSMemoryRanges)) {
47+
var fileInfo = await fetch(url, {method:'HEAD', headers:{'Range': 'bytes=0-'}});
48+
if (fileInfo.ok &&
49+
fileInfo.headers.has('Content-Length') &&
50+
fileInfo.headers.get('Accept-Ranges') == 'bytes' &&
51+
(parseInt(fileInfo.headers.get('Content-Length'), 10) > chunkSize*2)) {
52+
wasmFS$JSMemoryRanges[file] = {
53+
size: parseInt(fileInfo.headers.get('Content-Length'), 10),
54+
chunks: [],
55+
chunkSize: chunkSize
56+
};
57+
} else {
58+
// may as well/forced to download the whole file
59+
var wholeFileReq = await fetch(url);
60+
if (!wholeFileReq.ok) {
61+
throw wholeFileReq;
62+
}
63+
var wholeFileData = new Uint8Array(await wholeFileReq.arrayBuffer());
64+
var text = new TextDecoder().decode(wholeFileData);
65+
wasmFS$JSMemoryRanges[file] = {
66+
size: wholeFileData.byteLength,
67+
chunks: [wholeFileData],
68+
chunkSize: wholeFileData.byteLength
69+
};
70+
return Promise.resolve();
71+
}
72+
}
73+
var allPresent = true;
74+
var i;
75+
// Do we have all the chunks already? If so, we don't need to do any fetches.
76+
for (i = firstChunk; i <= lastChunk; i++) {
77+
if (!wasmFS$JSMemoryRanges[file].chunks[i]) {
78+
allPresent = false;
79+
break;
80+
}
81+
}
82+
if (allPresent) {
83+
// The data is already here, so nothing to do before we continue on to
84+
// the actual read.
85+
return Promise.resolve();
86+
}
87+
// This is the first time we want the chunks' data. We'll make
88+
// one request for all the chunks we need, rather than one
89+
// request per chunk.
90+
var start = firstChunk * chunkSize;
91+
// We must fetch *up to* the last byte of the last chunk.
92+
var end = (lastChunk+1) * chunkSize;
93+
var response = await fetch(url, {headers:{'Range': `bytes=${start}-${end-1}`}});
94+
if (!response.ok) {
4695
throw response;
4796
}
97+
var bytes = await response['bytes']();
98+
for (i = firstChunk; i <= lastChunk; i++) {
99+
wasmFS$JSMemoryRanges[file].chunks[i] = bytes.slice(i*chunkSize-start,(i+1)*chunkSize-start);
100+
}
101+
return Promise.resolve();
48102
}
49103

50-
// Start with the normal JSFile operations. This sets
51-
// wasmFS$backends[backend]
52-
// which we will then augment.
53-
__wasmfs_create_js_file_backend_js(backend);
54-
55-
// Add the async operations on top.
56-
var jsFileOps = wasmFS$backends[backend];
57104
wasmFS$backends[backend] = {
58105
// alloc/free operations are not actually async. Just forward to the
59106
// parent class, but we must return a Promise as the caller expects.
60107
allocFile: async (file) => {
61-
jsFileOps.allocFile(file);
108+
// nop
62109
return Promise.resolve();
63110
},
64111
freeFile: async (file) => {
65-
jsFileOps.freeFile(file);
112+
// free memory
113+
wasmFS$JSMemoryRanges[file] = undefined;
66114
return Promise.resolve();
67115
},
68116

69117
write: async (file, buffer, length, offset) => {
70-
abort("TODO: file writing in fetch backend? read-only for now");
118+
console.error('TODO: file writing in fetch backend? read-only for now');
71119
},
72120

73121
// read/getSize fetch the data, then forward to the parent class.
74122
read: async (file, buffer, length, offset) => {
123+
if (length == 0) {
124+
return 0;
125+
}
75126
try {
76-
await getFile(file);
77-
} catch (response) {
78-
return response.status === 404 ? -{{{ cDefs.ENOENT }}} : -{{{ cDefs.EBADF }}};
127+
await getFileRange(file, offset || 0, length);
128+
} catch (failedResponse) {
129+
return failedResponse.status === 404 ? -{{{ cDefs.ENOENT }}} : -{{{ cDefs.EBADF }}};
79130
}
80-
return jsFileOps.read(file, buffer, length, offset);
131+
var fileInfo = wasmFS$JSMemoryRanges[file];
132+
var chunks = fileInfo.chunks;
133+
var chunkSize = fileInfo.chunkSize;
134+
var firstChunk = (offset / chunkSize) | 0;
135+
// See comments in getFileRange.
136+
var lastChunk = ((offset+length-1) / chunkSize) | 0;
137+
var readLength = 0;
138+
for (var i = firstChunk; i <= lastChunk; i++) {
139+
var chunk = chunks[i];
140+
var start = Math.max(i*chunkSize, offset);
141+
var chunkStart = i*chunkSize;
142+
var end = Math.min(chunkStart+chunkSize, offset+length);
143+
HEAPU8.set(chunk.subarray(start-chunkStart, end-chunkStart), buffer+(start-offset));
144+
readLength = end - offset;
145+
}
146+
return readLength;
81147
},
82148
getSize: async (file) => {
83149
try {
84-
await getFile(file);
85-
} catch (response) {}
86-
return jsFileOps.getSize(file);
150+
await getFileRange(file, 0, 0);
151+
} catch (failedResponse) {
152+
return 0;
153+
}
154+
return wasmFS$JSMemoryRanges[file].size;
87155
},
88156
};
89157
},

system/include/emscripten/wasmfs.h

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,23 @@ typedef backend_t (*backend_constructor_t)(void*);
4848

4949
backend_t wasmfs_create_memory_backend(void);
5050

51+
// Fetch backend
52+
//
53+
// Creates a new fetchfs backend. FetchFS will backstop filesystem
54+
// reads to HTTP fetch requests, which will download just specific
55+
// ranges of the requested files. FetchFS works best when your web
56+
// server supports HTTP range requests, and it's important that those
57+
// files are not stored encrypted or compressed at rest. FetchFS by
58+
// default will dispatch HTTP requests to URLs beginning with base_url
59+
// and ending with whatever the file's path is relative to where the
60+
// fetchfs directory is mounted.
61+
//
62+
// Individual range requests will be no bigger than chunk_size, and will
63+
// be aligned to boundaries of chunk_size. Files smaller than chunk_size
64+
// will be downloaded all at once.
65+
//
66+
// If chunk_size is 0, a reasonable default value will be used.
67+
//
5168
// Note: this cannot be called on the browser main thread because it might
5269
// deadlock while waiting for its dedicated worker thread to be spawned.
5370
//
@@ -57,7 +74,9 @@ backend_t wasmfs_create_memory_backend(void);
5774
//
5875
// TODO: Add an async version of this function that will work on the main
5976
// thread.
60-
backend_t wasmfs_create_fetch_backend(const char* base_url __attribute__((nonnull)));
77+
//
78+
backend_t wasmfs_create_fetch_backend(const char* base_url __attribute__((nonnull)),
79+
uint32_t chunk_size);
6180

6281
backend_t wasmfs_create_node_backend(const char* root __attribute__((nonnull)));
6382

system/lib/wasmfs/backends/fetch_backend.cpp

Lines changed: 51 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,39 @@
1212

1313
namespace wasmfs {
1414

15+
const uint32_t DEFAULT_CHUNK_SIZE = 16*1024*1024;
16+
17+
class FetchBackend : public wasmfs::ProxiedAsyncJSBackend {
18+
std::string baseUrl;
19+
uint32_t chunkSize;
20+
public:
21+
FetchBackend(const std::string& baseUrl,
22+
uint32_t chunkSize,
23+
std::function<void(backend_t)> setupOnThread)
24+
: ProxiedAsyncJSBackend(setupOnThread), baseUrl(baseUrl), chunkSize(chunkSize) {}
25+
std::shared_ptr<DataFile> createFile(mode_t mode) override;
26+
std::shared_ptr<Directory> createDirectory(mode_t mode) override;
27+
const std::string getFileURL(const std::string& filePath);
28+
uint32_t getChunkSize();
29+
};
30+
31+
1532
class FetchFile : public ProxiedAsyncJSImplFile {
1633
std::string filePath;
34+
std::string fileUrl;
1735

1836
public:
1937
FetchFile(const std::string& path,
2038
mode_t mode,
2139
backend_t backend,
2240
emscripten::ProxyWorker& proxy)
23-
: ProxiedAsyncJSImplFile(mode, backend, proxy), filePath(path) {}
41+
: ProxiedAsyncJSImplFile(mode, backend, proxy), filePath(path) {
42+
this->fileUrl = dynamic_cast<FetchBackend*>(getBackend())->getFileURL(filePath);
43+
}
2444

2545
const std::string& getPath() const { return filePath; }
46+
const std::string& getURL() const { return fileUrl; }
47+
const uint32_t getChunkSize() const { return dynamic_cast<FetchBackend*>(getBackend())->getChunkSize(); }
2648
};
2749

2850
class FetchDirectory : public MemoryDirectory {
@@ -57,40 +79,53 @@ class FetchDirectory : public MemoryDirectory {
5779
std::string getChildPath(const std::string& name) const {
5880
return dirPath + '/' + name;
5981
}
82+
83+
std::shared_ptr<File> getChild(const std::string& name) override {
84+
return MemoryDirectory::getChild(name);
85+
}
6086
};
6187

62-
class FetchBackend : public ProxiedAsyncJSBackend {
63-
std::string baseUrl;
88+
std::shared_ptr<DataFile> FetchBackend::createFile(mode_t mode) {
89+
return std::make_shared<FetchFile>("", mode, this, proxy);
90+
}
6491

65-
public:
66-
FetchBackend(const std::string& baseUrl,
67-
std::function<void(backend_t)> setupOnThread)
68-
: ProxiedAsyncJSBackend(setupOnThread), baseUrl(baseUrl) {}
92+
std::shared_ptr<Directory> FetchBackend::createDirectory(mode_t mode) {
93+
return std::make_shared<FetchDirectory>("", mode, this, proxy);
94+
}
6995

70-
std::shared_ptr<DataFile> createFile(mode_t mode) override {
71-
return std::make_shared<FetchFile>(baseUrl, mode, this, proxy);
96+
const std::string FetchBackend::getFileURL(const std::string& filePath) {
97+
if (filePath == "") {
98+
return baseUrl;
7299
}
100+
return baseUrl + "/" + filePath;
101+
}
73102

74-
std::shared_ptr<Directory> createDirectory(mode_t mode) override {
75-
return std::make_shared<FetchDirectory>(baseUrl, mode, this, proxy);
76-
}
77-
};
103+
uint32_t FetchBackend::getChunkSize() {
104+
return chunkSize;
105+
}
78106

79107
extern "C" {
80-
backend_t wasmfs_create_fetch_backend(const char* base_url) {
108+
backend_t wasmfs_create_fetch_backend(const char* base_url, uint32_t chunkSize) {
81109
// ProxyWorker cannot safely be synchronously spawned from the main browser
82110
// thread. See comment in thread_utils.h for more details.
83111
assert(!emscripten_is_main_browser_thread() &&
84112
"Cannot safely create fetch backend on main browser thread");
85113
return wasmFS.addBackend(std::make_unique<FetchBackend>(
86114
base_url ? base_url : "",
115+
chunkSize ? chunkSize : DEFAULT_CHUNK_SIZE,
87116
[](backend_t backend) { _wasmfs_create_fetch_backend_js(backend); }));
117+
}
118+
119+
const char* _wasmfs_fetch_get_file_url(void* ptr) {
120+
auto* file = reinterpret_cast<wasmfs::FetchFile*>(ptr);
121+
return file ? file->getURL().data() : nullptr;
88122
}
89123

90-
const char* EMSCRIPTEN_KEEPALIVE _wasmfs_fetch_get_file_path(void* ptr) {
124+
uint32_t _wasmfs_fetch_get_chunk_size(void* ptr) {
91125
auto* file = reinterpret_cast<wasmfs::FetchFile*>(ptr);
92-
return file ? file->getPath().data() : nullptr;
126+
return file ? file->getChunkSize() : DEFAULT_CHUNK_SIZE;
93127
}
128+
94129
}
95130

96131
} // namespace wasmfs

system/lib/wasmfs/backends/fetch_backend.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
#include "wasmfs.h"
77

88
extern "C" {
9-
109
// See library_wasmfs_fetch.js
1110
void _wasmfs_create_fetch_backend_js(wasmfs::backend_t);
1211
}

0 commit comments

Comments
 (0)