diff --git a/.github/workflows/build_libzim_wasm.yml b/.github/workflows/build_libzim_wasm.yml index 7e513a4..50d06cd 100644 --- a/.github/workflows/build_libzim_wasm.yml +++ b/.github/workflows/build_libzim_wasm.yml @@ -1,25 +1,71 @@ -name: Docker Image CI +# Workflow to build and publish the libzim ASM and WASM arttefacts together with the JavaScript wrapper. +# If this workflow is triggered by the creation of a draft release, then the artefacts are uploaded to the release assets. +# If it is triggered by a push or pull request to master, or manually, then the artefacts are archived under the corresponding Action. + +name: Build and publish release artefacts (Docker) on: - # push: - # branches: [ "master" ] - # pull_request: - # branches: [ "master" ] + push: + branches: [ master ] + tags: + - 'v*' # Tag push events matching v*, i.e. v1.0, v20.15.10 + pull_request: + branches: [ master ] workflow_dispatch: + inputs: + version: + description: | + If you wish to create a draft release, set the tag version, like v9.9.9 (must not be an exisitng tag). + If left blank or incorrect format, archives will be archived instead of being uploaded to Releases. + required: false + default: '' +# Define top-level environment vars we can refer to below +env: + VERSION: ${{ github.ref_name }} + DISPATCH_VERSION: ${{ github.event.inputs.version }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + jobs: - build: - + name: Build and publish W/ASM artefacts runs-on: ubuntu-latest - steps: - uses: actions/checkout@v3 + # Customizes the Emscripten docker container via the Dockerfile in this repo - name: Build the Docker image run: docker build -t "docker-emscripten-libzim:v3" ./docker - - name: Compile the libzim WASM - run: docker run --rm -v $(pwd):/src -u $(id -u):$(id -g) -it docker-emscripten-libzim:v3 make - - name: Publish the WASM to draft release + # Creates the ASM and WASM artefacts, and the JS wrappers, using the Makefile in this repo + - name: Compile the libzim WASM artefacts + run: docker run --rm -v $(pwd):/src -u $(id -u):$(id -g) docker-emscripten-libzim:v3 make + - name: List directories with updated archives + run: | + echo -e "\nList ./ :" + ls -l + echo -e "\nList ./tests/prototype/ :" + ls -l tests/prototype/ + echo -e "\nList ./tests/test_large_file_access/ :" + ls -l tests/test_large_file_access/ + # If we are not creating a release, archive the artefacts under this Action run + - name: Archive build artefacts + if: | + github.event_name == 'pull_request' || github.event_name == 'push' && ! startsWith(github.ref_name, 'v') + || ! startsWith(github.event.inputs.version, 'v') + uses: actions/upload-artifact@v3 + with: + name: libzim-wasm-artefacts + path: | + libzim-wasm.* + libzim-asm.* + tests/test_large_file_access/large_file_access.* + # Otherwise, zip the artefacts into respective packages (asm and wasm), create and upload releases + - name: Zip the artefacts and create draft release + id: zip-release + if: github.event_name == 'push' && startsWith(github.ref_name, 'v') || startsWith(github.event.inputs.version, 'v') run: | - echo 'If the run was successful, we will build a publish action here, to publish:' - ls -l a.out.* + if [[ ! $VERSION =~ ^v?[0-9.]+ ]]; then + VERSION=$DISPATCH_VERSION + fi + # Create a draft release and upload zipped artefacts as release assets + chmod +x ./scripts/create_draft_release.sh + ./scripts/create_draft_release.sh diff --git a/.github/workflows/upload_release_assets_to_kiwix.yml b/.github/workflows/upload_release_assets_to_kiwix.yml index f48427d..1026092 100644 --- a/.github/workflows/upload_release_assets_to_kiwix.yml +++ b/.github/workflows/upload_release_assets_to_kiwix.yml @@ -18,7 +18,6 @@ on: # Define top-level environment vars we can refer to below env: REPOSITORY: ${{ github.repository }} - RELEASE: ${{ github.event.release }} VERSION: ${{ github.event.release.tag_name }} DISPATCH_VERSION: ${{ github.event.inputs.version }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -34,14 +33,13 @@ jobs: - name: Find the release, get assets, and upload run: | $repository = $Env:REPOSITORY - $release = $Env:RELEASE $version = $Env:VERSION if (! $version) { $version = $Env:DISPATCH_VERSION } if (! $version) { Write-Host "`n** You must set a tag version so that we can find and upload the assets! **`n" -ForegroundColor Red exit 1 } - echo "`nUsing tag: $TAG_NAME for version upload`n" + echo "`nUsing tag: $version for version upload`n" $SSH_KEY = $Env:SSH_KEY if (! $SSH_KEY) { Write-Warning "The SSH secret is empty!" diff --git a/.gitignore b/.gitignore index 0b77dfe..d6e4b4c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ +.vscode/ /nbproject/private/ -/build/ +/build* /xz-*/ /zlib-*/ /zstd-*/ @@ -11,6 +12,6 @@ /zstd-*.tar.gz /icu4c-*.tgz /xapian-core-*.tar.xz -/libzim-*.tar.gz +/libzim-* ssh_key github_token \ No newline at end of file diff --git a/Makefile b/Makefile index 99fbfe9..672119b 100644 --- a/Makefile +++ b/Makefile @@ -1,29 +1,34 @@ -all: demo_file_api.js big_file_demo.js +all: libzim-wasm.dev.js libzim-asm.dev.js libzim-wasm.js libzim-asm.js large_file_access.js + build/lib/liblzma.so : - wget -N https://tukaani.org/xz/xz-5.2.4.tar.gz + # Origin: https://tukaani.org/xz/xz-5.2.4.tar.gz + [ ! -f xz-*.tar.gz ] && wget -N https://dev.kiwix.org/kiwix-build/xz-5.2.4.tar.gz || true tar xf xz-5.2.4.tar.gz cd xz-5.2.4 ; ./autogen.sh cd xz-5.2.4 ; emconfigure ./configure --prefix=`pwd`/../build cd xz-5.2.4 ; emmake make cd xz-5.2.4 ; emmake make install -build/lib/libz.a : +build/lib/libz.a : + # Version not yet available in dev.kiwix.org wget -N https://zlib.net/zlib-1.2.13.tar.gz tar xf zlib-1.2.13.tar.gz cd zlib-1.2.13 ; emconfigure ./configure --prefix=`pwd`/../build cd zlib-1.2.13 ; emmake make cd zlib-1.2.13 ; emmake make install -build/lib/libzstd.a : - wget -N https://github.com/facebook/zstd/releases/download/v1.4.4/zstd-1.4.4.tar.gz - tar xf zstd-1.4.4.tar.gz - cd zstd-1.4.4/build/meson ; meson setup --cross-file=../../../emscripten-crosscompile.ini -Dbin_programs=false -Dbin_contrib=false -Dzlib=disabled -Dlzma=disabled -Dlz4=disabled --prefix=`pwd`/../../../build --libdir=lib builddir - cd zstd-1.4.4/build/meson/builddir ; ninja - cd zstd-1.4.4/build/meson/builddir ; ninja install +build/lib/libzstd.a : + # Origin: https://github.com/facebook/zstd/releases/download/v1.4.4/zstd-1.4.4.tar.gz + [ ! -f zstd-*.tar.gz ] && wget -N https://dev.kiwix.org/kiwix-build/zstd-1.5.2.tar.gz || true + tar xf zstd-1.5.2.tar.gz + cd zstd-1.5.2/build/meson ; meson setup --cross-file=../../../emscripten-crosscompile.ini -Dbin_programs=false -Dbin_contrib=false -Dzlib=disabled -Dlzma=disabled -Dlz4=disabled --prefix=`pwd`/../../../build --libdir=lib builddir + cd zstd-1.5.2/build/meson/builddir ; ninja + cd zstd-1.5.2/build/meson/builddir ; ninja install build/lib/libicudata.so : - wget -N https://github.com/unicode-org/icu/releases/download/release-69-1/icu4c-69_1-src.tgz - tar xf icu4c-69_1-src.tgz + # Version not yet available in dev.kiwix.org + wget -N https://github.com/unicode-org/icu/releases/download/release-71-1/icu4c-71_1-src.tgz + tar xf icu4c-71_1-src.tgz # It's no use trying to compile examples sed -i -e 's/^SUBDIRS =\(.*\)$$(DATASUBDIR) $$(EXTRA) $$(SAMPLE) $$(TEST)\(.*\)/SUBDIRS =\1\2/' icu/source/Makefile.in cd icu/source ; emconfigure ./configure --prefix=`pwd`/../../build @@ -31,28 +36,46 @@ build/lib/libicudata.so : cd icu/source ; emmake make install build/lib/libxapian.a : build/lib/libz.a - wget -N https://oligarchy.co.uk/xapian/1.4.18/xapian-core-1.4.18.tar.xz + # Origin: https://oligarchy.co.uk/xapian/1.4.18/xapian-core-1.4.18.tar.xz + [ ! -f xapian-*.tar.gz ] && wget -N https://dev.kiwix.org/kiwix-build/xapian-core-1.4.18.tar.xz || true tar xf xapian-core-1.4.18.tar.xz # Some options coming from https://github.com/xapian/xapian/tree/master/xapian-core/emscripten - #cd xapian-core-1.4.18; emconfigure ./configure --prefix=`pwd`/../build "CFLAGS=-I`pwd`/../build/include -L`pwd`/../build/lib" "CXXFLAGS=-I`pwd`/../build/include -L`pwd`/../build/lib" CPPFLAGS='-DFLINTLOCK_USE_FLOCK' CXXFLAGS='-Oz -s USE_ZLIB=1 -fno-rtti' --disable-backend-honey --disable-backend-inmemory --disable-shared --disable-backend-remote + # cd xapian-core-1.4.18; emconfigure ./configure --prefix=`pwd`/../build "CFLAGS=-I`pwd`/../build/include -L`pwd`/../build/lib" "CXXFLAGS=-I`pwd`/../build/include -L`pwd`/../build/lib" CPPFLAGS='-DFLINTLOCK_USE_FLOCK' CXXFLAGS='-Oz -s USE_ZLIB=1 -fno-rtti' --disable-backend-honey --disable-backend-inmemory --disable-shared --disable-backend-remote cd xapian-core-1.4.18; emconfigure ./configure --prefix=`pwd`/../build "CFLAGS=-I`pwd`/../build/include -L`pwd`/../build/lib" "CXXFLAGS=-I`pwd`/../build/include -L`pwd`/../build/lib" --disable-shared --disable-backend-remote cd xapian-core-1.4.18; emmake make "CFLAGS=-I`pwd`/../build/include -L`pwd`/../build/lib -std=c++11" "CXXFLAGS=-I`pwd`/../build/include -L`pwd`/../build/lib -std=c++11" cd xapian-core-1.4.18; emmake make install build/lib/libzim.a : build/lib/liblzma.so build/lib/libz.a build/lib/libzstd.a build/lib/libicudata.so build/lib/libxapian.a - wget -N --content-disposition https://github.com/openzim/libzim/archive/7.2.2.tar.gz - tar xf libzim-7.2.2.tar.gz + # Origin: wget -N --content-disposition https://github.com/openzim/libzim/archive/7.2.2.tar.gz + [ ! -f libzim-*.tar.xz ] && wget -N https://download.openzim.org/release/libzim/libzim-8.1.0.tar.xz || true + tar xf libzim-8.1.0.tar.xz # It's no use trying to compile examples - sed -i -e "s/^subdir('examples')//" libzim-7.2.2/meson.build - cd libzim-7.2.2; PKG_CONFIG_PATH=/src/build/lib/pkgconfig meson --prefix=`pwd`/../build --cross-file=../emscripten-crosscompile.ini . build -DUSE_MMAP=false - cd libzim-7.2.2; ninja -C build - cd libzim-7.2.2; ninja -C build install + sed -i -e "s/^subdir('examples')//" libzim-8.1.0/meson.build + cd libzim-8.1.0; PKG_CONFIG_PATH=/src/build/lib/pkgconfig meson --prefix=`pwd`/../build --cross-file=../emscripten-crosscompile.ini . build -DUSE_MMAP=false + cd libzim-8.1.0; ninja -C build + cd libzim-8.1.0; ninja -C build install + +# Development WASM version for testing, completely unoptimized +libzim-wasm.dev.js: build/lib/libzim.a libzim_bindings.cpp prejs_file_api.js postjs_file_api.js + em++ -o libzim-wasm.dev.js --bind libzim_bindings.cpp -I/src/build/include -L/src/build/lib -lzim -llzma -lzstd -lxapian -lz -licui18n -licuuc -licudata -lpthread -lm -fdiagnostics-color=always -pipe -Wall -Winvalid-pch -Wnon-virtual-dtor -Werror -std=c++11 -O0 -g --pre-js prejs_file_api.js --post-js postjs_file_api.js -s WASM=1 -s DISABLE_EXCEPTION_CATCHING=0 -s "EXPORTED_RUNTIME_METHODS=['ALLOC_NORMAL','printErr','ALLOC_STACK','print']" -s DEMANGLE_SUPPORT=1 -s INITIAL_MEMORY=83886080 -s ALLOW_MEMORY_GROWTH=1 -lworkerfs.js + cp libzim-wasm.dev.* tests/prototype/ + +# Development ASM version for testing, completely unoptimized +libzim-asm.dev.js: build/lib/libzim.a libzim_bindings.cpp prejs_file_api.js postjs_file_api.js + em++ -o libzim-asm.dev.js --bind libzim_bindings.cpp -I/src/build/include -L/src/build/lib -lzim -llzma -lzstd -lxapian -lz -licui18n -licuuc -licudata -lpthread -lm -fdiagnostics-color=always -pipe -Wall -Winvalid-pch -Wnon-virtual-dtor -Werror -std=c++11 -O0 -g --pre-js prejs_file_api.js --post-js postjs_file_api.js -s WASM=0 --memory-init-file 0 -s DISABLE_EXCEPTION_CATCHING=0 -s "EXPORTED_RUNTIME_METHODS=['ALLOC_NORMAL','printErr','ALLOC_STACK','print']" -s DEMANGLE_SUPPORT=1 -s INITIAL_MEMORY=83886080 -s ALLOW_MEMORY_GROWTH=1 -lworkerfs.js + +# Production WASM version, optimized and packed +libzim-wasm.js: build/lib/libzim.a libzim_bindings.cpp prejs_file_api.js postjs_file_api.js + em++ -o libzim-wasm.js --bind libzim_bindings.cpp -I/src/build/include -L/src/build/lib -lzim -llzma -lzstd -lxapian -lz -licui18n -licuuc -licudata -O3 --pre-js prejs_file_api.js --post-js postjs_file_api.js -s WASM=1 -s "EXPORTED_RUNTIME_METHODS=['ALLOC_NORMAL','printErr','ALLOC_STACK','print']" -s INITIAL_MEMORY=83886080 -s ALLOW_MEMORY_GROWTH=1 -lworkerfs.js -demo_file_api.js: build/lib/libzim.a demo_file_api.cpp prejs_file_api.js postjs_file_api.js - em++ --bind demo_file_api.cpp -I/src/build/include -L/src/build/lib -lzim -llzma -lzstd -lxapian -lz -licui18n -licuuc -licudata -lpthread -lm -fdiagnostics-color=always -pipe -Wall -Winvalid-pch -Wnon-virtual-dtor -Werror -std=c++11 -O0 -g --pre-js prejs_file_api.js --post-js postjs_file_api.js -s DISABLE_EXCEPTION_CATCHING=0 -s "EXPORTED_RUNTIME_METHODS=['ALLOC_NORMAL','printErr','ALLOC_STACK','print']" -s DEMANGLE_SUPPORT=1 -s TOTAL_MEMORY=83886080 -s ALLOW_MEMORY_GROWTH=1 -lworkerfs.js +# Production ASM version, optimized and packed +libzim-asm.js: build/lib/libzim.a libzim_bindings.cpp prejs_file_api.js postjs_file_api.js + em++ -o libzim-asm.js --bind libzim_bindings.cpp -I/src/build/include -L/src/build/lib -lzim -llzma -lzstd -lxapian -lz -licui18n -licuuc -licudata -O3 --pre-js prejs_file_api.js --post-js postjs_file_api.js -s WASM=0 --memory-init-file 0 -s MIN_EDGE_VERSION=40 -s "EXPORTED_RUNTIME_METHODS=['ALLOC_NORMAL','printErr','ALLOC_STACK','print']" -s INITIAL_MEMORY=83886080 -s ALLOW_MEMORY_GROWTH=1 -lworkerfs.js -big_file_demo.js: - em++ --bind -std=c++11 -O0 --pre-js prejs_file_api_testbigfile.js --post-js postjs_file_api_testbigfile.js big_file_test.cpp -lworkerfs.js -o bigfile.js +# Test case: for testing large files +large_file_access.js: test_file_bindings.cpp prejs_test_file_access.js postjs_test_file_access.js + em++ -o large_file_access.js --bind test_file_bindings.cpp -std=c++11 -O0 --pre-js prejs_test_file_access.js --post-js postjs_test_file_access.js -lworkerfs.js + cp large_file_access.* tests/test_large_file_access/ clean : rm -rf xz-* @@ -62,6 +85,5 @@ clean : rm -rf icu* rm -rf libzim-* rm -rf build - rm a.out.* .PHONY : all clean diff --git a/README.md b/README.md index a6ccb95..90b3928 100644 --- a/README.md +++ b/README.md @@ -1,26 +1,35 @@ -# Prototype of libzim in Webassembly +# Prototype of libzim in WebAssembly (WASM) -Demo at https://openzim.github.io/javascript-libzim/ +This Repository provides the source code and utilities for compiling the [ZIM File](https://wiki.openzim.org/wiki/ZIM_file_format) reader [lbizim](https://wiki.openzim.org/wiki/Libzim) from C++ to [WebAssembly](https://developer.mozilla.org/en-US/docs/WebAssembly) (and [ASM.js](https://developer.mozilla.org/en-US/docs/Games/Tools/asm.js)). -It uses WORKERFS as FS with emscripten and run in a web worker, file object is mount before run, and name is passed as argument. +A prototype in HTML/JS, for testing the WASM version, is provided at https://openzim.github.io/javascript-libzim/tests/prototyep/. This prototype uses WORKERFS as the Emscripten File System and runs in a Web Worker. The file object is mounted before run, and the name is passed as argument. + +There is also an HTML/JS utility for testing the ability of Emscripten File Systems to read large files (muliti-gigabyte) at https://openzim.github.io/javascript-libzim/tests/test_large_file_access/. ## Steps to recompile manually + - Install emscripten : https://kripken.github.io/emscripten-site/docs/getting_started/downloads.html -- Install dependencies necessary for compilation. On ubuntu 18.04, you need to activate universe repository and : +- Install dependencies necessary for compilation. On ubuntu 18.04, you need to activate universe repository and: + ``` sudo apt install ninja-build meson pkg-config python3 autopoint libtool autoconf sudo apt install zlib1g-dev libicu-dev libxapian-dev liblzma-dev ``` + - activate emscripten environment variables with something like `source ./emsdk_env.sh` - run `make` ## Steps to recompile with Docker -While being at the root of this repository : - - Build the Docker image with the provided Dockerfile (based on https://hub.docker.com/r/emscripten/emsdk , which is based on Debian) : + +While being at the root of this repository: + - Build the Docker image with the provided Dockerfile (based on https://hub.docker.com/r/emscripten/emsdk, which is based on Debian): + ``` docker build -t "docker-emscripten-libzim:v3" ./docker ``` - - Run the build with : + + - Run the build with: + ``` docker run --rm -v $(pwd):/src -v /tmp/emscripten_cache/:/home/emscripten/.emscripten_cache -u $(id -u):$(id -g) -it docker-emscripten-libzim:v3 make ``` diff --git a/emscripten-crosscompile.ini b/emscripten-crosscompile.ini index bd72343..090d958 100644 --- a/emscripten-crosscompile.ini +++ b/emscripten-crosscompile.ini @@ -4,6 +4,7 @@ cpp = 'em++' c_ld = 'wasm-ld' cpp_ld = 'wasm-ld' pkgconfig = 'pkg-config' +ar = 'emar' [host_machine] system = 'emscripten' diff --git a/bigfile.js b/large_file_access.js similarity index 99% rename from bigfile.js rename to large_file_access.js index b5778f2..677de42 100644 --- a/bigfile.js +++ b/large_file_access.js @@ -1503,7 +1503,7 @@ function createExportWrapper(name, fixedasm) { } var wasmBinaryFile; - wasmBinaryFile = 'bigfile.wasm'; + wasmBinaryFile = 'large_file_access.wasm'; if (!isDataURI(wasmBinaryFile)) { wasmBinaryFile = locateFile(wasmBinaryFile); } diff --git a/bigfile.wasm b/large_file_access.wasm similarity index 100% rename from bigfile.wasm rename to large_file_access.wasm diff --git a/demo_file_api.cpp b/libzim_bindings.cpp similarity index 94% rename from demo_file_api.cpp rename to libzim_bindings.cpp index 4e794e6..fb99d3b 100644 --- a/demo_file_api.cpp +++ b/libzim_bindings.cpp @@ -14,7 +14,7 @@ std::shared_ptr g_archive; int main(int argc, char* argv[]) { - std::cout << "wasm initialized" << std::endl; + std::cout << "assembler initialized" << std::endl; return 0; } @@ -95,11 +95,11 @@ std::unique_ptr getEntryByPath(std::string url) { } // Search for a text, and returns the path of the first result -std::vector search(std::string text) { +std::vector search(std::string text, int numResults) { auto searcher = zim::Searcher(*g_archive); auto query = zim::Query(text); auto search = searcher.search(query); - auto searchResultSet = search.getResults(0,50); + auto searchResultSet = search.getResults(0, numResults); std::vector ret; for(auto entry:searchResultSet) { ret.push_back(EntryWrapper(entry)); diff --git a/postjs_file_api_testbigfile.js b/postjs_test_file_access.js similarity index 100% rename from postjs_file_api_testbigfile.js rename to postjs_test_file_access.js diff --git a/prejs_file_api.js b/prejs_file_api.js index d80a2b3..ad3cedd 100644 --- a/prejs_file_api.js +++ b/prejs_file_api.js @@ -1,5 +1,4 @@ self.addEventListener("message", function(e) { - var t0 = performance.now(); var action = e.data.action; var path = e.data.path; var outgoingMessagePort = e.ports[0]; @@ -13,29 +12,11 @@ self.addEventListener("message", function(e) { item = entry.getItem(follow); // It's necessary to keep an instance of the blob till the end of this block, // to ensure that the corresponding content is not deleted on the C side. - var t1 = performance.now(); var blob = item.getData(); - var t2 = performance.now(); var content = blob.getContent(); - var t3 = performance.now(); // TODO : is there a more efficient way to make the Array detachable? So that it can be transfered back from the WebWorker without a copy? var contentArray = new Uint8Array(content); - var t4 = performance.now(); outgoingMessagePort.postMessage({ content: contentArray, mimetype: item.getMimetype(), isRedirect: entry.isRedirect()}); - var t5 = performance.now(); - var getTime = Math.round(t1 - t0); - var getDataTime = Math.round(t2 - t1); - var getContentTime = Math.round(t3 - t2); - var copyArrayTime = Math.round(t4 - t3); - var postMessageTime = Math.round(t5 - t4); - var totalTime = Math.round(t5 - t0); - console.debug("content length = " + content.length + " read in " + totalTime + " ms" - + " (" + getTime + " ms to find the entry, " - + getDataTime + " ms for getData, " - + getContentTime + " ms for getContent, " - + copyArrayTime + " ms for array copying, " - + postMessageTime + " ms for postMessage" - + ")"); } else { outgoingMessagePort.postMessage({ content: new Uint8Array(), isRedirect: true, redirectPath: entry.getRedirectEntry().getPath()}); @@ -47,7 +28,8 @@ self.addEventListener("message", function(e) { } else if (action === "search") { var text = e.data.text; - var entries = Module[action](text); + var numResults = e.data.numResults || 50; + var entries = Module[action](text, numResults); console.debug("Found nb results = " + entries.size(), entries); var serializedEntries = []; for (var i=0; i .zim) var baseZimFileName = files[0].name.replace(/\.zim..$/, '.zim'); Module = {}; Module["onRuntimeInitialized"] = function() { - console.debug("runtime initialized"); Module.loadArchive("/work/" + baseZimFileName); + console.debug(assemblerType + " initialized"); outgoingMessagePort.postMessage("runtime initialized"); }; Module["arguments"] = []; diff --git a/prejs_file_api_with_performance_tests.js b/prejs_file_api_with_performance_tests.js new file mode 100644 index 0000000..5215ff1 --- /dev/null +++ b/prejs_file_api_with_performance_tests.js @@ -0,0 +1,86 @@ +self.addEventListener("message", function(e) { + var t0 = performance.now(); + var action = e.data.action; + var path = e.data.path; + var outgoingMessagePort = e.ports[0]; + console.debug("WebWorker called with action=" + action); + if (action === "getEntryByPath") { + var follow = e.data.follow; + var entry = Module[action](path); + if (entry) { + var item = {}; + if (follow || !entry.isRedirect()) { + item = entry.getItem(follow); + // It's necessary to keep an instance of the blob till the end of this block, + // to ensure that the corresponding content is not deleted on the C side. + var t1 = performance.now(); + var blob = item.getData(); + var t2 = performance.now(); + var content = blob.getContent(); + var t3 = performance.now(); + // TODO : is there a more efficient way to make the Array detachable? So that it can be transfered back from the WebWorker without a copy? + var contentArray = new Uint8Array(content); + var t4 = performance.now(); + outgoingMessagePort.postMessage({ content: contentArray, mimetype: item.getMimetype(), isRedirect: entry.isRedirect()}); + var t5 = performance.now(); + var getTime = Math.round(t1 - t0); + var getDataTime = Math.round(t2 - t1); + var getContentTime = Math.round(t3 - t2); + var copyArrayTime = Math.round(t4 - t3); + var postMessageTime = Math.round(t5 - t4); + var totalTime = Math.round(t5 - t0); + console.debug("content length = " + content.length + " read in " + totalTime + " ms" + + " (" + getTime + " ms to find the entry, " + + getDataTime + " ms for getData, " + + getContentTime + " ms for getContent, " + + copyArrayTime + " ms for array copying, " + + postMessageTime + " ms for postMessage" + + ")"); + } + else { + outgoingMessagePort.postMessage({ content: new Uint8Array(), isRedirect: true, redirectPath: entry.getRedirectEntry().getPath()}); + } + } + else { + outgoingMessagePort.postMessage({ content: new Uint8Array(), mimetype: "unknown", isRedirect: false}); + } + } + else if (action === "search") { + var text = e.data.text; + var numResults = e.data.numResults || 50; + var entries = Module[action](text, numResults); + console.debug("Found nb results = " + entries.size(), entries); + var serializedEntries = []; + for (var i=0; i .zim) + var baseZimFileName = files[0].name.replace(/\.zim..$/, '.zim'); + Module = {}; + Module["onRuntimeInitialized"] = function() { + console.debug("runtime initialized"); + Module.loadArchive("/work/" + baseZimFileName); + outgoingMessagePort.postMessage("runtime initialized"); + }; + Module["arguments"] = []; + for (let i = 0; i < files.length; i++) { + Module["arguments"].push("/work/" + files[i].name); + } + Module["preRun"] = function() { + FS.mkdir("/work"); + FS.mount(WORKERFS, { + files: files + }, "/work"); + }; + console.debug("baseZimFileName = " + baseZimFileName); + console.debug('Module["arguments"] = ' + Module["arguments"]) + diff --git a/prejs_file_api_testbigfile.js b/prejs_test_file_access.js similarity index 100% rename from prejs_file_api_testbigfile.js rename to prejs_test_file_access.js diff --git a/scripts/Upload-KiwixRelease.ps1 b/scripts/Upload-KiwixRelease.ps1 index 5534fd5..53a686c 100644 --- a/scripts/Upload-KiwixRelease.ps1 +++ b/scripts/Upload-KiwixRelease.ps1 @@ -67,13 +67,13 @@ function Main { } } # We should have a release, so now get the assets - $releaseAssetsURLs = @() + $releaseAssets = @() if ($release.assets) { $release.assets | % { $asset = $_ if ($asset.name -imatch $rgxAssetMatch) { $assetUrl = $asset.url + "/" + $asset.name - $releaseAssetsURLs += $assetUrl + $releaseAssets += $asset Write-Host "Found asset $assetUrl!" -ForegroundColor Green } } @@ -85,18 +85,18 @@ function Main { # If we found assets, download them to file system $releaseFiles = @() $errorFlag = $false - if ($releaseAssetsURLs.count) { - $releaseAssetsURLs | % { - $filename = ($_ -replace "^.+/", "") + if ($releaseAssets.count) { + $releaseAssets | % { + $asset = $_ if (! $dryrun) { - Invoke-WebRequest $_ -OutFile $filename + Invoke-WebRequest $asset.url -OutFile $asset.name } - if ((Test-Path $filename -PathType leaf) -or $dryrun) { + if ((Test-Path $asset.name -PathType leaf) -or $dryrun) { if ($dryrun) { "[DRYRUN]:"} - Write-Host "Downloaded asset $filename to local file system..." -ForegroundColor Green - $releaseFiles += $filename # Store the filename to access when we upload + Write-Host "`n* Downloaded asset" $asset.name "to local file system..." -ForegroundColor Green + $releaseFiles += $asset.name # Store the filename to access when we upload } else { - Write-Host "`n** The file $filename does not appear to have downloaded correctly! **`n" -ForegroundColor Red + Write-Host "`n** The file" $asset.name "does not appear to have downloaded correctly! **`n" -ForegroundColor Red $errorFlag = $true } } @@ -105,7 +105,7 @@ function Main { } } else { "" - Write-Warning "No assets of Release " + $release.id + " ($tag) match $rgxAssetMatch!" + Write-Warning "No assets of Release" $release.id "($tag) match $rgxAssetMatch!" exit 1 } # We should have filenames and files now, so upload to Kiwix diff --git a/scripts/create_draft_release.sh b/scripts/create_draft_release.sh new file mode 100644 index 0000000..319efa2 --- /dev/null +++ b/scripts/create_draft_release.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +# This script packages the ASM and WASM artefacts, and creates a draft release +# It is designed to be run by a GitHub action. To test, from the commandline, supply $VERSION and $GITHUB_TOKEN. + +echo "Zipping the release archives..." +NUMERIC_VERSION=$(sed 's/^v//' <<<"$VERSION") +zip libzim_wasm_$NUMERIC_VERSION.zip libzim-wasm.* +zip libzim_asm_$NUMERIC_VERSION.zip libzim-asm.* +# If we initiated the build with a tag version, and the version doesn't already exist +if [ $DISPATCH_VERSION ]; then + if [[ ! $(git tag) =~ "$VERSION" ]]; then + echo "Creating a corresponding tag: $VERSION" + git tag $VERSION + git push origin $VERSION + fi +fi +echo "Creating the draft release..." +REST_RESPONSE=$( + curl \ + -X POST \ + -H "Accept: application/vnd.github+json" \ + -H "Authorization: Bearer $GITHUB_TOKEN" \ + https://api.github.com/repos/openzim/javascript-libzim/releases \ + -d "{\"tag_name\":\"$VERSION\",\"target_commitish\":\"master\",\"name\":\"Release $VERSION\",\"body\":\"\",\"draft\":true,\"prerelease\":false,\"generate_release_notes\":true}" +) +echo $REST_RESPONSE +UPLOAD_URL=$(echo $REST_RESPONSE | jq -r '.upload_url') +UPLOAD_URL=$(sed -E 's/\{.+\}$//' <<<"$UPLOAD_URL") +if [ -z $UPLOAD_URL ]; then + echo -e "\n***ERROR! We could not create the draft release!***" + exit 2 +else + echo "Draft release created, files will be uploaded to: $UPLOAD_URL" +fi +# echo "UPLOAD_URL=$REST_RESPONSE" >> $GITHUB_OUTPUT # Use this if you need to access the URL in a later step with steps.zip-release.outputs.UPLOAD_URL +# Upload archives to the draft release +for FILE in "libzim_wasm_$NUMERIC_VERSION.zip" "libzim_asm_$NUMERIC_VERSION.zip" +do + echo -e "\nUploading $FILE to $UPLOAD_URL?name=$FILE..." + curl \ + -X POST \ + -H "Accept: application/vnd.github+json" \ + -H "Content-Type: application/zip" \ + -H "Authorization: Bearer $GITHUB_TOKEN" \ + -T "$FILE" \ + "$UPLOAD_URL?name=$FILE" +done \ No newline at end of file diff --git a/big_file_test.cpp b/test_file_bindings.cpp similarity index 100% rename from big_file_test.cpp rename to test_file_bindings.cpp diff --git a/index.html b/tests/prototype/index.html similarity index 97% rename from index.html rename to tests/prototype/index.html index dc94be7..86ad8e3 100644 --- a/index.html +++ b/tests/prototype/index.html @@ -4,7 +4,7 @@

Prototype to test libzim compiled with emscripten

It uses a Web Worker to run libzim (compiled as wasm), and the File api to access the local file.

- Select a local ZIM file, open your browser console, and wait for the message "wasm initialized" in the console. Afterwards you can use the buttons below.
+ Select a local ZIM file, open your browser console, and wait for the message "assembler initialized" in the console. Afterwards you can use the buttons below.
For now, split ZIM files are not supported.
If you want to do a quick test, you can choose the file wikipedia_en_ray_charles_maxi_2021-10.zim, and use "A/Baby_Grand" as the Path.
@@ -12,7 +12,7 @@

Prototype to test libzim compiled with emscripten

ZIM file(s) :