From 19ba45d22a77fb772f9823bdb041366eff0e4fbd Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Sat, 22 Feb 2025 21:10:31 +0100 Subject: [PATCH] Split python polyglot into a bash script and a js script --- Makefile | 4 + src/js/emscripten-settings.ts | 2 + src/js/pyodide.ts | 5 + src/templates/python | 187 +++--------------------------- src/templates/python_cli_entry.js | 154 ++++++++++++++++++++++++ 5 files changed, 182 insertions(+), 170 deletions(-) create mode 100644 src/templates/python_cli_entry.js diff --git a/Makefile b/Makefile index 7b50e1ed5c0..c32bd0f47ad 100644 --- a/Makefile +++ b/Makefile @@ -25,6 +25,7 @@ all-but-packages: \ \ dist/package.json \ dist/python \ + dist/python_cli_entry.js \ dist/python_stdlib.zip \ dist/test.html \ dist/module_test.html \ @@ -252,6 +253,9 @@ dist/module_test.html: src/templates/module_test.html dist/python: src/templates/python cp $< $@ +dist/python_cli_entry.js: src/templates/python_cli_entry.js + cp $< $@ + .PHONY: dist/console.html dist/console.html: src/templates/console.html cp $< $@ diff --git a/src/js/emscripten-settings.ts b/src/js/emscripten-settings.ts index 737d80e97be..72433597c99 100644 --- a/src/js/emscripten-settings.ts +++ b/src/js/emscripten-settings.ts @@ -17,6 +17,7 @@ export interface EmscriptenSettings { readonly print?: (a: string) => void; readonly printErr?: (a: string) => void; readonly onExit?: (code: number) => void; + readonly thisProgram?: string; readonly arguments: readonly string[]; readonly instantiateWasm?: ( imports: { [key: string]: any }, @@ -49,6 +50,7 @@ export function createSettings(config: ConfigType): EmscriptenSettings { onExit(code) { settings.exitCode = code; }, + thisProgram: config.sysExecutable, arguments: config.args, API: { config } as API, // Emscripten calls locateFile exactly one time with argument diff --git a/src/js/pyodide.ts b/src/js/pyodide.ts index 181843a6412..ff0a1fcb4f3 100644 --- a/src/js/pyodide.ts +++ b/src/js/pyodide.ts @@ -46,6 +46,7 @@ export type ConfigType = { stdout?: (msg: string) => void; stderr?: (msg: string) => void; jsglobals?: object; + sysExecutable?: string; args: string[]; _node_mounts: string[]; env: { [key: string]: string }; @@ -139,6 +140,10 @@ export async function loadPyodide( * Default: ``globalThis`` */ jsglobals?: object; + /** + * Determine the value of ``sys.executable``. + */ + sysExecutable?: string; /** * Command line arguments to pass to Python on startup. See `Python command * line interface options diff --git a/src/templates/python b/src/templates/python index ae066bd00fc..fa3ae47fcab 100755 --- a/src/templates/python +++ b/src/templates/python @@ -1,28 +1,6 @@ #!/usr/bin/env bash -":" /* << "EOF" -This file is a bash/node polyglot. This is needed for a few reasons: - -TODO: We don't support node < 18 anymore, so maybe we can remove this? - -1. In node 14 we must pass `--experimental-wasm-bigint`. In node >14 we cannot -pass --experimental-wasm-bigint - -2. Emscripten vendors node 14 so it is desirable not to require node >= 16 - -3. We could use a bash script in a separate file to determine the flags needed, -but virtualenv looks up the current file and uses it directly. So if we make -python.sh and have it invoke python.js, then the virtualenv will invoke python.js -directly without the `--experimental-wasm-bigint` flag and so the virtualenv won't -work with node 14. - -Keeping the bash script and the JavaScript in the same file makes sure that even -inside the virtualenv the proper shell code is executed. -*/ - -/* -EOF -# bash set -e + if [[ $1 == "-m" ]] && [[ $2 == "pip" ]]; then # redirect python -m pip to execute in host environment shift 1 @@ -46,154 +24,23 @@ process.stdout.write("--"); EOF )") -exec node "$ARGS" "$0" "$@" -*/ - - -const { loadPyodide } = require("./pyodide"); -const fs = require("fs"); - -/** - * Determine which native top level directories to mount into the Emscripten - * file system. - * - * This is a bit brittle, if the machine has a top level directory with certain - * names it is possible this could break. The most surprising one here is tmp, I - * am not sure why but if we link tmp then the process silently fails. - */ -function rootDirsToMount() { - const skipDirs = ["dev", "lib", "proc", "tmp"]; - return fs - .readdirSync("/") - .filter((dir) => !skipDirs.includes(dir)) - .map((dir) => "/" + dir); -} - -function dirsToMount() { - extra_mounts = process.env["_PYODIDE_EXTRA_MOUNTS"] || ""; - return rootDirsToMount().concat(extra_mounts.split(":").filter(s => s)) -} - -async function main() { - let args = process.argv.slice(2); - try { - py = await loadPyodide({ - args, - env: Object.assign({ - PYTHONINSPECT: "", - }, process.env, { HOME: process.cwd() }), - fullStdLib: false, - _node_mounts: dirsToMount(), - // Strip out messages written to stderr while loading - // After Pyodide is loaded we will replace stdstreams with setupStreams. - stderr(e) { - if ( - [ - "warning: no blob constructor, cannot create blobs with mimetypes", - "warning: no BlobBuilder", - ].includes(e.trim()) - ) { - return; - } - console.warn(e); - } - }); - } catch (e) { - if (e.constructor.name !== "ExitStatus") { - throw e; - } - // If the user passed `--help`, `--version`, or a set of command line - // arguments that is invalid in some way, we will exit here. - process.exit(e.status); - } - py.setStdout(); - py.setStderr(); - let sideGlobals = py.runPython("{}"); - function handleExit(code) { - if (code === undefined) { - code = 0; - } - if (py._module._Py_FinalizeEx() < 0) { - code = 120; - } - // It's important to call `process.exit` immediately after - // `_Py_FinalizeEx` because otherwise any asynchronous tasks still - // scheduled will segfault. - process.exit(code); - }; - sideGlobals.set("handleExit", handleExit); - - py.runPython( - ` - from pyodide._package_loader import SITE_PACKAGES, should_load_dynlib - from pyodide.ffi import to_js - import re - dynlibs_to_load = to_js([ - str(path) for path in SITE_PACKAGES.glob("**/*.so*") - if should_load_dynlib(path) - ]) - `, - { globals: sideGlobals } - ); - const dynlibs = sideGlobals.get("dynlibs_to_load"); - for (const dynlib of dynlibs) { - try { - await py._module.API.loadDynlib(dynlib); - } catch(e) { - console.error("Failed to load lib ", dynlib); - console.error(e); - } +# Macs come with FreeBSD coreutils which doesn't have the -s option +# so feature detect and work around it. +if which grealpath > /dev/null; then + # It has brew installed gnu core utils, use that + REALPATH="grealpath -s" +elif which realpath > /dev/null && realpath --version > /dev/null 2> /dev/null && realpath --version | grep GNU > /dev/null; then + # realpath points to GNU realpath so use it. + REALPATH="realpath -s" +else + # Shim for macs without GNU coreutils + abs_path () { + echo "$(cd "$(dirname "$1")" || exit; pwd)/$(basename "$1")" } - // Warning: this sounds like it might not do anything important, but it - // fills in the GOT. There can be segfaults if we leave it out. - // See https://github.com/emscripten-core/emscripten/issues/22052 - // TODO: Fix Emscripten so this isn't needed - py._module.reportUndefinedSymbols(); + REALPATH=abs_path +fi - py.runPython( - ` - import asyncio - # Keep the event loop alive until all tasks are finished, or SystemExit or - # KeyboardInterupt is raised. - loop = asyncio.get_event_loop() - # Make sure we don't run _no_in_progress_handler before we finish _run_main. - loop._in_progress += 1 - loop._no_in_progress_handler = handleExit - loop._system_exit_handler = handleExit - loop._keyboard_interrupt_handler = lambda: handleExit(130) - # Make shutil.get_terminal_size tell the terminal size accurately. - import shutil - from js.process import stdout - import os - def get_terminal_size(fallback=(80, 24)): - columns = getattr(stdout, "columns", None) - rows = getattr(stdout, "rows", None) - if columns is None: - columns = fallback[0] - if rows is None: - rows = fallback[1] - return os.terminal_size((columns, rows)) - shutil.get_terminal_size = get_terminal_size - `, - { globals: sideGlobals } - ); +RESOLVED_DIR=$(dirname $(realpath "$0")) - let errcode; - try { - errcode = py._module._run_main(); - } catch (e) { - if (e.constructor.name === "ExitStatus") { - process.exit(e.status); - } - py._api.fatal_error(e); - } - if (errcode) { - process.exit(errcode); - } - py.runPython("loop._decrement_in_progress()", { globals: sideGlobals }); -} -main().catch((e) => { - console.error(e); - process.exit(1); -}); +exec node "$ARGS" $RESOLVED_DIR/python_cli_entry.js --this-program="$($REALPATH "$0")" "$@" diff --git a/src/templates/python_cli_entry.js b/src/templates/python_cli_entry.js new file mode 100644 index 00000000000..5bc6834b5d0 --- /dev/null +++ b/src/templates/python_cli_entry.js @@ -0,0 +1,154 @@ +const { loadPyodide } = require("./pyodide"); +const fs = require("fs"); + +/** + * Determine which native top level directories to mount into the Emscripten + * file system. + * + * This is a bit brittle, if the machine has a top level directory with certain + * names it is possible this could break. The most surprising one here is tmp, I + * am not sure why but if we link tmp then the process silently fails. + */ +function rootDirsToMount() { + const skipDirs = ["dev", "lib", "proc", "tmp"]; + return fs + .readdirSync("/") + .filter((dir) => !skipDirs.includes(dir)) + .map((dir) => "/" + dir); +} + +function dirsToMount() { + extra_mounts = process.env["_PYODIDE_EXTRA_MOUNTS"] || ""; + return rootDirsToMount().concat(extra_mounts.split(":").filter(s => s)) +} + +const thisProgramFlag = "--this-program="; +const thisProgramIndex = process.argv.findIndex((x) => + x.startsWith(thisProgramFlag), +); +const args = process.argv.slice(thisProgramIndex + 1); +const sysExecutable = process.argv[thisProgramIndex].slice(thisProgramFlag.length); + +async function main() { + try { + py = await loadPyodide({ + args, + sysExecutable, + env: Object.assign({ + PYTHONINSPECT: "", + }, process.env, { HOME: process.cwd() }), + fullStdLib: false, + _node_mounts: dirsToMount(), + // Strip out messages written to stderr while loading + // After Pyodide is loaded we will replace stdstreams with setupStreams. + stderr(e) { + if ( + [ + "warning: no blob constructor, cannot create blobs with mimetypes", + "warning: no BlobBuilder", + ].includes(e.trim()) + ) { + return; + } + console.warn(e); + } + }); + } catch (e) { + if (e.constructor.name !== "ExitStatus") { + throw e; + } + // If the user passed `--help`, `--version`, or a set of command line + // arguments that is invalid in some way, we will exit here. + process.exit(e.status); + } + py.setStdout(); + py.setStderr(); + let sideGlobals = py.runPython("{}"); + function handleExit(code) { + if (code === undefined) { + code = 0; + } + if (py._module._Py_FinalizeEx() < 0) { + code = 120; + } + // It's important to call `process.exit` immediately after + // `_Py_FinalizeEx` because otherwise any asynchronous tasks still + // scheduled will segfault. + process.exit(code); + }; + sideGlobals.set("handleExit", handleExit); + + py.runPython( + ` + from pyodide._package_loader import SITE_PACKAGES, should_load_dynlib + from pyodide.ffi import to_js + import re + dynlibs_to_load = to_js([ + str(path) for path in SITE_PACKAGES.glob("**/*.so*") + if should_load_dynlib(path) + ]) + `, + { globals: sideGlobals } + ); + const dynlibs = sideGlobals.get("dynlibs_to_load"); + for (const dynlib of dynlibs) { + try { + await py._module.API.loadDynlib(dynlib); + } catch(e) { + console.error("Failed to load lib ", dynlib); + console.error(e); + } + } + // Warning: this sounds like it might not do anything important, but it + // fills in the GOT. There can be segfaults if we leave it out. + // See https://github.com/emscripten-core/emscripten/issues/22052 + // TODO: Fix Emscripten so this isn't needed + py._module.reportUndefinedSymbols(); + + py.runPython( + ` + import asyncio + # Keep the event loop alive until all tasks are finished, or SystemExit or + # KeyboardInterupt is raised. + loop = asyncio.get_event_loop() + # Make sure we don't run _no_in_progress_handler before we finish _run_main. + loop._in_progress += 1 + loop._no_in_progress_handler = handleExit + loop._system_exit_handler = handleExit + loop._keyboard_interrupt_handler = lambda: handleExit(130) + + # Make shutil.get_terminal_size tell the terminal size accurately. + import shutil + from js.process import stdout + import os + def get_terminal_size(fallback=(80, 24)): + columns = getattr(stdout, "columns", None) + rows = getattr(stdout, "rows", None) + if columns is None: + columns = fallback[0] + if rows is None: + rows = fallback[1] + return os.terminal_size((columns, rows)) + shutil.get_terminal_size = get_terminal_size + `, + { globals: sideGlobals } + ); + + let errcode; + try { + errcode = py._module._run_main(); + } catch (e) { + if (e.constructor.name === "ExitStatus") { + process.exit(e.status); + } + py._api.fatal_error(e); + } + if (errcode) { + process.exit(errcode); + } + py.runPython("loop._decrement_in_progress()", { globals: sideGlobals }); +} +main().catch((e) => { + console.error(e); + process.exit(1); +});