diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml new file mode 100644 index 00000000..96fa9bc3 --- /dev/null +++ b/.github/workflows/main.yaml @@ -0,0 +1,114 @@ +name: Build kernel images + +on: + push: + branches: + - main + + pull_request: + branches: + - main + +jobs: + + build-python-images: + runs-on: ubuntu-latest + strategy: + matrix: + # version: ["3.8.8", "3.9.13", "3.10.5"] 3.10.5 fails with dependency conflicts + # We may need to have separate requirements.txt for each version, or replace + # dependencies on the fly + version: ["3.7.12", "3.8.13", "3.9.13", "3.10.9"] + directory: ["datascience-notebook"] + # The datascience-notebook base image does not support ARM + # We would need to build and maintain our own base image + # architecture: ["arm", "amd"] + steps: + - name: Checkout code + uses: actions/checkout@v3 + + # TODO: Log into DockerHub to prevent rate limiting + + - name: Login to GitHub Container Registry + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + + - name: Create context + run: | + docker context create github-action + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + with: + endpoint: github-action + version: v0.10.4 + env: + DOCKER_CONTEXT: github-action + + - name: Cache Docker layers + uses: actions/cache@v3 + id: docker-cache + with: + path: "/tmp/buildx-cache" + key: "${{ runner.os }}-buildx-${{ matrix.directory }}-${{ matrix.version }}" + restore-keys: "${{ runner.os }}-buildx-" + + - name: Build arguments + id: build-args + run: | + # Image Name + container_registry=ghcr.io/${{ github.repository_owner }} + image_name=kernel-${{ matrix.directory }} + full_image_name="${container_registry}/${image_name}" + + # Image Tags + image_sha_tag="${GITHUB_SHA:0:12}" # first 12 numbers of the SHA + image_version_tag="python-$(version=${{ matrix.version }} && echo ${version%.*} )" # removes patch version + + full_image_name_tagged='' + + if [ "${GITHUB_EVENT_NAME}" = 'push' ]; then + full_image_name_tagged="${full_image_name}:${image_version_tag}" + elif [ "${GITHUB_EVENT_NAME}" = 'pull_request' ]; then + full_image_name_tagged="${full_image_name}:${image_version_tag}-${image_sha_tag}" + fi + + echo "::set-output name=FULL_IMAGE_NAME::${full_image_name}" + echo "::set-output name=FULL_IMAGE_NAME_TAGGED::${full_image_name_tagged}" + + echo "::set-output name=BUILD_URL::https://github.com/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" + echo "::set-output name=BUILD_TIMESTAMP::$(date --utc --iso-8601=seconds)" + + echo "full_image_name: $full_image_name" + echo "image_version_tag: $image_version_tag" + echo "image_sha_tag: $image_sha_tag" + echo "full_image_name_tagged: $full_image_name_tagged" + + - name: Build image + env: + DOCKER_CONTEXT: github-action + run: | + ( + cd ${GITHUB_WORKSPACE}/kernels/${{ matrix.directory }} + + docker buildx build \ + --pull \ + --push \ + --platform=linux/amd64 \ + --progress plain \ + --cache-from 'type=local,src=/tmp/buildx-cache' \ + --cache-to 'type=local,dest=/tmp/buildx-cache' \ + --tag '${{ steps.build-args.outputs.FULL_IMAGE_NAME_TAGGED }}' \ + --build-arg PYTHON_VERSION=${{ matrix.version }} \ + --build-arg 'NBL_ARG_BUILD_TIMESTAMP=${{ steps.build-args.outputs.BUILD_TIMESTAMP }}' \ + --build-arg 'NBL_ARG_BUILD_URL=${{ steps.build-args.outputs.BUILD_URL }}' \ + --build-arg 'NBL_ARG_REVISION=${{ github.sha }}' \ + --build-arg 'NBL_ARG_VERSION=${{ github.ref }}' \ + . + ) diff --git a/.github/workflows/reusable-docker-build.yml b/.github/workflows/reusable-docker-build.yml deleted file mode 100644 index 5958de5b..00000000 --- a/.github/workflows/reusable-docker-build.yml +++ /dev/null @@ -1,142 +0,0 @@ -name: docker - -on: - workflow_call: - inputs: - dockerfile: - description: "Path to the Dockerfile to build" - type: string - default: Dockerfile - context: - description: "The context for Docker build" - type: string - default: "." - platforms: - description: "Comma separate list of platforms to build on" - type: string - required: false - default: "linux/amd64,linux/arm64" - images: - description: "The image names that we want to build" - type: string - required: true - tags: - description: "The various tags to be attached to the built image" - type: string - required: false - default: "" - labels: - description: "The various labels to attach to the built image" - type: string - required: false - default: | - org.opencontainers.image.url=https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} - org.opencontainers.image.vendor=Noteable - org.opencontainers.image.version=${{ github.ref }} - target: - description: "Sets the target stage to build" - type: string - required: false - build_args: - description: "Additional build args to pass to the Docker build" - type: string - required: false - default: "" - secrets: - # We login to Dockerhub to prevent rate limiting issues when pulling images - # https://docs.docker.com/docker-hub/download-rate-limit/ - DOCKERHUB_USER: - required: true - DOCKERHUB_PASSWORD: - required: true - -jobs: - build: - permissions: - id-token: write - contents: read - packages: write - - if: | - github.event_name == 'push' || - (github.event_name == 'pull_request' && github.event.pull_request.state == 'open') - runs-on: ubuntu-22.04 - steps: - - name: Checkout the code - uses: actions/checkout@v3 - - - name: Copy common files - run: make copy-common-files - - name: Log in to Docker Hub - uses: docker/login-action@v2 - with: - username: ${{ secrets.DOCKERHUB_USER }} - password: ${{ secrets.DOCKERHUB_PASSWORD }} - - - name: Log in to the Container registry - uses: docker/login-action@v2 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - with: - version: v0.10.1 - - # Note: The outputs in github action will show duplicate labels being generated for the meta outputs. - # When the Docker engine builds, it will only take the later values, and our custom labels get added - # at the end. https://github.com/docker/metadata-action/issues/125 - - name: Docker metadata for labels and tags - id: meta - uses: docker/metadata-action@v4 - with: - images: ${{ inputs.images }} - tags: ${{ inputs.tags }} - labels: ${{ inputs.labels }} - - - name: Build and push - uses: docker/build-push-action@v3 - with: - platforms: ${{ inputs.platforms }} - context: ${{ inputs.context }} - push: true - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} - target: ${{ inputs.target }} - cache-from: type=gha - cache-to: type=gha,mode=max - build-args: ${{ inputs.build_args }} - - clear_cache: - permissions: - contents: read - actions: write - # If the PR is closed (or merged), we want to clear the cache - if: ${{ github.event_name == 'pull_request' && github.event.pull_request.state == 'closed' }} - runs-on: ubuntu-latest - steps: - - name: Check out code - uses: actions/checkout@v3 - - - name: Cleanup - run: | - gh extension install actions/gh-actions-cache - - REPO=${{ github.repository }} - BRANCH=${{ github.ref }} - - echo "Fetching list of cache key" - cacheKeysForPR=$(gh actions-cache list -R $REPO -B $BRANCH | cut -f 1 ) - - ## Setting this to not fail the workflow while deleting cache keys. - set +e - echo "Deleting caches..." - for cacheKey in $cacheKeysForPR - do - gh actions-cache delete $cacheKey -R $REPO -B $BRANCH --confirm - done - echo "Done" - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/kernels/datascience-notebook/.pythonrc b/kernels/datascience-notebook/.pythonrc new file mode 100644 index 00000000..6a1eac85 --- /dev/null +++ b/kernels/datascience-notebook/.pythonrc @@ -0,0 +1,8 @@ +import pandas as pd + +import dx + +dx.set_option("DISPLAY_MAX_ROWS", 50_000) +dx.set_option("DISPLAY_MAX_COLUMNS", 100) +dx.set_option("ENABLE_DATALINK", True) +dx.set_option("ENABLE_ASSIGNMENT", False) \ No newline at end of file diff --git a/kernels/datascience-notebook/Dockerfile b/kernels/datascience-notebook/Dockerfile new file mode 100644 index 00000000..bc132097 --- /dev/null +++ b/kernels/datascience-notebook/Dockerfile @@ -0,0 +1,125 @@ +# syntax = docker/dockerfile:1.4.3 +ARG BASE_IMAGE=jupyter/datascience-notebook +ARG PYTHON_VERSION=3.9.13 +# hadolint ignore=DL3006 +FROM ${BASE_IMAGE}:python-${PYTHON_VERSION} + +USER root + +# datascience-notebook:python-3.9.13 includes psutil 5.9.2 with cooked C lib, but +# later pip installs end up installing 5.9.4, but for some +# reason 'import psutil' will end up getting the python 5.9.4 but the +# C lib from 5.9.2, and, unlike Smeagol, it hateses the precious. +RUN pip uninstall -y psutil + +# Set up log file for magics +RUN touch /var/log/noteable_magics.log && \ + chown 4004:4004 /var/log/noteable_magics.log + +# When image is run, run the code with the environment +# activated: +SHELL ["/bin/bash", "-c"] + +WORKDIR /tmp + +# hadolint ignore=DL3008,DL3015 +RUN apt-get update && \ + apt-get install -y jq procps git unixodbc-dev g++ \ + && rm -rf /var/lib/apt/lists/* + +ENV NB_USER="noteable" \ + NB_UID=4004 \ + NB_GID=4004 + +# Create the default unprivileged user +RUN groupadd --gid 4004 noteable && \ + useradd --uid 4004 --shell /bin/false --create-home --no-log-init --gid noteable noteable && \ + chown --recursive noteable:noteable /home/noteable + +RUN mkdir /etc/ipython && chown noteable:noteable /etc/ipython +RUN mkdir -p /etc/noteable && chown noteable:noteable /etc/noteable + +RUN chown noteable:noteable "${CONDA_DIR}" && \ + fix-permissions "${CONDA_DIR}" + +# `rust` is required for install of some packages under arm (and future x86 packages) +# hadolint ignore=SC1091 +RUN wget -O rustup.sh -q https://sh.rustup.rs && \ + sh rustup.sh -y && \ + rm rustup.sh && \ + source "$HOME/.cargo/env" + +# Run non-privileged user +USER noteable + +ENV PATH="/home/noteable/.local/bin:${PATH}" \ + HOME="/home/noteable" \ + XDG_CACHE_HOME="/home/noteable/.cache/" \ + GOOGLE_APPLICATION_CREDENTIALS="/vault/secrets/gcp-credentials" + +ARG PYTHON_VERSION + +# hadolint ignore=DL3045 +COPY environment-${PYTHON_VERSION}.txt ./environment.txt + +# hadolint ignore=SC2034 +RUN mamba install --file environment.txt + +# hadolint ignore=DL3045 +COPY requirements-${PYTHON_VERSION}.txt ./requirements.txt + +# hadolint ignore=SC1008,SC2155,DL3042,SC2102 +RUN pip install -I --no-cache-dir -r requirements.txt + +# Copy over any python commands that need to run on startup +# that aren't covered by IPython extensions +COPY .pythonrc /home/noteable/.pythonrc + +# Enable the widgets nbextension +# hadolint ignore=SC1008 +RUN jupyter nbextension enable --py --sys-prefix widgetsnbextension + +# Smoke test to ensure packages were installed properly +# hadolint ignore=SC1008 +RUN if [ "$PYTHON_VERSION" != "3.7.12" ]; then python -c "import noteable_magics"; fi + +RUN git config --global user.name "Noteable Kernel" && \ + git config --global user.email "engineering@noteable.io" && \ + git config --global safe.directory /etc/noteable/project && \ + git config --global credential.helper /git_credential_helper.py && \ + git config --global credential.useHttpPath true + +# https://ipython.readthedocs.io/en/stable/config/intro.html#systemwide-configuration +COPY ipython_config.py /etc/ipython + +# Set standard working directory for noteable project +WORKDIR /etc/noteable/project + +# Add the entrypoint script to the $PATH +COPY run.sh /usr/local/bin +COPY secrets_helper.py /tmp/secrets_helper.py +COPY git_credential_helper.py /git_credential_helper.py +COPY git-wrapper.sh /usr/local/bin/git + +EXPOSE 50001-50005 + +# Use tini to manage passing signals to the child kernel process +# -g will ensure signals are passed to the entire child process *group*, +# not just the immediate child process (bash) +# https://github.com/krallin/tini#process-group-killing +ENTRYPOINT ["tini", "-g", "--"] +CMD ["run.sh"] + +# Labels +ARG NBL_ARG_BUILD_TIMESTAMP="undefined" +ARG NBL_ARG_REVISION="undefined" +ARG NBL_ARG_PYTHON_VERSION="3.9.6" +ARG NBL_ARG_BUILD_URL="undefined" +ARG NBL_ARG_VERSION="undefined" +LABEL org.opencontainers.image.created="${NBL_ARG_BUILD_TIMESTAMP}" \ + org.opencontainers.image.revision="${NBL_ARG_REVISION}" \ + org.opencontainers.image.source="https://github.com/noteable-io/polymorph" \ + org.opencontainers.image.title="noteable-python-${NBL_ARG_PYTHON_VERSION}" \ + org.opencontainers.image.url="${NBL_ARG_BUILD_URL}" \ + org.opencontainers.image.vendor="Noteable" \ + org.opencontainers.image.version="${NBL_ARG_VERSION}" diff --git a/kernels/datascience-notebook/README.md b/kernels/datascience-notebook/README.md new file mode 100644 index 00000000..8ecf387b --- /dev/null +++ b/kernels/datascience-notebook/README.md @@ -0,0 +1,20 @@ +# Multitenant Python Image + +Entrypoint is used to implement signal-based interrupts, since `ipykernel` does not support message-based interupts. + +## Building Locally +You'll need to provide a git credential string located at `${HOME}/.git-credentials`: + +```shell +echo "${GITHUB_USER_NAME}:${GITHUB_PERSONAL_ACCESS_TOKEN}" > ${HOME}/.git-credentials +``` + +The [personal access token](https://github.com/settings/tokens) needs to have +the `read:packages, repo` scope (and make sure to enable SSO on it). + +```shell +# Optional step to help you auto-load your built docker container into minikube for use with Gate +eval $(minikube docker-env) + +DOCKER_BUILDKIT=1 docker build --secret "id=git-credentials,src=${HOME}/.git-credentials" -t local/noteable-python:latest . +``` diff --git a/kernels/datascience-notebook/environment-3.10.9.txt b/kernels/datascience-notebook/environment-3.10.9.txt new file mode 100644 index 00000000..20d65f2f --- /dev/null +++ b/kernels/datascience-notebook/environment-3.10.9.txt @@ -0,0 +1,7 @@ +jupyter_client=7.3.* +ipython=8.0.* +vdom=0.6 +papermill=2.2.* +ipywidgets=7.6.* +plotly=4.14.3 +geopandas=0.11.0 \ No newline at end of file diff --git a/kernels/datascience-notebook/environment-3.7.12.txt b/kernels/datascience-notebook/environment-3.7.12.txt new file mode 100644 index 00000000..c09adcca --- /dev/null +++ b/kernels/datascience-notebook/environment-3.7.12.txt @@ -0,0 +1,8 @@ +jupyter_client=7.3.* +ipython +vdom=0.6 +papermill=2.2.* +ipywidgets=7.6.* +geopandas +pyspark==3.2.1 +openjdk==8.0.332 diff --git a/kernels/datascience-notebook/environment-3.8.13.txt b/kernels/datascience-notebook/environment-3.8.13.txt new file mode 100644 index 00000000..20d65f2f --- /dev/null +++ b/kernels/datascience-notebook/environment-3.8.13.txt @@ -0,0 +1,7 @@ +jupyter_client=7.3.* +ipython=8.0.* +vdom=0.6 +papermill=2.2.* +ipywidgets=7.6.* +plotly=4.14.3 +geopandas=0.11.0 \ No newline at end of file diff --git a/kernels/datascience-notebook/environment-3.9.13.txt b/kernels/datascience-notebook/environment-3.9.13.txt new file mode 100644 index 00000000..20d65f2f --- /dev/null +++ b/kernels/datascience-notebook/environment-3.9.13.txt @@ -0,0 +1,7 @@ +jupyter_client=7.3.* +ipython=8.0.* +vdom=0.6 +papermill=2.2.* +ipywidgets=7.6.* +plotly=4.14.3 +geopandas=0.11.0 \ No newline at end of file diff --git a/kernels/datascience-notebook/git-wrapper.sh b/kernels/datascience-notebook/git-wrapper.sh new file mode 100644 index 00000000..dff1bff6 --- /dev/null +++ b/kernels/datascience-notebook/git-wrapper.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +# This script wraps git to only allow certain commands to be run. +# We mainly want to prevent users from getting into unknown states by checking out other branches, etc. + +# Allowed command list +allowed_commands=( "commit" "pull" "push" "status" "diff" "add" "fetch" "log" ) + +# Check if the command is allowed +# shellcheck disable=SC2076 +if [[ ! " ${allowed_commands[*]} " =~ " ${1} " ]]; then + echo "That git command is not allowed, contact support@noteable.io if you think this is a mistake." + exit 1 +fi + +# Otherwise pass through to git at /usr/bin/git +exec /usr/bin/git "$@" diff --git a/kernels/datascience-notebook/git_credential_helper.py b/kernels/datascience-notebook/git_credential_helper.py new file mode 100644 index 00000000..755efe42 --- /dev/null +++ b/kernels/datascience-notebook/git_credential_helper.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +""" +This script is used as a Git credential helper https://git-scm.com/docs/git-credential. +We iterate through all the git credential secrets on the file system and return the first one that matches the requested URL. +If no match is found, we return an empty response. +An empty response will cause Git to use the next credential helper in the list, or prompt the user for credentials. + +To test this script: + +$ cat > /tmp/demo.git-cred < dict: + """Parse the input from Git into a dictionary.""" + return dict(line.split("=", 1) for line in input_.splitlines()) + + +def format_output(data: dict) -> str: + """Format the output to Git.""" + return "\n".join(f"{key}={value}" for key, value in data.items()) + + +def find_secret(input_data: dict) -> Optional[dict]: + """Find the secret that matches the input data.""" + secrets_dir = Path(os.environ.get("NTBL_SECRETS_DIR", "/vault/secrets")) + if not secrets_dir.exists(): + return None + + keys_to_match = ["host", "protocol", "path"] + for secret_path in secrets_dir.glob("*.git-cred"): + secret_data = json.loads(secret_path.read_text()) + meta = secret_data["meta"] + if all(meta[key] == input_data.get(key) for key in keys_to_match): + return secret_data["data"] + + return None + + +def main(stdin=sys.stdin, stdout=sys.stdout): + """Main entrypoint.""" + parsed_input = parse_input(stdin.read()) + if (secret := find_secret(parsed_input)) is not None: + print(format_output(secret), file=stdout) + + +if __name__ == "__main__": + main() diff --git a/kernels/datascience-notebook/ipython_config.py b/kernels/datascience-notebook/ipython_config.py new file mode 100644 index 00000000..fb66a186 --- /dev/null +++ b/kernels/datascience-notebook/ipython_config.py @@ -0,0 +1,11 @@ +c.InteractiveShellApp.extensions = [ + "noteable_magics", +] + +c.SqlMagic.feedback = False +c.SqlMagic.autopandas = True +c.NTBLMagic.project_dir = "/etc/noteable/project" +c.NoteableDataLoaderMagic.return_head = False +c.IPythonKernel._execute_sleep = 0.15 +# 10 minutes to support large files +c.NTBLMagic.planar_ally_default_timeout_seconds = 600 \ No newline at end of file diff --git a/kernels/datascience-notebook/requirements-3.10.9.txt b/kernels/datascience-notebook/requirements-3.10.9.txt new file mode 100644 index 00000000..3177ffd7 --- /dev/null +++ b/kernels/datascience-notebook/requirements-3.10.9.txt @@ -0,0 +1,12 @@ +# Scheduler/ochestration packages +dagstermill==0.16.15 +papermill-origami==0.0.9 +cloudpickle==2.2.0 +flytekitplugins-papermill==1.2.4 + +# https://github.com/noteable-io/ packages +git+https://www.github.com/noteable-io/dx.git@4be0c105aea40248d066a1a8beb74ff00d0b5bd3 +git+https://www.github.com/noteable-io/noteable-notebook-magics.git@5d54f4cd94c46f617459db5e5a0110d081393936 +git+https://www.github.com/noteable-io/sidecar_comms.git@eed16c3ab900a8abe19b654fa775646bc38dd519 + +# (All of the datasources modules are now explicit requirements within noteable-notebook-magics.) \ No newline at end of file diff --git a/kernels/datascience-notebook/requirements-3.7.12.txt b/kernels/datascience-notebook/requirements-3.7.12.txt new file mode 100644 index 00000000..aa46a76e --- /dev/null +++ b/kernels/datascience-notebook/requirements-3.7.12.txt @@ -0,0 +1,20 @@ +# Scheduler/ochestration packages +# dagstermill +cloudpickle==2.2.0 +# flytekitplugins-papermill +# Added for ease of use with integration partners +# pyiceberg +fugue==0.8.0 +fugue-jupyter==0.2.2 + +# https://github.com/noteable-io/ packages +# git+https://www.github.com/noteable-io/dx.git@a7df2821182293546d7d7a9ede3cdcc0c946d570 +# git+https://www.github.com/noteable-io/noteable-notebook-magics.git@a3b00faedcba9f38c7b75b4f45f4eba7e1ce313e +# git+https://www.github.com/noteable-io/sidecar_comms.git@35b7cf8ad6b15daf020954c9029aaae5779f324a + +# Conflict +# dagstermill 0.3.0 depends on ipykernel>=4.9.0 +# flytekitplugins-papermill 0.1.0 depends on ipykernel>=5.0.0 +# sidecar-comms 0.1.0 depends on ipykernel<7.0.0 and >=6.20.2 + +# (All of the datasources modules are now explicit requirements within noteable-notebook-magics.) diff --git a/kernels/datascience-notebook/requirements-3.8.13.txt b/kernels/datascience-notebook/requirements-3.8.13.txt new file mode 100644 index 00000000..3177ffd7 --- /dev/null +++ b/kernels/datascience-notebook/requirements-3.8.13.txt @@ -0,0 +1,12 @@ +# Scheduler/ochestration packages +dagstermill==0.16.15 +papermill-origami==0.0.9 +cloudpickle==2.2.0 +flytekitplugins-papermill==1.2.4 + +# https://github.com/noteable-io/ packages +git+https://www.github.com/noteable-io/dx.git@4be0c105aea40248d066a1a8beb74ff00d0b5bd3 +git+https://www.github.com/noteable-io/noteable-notebook-magics.git@5d54f4cd94c46f617459db5e5a0110d081393936 +git+https://www.github.com/noteable-io/sidecar_comms.git@eed16c3ab900a8abe19b654fa775646bc38dd519 + +# (All of the datasources modules are now explicit requirements within noteable-notebook-magics.) \ No newline at end of file diff --git a/kernels/datascience-notebook/requirements-3.9.13.txt b/kernels/datascience-notebook/requirements-3.9.13.txt new file mode 100644 index 00000000..3177ffd7 --- /dev/null +++ b/kernels/datascience-notebook/requirements-3.9.13.txt @@ -0,0 +1,12 @@ +# Scheduler/ochestration packages +dagstermill==0.16.15 +papermill-origami==0.0.9 +cloudpickle==2.2.0 +flytekitplugins-papermill==1.2.4 + +# https://github.com/noteable-io/ packages +git+https://www.github.com/noteable-io/dx.git@4be0c105aea40248d066a1a8beb74ff00d0b5bd3 +git+https://www.github.com/noteable-io/noteable-notebook-magics.git@5d54f4cd94c46f617459db5e5a0110d081393936 +git+https://www.github.com/noteable-io/sidecar_comms.git@eed16c3ab900a8abe19b654fa775646bc38dd519 + +# (All of the datasources modules are now explicit requirements within noteable-notebook-magics.) \ No newline at end of file diff --git a/kernels/datascience-notebook/run.sh b/kernels/datascience-notebook/run.sh new file mode 100755 index 00000000..a6325856 --- /dev/null +++ b/kernels/datascience-notebook/run.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +set -o pipefail +set -o nounset +set -o errexit + +echo "Local time: $(date)" + +set -x + +connection_file=/tmp/connection_file.json + +cp /etc/noteable/connections/connection_file.json ${connection_file} + +kernel_name=$(jq -r .kernel_name /tmp/connection_file.json) + +# Inject Secrets into environment (see script docstring for more info) +# set +x to avoid echoing the Secrets in plaintext to logs +set +x +echo "Injecting Secrets into environment, echoing is turned off" +eval "$(python /tmp/secrets_helper.py)" +echo "Done injecting Secrets, turning echoing back on" +set -x + +case $kernel_name in + + python | python3) + echo "Starting Python kernel" + # https://docs.python.org/3/using/cmdline.html#envvar-PYTHONSTARTUP + export PYTHONSTARTUP=~/.pythonrc + exec python -m ipykernel_launcher -f ${connection_file} --debug + ;; + + ir) + echo "Starting R kernel" + exec R --slave -e "IRkernel::main()" --args ${connection_file} + ;; + + julia | julia-1.6) + echo "Starting Julia kernel" + # project path necessary to keep julia form using its defaults + exec julia -i --color=yes --project=/etc/noteable/project /opt/julia/packages/IJulia/e8kqU/src/kernel.jl ${connection_file} + ;; + + *) + echo "Unrecognized '$kernel_name' kernel, falling back to Python" + # https://docs.python.org/3/using/cmdline.html#envvar-PYTHONSTARTUP + export PYTHONSTARTUP=~/.pythonrc + exec python -m ipykernel_launcher -f ${connection_file} --debug + ;; +esac diff --git a/kernels/datascience-notebook/secrets_helper.py b/kernels/datascience-notebook/secrets_helper.py new file mode 100644 index 00000000..e731babd --- /dev/null +++ b/kernels/datascience-notebook/secrets_helper.py @@ -0,0 +1,34 @@ +""" +This script helps inject Secrets into the Kernel environment. + +The Vault Agent will volume mount files into the Kernel container +at /vault/secrets. Noteable Secrets will be in .env suffix files. + +We want to parse all those files and export them as environment variables +in the bash script that kicks off the Kernel (ipykernel_launcher etc). + +Doing that scripting in bash is a pain, so we do it in Python here and +bash just does an `eval` on the output. + +Some defensive programming to highlight: + - Env vars in the output are all uppercased + - If an env var is already set, we don't overwrite it + - We use shlex to quote the output so bash eval does not cause nasty side effects +""" +import os +import pathlib +import shlex + +output = [] + +secrets_directory = os.environ.get("VAULT_SECRETS_PATH", "/vault/secrets") + +directory = pathlib.Path(secrets_directory) +if directory.exists(): + for file in directory.glob("*.env"): + name = file.stem.upper() + if name not in os.environ: + content = file.read_text() + output.append(f"export {name}={shlex.quote(content)}") + +print("\n".join(output))