diff --git a/demos/demos-v2.yaml b/demos/demos-v2.yaml index a31ff1ea..0c7bbc14 100644 --- a/demos/demos-v2.yaml +++ b/demos/demos-v2.yaml @@ -226,3 +226,20 @@ demos: cpu: "3" memory: 5098Mi pvc: 16Gi + jupyterhub-keycloak: + description: Demo showing jupyterhub notebooks secured with keycloak + documentation: https://docs.stackable.tech/stackablectl/stable/demos/jupyterhub-keycloak.html + stackableStack: jupyterhub-keycloak + labels: + - jupyterhub + - keycloak + - spark + - S3 + manifests: + # TODO: revert paths + - plainYaml: demos/jupyterhub-keycloak/load-gas-data.yaml + supportedNamespaces: [] + resourceRequests: + cpu: 6400m + memory: 12622Mi + pvc: 20Gi diff --git a/demos/jupyterhub-keycloak/load-gas-data.yaml b/demos/jupyterhub-keycloak/load-gas-data.yaml new file mode 100644 index 00000000..c230a1c1 --- /dev/null +++ b/demos/jupyterhub-keycloak/load-gas-data.yaml @@ -0,0 +1,21 @@ +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: load-gas-data +spec: + template: + spec: + containers: + - name: load-gas-data + image: "bitnami/minio:2022-debian-10" + command: ["bash", "-c", "cd /tmp; curl -O https://repo.stackable.tech/repository/misc/datasets/gas-sensor-data/20160930_203718.csv && mc --insecure alias set minio http://minio:9000/ $(cat /minio-s3-credentials/accessKey) $(cat /minio-s3-credentials/secretKey) && mc cp 20160930_203718.csv minio/demo/gas-sensor/raw/;"] + volumeMounts: + - name: minio-s3-credentials + mountPath: /minio-s3-credentials + volumes: + - name: minio-s3-credentials + secret: + secretName: minio-s3-credentials + restartPolicy: OnFailure + backoffLimit: 50 diff --git a/docs/modules/demos/images/jupyterhub-keycloak/admin-tab.png b/docs/modules/demos/images/jupyterhub-keycloak/admin-tab.png new file mode 100644 index 00000000..7b30b0b4 Binary files /dev/null and b/docs/modules/demos/images/jupyterhub-keycloak/admin-tab.png differ diff --git a/docs/modules/demos/images/jupyterhub-keycloak/keycloak-login.png b/docs/modules/demos/images/jupyterhub-keycloak/keycloak-login.png new file mode 100644 index 00000000..9de77440 Binary files /dev/null and b/docs/modules/demos/images/jupyterhub-keycloak/keycloak-login.png differ diff --git a/docs/modules/demos/images/jupyterhub-keycloak/load-nb.png b/docs/modules/demos/images/jupyterhub-keycloak/load-nb.png new file mode 100644 index 00000000..4fa36126 Binary files /dev/null and b/docs/modules/demos/images/jupyterhub-keycloak/load-nb.png differ diff --git a/docs/modules/demos/images/jupyterhub-keycloak/oauth-login.png b/docs/modules/demos/images/jupyterhub-keycloak/oauth-login.png new file mode 100644 index 00000000..cc6e1d1d Binary files /dev/null and b/docs/modules/demos/images/jupyterhub-keycloak/oauth-login.png differ diff --git a/docs/modules/demos/images/jupyterhub-keycloak/run-nb.png b/docs/modules/demos/images/jupyterhub-keycloak/run-nb.png new file mode 100644 index 00000000..204d18e6 Binary files /dev/null and b/docs/modules/demos/images/jupyterhub-keycloak/run-nb.png differ diff --git a/docs/modules/demos/images/jupyterhub-keycloak/s3-buckets.png b/docs/modules/demos/images/jupyterhub-keycloak/s3-buckets.png new file mode 100644 index 00000000..4b671b62 Binary files /dev/null and b/docs/modules/demos/images/jupyterhub-keycloak/s3-buckets.png differ diff --git a/docs/modules/demos/images/jupyterhub-keycloak/server-options.png b/docs/modules/demos/images/jupyterhub-keycloak/server-options.png new file mode 100644 index 00000000..a1d366ef Binary files /dev/null and b/docs/modules/demos/images/jupyterhub-keycloak/server-options.png differ diff --git a/docs/modules/demos/pages/jupyterhub-keycloak.adoc b/docs/modules/demos/pages/jupyterhub-keycloak.adoc new file mode 100644 index 00000000..ebbb4b3c --- /dev/null +++ b/docs/modules/demos/pages/jupyterhub-keycloak.adoc @@ -0,0 +1,193 @@ += jupyterhub-keycloak + +:k8s-cpu: https://kubernetes.io/docs/tasks/debug/debug-cluster/resource-metrics-pipeline/#cpu +:spark-pkg: https://spark.apache.org/docs/latest/api/python/user_guide/python_packaging.html +:pyspark: https://spark.apache.org/docs/latest/api/python/getting_started/index.html +:jupyterhub-k8s: https://github.com/jupyterhub/zero-to-jupyterhub-k8s +:jupyterlab: https://jupyterlab.readthedocs.io/en/stable/ +:jupyter: https://jupyter.org +:keycloak: https://www.keycloak.org/ +:gas-sensor: https://archive.ics.uci.edu/dataset/487/gas+sensor+array+temperature+modulation + +This demo showcases the integration between {jupyter}[JupyterHub] and {keycloak}[Keycloak] deployed on the Stackable Data Platform (SDP) onto a Kubernetes cluster. +{jupyterlab}[JupyterLab] is deployed using the {jupyterhub-k8s}[pyspark-notebook stack] provided by the Jupyter community. +A simple notebook is provided that shows how to start a distributed Spark cluster, reading and writing data from an S3 instance. + +For this demo a small sample of {gas-sensor}[gas sensor measurements*] is provided. +Install this demo on an existing Kubernetes cluster: + +[source,console] +---- +$ stackablectl demo install jupyterhub-keycloak +---- + +WARNING: When running a distributed Spark cluster from within a JupyterHub notebook, the notebook acts as the driver and requests executors Pods from k8s. +These Pods in turn can mount *all* volumes and Secrets in that namespace. +To prevent this from breaking user separation, it is planned to use an OPA gatekeeper to define OPA rules that restrict what the created executor Pods can mount. This is not yet implemented in this demo. + +[#system-requirements] +== System requirements + +To run this demo, your system needs at least: + +* 8 {k8s-cpu}[cpu units] (core/hyperthread) +* 32GiB memory + +You may need more resources depending on how many concurrent users are logged in, and which notebook profiles they are using. + +== Aim / Context + +This demo shows how to authenticate JupyerHub users against a Keycloak backend using JupyterHub's OAuthenticator. +The same users as in the xref:end-to-end-security.adoc[End-to-end-security] demo are configured in Keycloak and these will be used as examples. +The notebook offers a simple template for using Spark to interact with S3 as a storage backend. + +== Overview + +This demo will: + +* Install the required Stackable Data Platform operators +* Spin up the following data products: +** *JupyterHub*: A multi-user server for Jupyter notebooks +** *Keycloak*: An identity and access management product +** *S3*: A Minio instance for data storage +* Download a sample of the gas sensor dataset into S3 +* Install the Jupyter notebook +* Demonstrate some basic data operations against S3 +* Illustrate multi-user usage + +== JupyterHub + +Have a look at the available Pods before logging in: + +[source,console] +---- +$ kubectl get pods +NAME READY STATUS RESTARTS AGE +hub-84f49ccbd7-29h7j 1/1 Running 0 56m +keycloak-544d757f57-f55kr 2/2 Running 0 57m +load-gas-data-m6z5p 0/1 Completed 0 54m +minio-5486d7584f-x2jn8 1/1 Running 0 57m +proxy-648bf7f45b-62vqg 1/1 Running 0 56m + +---- + +The `proxy` Pod has an associated `proxy-public` service with a statically-defined port (31095), exposed with type NodePort. The `keycloak` Pod has a Service called `keycloak` with a fixed port (31093) of type NodePort as well. +In order to reach the JupyterHub web interface, navigate to this service. +The node port IP can be found in the ConfigMap `keycloak-address` (written by the Keycloak Deployment as it starts up). +On Kind this can be any node - not necessarily the one where the proxy Pod is running. +This is due to the way in which Docker networking is used within the cluster. +On other clusters it will be necessary to use the exact Node on which the proxy is running. + +In the example below that would then be 172.19.0.5:31095: + +[source,yaml] +---- +apiVersion: v1 +data: + keycloakAddress: 172.19.0.5:31093 # Keycloak itself + keycloakNodeIp: 172.19.0.5 # can be used to access the proxy-public service +kind: ConfigMap +metadata: + name: keycloak-address + namespace: default +---- + +NOTE: The `hub` Pod may show a `CreateContainerConfigError` for a few moments on start-up as it requires the ConfigMap written by the Keycloak deployment. + +You should see the JupyterHub login page, which will indicate a re-direct to the OAuth service (Keycloak): + +image::jupyterhub-keycloak/oauth-login.png[] + +Click on the sign-in button. +You will be redirected to the Keycloak login, where you can enter one of the aforementioned users (e.g. `justin.martin` or `isla.williams`: the password is the same as the username): + +image::jupyterhub-keycloak/keycloak-login.png[] + +A successful login will redirect you back to JupyterHub where different profiles are listed (the drop-down options are visible when you click on the respective fields): + +image::jupyterhub-keycloak/server-options.png[] + +The explorer window on the left includes a notebook that is already mounted. + +Double-click on the file `notebook/process-s3.ipynb`: + +image::jupyterhub-keycloak/load-nb.png[] + +Run the notebook by selecting "Run All Cells" from the menu: + +image::jupyterhub-keycloak/run-nb.png[] + +The notebook includes some comments regarding image compatibility and uses a custom image built off the official Spark image that matches the Spark version used in the notebook. +The java versions also match exactly. +Python versions need to match at the `major:minor` level, which is why Python 3.11 is used in the custom image. + +Once the spark executor has been started (we have specified `spark.executor.instances` = 1) it will spin up as an extra pod. +We have named the spark job to incorporate the current user (justin-martin). +JupyterHub has started a pod for the user's notebook instance (`jupyter-justin-martin---bdd3b4a1`) and another one for the spark executor (`process-s3-jupyter-justin-martin-bdd3b4a1-9e9da995473f481f-exec-1`): + +[source,console] +---- +$ kubectl get pods +NAME READY STATUS RESTARTS AGE +... +jupyter-justin-martin---bdd3b4a1 1/1 Running 0 17m +process-s3-jupyter-justin-martin-... 1/1 Running 0 2m9s +... +---- + +Stop the kernel in the notebook (which will shut down the spark session and thus the executor) and log out as the current user. +Log in now as `daniel.king` and then again as `isla.williams` (you may need to do this in a clean browser sessions so that existing login cookies are removed). +This user has been defined as an admin user in the jupyterhub configuration: + +[source,yaml] +---- + ... + hub: + config: + Authenticator: + # don't filter here: delegate to Keycloak + allow_all: True + admin_users: + - isla.williams + ... +---- + +You should now see user-specific pods for all three users: + + +[source,console] +---- +$ kubectl get pods +NAME READY STATUS RESTARTS AGE +... +jupyter-daniel-king---181a80ce 1/1 Running 0 6m17s +jupyter-isla-williams---14730816 1/1 Running 0 4m50s +jupyter-justin-martin---bdd3b4a1 1/1 Running 0 3h47m +... +---- + +The admin user (`isla.williams`) will also have an extra Admin tab in the JupyterHub console where current users can be managed. +You can find this in the JupyterHub UI at http://:31095/hub/admin e.g http://172.19.0.5:31095/hub/admin: + +image::jupyterhub-keycloak/admin-tab.png[] + +You can inspect the S3 buckets by using stackable stacklet list to return the Minio endpoint and logging in there with `admin/adminadmin`: + +[source,console] +---- +$ stackablectl stacklet list + +┌─────────┬───────────────┬───────────┬───────────────────────────────┬────────────┐ +│ PRODUCT ┆ NAME ┆ NAMESPACE ┆ ENDPOINTS ┆ CONDITIONS │ +╞═════════╪═══════════════╪═══════════╪═══════════════════════════════╪════════════╡ +│ minio ┆ minio-console ┆ default ┆ http http://172.19.0.5:32470 ┆ │ +└─────────┴───────────────┴───────────┴───────────────────────────────┴────────────┘ +---- + +image::jupyterhub-keycloak/s3-buckets.png[] + +NOTE: if you attempt to re-run the notebook you will need to first remove the `_temporary folders` from the S3 buckets. +These are created by spark jobs and are not removed from the bucket when the job has completed. + +*See: Burgués, Javier, Juan Manuel Jiménez-Soto, and Santiago Marco. "Estimation of the limit of detection in semiconductor gas sensors through linearized calibration models." Analytica chimica acta 1013 (2018): 13-25 +Burgués, Javier, and Santiago Marco. "Multivariate estimation of the limit of detection by orthogonal partial least squares in temperature-modulated MOX sensors." Analytica chimica acta 1019 (2018): 49-64. diff --git a/docs/modules/demos/partials/demos.adoc b/docs/modules/demos/partials/demos.adoc index 473abad6..ebd54e62 100644 --- a/docs/modules/demos/partials/demos.adoc +++ b/docs/modules/demos/partials/demos.adoc @@ -2,6 +2,7 @@ * xref:data-lakehouse-iceberg-trino-spark.adoc[] * xref:end-to-end-security.adoc[] * xref:hbase-hdfs-load-cycling-data.adoc[] +* xref:jupyterhub-keycloak.adoc[] * xref:jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data.adoc[] * xref:logging.adoc[] * xref:nifi-kafka-druid-earthquake-data.adoc[] diff --git a/stacks/jupyterhub-keycloak/Dockerfile b/stacks/jupyterhub-keycloak/Dockerfile new file mode 100644 index 00000000..874bbd17 --- /dev/null +++ b/stacks/jupyterhub-keycloak/Dockerfile @@ -0,0 +1,29 @@ +# docker build -t oci.stackable.tech/sandbox/spark:3.5.2-python311 -f Dockerfile . +# kind load docker-image oci.stackable.tech/sandbox/spark:3.5.2-python311 -n stackable-data-platform +# or: +# docker push oci.stackable.tech/sandbox/spark:3.5.2-python311 + +FROM spark:3.5.2-scala2.12-java17-ubuntu + +USER root + +RUN set -ex; \ + apt-get update; \ + # Install dependencies for Python 3.11 + apt-get install -y \ + software-properties-common \ + && apt-get update && apt-get install -y \ + python3.11 \ + python3.11-venv \ + python3.11-dev \ + && rm -rf /var/lib/apt/lists/*; \ + # Install pip manually for Python 3.11 + curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \ + python3.11 get-pip.py && \ + rm get-pip.py + +# Make Python 3.11 the default Python version +RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 \ + && update-alternatives --install /usr/bin/pip pip /usr/local/bin/pip3 1 + +USER spark diff --git a/stacks/jupyterhub-keycloak/jupyterhub-native-auth.yaml b/stacks/jupyterhub-keycloak/jupyterhub-native-auth.yaml new file mode 100644 index 00000000..a2fd7cbd --- /dev/null +++ b/stacks/jupyterhub-keycloak/jupyterhub-native-auth.yaml @@ -0,0 +1,71 @@ +--- +releaseName: jupyterhub +name: jupyterhub +repo: + name: jupyterhub + url: https://jupyterhub.github.io/helm-chart/ +version: 4.0.0 +options: + hub: + config: + Authenticator: + allow_all: True + admin_users: + - admin + JupyterHub: + authenticator_class: nativeauthenticator.NativeAuthenticator + NativeAuthenticator: + open_signup: true + proxy: + service: + type: ClusterIP + rbac: + create: true + prePuller: + hook: + enabled: false + continuous: + enabled: false + scheduling: + userScheduler: + enabled: false + singleuser: + cmd: null + serviceAccountName: hub + networkPolicy: + enabled: false + extraLabels: + stackable.tech/vendor: Stackable + profileList: + - display_name: "Default" + description: "Default profile" + default: true + profile_options: + cpu: + display_name: CPU + choices: + "2": + display_name: "2 request, 2 limit" + kubespawner_override: + cpu_guarantee: 2 + cpu_limit: 2 + "1 request, 16 limit": + display_name: "1 request, 16 limit" + kubespawner_override: + cpu_guarantee: 1 + cpu_limit: 16 + memory: + display_name: Memory + choices: + "8 GB": + display_name: "8 GB" + kubespawner_override: + mem_guarantee: "8G" + mem_limit: "8G" + image: + display_name: Image + choices: + "quay.io/jupyter/pyspark-notebook:python-3.11.9": + display_name: "quay.io/jupyter/pyspark-notebook:python-3.11.9" + kubespawner_override: + image: "quay.io/jupyter/pyspark-notebook:python-3.11.9" diff --git a/stacks/jupyterhub-keycloak/jupyterhub.yaml b/stacks/jupyterhub-keycloak/jupyterhub.yaml new file mode 100644 index 00000000..c1193c22 --- /dev/null +++ b/stacks/jupyterhub-keycloak/jupyterhub.yaml @@ -0,0 +1,217 @@ +# yamllint disable-file +--- +releaseName: jupyterhub +name: jupyterhub +repo: + name: jupyterhub + url: https://jupyterhub.github.io/helm-chart/ +version: 4.0.0 +options: + hub: + config: + Authenticator: + # don't filter here: delegate to Keycloak + allow_all: true + admin_users: + - isla.williams + GenericOAuthenticator: + client_id: jupyterhub + client_secret: {{jupyterhubClientPassword}} + username_claim: preferred_username + scope: + - openid + JupyterHub: + authenticator_class: generic-oauth + extraEnv: + CACERT: /etc/ssl/certs/ca-certificates.crt + CERT: /etc/ssl/certs/ca-certificates.crt + CURLOPT_CAINFO: /etc/ssl/certs/ca-certificates.crt + KEYCLOAK_NODEPORT_URL: + valueFrom: + configMapKeyRef: + name: keycloak-address + key: keycloakAddress + KEYCLOAK_NODE_IP: + valueFrom: + configMapKeyRef: + name: keycloak-address + key: keycloakNodeIp + extraVolumes: + - name: tls-ca-cert + ephemeral: + volumeClaimTemplate: + metadata: + annotations: + secrets.stackable.tech/class: tls + spec: + storageClassName: secrets.stackable.tech + accessModes: + - ReadWriteOnce + resources: + requests: + storage: "1" + extraVolumeMounts: + - name: tls-ca-cert + # Alternative: mount to another filename in this folder and call update-ca-certificates + mountPath: /etc/ssl/certs/ca-certificates.crt + subPath: ca.crt + - name: tls-ca-cert + mountPath: /usr/local/lib/python3.12/site-packages/certifi/cacert.pem + subPath: ca.crt + extraConfig: + 01-drop-security-context-hook: | + from kubespawner import KubeSpawner + + async def modify_pod_hook(spawner: KubeSpawner, pod: dict): + pod.spec.security_context = None + for container in pod.spec.containers: + container.security_context = None + + return pod + + c.KubeSpawner.modify_pod_hook = modify_pod_hook + 02-create-spark-driver-service-hook: | + # Thanks to https://github.com/jupyterhub/kubespawner/pull/644 + from jupyterhub.utils import exponential_backoff + from kubespawner import KubeSpawner + from kubespawner.objects import make_owner_reference + from kubernetes_asyncio.client.models import V1ServicePort + from functools import partial + + async def after_pod_created_hook(spawner: KubeSpawner, pod: dict): + owner_reference = make_owner_reference( + pod["metadata"]["name"], pod["metadata"]["uid"] + ) + service_manifest = spawner.get_service_manifest(owner_reference) + + service_manifest.spec.type = "ClusterIP" + service_manifest.spec.clusterIP = "None" # Headless Services is all we need + service_manifest.spec.ports += [ + V1ServicePort(name='spark-ui', port=4040, target_port=4040), + V1ServicePort(name='spark-driver', port=2222, target_port=2222), + V1ServicePort(name='spark-block-manager', port=7777, target_port=7777) + ] + + await exponential_backoff( + partial( + spawner._ensure_not_exists, + "service", + service_manifest.metadata.name, + ), + f"Failed to delete service {service_manifest.metadata.name}", + ) + await exponential_backoff( + partial(spawner._make_create_resource_request, "service", service_manifest), + f"Failed to create service {service_manifest.metadata.name}", + ) + + c.KubeSpawner.after_pod_created_hook = after_pod_created_hook + 03-set-endpoints: | + import os + from oauthenticator.generic import GenericOAuthenticator + + keycloak_url = os.getenv("KEYCLOAK_NODEPORT_URL") + if not keycloak_url: + raise ValueError("KEYCLOAK_NODEPORT_URL environment variable not set") + + keycloak_node_ip = os.getenv("KEYCLOAK_NODE_IP") + if not keycloak_node_ip: + raise ValueError("KEYCLOAK_NODE_IP environment variable not set") + + c.GenericOAuthenticator.oauth_callback_url: f"http://{keycloak_node_ip}:31095/hub/oauth_callback" + c.GenericOAuthenticator.authorize_url = f"https://{keycloak_url}/realms/demo/protocol/openid-connect/auth" + c.GenericOAuthenticator.token_url = f"https://{keycloak_url}/realms/demo/protocol/openid-connect/token" + c.GenericOAuthenticator.userdata_url = f"https://{keycloak_url}/realms/demo/protocol/openid-connect/userinfo" + service: + type: NodePort + proxy: + service: + type: NodePort + nodePorts: + http: 31095 + rbac: + create: true + prePuller: + hook: + enabled: false + continuous: + enabled: false + scheduling: + userScheduler: + enabled: false + singleuser: + cmd: null + serviceAccountName: spark + networkPolicy: + enabled: false + extraLabels: + stackable.tech/vendor: Stackable + initContainers: + - name: download-notebook + image: oci.stackable.tech/sdp/tools:1.0.0-stackable0.0.0-dev + command: ['sh', '-c', 'curl https://raw.githubusercontent.com/stackabletech/demos/feat/keycloak-jupyterhub/stacks/jupyterhub-keycloak/process-s3.ipynb -o /notebook/process-s3.ipynb'] + volumeMounts: + - mountPath: /notebook + name: notebook + storage: + extraVolumes: + - name: tls-ca-cert + ephemeral: + volumeClaimTemplate: + metadata: + annotations: + secrets.stackable.tech/class: tls + spec: + storageClassName: secrets.stackable.tech + accessModes: + - ReadWriteOnce + resources: + requests: + storage: "1" + - name: minio-s3-credentials + secret: + secretName: minio-s3-credentials + - name: notebook + emptyDir: + sizeLimit: 500Mi + extraVolumeMounts: + - name: tls-ca-cert + mountPath: /stackable/secrets/tls-ca-cert + - name: minio-s3-credentials + mountPath: /minio-s3-credentials + - name: notebook + mountPath: /home/jovyan/notebook + profileList: + - display_name: "Default" + description: "Default profile" + default: true + profile_options: + cpu: + display_name: CPU + choices: +{% for cpu in ["1","2","4","8","16","32"] %} + "{{cpu}}": + display_name: "{{cpu}}" + kubespawner_override: + cpu_guarantee: {{cpu}} + cpu_limit: {{cpu}} +{% endfor %} + memory: + display_name: Memory + choices: +{% for memory in ["1","2","4","8","16","32","64","128"] %} + "{{memory}} GB": + display_name: "{{memory}} GB" + kubespawner_override: + mem_guarantee: "{{memory}}G" + mem_limit: "{{memory}}G" +{% endfor %} + image: + display_name: Image + choices: +{% for image in ["quay.io/jupyter/pyspark-notebook:python-3.11.9", "quay.io/jupyter/pyspark-notebook:spark-3.5.2"] %} + "{{image}}": + display_name: "{{image}}" + kubespawner_override: + image: "{{image}}" +{% endfor %} diff --git a/stacks/jupyterhub-keycloak/keycloak-realm-config.yaml b/stacks/jupyterhub-keycloak/keycloak-realm-config.yaml new file mode 100644 index 00000000..5495a1c3 --- /dev/null +++ b/stacks/jupyterhub-keycloak/keycloak-realm-config.yaml @@ -0,0 +1,202 @@ +--- +apiVersion: v1 +kind: Secret +metadata: + name: keycloak-realm-config +stringData: + realm.json: | + { + "realm" : "demo", + "enabled" : true, + "groups" : [ { + "name" : "Compliance and Regulation", + "path" : "/Compliance and Regulation", + "subGroups" : [ { + "name" : "Analytics", + "path" : "/Compliance and Regulation/Analytics" + } ] + }, { + "name" : "Customer Service", + "path" : "/Customer Service", + "subGroups" : [ { + "name" : "Analytics", + "path" : "/Customer Service/Analytics" + } ] + }, { + "name" : "Marketing", + "path" : "/Marketing", + "subGroups" : [ ] + } ], + "users" : [ { + "id" : "e44a09fa-bce1-40e8-a1da-28902b79dcf0", + "createdTimestamp" : 1711375603780, + "username" : "daniel.king", + "enabled" : true, + "totp" : false, + "emailVerified" : true, + "firstName" : "Daniel", + "lastName" : "King", + "email" : "daniel.king@knab.com", + "credentials" : [ { + "id" : "77343b9c-661e-4f04-8ea3-c4dd7f86abde", + "type" : "password", + "userLabel" : "My password", + "createdDate" : 1711375610955, + "secretData" : "{\"value\":\"jNVzthZCkPipHW1KHZCncU/G/DHc+5QqmGEKernet5E=\",\"salt\":\"7woqv8IPicuA0cpa3U6dEg==\",\"additionalParameters\":{}}", + "credentialData" : "{\"hashIterations\":27500,\"algorithm\":\"pbkdf2-sha256\",\"additionalParameters\":{}}" + } ], + "disableableCredentialTypes" : [ ], + "requiredActions" : [ ], + "realmRoles" : [ "default-roles-demo" ], + "notBefore" : 0, + "groups" : [ "/Compliance and Regulation/Analytics" ] + }, { + "id" : "68dc0996-be4a-412e-8d7b-a21cfc5b1e61", + "createdTimestamp" : 1711375695303, + "username" : "isla.williams", + "enabled" : true, + "totp" : false, + "emailVerified" : true, + "firstName" : "Isla", + "lastName" : "Williams", + "email" : "isla.williams@knab.com", + "credentials" : [ { + "id" : "67941518-5494-4b97-af8a-458250cc1bd0", + "type" : "password", + "userLabel" : "My password", + "createdDate" : 1711375708463, + "secretData" : "{\"value\":\"OpKvnLaK7afN4vpaqhjvNg/u1phsMZUAq0y6+28UFK4=\",\"salt\":\"dSfuKvbSyumwCP6Y1JNYEg==\",\"additionalParameters\":{}}", + "credentialData" : "{\"hashIterations\":27500,\"algorithm\":\"pbkdf2-sha256\",\"additionalParameters\":{}}" + } ], + "disableableCredentialTypes" : [ ], + "requiredActions" : [ ], + "realmRoles" : [ "default-roles-demo" ], + "notBefore" : 0, + "groups" : [ "/Customer Service/Analytics" ] + }, { + "id" : "e434ea2c-1dcc-4b00-aa98-77444d50a945", + "createdTimestamp" : 1711375666275, + "username" : "justin.martin", + "enabled" : true, + "totp" : false, + "emailVerified" : true, + "firstName" : "Justin", + "lastName" : "Martin", + "email" : "justin.martin@knab.com", + "credentials" : [ { + "id" : "7df036b6-1cd4-48fd-920a-5e43cdb69d15", + "type" : "password", + "userLabel" : "My password", + "createdDate" : 1711375673501, + "secretData" : "{\"value\":\"MYCAP3Hl54rKbMGG16YRnchpkWJQ5EaqYiMfZN/Q3z4=\",\"salt\":\"8PieIPt7mRiJQe3kszgakg==\",\"additionalParameters\":{}}", + "credentialData" : "{\"hashIterations\":27500,\"algorithm\":\"pbkdf2-sha256\",\"additionalParameters\":{}}" + } ], + "disableableCredentialTypes" : [ ], + "requiredActions" : [ ], + "realmRoles" : [ "default-roles-demo" ], + "notBefore" : 0, + "groups" : [ "/Customer Service/Analytics" ] + }, { + "id" : "14d21868-9f24-45e6-876c-34405ac506c8", + "createdTimestamp" : 1711375726932, + "username" : "mark.ketting", + "enabled" : true, + "totp" : false, + "emailVerified" : true, + "firstName" : "Mark", + "lastName" : "Ketting", + "email" : "mark.ketting@knab.com", + "credentials" : [ { + "id" : "80bc0228-fd73-4011-b8d7-be382429ef80", + "type" : "password", + "userLabel" : "My password", + "createdDate" : 1711375733935, + "secretData" : "{\"value\":\"2GQKaid+G8gKIQe4a9J12/d7d4z41kqy3nuNEdEh5YI=\",\"salt\":\"DCX1lj9nYbdPkzU2BnTUjg==\",\"additionalParameters\":{}}", + "credentialData" : "{\"hashIterations\":27500,\"algorithm\":\"pbkdf2-sha256\",\"additionalParameters\":{}}" + } ], + "disableableCredentialTypes" : [ ], + "requiredActions" : [ ], + "realmRoles" : [ "default-roles-demo" ], + "notBefore" : 0, + "groups" : [ "/Marketing" ] + }, { + "id" : "f695a4a2-b05a-4484-b49b-ea94837490e8", + "createdTimestamp" : 1711375638235, + "username" : "pamela.scott", + "enabled" : true, + "totp" : false, + "emailVerified" : true, + "firstName" : "Pamela", + "lastName" : "Scott", + "email" : "pamela.scott@knab.com", + "credentials" : [ { + "id" : "553b96c7-849c-4655-810f-09e4569f260e", + "type" : "password", + "userLabel" : "My password", + "createdDate" : 1711375646251, + "secretData" : "{\"value\":\"IsMVP7Fw1bzIoBpjzFHl7Ey9FrfaOuzX5fTnfmw3/iw=\",\"salt\":\"G14/rEl5h6QUPrCR/XoVMw==\",\"additionalParameters\":{}}", + "credentialData" : "{\"hashIterations\":27500,\"algorithm\":\"pbkdf2-sha256\",\"additionalParameters\":{}}" + } ], + "disableableCredentialTypes" : [ ], + "requiredActions" : [ ], + "realmRoles" : [ "default-roles-demo" ], + "notBefore" : 0, + "groups" : [ "/Customer Service/Analytics" ] + }, { + "id" : "df53f737-794c-4bc4-ab0d-2cfbdc01cc57", + "createdTimestamp" : 1711375573153, + "username" : "sophia.clarke", + "enabled" : true, + "totp" : false, + "emailVerified" : true, + "firstName" : "Sophia", + "lastName" : "Clarke", + "email" : "sophia.clarke@knab.com", + "credentials" : [ { + "id" : "aac9b727-acfb-45b2-bc52-b89a210adbf8", + "type" : "password", + "userLabel" : "My password", + "createdDate" : 1711375583194, + "secretData" : "{\"value\":\"Q+3UJbt/cpV2G8lRcVFZ5J4BqluRqAlJols+q8NFlUc=\",\"salt\":\"jJUdiUao9UcwEJkGtBzSuw==\",\"additionalParameters\":{}}", + "credentialData" : "{\"hashIterations\":27500,\"algorithm\":\"pbkdf2-sha256\",\"additionalParameters\":{}}" + } ], + "disableableCredentialTypes" : [ ], + "requiredActions" : [ ], + "realmRoles" : [ "default-roles-demo" ], + "notBefore" : 0, + "groups" : [ "/Compliance and Regulation/Analytics" ] + }, { + "id" : "75b31f7c-aa0a-419b-930c-57969e7b6e67", + "createdTimestamp" : 1711375413324, + "username" : "william.lewis", + "enabled" : true, + "totp" : false, + "emailVerified" : true, + "firstName" : "William", + "lastName" : "Lewis", + "email" : "william.lewis@knab.com", + "credentials" : [ { + "id" : "4a213d68-b1f9-41e4-971f-6a7335291293", + "type" : "password", + "userLabel" : "My password", + "createdDate" : 1711375448461, + "secretData" : "{\"value\":\"oU3A3av49qx+fk2WJlvuZo2m33GygiSi8L0fugOIOq4=\",\"salt\":\"JJHpTTZNTt5k4GY9Eci5OQ==\",\"additionalParameters\":{}}", + "credentialData" : "{\"hashIterations\":27500,\"algorithm\":\"pbkdf2-sha256\",\"additionalParameters\":{}}" + } ], + "disableableCredentialTypes" : [ ], + "requiredActions" : [ ], + "realmRoles" : [ "default-roles-demo" ], + "notBefore" : 0, + "groups" : [ "/Compliance and Regulation/Analytics" ] + } ], + "clients" : [ { + "clientId": "jupyterhub", + "enabled": true, + "protocol": "openid-connect", + "clientAuthenticatorType": "client-secret", + "secret": "jupyterhubjupyterhub", + "redirectUris" : [ "*" ], + "webOrigins" : [ "*" ], + "standardFlowEnabled": true + } ] + } diff --git a/stacks/jupyterhub-keycloak/keycloak-serviceaccount.yaml b/stacks/jupyterhub-keycloak/keycloak-serviceaccount.yaml new file mode 100644 index 00000000..43a5837a --- /dev/null +++ b/stacks/jupyterhub-keycloak/keycloak-serviceaccount.yaml @@ -0,0 +1,38 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: keycloak-serviceaccount + namespace: default +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: keycloak-clusterrolebinding +subjects: + - kind: ServiceAccount + name: keycloak-serviceaccount + namespace: default +roleRef: + kind: ClusterRole + name: keycloak-clusterrole + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: keycloak-clusterrole +rules: + - apiGroups: + - "" + resources: + - nodes + - services + - endpoints + - configmaps + verbs: + - get + - list + - watch + - create + - patch diff --git a/stacks/jupyterhub-keycloak/keycloak.yaml b/stacks/jupyterhub-keycloak/keycloak.yaml new file mode 100644 index 00000000..28bb975c --- /dev/null +++ b/stacks/jupyterhub-keycloak/keycloak.yaml @@ -0,0 +1,145 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: keycloak + labels: + app: keycloak +spec: + replicas: 1 + selector: + matchLabels: + app: keycloak + template: + metadata: + labels: + app: keycloak + spec: + containers: + - name: keycloak + resources: {} + image: quay.io/keycloak/keycloak:26.0.5 + args: + - start + - --hostname-strict=false + - --https-key-store-file=/tls/keystore.p12 + - --https-key-store-password=changeit + - --import-realm + env: + - name: KEYCLOAK_ADMIN + value: admin + - name: KEYCLOAK_ADMIN_PASSWORD + valueFrom: + secretKeyRef: + name: keycloak-admin-credentials + key: admin + ports: + - name: https + containerPort: 8443 + readinessProbe: + httpGet: + scheme: HTTPS + path: /realms/master + port: https + volumeMounts: + - name: data + mountPath: /opt/keycloak/data/ + - name: realm-config + mountPath: /opt/keycloak/data/import + - name: tls + mountPath: /tls/ + - name: create-configmap + resources: {} + image: oci.stackable.tech/sdp/testing-tools:0.2.0-stackable0.0.0-dev + command: ["/bin/bash", "-c"] + args: + - | + pid= + trap 'echo SIGINT; [[ $pid ]] && kill $pid; exit' SIGINT + trap 'echo SIGTERM; [[ $pid ]] && kill $pid; exit' SIGTERM + + while : + do + echo "Determining Keycloak public reachable address" + KEYCLOAK_ADDRESS=$(kubectl get svc keycloak -o json | jq -r --argfile endpoints <(kubectl get endpoints keycloak -o json) --argfile nodes <(kubectl get nodes -o json) '($nodes.items[] | select(.metadata.name == $endpoints.subsets[].addresses[].nodeName) | .status.addresses | map(select(.type == "ExternalIP" or .type == "InternalIP")) | min_by(.type) | .address | tostring) + ":" + (.spec.ports[] | select(.name == "https") | .nodePort | tostring)') + echo "Found Keycloak running at $KEYCLOAK_ADDRESS" + + if [ ! -z "$KEYCLOAK_ADDRESS" ]; then + KEYCLOAK_HOSTNAME="$(echo $KEYCLOAK_ADDRESS | grep -oP '^[^:]+')" + KEYCLOAK_PORT="$(echo $KEYCLOAK_ADDRESS | grep -oP '[0-9]+$')" + + cat << EOF | kubectl apply -f - + apiVersion: v1 + kind: ConfigMap + metadata: + name: keycloak-address + data: + keycloakAddress: "$KEYCLOAK_HOSTNAME:$KEYCLOAK_PORT" + keycloakNodeIp: "$KEYCLOAK_HOSTNAME" + EOF + fi + + sleep 30 & pid=$! + wait + done + securityContext: + fsGroup: 1000 + runAsGroup: 1000 + runAsUser: 1000 + volumes: + - name: data + persistentVolumeClaim: + claimName: keycloak-data + - name: realm-config + secret: + secretName: keycloak-realm-config + - name: tls + ephemeral: + volumeClaimTemplate: + metadata: + annotations: + secrets.stackable.tech/class: tls + secrets.stackable.tech/format: tls-pkcs12 + secrets.stackable.tech/format.compatibility.tls-pkcs12.password: changeit + secrets.stackable.tech/scope: service=keycloak,node + spec: + storageClassName: secrets.stackable.tech + accessModes: + - ReadWriteOnce + resources: + requests: + storage: "1" + serviceAccountName: keycloak-serviceaccount +--- +apiVersion: v1 +kind: Service +metadata: + name: keycloak + labels: + app: keycloak +spec: + type: NodePort + selector: + app: keycloak + ports: + - name: https + port: 8443 + targetPort: 8443 + nodePort: 31093 +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: keycloak-data +spec: + accessModes: [ReadWriteOnce] + resources: + requests: + storage: 1Gi +--- +apiVersion: v1 +kind: Secret +metadata: + name: keycloak-admin-credentials +stringData: + admin: "{{ keycloakAdminPassword }}" diff --git a/stacks/jupyterhub-keycloak/process-s3.ipynb b/stacks/jupyterhub-keycloak/process-s3.ipynb new file mode 100644 index 00000000..ac36f6ad --- /dev/null +++ b/stacks/jupyterhub-keycloak/process-s3.ipynb @@ -0,0 +1,264 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "f6515406-dc52-4a2b-9ae8-99fff7773146", + "metadata": {}, + "source": [ + "## Preliminaries\n", + "We can first output some versions that are running and read the minio credentials from the secret that has been mounted." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0705d7d-d93b-4e3b-bd49-2b6696ddc5be", + "metadata": {}, + "outputs": [], + "source": [ + "! python3 -V\n", + "! java --version\n", + "! pyspark --version" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bd941fee", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "# get minio credentials\n", + "with open(\"/minio-s3-credentials/accessKey\", \"r\") as f:\n", + " minio_user = f.read().strip()\n", + "\n", + "with open(\"/minio-s3-credentials/secretKey\", \"r\") as f:\n", + " minio_pwd = f.read().strip()" + ] + }, + { + "cell_type": "markdown", + "id": "d01b5e14-c704-4408-a803-561cbcf8005f", + "metadata": {}, + "source": [ + "## Spark\n", + "Spark can be used in client mode (recommended for JupyterHub notebooks, as code is intended to be called in an interactive\n", + "fashion), which is the default, or cluster mode. This notebook uses spark in client mode, meaning that the notebook itself\n", + "acts as the driver. It is important that the versions of spark and python match across the driver (running in the juypyterhub image)\n", + "and the executor(s) (running in a separate image, specified below with the `spark.kubernetes.container.image` setting).\n", + "\n", + "The jupyterhub image `quay.io/jupyter/pyspark-notebook:spark-3.5.2` uses a base ubuntu image (like the spark images).\n", + "The versions of java match exactly. Python versions can differ at patch level, and the image used below `oci.stackable.tech/sandbox/spark:3.5.2-python311` is built from a `spark:3.5.2-scala2.12-java17-ubuntu` base image with python 3.11 (the same major/minor version as the notebook) installed.\n", + "\n", + "## S3\n", + "As we will be reading data from an S3 bucket, we need to add the necessary `hadoop` and `aws` libraries in the same hadoop version as the\n", + "notebook image (see `spark.jars.packages`), and define the endpoint settings (see `spark.hadoopo.fs.*`)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "606363ba-0c97-4156-af1c-c8ad54745cfb", + "metadata": {}, + "outputs": [], + "source": [ + "from pyspark.sql import SparkSession\n", + "\n", + "NAMESPACE = os.environ.get(\"NAMESPACE\", \"default\")\n", + "POD_NAME = os.environ.get(\"HOSTNAME\", f\"jupyter-{os.environ.get('USER', 'default')}-{NAMESPACE}\")\n", + "\n", + "EXECUTOR_IMAGE = \"oci.stackable.tech/sandbox/spark:3.5.2-python311\" \n", + "\n", + "spark = (\n", + " SparkSession.builder\n", + " .master(f\"k8s://https://{os.environ['KUBERNETES_SERVICE_HOST']}:{os.environ['KUBERNETES_SERVICE_PORT']}\")\n", + " .appName(f\"process-s3-{POD_NAME}\")\n", + " .config(\"spark.kubernetes.container.image\", EXECUTOR_IMAGE)\n", + " .config(\"spark.kubernetes.container.image.pullPolicy\", \"IfNotPresent\")\n", + " .config(\"spark.kubernetes.namespace\", NAMESPACE)\n", + " .config(\"spark.kubernetes.authenticate.driver.serviceAccountName\", \"spark\")\n", + " .config(\"spark.kubernetes.authenticate.executor.serviceAccountName\", \"spark\")\n", + " .config(\"spark.driver.port\", \"2222\")\n", + " .config(\"spark.driver.blockManager.port\", \"7777\")\n", + " .config(\"spark.executor.instances\", \"1\")\n", + " .config(\"spark.executor.memory\", \"1g\")\n", + " .config(\"spark.executor.cores\", \"1\")\n", + " .config(\"spark.hadoop.fs.s3a.endpoint\", \"http://minio:9000/\")\n", + " .config(\"spark.hadoop.fs.s3a.path.style.access\", \"true\")\n", + " .config(\"spark.hadoop.fs.s3a.access.key\", minio_user)\n", + " .config(\"spark.hadoop.fs.s3a.secret.key\", minio_pwd)\n", + " .config(\"spark.hadoop.fs.s3a.aws.credentials.provider\", \"org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider\")\n", + " .config(\"spark.jars.packages\", \"org.apache.hadoop:hadoop-client-api:3.3.4,org.apache.hadoop:hadoop-client-runtime:3.3.4,org.apache.hadoop:hadoop-aws:3.3.4,org.apache.hadoop:hadoop-common:3.3.4,com.amazonaws:aws-java-sdk-bundle:1.12.162\")\n", + " .config(\"spark.kubernetes.driver.pod.name\", POD_NAME)\n", + " .getOrCreate()\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "eb08096d-1f7a-4c95-8807-aca76290cdfa", + "metadata": {}, + "source": [ + "### Create an in-memory DataFrame\n", + "This will check that libraries across driver and executor are compatible." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3df1ab91-ab2e-49b0-a72f-164915e4ee80", + "metadata": {}, + "outputs": [], + "source": [ + "df = spark.createDataFrame([(\"a\", 1), (\"b\", 2)], [\"col1\", \"col2\"])\n", + "df.show()" + ] + }, + { + "cell_type": "markdown", + "id": "469988e4-1057-49f6-8c8f-93743c4a6839", + "metadata": {}, + "source": [ + "### Check s3 with pyarrow\n", + "As well as spark, we can inspect S3 buckets with the `pyarrow` library." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f8479cb-f216-4a8f-b9db-6da17ffebaa9", + "metadata": {}, + "outputs": [], + "source": [ + "# Manual S3 file check via pyarrow.fs\n", + "import pyarrow.fs as fs\n", + "\n", + "s3 = fs.S3FileSystem(endpoint_override=\"http://minio:9000/\", access_key=minio_user, secret_key=minio_pwd, scheme=\"http\")\n", + "files = s3.get_file_info(fs.FileSelector(\"demo/gas-sensor/raw/\", recursive=True))\n", + "for f in files:\n", + " print(\"Found file:\", f.path)" + ] + }, + { + "cell_type": "markdown", + "id": "1b3e3331-5587-40c5-8a38-a1c3527bb25a", + "metadata": {}, + "source": [ + "### Read/Write operations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bc35f4d3", + "metadata": {}, + "outputs": [], + "source": [ + "df = spark.read.csv(\"s3a://demo/gas-sensor/raw/\", header = True)\n", + "df.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35861943-7586-434f-a03d-31ebf03b59d7", + "metadata": {}, + "outputs": [], + "source": [ + "df.write.csv(\"s3a://demo/gas-sensor/rewritten/\", mode=\"overwrite\")\n", + "df.write.parquet(\"s3a://demo/gas-sensor/parquet/\", mode=\"overwrite\")\n", + "\n", + "df2 = spark.read.parquet(\"s3a://demo/gas-sensor/parquet/\", header = True)\n", + "df2.count()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a319fa38-96de-4c8a-96e0-8e47ef5a7561", + "metadata": {}, + "outputs": [], + "source": [ + "from pyspark.sql import functions\n", + "\n", + "df2 = df2.withColumn(\"hour\", (functions.floor(df2.timesecs / 60) + 1))\n", + "\n", + "dfs = df2.select(\n", + " df2.hour,\n", + " df2.humidity,\n", + " df2.temperature,\n", + " df2.flowrate\n", + ").groupby(\"hour\").agg(\n", + " functions.round(functions.avg('humidity'), 2).alias('humidity'),\n", + " functions.round(functions.avg('temperature'), 2).alias('temperature'),\n", + " functions.round(functions.avg('flowrate'), 2).alias('flowrate')\n", + ").orderBy(\"hour\")\n", + "\n", + "dfs.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4276e8e7", + "metadata": {}, + "outputs": [], + "source": [ + "dfs.write.parquet(\"s3a://demo/gas-sensor/agg/\", mode=\"overwrite\")" + ] + }, + { + "cell_type": "markdown", + "id": "94d38d8f-57f7-4629-a4d2-e28142cc6a68", + "metadata": {}, + "source": [ + "### Convert between Spark and Pandas DataFrames" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "24d68a92-c104-4cc6-9a89-c052324ba1fd", + "metadata": {}, + "outputs": [], + "source": [ + "df_pandas = dfs.toPandas()\n", + "df_pandas.head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "128628ff-f2c7-4a04-8c1a-020b239e1158", + "metadata": {}, + "outputs": [], + "source": [ + "spark_df = spark.createDataFrame(df_pandas)\n", + "spark_df.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/stacks/jupyterhub-keycloak/s3-connection.yaml b/stacks/jupyterhub-keycloak/s3-connection.yaml new file mode 100644 index 00000000..7e026539 --- /dev/null +++ b/stacks/jupyterhub-keycloak/s3-connection.yaml @@ -0,0 +1,31 @@ +--- +apiVersion: s3.stackable.tech/v1alpha1 +kind: S3Connection +metadata: + name: minio +spec: + host: minio + port: 9000 + accessStyle: Path + credentials: + secretClass: minio-s3-credentials +--- +apiVersion: secrets.stackable.tech/v1alpha1 +kind: SecretClass +metadata: + name: minio-s3-credentials +spec: + backend: + k8sSearch: + searchNamespace: + pod: {} +--- +apiVersion: v1 +kind: Secret +metadata: + name: minio-s3-credentials + labels: + secrets.stackable.tech/class: minio-s3-credentials +stringData: + accessKey: admin + secretKey: {{ minioAdminPassword }} diff --git a/stacks/jupyterhub-keycloak/serviceaccount.yaml b/stacks/jupyterhub-keycloak/serviceaccount.yaml new file mode 100644 index 00000000..f8298b4b --- /dev/null +++ b/stacks/jupyterhub-keycloak/serviceaccount.yaml @@ -0,0 +1,35 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: spark +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: spark +subjects: + - kind: ServiceAccount + name: spark +roleRef: + kind: Role + name: spark-role + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: spark-role +rules: + - apiGroups: [""] + resources: [pods] + verbs: ["*"] + - apiGroups: [""] + resources: [services] + verbs: ["*"] + - apiGroups: [""] + resources: [configmaps] + verbs: ["*"] + - apiGroups: [""] + resources: [persistentvolumeclaims] + verbs: ["*"] diff --git a/stacks/stacks-v2.yaml b/stacks/stacks-v2.yaml index dda24f20..fee53aa0 100644 --- a/stacks/stacks-v2.yaml +++ b/stacks/stacks-v2.yaml @@ -648,3 +648,39 @@ stacks: - name: jupyterHubAdminPassword description: Password of the JupyterHub admin user default: adminadmin + jupyterhub-keycloak: + description: >- + A stack used to demonstrate JupyterHub and Keycloak, using parts of the end-to-end security demo. + stackableRelease: dev + stackableOperators: + - commons + - listener + - secret + labels: + - security + - jupyterhub + - keycloak + supportedNamespaces: [] + resourceRequests: + cpu: 3400m + memory: 9010Mi + pvc: 24Gi + manifests: + # TODO: revert paths + - helmChart: stacks/_templates/minio.yaml + - plainYaml: stacks/jupyterhub-keycloak/keycloak-serviceaccount.yaml + - plainYaml: stacks/jupyterhub-keycloak/keycloak-realm-config.yaml + - plainYaml: stacks/jupyterhub-keycloak/keycloak.yaml + - helmChart: stacks/jupyterhub-keycloak/jupyterhub.yaml + - plainYaml: stacks/jupyterhub-keycloak/serviceaccount.yaml + - plainYaml: stacks/jupyterhub-keycloak/s3-connection.yaml + parameters: + - name: keycloakAdminPassword + description: Password of the Keycloak admin user + default: adminadmin + - name: minioAdminPassword + description: Password of the MinIO admin user + default: adminadmin + - name: jupyterhubClientPassword + description: Password of the JupyterHub client user + default: jupyterhubjupyterhub