|
43 | 43 | "NAMESPACE = os.environ.get(\"NAMESPACE\", \"default\")\n",
|
44 | 44 | "POD_NAME = os.environ.get(\"HOSTNAME\", f\"jupyter-{os.environ.get('USER', 'default')}-{NAMESPACE}\")\n",
|
45 | 45 | "\n",
|
46 |
| - "# works with python-3.11 notebook image\n", |
47 |
| - "#EXECUTOR_IMAGE = \"oci.stackable.tech/sdp/spark-k8s:3.5.0-stackable24.3.0\" \n", |
48 |
| - "\n", |
49 |
| - "# jars differ in size, 17.0.12 vs. 17.0.13, 3.11.10 vs. 3.11.9\n", |
50 |
| - "#SerializableBuffer conflict\n", |
51 |
| - "#EXECUTOR_IMAGE = \"oci.stackable.tech/sdp/spark-k8s:3.5.2-stackable24.11.1\" \n", |
52 |
| - "\n", |
53 |
| - "# java, jars match\n", |
54 |
| - "#Python in worker has different version (3, 10) than that in driver 3.11, PySpark cannot run with different minor versions. \n", |
55 |
| - "#EXECUTOR_IMAGE = \"apache/spark:3.5.2-java17-python3\" \n", |
56 |
| - "\n", |
57 |
| - "# java, jars match\n", |
58 |
| - "#Python in worker has different version (3, 10) than that in driver 3.11, PySpark cannot run with different minor versions. \n", |
59 |
| - "#EXECUTOR_IMAGE = \"spark:3.5.2-scala2.12-java17-python3-ubuntu\" \n", |
60 |
| - "\n", |
61 |
| - "#Python in worker has different version (3, 12) than that in driver 3.11, PySpark cannot run with different minor versions.\n", |
62 |
| - "#EXECUTOR_IMAGE = \"bitnami/spark:3.5.2\"\n", |
63 |
| - "\n", |
64 |
| - "# custom image with python 3.11 - works!\n", |
65 |
| - "# based off: spark:3.5.2-scala2.12-java17-ubuntu\n", |
66 |
| - "# see: \n", |
67 | 46 | "EXECUTOR_IMAGE = \"spark:3.5.2-python311\" \n",
|
68 | 47 | "\n",
|
69 | 48 | "spark = (\n",
|
70 | 49 | " SparkSession.builder\n",
|
71 | 50 | " .master(f'k8s://https://{os.environ[\"KUBERNETES_SERVICE_HOST\"]}:{os.environ[\"KUBERNETES_SERVICE_PORT\"]}')\n",
|
72 |
| - " .appName(\"process-s3-data\")\n", |
| 51 | + " .appName(\"process-s3-data-\"{os.environ.get('USER', 'default')}-{NAMESPACE})\n", |
73 | 52 | " .config(\"spark.kubernetes.container.image\", EXECUTOR_IMAGE)\n",
|
74 | 53 | " .config(\"spark.kubernetes.container.image.pullPolicy\", \"IfNotPresent\")\n",
|
75 | 54 | " .config(\"spark.kubernetes.namespace\", NAMESPACE)\n",
|
|
80 | 59 | " .config(\"spark.executor.instances\", \"1\")\n",
|
81 | 60 | " .config(\"spark.executor.memory\", \"1g\")\n",
|
82 | 61 | " .config(\"spark.executor.cores\", \"1\")\n",
|
83 |
| - " # bitnami. See https://github.com/bitnami/containers/issues/52698#issuecomment-2275913474\n", |
84 |
| - " #.config(\"spark.executorEnv.LD_PRELOAD\", \"/opt/bitnami/common/lib/libnss_wrapper.so\")\n", |
85 | 62 | " .config(\"spark.hadoop.fs.s3a.endpoint\", \"http://minio:9000/\")\n",
|
86 | 63 | " .config(\"spark.hadoop.fs.s3a.path.style.access\", \"true\")\n",
|
87 | 64 | " .config(\"spark.hadoop.fs.s3a.access.key\", minio_user)\n",
|
|
0 commit comments