Skip to content

Commit c11c8cb

Browse files
committed
Reverted changes to pass EMAIL, LICENSE options to zingg
Added a file to configure env variables that are needed for spark
1 parent bb5832c commit c11c8cb

File tree

4 files changed

+53
-2
lines changed

4 files changed

+53
-2
lines changed

client/src/main/java/zingg/client/ClientOptions.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ public class ClientOptions {
4949
static { //This is the canonical list of Zingg options.
5050
optionMaster.put(CONF, new Option(CONF, true, "JSON configuration with data input output locations and field definitions", false, true));
5151
optionMaster.put(PHASE, new Option(PHASE, true, Util.join(ZinggOptions.getAllZinggOptions(), "|"), false, true, ZinggOptions.getAllZinggOptions()));
52-
optionMaster.put(LICENSE, new Option(LICENSE, true, "location of license file", false, false));
52+
optionMaster.put(LICENSE, new Option(LICENSE, true, "location of license file", false, true));
5353
optionMaster.put(JOBID, new Option(JOBID, true, "database job id for logging", false, false));
5454
optionMaster.put(EMAIL, new Option(EMAIL, true, "notification email id. Can be an alias", false, false));
5555
optionMaster.put(FORMAT, new Option(FORMAT, true, "format of the data", false, false));

config/zingg-env.sh

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#### ZINGG_EXTRA_JARS #############################################
2+
# The ZINGG_EXTRA_JARS variable is set to pass additional dependencies to ZINGG. This env variable must be set while working with BigQuery, Snowflake, Mysql etc.
3+
# multiple jars should be separated by comma
4+
# e.g. ZINGG_EXTRA_JARS=one.jar,two.jar,three.jar
5+
#ZINGG_EXTRA_JARS=
6+
7+
8+
#### ZINGG_EXTRA_SPARK_CONF #######################################
9+
# The ZINGG_EXTRA_SPARK_CONF variable must be set to pass additional parameters to pass to spark. For example, when running Zingg with BigQuery.
10+
# the variable must be in below format. Multiple 'conf' entries can be combined and passed in this variable.
11+
# e.g. ZINGG_EXTRA_SPARK_CONF="--conf spark.hadoop.fs.gs.impl=com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem"
12+
#ZINGG_EXTRA_SPARK_CONF=
13+
14+
15+
#### SPARK_EXECUTOR_MEMORY ########################################
16+
# The SPARK_EXECUTOR_MEMORY variable updates spark.executor.memory. It may be modified based on memory available in the system. Default is 8GB
17+
SPARK_EXECUTOR_MEMORY=8g
18+
19+
#### SPARK_DRIVER_MEMORY ##########################################
20+
# The SPARK_DRIVER_MEMORY variable updates spark.driver.memory. It may be modified based on memory available in the system. Default is 8GB.
21+
SPARK_DRIVER_MEMORY=8g

scripts/load-zingg-env.sh

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#!/usr/bin/env bash
2+
3+
ZINGG_ENV_SH="zingg-env.sh"
4+
export ZINGG_CONF_DIR="$(dirname "$0")"/../config
5+
6+
ZINGG_ENV_SH="${ZINGG_CONF_DIR}/${ZINGG_ENV_SH}"
7+
if [[ -f "${ZINGG_ENV_SH}" ]]; then
8+
# Promote all variable declarations to environment (exported) variables
9+
set -a
10+
. ${ZINGG_ENV_SH}
11+
set +a
12+
fi

scripts/zingg.sh

+19-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,12 @@ ZINGG_JARS=$ZINGG_HOME/zingg-0.3.3-SNAPSHOT.jar
44
55
LICENSE="test"
66

7+
# Set the ZINGG environment variables
8+
ZINGG_ENV="$(dirname "$0")"/load-zingg-env.sh
9+
if [[ -f "${ZINGG_ENV}" ]]; then
10+
source ${ZINGG_ENV}
11+
fi
12+
713
if [[ -z "${ZINGG_EXTRA_JARS}" ]]; then
814
OPTION_JARS=""
915
else
@@ -16,4 +22,16 @@ else
1622
OPTION_SPARK_CONF="${ZINGG_EXTRA_SPARK_CONF}"
1723
fi
1824

19-
$SPARK_HOME/bin/spark-submit --master $SPARK_MASTER $OPTION_JARS $OPTION_SPARK_CONF --conf spark.serializer=org.apache.spark.serializer.KryoSerializer --conf spark.default.parallelism="8" --conf spark.executor.extraJavaOptions="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+HeapDumpOnOutOfMemoryError -Xloggc:/tmp/memLog.txt -XX:+UseCompressedOops" --conf spark.executor.memory=10g --conf spark.debug.maxToStringFields=200 --driver-class-path $ZINGG_JARS --class zingg.client.Client $ZINGG_JARS $@ --email $EMAIL --license $LICENSE
25+
if [[ -z "${SPARK_EXECUTOR_MEMORY}" ]]; then
26+
SPARK_EXECUTOR_MEMORY=8g
27+
fi
28+
OPTION_EXECUTOR_MEMORY="--conf spark.executor.memory=${SPARK_EXECUTOR_MEMORY}"
29+
30+
if [[ -z "${SPARK_DRIVER_MEMORY}" ]]; then
31+
SPARK_DRIVER_MEMORY=8g
32+
fi
33+
OPTION_DRIVER_MEMORY="--conf spark.driver.memory=${SPARK_DRIVER_MEMORY}"
34+
35+
# All the additional options must be added here
36+
ALL_OPTIONS=" ${OPTION_DRIVER_MEMORY} ${OPTION_EXECUTOR_MEMORY} ${OPTION_JARS} ${OPTION_SPARK_CONF} "
37+
$SPARK_HOME/bin/spark-submit --master $SPARK_MASTER ${ALL_OPTIONS} --conf spark.serializer=org.apache.spark.serializer.KryoSerializer --conf spark.default.parallelism="8" --conf spark.executor.extraJavaOptions="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+HeapDumpOnOutOfMemoryError -Xloggc:/tmp/memLog.txt -XX:+UseCompressedOops" --conf spark.debug.maxToStringFields=200 --driver-class-path $ZINGG_JARS --class zingg.client.Client $ZINGG_JARS $@ --email $EMAIL --license $LICENSE

0 commit comments

Comments
 (0)