Skip to content

Commit aff7b12

Browse files
VickyTheVikingcopybara-github
authored andcommitted
Copybara import of the project:
-- 7d7383e by VickyTheViking <[email protected]>: updater script, asset files and also fingerprint added. -- a87daa7 by VickyTheViking <[email protected]>: a little fix -- e79f202 by VickyTheViking <[email protected]>: user: root -- edaff69 by VickyTheViking <[email protected]>: spark-master to 127.0.0.1 because it is not resolvable from outside of docker network use python3 included containers, otherwise install python3, python3-pip and pyspark python package -- 075cb5a by VickyTheViking <[email protected]>: minor -- 58f6602 by VickyTheViking <[email protected]>: fix bugs -- 9b55a10 by VickyTheViking <[email protected]>: update binproto -- bc9b70f by VickyTheViking <[email protected]>: add new versions -- a54b736 by VickyTheViking <[email protected]>: docker inspect instead of try with errors and use apt-get instead of interactive apt -- f30bc56 by VickyTheViking <[email protected]>: replace g with 'docker manifest inspect' -- 22b737e by Vicky <[email protected]>: add 3.5.4 Co-authored-by: Savio Sisco <[email protected]> -- 5c24856 by VickyTheViking <[email protected]>: update binproto for version 3.5.4 COPYBARA_INTEGRATE_REVIEW=#448 from VickyTheViking:spark aaeb118 PiperOrigin-RevId: 725765024 Change-Id: I8178e4593764040454c1554121b251e00c6cfa5d
1 parent 9989eeb commit aff7b12

File tree

5 files changed

+13661
-0
lines changed

5 files changed

+13661
-0
lines changed
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
services:
2+
spark-master:
3+
image: apache/spark:${SPARK_VERSION}
4+
container_name: spark-master
5+
hostname: spark-master
6+
ports:
7+
- "8080:8080"
8+
- "4040:4040"
9+
volumes:
10+
- ./examples:/opt/spark/examples/src/main/python
11+
environment:
12+
- SPARK_MASTER_HOST=spark-master
13+
- SPARK_MASTER_PORT=7077
14+
- SPARK_LOCAL_HOSTNAME=127.0.0.1
15+
- DEBIAN_FRONTEND=noninteractive
16+
command: /opt/spark/bin/spark-class org.apache.spark.deploy.master.Master
17+
18+
spark-worker:
19+
image: apache/spark:${SPARK_VERSION}
20+
container_name: spark-worker
21+
user: root
22+
ports:
23+
- "8081:8081"
24+
volumes:
25+
- ./examples:/opt/spark/examples/src/main/python
26+
environment:
27+
- SPARK_MASTER=spark://spark-master:7077
28+
command: /opt/spark/bin/spark-class org.apache.spark.deploy.worker.Worker spark://spark-master:7077
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
from pyspark.sql import SparkSession
2+
import time
3+
4+
# Initialize Spark session
5+
spark = SparkSession.builder \
6+
.appName("Fibonacci with Spark") \
7+
.getOrCreate()
8+
9+
# Function to calculate Fibonacci numbers
10+
def fibonacci(n):
11+
a, b = 0, 1
12+
for _ in range(n):
13+
yield a
14+
a, b = b, a + b
15+
16+
# Create an RDD with the range of Fibonacci numbers to calculate
17+
n = 10 # Number of Fibonacci numbers to generate
18+
fibonacci_rdd = spark.sparkContext.parallelize(range(n))
19+
20+
# Calculate Fibonacci numbers using map transformation
21+
fibonacci_result = fibonacci_rdd.map(lambda x: list(fibonacci(x)))
22+
23+
# Introduce a unlimit loop with a sleep time to keep spark WebUI running
24+
# We actually dont' want to calculate fibonacci :)
25+
while True:
26+
time.sleep(1)
27+
28+
# Collect and print the Fibonacci numbers
29+
for result in fibonacci_result.collect():
30+
print(result)
31+
32+
# Stop the Spark session
33+
spark.stop()
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
#!/usr/bin/env bash
2+
3+
# Copyright 2022 Google LLC
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# https://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
set -e
18+
19+
source ../../common.sh
20+
21+
SCRIPT_PATH="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P)"
22+
# Root path to the web fingerprinter plugin.
23+
PROJECT_ROOT="$(cd -- "${SCRIPT_PATH}/../../../.." >/dev/null 2>&1 ; pwd -P)"
24+
# Path to the configurations for starting a live instance of Spark.
25+
SPARK_APP_PATH="${SCRIPT_PATH}/app"
26+
# Path to the temporary data holder.
27+
TMP_DATA="/tmp/SPARK_fingerprints"
28+
# Path to the local git repository for Spark codebase.
29+
GIT_REPO="${TMP_DATA}/repo"
30+
# Path to the directory of all the updated fingerprints data.
31+
FINGERPRINTS_PATH="${TMP_DATA}/fingerprints"
32+
# Json data of the final result.
33+
JSON_DATA="${FINGERPRINTS_PATH}/fingerprint.json"
34+
# Binary proto data of the final result.
35+
BIN_DATA="${FINGERPRINTS_PATH}/fingerprint.binproto"
36+
# Read all the versions to be fingerprinted.
37+
readarray -t ALL_VERSIONS < "${SCRIPT_PATH}/versions.txt"
38+
mkdir -p "${FINGERPRINTS_PATH}"
39+
40+
startSpark() {
41+
local version="$1"
42+
pushd "${SPARK_APP_PATH}" >/dev/null
43+
# if version-python3 exists then we have a spark container with python3
44+
# otherwise we must install python3
45+
if docker manifest inspect "apache/spark:${version}-python3" 2>/dev/null ; then
46+
SPARK_VERSION="${version}-python3" docker compose up -d
47+
sleep 10
48+
else
49+
SPARK_VERSION="${version}" docker compose up -d
50+
sleep 10
51+
echo -e "\nInstalling python3 into worker container"
52+
installPython3InSpark "${version}"
53+
fi
54+
popd >/dev/null
55+
}
56+
57+
installPython3InSpark() {
58+
local version="$1"
59+
pushd "${SPARK_APP_PATH}" >/dev/null
60+
docker exec -it -u 0 spark-master apt-get update >/dev/null
61+
docker exec -it -u 0 spark-master apt-get install python3 python3-pip -y >/dev/null
62+
docker exec -it -u 0 spark-master pip3 install pyspark=="${version}" >/dev/null
63+
popd >/dev/null
64+
}
65+
66+
stopSpark() {
67+
local version="$1"
68+
pushd "${SPARK_APP_PATH}" >/dev/null
69+
SPARK_VERSION="${version}" docker compose down --volumes --remove-orphans
70+
# or stop the python3 contained version
71+
SPARK_VERSION="${version}-python3" docker compose down --volumes --remove-orphans
72+
popd >/dev/null
73+
}
74+
75+
createFingerprintForWebUI() {
76+
local spark_version="$1"
77+
78+
echo "Fingerprinting Spark version ${spark_version} ..."
79+
# Start a live instance of Spark.
80+
startSpark "${spark_version}"
81+
82+
# Checkout the repository to the correct tag.
83+
if [[ ${spark_version:0:1} == "v" ]]; then
84+
checkOutRepo "${GIT_REPO}" "${spark_version}"
85+
else
86+
checkOutRepo "${GIT_REPO}" "v${spark_version}"
87+
fi
88+
89+
# Fingerprint of Master UI
90+
updateFingerprint \
91+
"spark" \
92+
"${spark_version}" \
93+
"${FINGERPRINTS_PATH}" \
94+
"${GIT_REPO}/core/src/main/resources/org/apache/spark/ui/static" \
95+
"http://localhost:8080/"
96+
97+
# Fingerprint of Worker UI
98+
updateFingerprint \
99+
"spark" \
100+
"${spark_version}" \
101+
"${FINGERPRINTS_PATH}" \
102+
"${GIT_REPO}/core/src/main/resources/org/apache/spark/ui/static" \
103+
"http://localhost:8081/"
104+
105+
docker exec -d spark-master /opt/spark/bin/spark-submit --master spark://spark-master:7077 /opt/spark/examples/src/main/python/fib.py
106+
sleep 10
107+
108+
# Fingerprint of Web Interface
109+
updateFingerprint \
110+
"spark" \
111+
"${spark_version}" \
112+
"${FINGERPRINTS_PATH}" \
113+
"${GIT_REPO}/core/src/main/resources/org/apache/spark/ui/static" \
114+
"http://localhost:4040/"
115+
116+
117+
# Stop the live instance of Spark.
118+
stopSpark "${spark_version}"
119+
}
120+
121+
122+
# Convert the existing data file to a human-readable json file.
123+
convertFingerprint \
124+
"${PROJECT_ROOT}/src/main/resources/fingerprinters/web/data/community/spark.binproto" \
125+
"${JSON_DATA}"
126+
127+
# Fetch Spark codebase.
128+
if [[ ! -d "${GIT_REPO}" ]] ; then
129+
git clone https://github.com/apache/spark.git "${GIT_REPO}"
130+
fi
131+
132+
# Update for all the versions listed in versions.txt file.
133+
for spark_version in "${ALL_VERSIONS[@]}"; do
134+
createFingerprintForWebUI "${spark_version}"
135+
done
136+
137+
convertFingerprint "${JSON_DATA}" "${BIN_DATA}"
138+
139+
echo "Fingerprint updated for Spark. Please commit the following file:"
140+
echo " ${BIN_DATA}"
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
v3.1.3
2+
v3.2.1
3+
v3.2.2
4+
v3.2.3
5+
v3.2.4
6+
v3.3.0
7+
3.3.1
8+
v3.3.2
9+
3.3.3
10+
3.4.0
11+
3.4.1
12+
3.4.2
13+
3.4.3
14+
3.4.4
15+
3.5.0
16+
3.5.1
17+
3.5.2
18+
3.5.3
19+
3.5.4

0 commit comments

Comments
 (0)