Skip to content

Commit

Permalink
Copybara import of the project:
Browse files Browse the repository at this point in the history
--
7d7383e by VickyTheViking <[email protected]>:

updater script, asset files and also fingerprint added.

--
a87daa7 by VickyTheViking <[email protected]>:

a little fix

--
e79f202 by VickyTheViking <[email protected]>:

user: root

--
edaff69 by VickyTheViking <[email protected]>:

spark-master to 127.0.0.1 because it is not resolvable from outside of docker network
use python3 included containers, otherwise install python3, python3-pip and pyspark python package

--
075cb5a by VickyTheViking <[email protected]>:

minor

--
58f6602 by VickyTheViking <[email protected]>:

fix bugs

--
9b55a10 by VickyTheViking <[email protected]>:

update binproto

--
bc9b70f by VickyTheViking <[email protected]>:

add new versions

--
a54b736 by VickyTheViking <[email protected]>:

docker inspect instead of try with errors and use apt-get instead of interactive apt

--
f30bc56 by VickyTheViking <[email protected]>:

replace g with 'docker manifest inspect'

--
22b737e by Vicky <[email protected]>:

add 3.5.4

Co-authored-by: Savio Sisco <[email protected]>
--
5c24856 by VickyTheViking <[email protected]>:

update binproto for version 3.5.4

COPYBARA_INTEGRATE_REVIEW=#448 from VickyTheViking:spark aaeb118
PiperOrigin-RevId: 725765024
Change-Id: I8178e4593764040454c1554121b251e00c6cfa5d
  • Loading branch information
VickyTheViking authored and copybara-github committed Feb 11, 2025
1 parent 9989eeb commit aff7b12
Show file tree
Hide file tree
Showing 5 changed files with 13,661 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
services:
spark-master:
image: apache/spark:${SPARK_VERSION}
container_name: spark-master
hostname: spark-master
ports:
- "8080:8080"
- "4040:4040"
volumes:
- ./examples:/opt/spark/examples/src/main/python
environment:
- SPARK_MASTER_HOST=spark-master
- SPARK_MASTER_PORT=7077
- SPARK_LOCAL_HOSTNAME=127.0.0.1
- DEBIAN_FRONTEND=noninteractive
command: /opt/spark/bin/spark-class org.apache.spark.deploy.master.Master

spark-worker:
image: apache/spark:${SPARK_VERSION}
container_name: spark-worker
user: root
ports:
- "8081:8081"
volumes:
- ./examples:/opt/spark/examples/src/main/python
environment:
- SPARK_MASTER=spark://spark-master:7077
command: /opt/spark/bin/spark-class org.apache.spark.deploy.worker.Worker spark://spark-master:7077
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from pyspark.sql import SparkSession
import time

# Initialize Spark session
spark = SparkSession.builder \
.appName("Fibonacci with Spark") \
.getOrCreate()

# Function to calculate Fibonacci numbers
def fibonacci(n):
a, b = 0, 1
for _ in range(n):
yield a
a, b = b, a + b

# Create an RDD with the range of Fibonacci numbers to calculate
n = 10 # Number of Fibonacci numbers to generate
fibonacci_rdd = spark.sparkContext.parallelize(range(n))

# Calculate Fibonacci numbers using map transformation
fibonacci_result = fibonacci_rdd.map(lambda x: list(fibonacci(x)))

# Introduce a unlimit loop with a sleep time to keep spark WebUI running
# We actually dont' want to calculate fibonacci :)
while True:
time.sleep(1)

# Collect and print the Fibonacci numbers
for result in fibonacci_result.collect():
print(result)

# Stop the Spark session
spark.stop()
140 changes: 140 additions & 0 deletions google/fingerprinters/web/scripts/updater/community/spark/update.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#!/usr/bin/env bash

# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -e

source ../../common.sh

SCRIPT_PATH="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P)"
# Root path to the web fingerprinter plugin.
PROJECT_ROOT="$(cd -- "${SCRIPT_PATH}/../../../.." >/dev/null 2>&1 ; pwd -P)"
# Path to the configurations for starting a live instance of Spark.
SPARK_APP_PATH="${SCRIPT_PATH}/app"
# Path to the temporary data holder.
TMP_DATA="/tmp/SPARK_fingerprints"
# Path to the local git repository for Spark codebase.
GIT_REPO="${TMP_DATA}/repo"
# Path to the directory of all the updated fingerprints data.
FINGERPRINTS_PATH="${TMP_DATA}/fingerprints"
# Json data of the final result.
JSON_DATA="${FINGERPRINTS_PATH}/fingerprint.json"
# Binary proto data of the final result.
BIN_DATA="${FINGERPRINTS_PATH}/fingerprint.binproto"
# Read all the versions to be fingerprinted.
readarray -t ALL_VERSIONS < "${SCRIPT_PATH}/versions.txt"
mkdir -p "${FINGERPRINTS_PATH}"

startSpark() {
local version="$1"
pushd "${SPARK_APP_PATH}" >/dev/null
# if version-python3 exists then we have a spark container with python3
# otherwise we must install python3
if docker manifest inspect "apache/spark:${version}-python3" 2>/dev/null ; then
SPARK_VERSION="${version}-python3" docker compose up -d
sleep 10
else
SPARK_VERSION="${version}" docker compose up -d
sleep 10
echo -e "\nInstalling python3 into worker container"
installPython3InSpark "${version}"
fi
popd >/dev/null
}

installPython3InSpark() {
local version="$1"
pushd "${SPARK_APP_PATH}" >/dev/null
docker exec -it -u 0 spark-master apt-get update >/dev/null
docker exec -it -u 0 spark-master apt-get install python3 python3-pip -y >/dev/null
docker exec -it -u 0 spark-master pip3 install pyspark=="${version}" >/dev/null
popd >/dev/null
}

stopSpark() {
local version="$1"
pushd "${SPARK_APP_PATH}" >/dev/null
SPARK_VERSION="${version}" docker compose down --volumes --remove-orphans
# or stop the python3 contained version
SPARK_VERSION="${version}-python3" docker compose down --volumes --remove-orphans
popd >/dev/null
}

createFingerprintForWebUI() {
local spark_version="$1"

echo "Fingerprinting Spark version ${spark_version} ..."
# Start a live instance of Spark.
startSpark "${spark_version}"

# Checkout the repository to the correct tag.
if [[ ${spark_version:0:1} == "v" ]]; then
checkOutRepo "${GIT_REPO}" "${spark_version}"
else
checkOutRepo "${GIT_REPO}" "v${spark_version}"
fi

# Fingerprint of Master UI
updateFingerprint \
"spark" \
"${spark_version}" \
"${FINGERPRINTS_PATH}" \
"${GIT_REPO}/core/src/main/resources/org/apache/spark/ui/static" \
"http://localhost:8080/"

# Fingerprint of Worker UI
updateFingerprint \
"spark" \
"${spark_version}" \
"${FINGERPRINTS_PATH}" \
"${GIT_REPO}/core/src/main/resources/org/apache/spark/ui/static" \
"http://localhost:8081/"

docker exec -d spark-master /opt/spark/bin/spark-submit --master spark://spark-master:7077 /opt/spark/examples/src/main/python/fib.py
sleep 10

# Fingerprint of Web Interface
updateFingerprint \
"spark" \
"${spark_version}" \
"${FINGERPRINTS_PATH}" \
"${GIT_REPO}/core/src/main/resources/org/apache/spark/ui/static" \
"http://localhost:4040/"


# Stop the live instance of Spark.
stopSpark "${spark_version}"
}


# Convert the existing data file to a human-readable json file.
convertFingerprint \
"${PROJECT_ROOT}/src/main/resources/fingerprinters/web/data/community/spark.binproto" \
"${JSON_DATA}"

# Fetch Spark codebase.
if [[ ! -d "${GIT_REPO}" ]] ; then
git clone https://github.com/apache/spark.git "${GIT_REPO}"
fi

# Update for all the versions listed in versions.txt file.
for spark_version in "${ALL_VERSIONS[@]}"; do
createFingerprintForWebUI "${spark_version}"
done

convertFingerprint "${JSON_DATA}" "${BIN_DATA}"

echo "Fingerprint updated for Spark. Please commit the following file:"
echo " ${BIN_DATA}"
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
v3.1.3
v3.2.1
v3.2.2
v3.2.3
v3.2.4
v3.3.0
3.3.1
v3.3.2
3.3.3
3.4.0
3.4.1
3.4.2
3.4.3
3.4.4
3.5.0
3.5.1
3.5.2
3.5.3
3.5.4
Loading

0 comments on commit aff7b12

Please sign in to comment.