|
| 1 | +#!/usr/bin/env bash |
| 2 | + |
| 3 | +# Copyright 2022 Google LLC |
| 4 | +# |
| 5 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | +# you may not use this file except in compliance with the License. |
| 7 | +# You may obtain a copy of the License at |
| 8 | +# |
| 9 | +# https://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +# |
| 11 | +# Unless required by applicable law or agreed to in writing, software |
| 12 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | +# See the License for the specific language governing permissions and |
| 15 | +# limitations under the License. |
| 16 | + |
| 17 | +set -e |
| 18 | + |
| 19 | +source ../../common.sh |
| 20 | + |
| 21 | +SCRIPT_PATH="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P)" |
| 22 | +# Root path to the web fingerprinter plugin. |
| 23 | +PROJECT_ROOT="$(cd -- "${SCRIPT_PATH}/../../../.." >/dev/null 2>&1 ; pwd -P)" |
| 24 | +# Path to the configurations for starting a live instance of Spark. |
| 25 | +SPARK_APP_PATH="${SCRIPT_PATH}/app" |
| 26 | +# Path to the temporary data holder. |
| 27 | +TMP_DATA="/tmp/SPARK_fingerprints" |
| 28 | +# Path to the local git repository for Spark codebase. |
| 29 | +GIT_REPO="${TMP_DATA}/repo" |
| 30 | +# Path to the directory of all the updated fingerprints data. |
| 31 | +FINGERPRINTS_PATH="${TMP_DATA}/fingerprints" |
| 32 | +# Json data of the final result. |
| 33 | +JSON_DATA="${FINGERPRINTS_PATH}/fingerprint.json" |
| 34 | +# Binary proto data of the final result. |
| 35 | +BIN_DATA="${FINGERPRINTS_PATH}/fingerprint.binproto" |
| 36 | +# Read all the versions to be fingerprinted. |
| 37 | +readarray -t ALL_VERSIONS < "${SCRIPT_PATH}/versions.txt" |
| 38 | +mkdir -p "${FINGERPRINTS_PATH}" |
| 39 | + |
| 40 | +startSpark() { |
| 41 | + local version="$1" |
| 42 | + pushd "${SPARK_APP_PATH}" >/dev/null |
| 43 | + # if version-python3 exists then we have a spark container with python3 |
| 44 | + # otherwise we must install python3 |
| 45 | + if docker manifest inspect "apache/spark:${version}-python3" 2>/dev/null ; then |
| 46 | + SPARK_VERSION="${version}-python3" docker compose up -d |
| 47 | + sleep 10 |
| 48 | + else |
| 49 | + SPARK_VERSION="${version}" docker compose up -d |
| 50 | + sleep 10 |
| 51 | + echo -e "\nInstalling python3 into worker container" |
| 52 | + installPython3InSpark "${version}" |
| 53 | + fi |
| 54 | + popd >/dev/null |
| 55 | +} |
| 56 | + |
| 57 | +installPython3InSpark() { |
| 58 | + local version="$1" |
| 59 | + pushd "${SPARK_APP_PATH}" >/dev/null |
| 60 | + docker exec -it -u 0 spark-master apt-get update >/dev/null |
| 61 | + docker exec -it -u 0 spark-master apt-get install python3 python3-pip -y >/dev/null |
| 62 | + docker exec -it -u 0 spark-master pip3 install pyspark=="${version}" >/dev/null |
| 63 | + popd >/dev/null |
| 64 | +} |
| 65 | + |
| 66 | +stopSpark() { |
| 67 | + local version="$1" |
| 68 | + pushd "${SPARK_APP_PATH}" >/dev/null |
| 69 | + SPARK_VERSION="${version}" docker compose down --volumes --remove-orphans |
| 70 | + # or stop the python3 contained version |
| 71 | + SPARK_VERSION="${version}-python3" docker compose down --volumes --remove-orphans |
| 72 | + popd >/dev/null |
| 73 | +} |
| 74 | + |
| 75 | +createFingerprintForWebUI() { |
| 76 | + local spark_version="$1" |
| 77 | + |
| 78 | + echo "Fingerprinting Spark version ${spark_version} ..." |
| 79 | + # Start a live instance of Spark. |
| 80 | + startSpark "${spark_version}" |
| 81 | + |
| 82 | + # Checkout the repository to the correct tag. |
| 83 | + if [[ ${spark_version:0:1} == "v" ]]; then |
| 84 | + checkOutRepo "${GIT_REPO}" "${spark_version}" |
| 85 | + else |
| 86 | + checkOutRepo "${GIT_REPO}" "v${spark_version}" |
| 87 | + fi |
| 88 | + |
| 89 | + # Fingerprint of Master UI |
| 90 | + updateFingerprint \ |
| 91 | + "spark" \ |
| 92 | + "${spark_version}" \ |
| 93 | + "${FINGERPRINTS_PATH}" \ |
| 94 | + "${GIT_REPO}/core/src/main/resources/org/apache/spark/ui/static" \ |
| 95 | + "http://localhost:8080/" |
| 96 | + |
| 97 | + # Fingerprint of Worker UI |
| 98 | + updateFingerprint \ |
| 99 | + "spark" \ |
| 100 | + "${spark_version}" \ |
| 101 | + "${FINGERPRINTS_PATH}" \ |
| 102 | + "${GIT_REPO}/core/src/main/resources/org/apache/spark/ui/static" \ |
| 103 | + "http://localhost:8081/" |
| 104 | + |
| 105 | + docker exec -d spark-master /opt/spark/bin/spark-submit --master spark://spark-master:7077 /opt/spark/examples/src/main/python/fib.py |
| 106 | + sleep 10 |
| 107 | + |
| 108 | + # Fingerprint of Web Interface |
| 109 | + updateFingerprint \ |
| 110 | + "spark" \ |
| 111 | + "${spark_version}" \ |
| 112 | + "${FINGERPRINTS_PATH}" \ |
| 113 | + "${GIT_REPO}/core/src/main/resources/org/apache/spark/ui/static" \ |
| 114 | + "http://localhost:4040/" |
| 115 | + |
| 116 | + |
| 117 | + # Stop the live instance of Spark. |
| 118 | + stopSpark "${spark_version}" |
| 119 | +} |
| 120 | + |
| 121 | + |
| 122 | +# Convert the existing data file to a human-readable json file. |
| 123 | +convertFingerprint \ |
| 124 | + "${PROJECT_ROOT}/src/main/resources/fingerprinters/web/data/community/spark.binproto" \ |
| 125 | + "${JSON_DATA}" |
| 126 | + |
| 127 | +# Fetch Spark codebase. |
| 128 | +if [[ ! -d "${GIT_REPO}" ]] ; then |
| 129 | + git clone https://github.com/apache/spark.git "${GIT_REPO}" |
| 130 | +fi |
| 131 | + |
| 132 | +# Update for all the versions listed in versions.txt file. |
| 133 | +for spark_version in "${ALL_VERSIONS[@]}"; do |
| 134 | + createFingerprintForWebUI "${spark_version}" |
| 135 | +done |
| 136 | + |
| 137 | +convertFingerprint "${JSON_DATA}" "${BIN_DATA}" |
| 138 | + |
| 139 | +echo "Fingerprint updated for Spark. Please commit the following file:" |
| 140 | +echo " ${BIN_DATA}" |
0 commit comments