|
| 1 | +#!/bin/bash |
| 2 | + |
| 3 | +# Common util **functions** that can be sourced in other scripts. |
| 4 | + |
| 5 | +# note: printf is used instead of echo to avoid backslash |
| 6 | +# processing and to properly handle values that begin with a '-'. |
| 7 | + |
| 8 | +log() { printf '%s\n' "$*"; } |
| 9 | +error() { log "ERROR: $*" >&2; } |
| 10 | +fatal() { error "$@"; exit 1; } |
| 11 | + |
| 12 | +retry () { |
| 13 | + "$@" || (sleep 10 && "$@") || (sleep 20 && "$@") || (sleep 40 && "$@") |
| 14 | +} |
| 15 | + |
| 16 | +# compositional trap taken from https://stackoverflow.com/a/7287873/23845 |
| 17 | +# appends a command to a trap |
| 18 | +# |
| 19 | +# - 1st arg: code to add |
| 20 | +# - remaining args: names of traps to modify |
| 21 | +# |
| 22 | +trap_add() { |
| 23 | + trap_add_cmd=$1; shift || fatal "${FUNCNAME[0]} usage error" |
| 24 | + for trap_add_name in "$@"; do |
| 25 | + trap -- "$( |
| 26 | + # helper fn to get existing trap command from output |
| 27 | + # of trap -p |
| 28 | + extract_trap_cmd() { printf '%s\n' "$3"; } |
| 29 | + # print existing trap command with newline |
| 30 | + eval "extract_trap_cmd $(trap -p "${trap_add_name}")" |
| 31 | + # print the new trap command |
| 32 | + printf '%s\n' "${trap_add_cmd}" |
| 33 | + )" "${trap_add_name}" \ |
| 34 | + || fatal "unable to add to trap ${trap_add_name}" |
| 35 | + done |
| 36 | +} |
| 37 | +# set the trace attribute for the above function. this is |
| 38 | +# required to modify DEBUG or RETURN traps because functions don't |
| 39 | +# inherit them unless the trace attribute is set |
| 40 | +declare -f -t trap_add |
| 41 | + |
| 42 | +function assert_git_not_dirty() { |
| 43 | + # TODO: we should add an option to `build_amd.py` that reverts the repo to |
| 44 | + # an unmodified state. |
| 45 | + if [[ "$BUILD_ENVIRONMENT" != *rocm* ]] && [[ "$BUILD_ENVIRONMENT" != *xla* ]] ; then |
| 46 | + git_status=$(git status --porcelain | grep -v '?? third_party' || true) |
| 47 | + if [[ $git_status ]]; then |
| 48 | + echo "Build left local git repository checkout dirty" |
| 49 | + echo "git status --porcelain:" |
| 50 | + echo "${git_status}" |
| 51 | + exit 1 |
| 52 | + fi |
| 53 | + fi |
| 54 | +} |
| 55 | + |
| 56 | +function pip_install_whl() { |
| 57 | + # This is used to install PyTorch and other build artifacts wheel locally |
| 58 | + # without using any network connection |
| 59 | + |
| 60 | + # Convert the input arguments into an array |
| 61 | + local args=("$@") |
| 62 | + |
| 63 | + # Check if the first argument contains multiple paths separated by spaces |
| 64 | + if [[ "${args[0]}" == *" "* ]]; then |
| 65 | + # Split the string by spaces into an array |
| 66 | + IFS=' ' read -r -a paths <<< "${args[0]}" |
| 67 | + # Loop through each path and install individually |
| 68 | + for path in "${paths[@]}"; do |
| 69 | + echo "Installing $path" |
| 70 | + python3 -mpip install --no-index --no-deps "$path" |
| 71 | + done |
| 72 | + else |
| 73 | + # Loop through each argument and install individually |
| 74 | + for path in "${args[@]}"; do |
| 75 | + echo "Installing $path" |
| 76 | + python3 -mpip install --no-index --no-deps "$path" |
| 77 | + done |
| 78 | + fi |
| 79 | +} |
| 80 | + |
| 81 | + |
| 82 | +function pip_install() { |
| 83 | + # retry 3 times |
| 84 | + # old versions of pip don't have the "--progress-bar" flag |
| 85 | + pip install --progress-bar off "$@" || pip install --progress-bar off "$@" || pip install --progress-bar off "$@" ||\ |
| 86 | + pip install "$@" || pip install "$@" || pip install "$@" |
| 87 | +} |
| 88 | + |
| 89 | +function pip_uninstall() { |
| 90 | + # uninstall 2 times |
| 91 | + pip uninstall -y "$@" || pip uninstall -y "$@" |
| 92 | +} |
| 93 | + |
| 94 | +function get_exit_code() { |
| 95 | + set +e |
| 96 | + "$@" |
| 97 | + retcode=$? |
| 98 | + set -e |
| 99 | + return $retcode |
| 100 | +} |
| 101 | + |
| 102 | +function get_bazel() { |
| 103 | + # Download and use the cross-platform, dependency-free Python |
| 104 | + # version of Bazelisk to fetch the platform specific version of |
| 105 | + # Bazel to use from .bazelversion. |
| 106 | + retry curl --location --output tools/bazel \ |
| 107 | + https://raw.githubusercontent.com/bazelbuild/bazelisk/v1.16.0/bazelisk.py |
| 108 | + shasum --algorithm=1 --check \ |
| 109 | + <(echo 'd4369c3d293814d3188019c9f7527a948972d9f8 tools/bazel') |
| 110 | + chmod u+x tools/bazel |
| 111 | +} |
| 112 | + |
| 113 | +# This function is bazel specific because of the bug |
| 114 | +# in the bazel that requires some special paths massaging |
| 115 | +# as a workaround. See |
| 116 | +# https://github.com/bazelbuild/bazel/issues/10167 |
| 117 | +function install_sccache_nvcc_for_bazel() { |
| 118 | + sudo mv /usr/local/cuda/bin/nvcc /usr/local/cuda/bin/nvcc-real |
| 119 | + |
| 120 | + # Write the `/usr/local/cuda/bin/nvcc` |
| 121 | + cat << EOF | sudo tee /usr/local/cuda/bin/nvcc |
| 122 | +#!/bin/sh |
| 123 | +if [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then |
| 124 | + exec sccache /usr/local/cuda/bin/nvcc "\$@" |
| 125 | +else |
| 126 | + exec external/local_cuda/cuda/bin/nvcc-real "\$@" |
| 127 | +fi |
| 128 | +EOF |
| 129 | + |
| 130 | + sudo chmod +x /usr/local/cuda/bin/nvcc |
| 131 | +} |
| 132 | + |
| 133 | +function install_monkeytype { |
| 134 | + # Install MonkeyType |
| 135 | + pip_install MonkeyType |
| 136 | +} |
| 137 | + |
| 138 | + |
| 139 | +function get_pinned_commit() { |
| 140 | + cat .github/ci_commit_pins/"${1}".txt |
| 141 | +} |
| 142 | + |
| 143 | +function install_torchaudio() { |
| 144 | + local commit |
| 145 | + commit=$(get_pinned_commit audio) |
| 146 | + if [[ "$1" == "cuda" ]]; then |
| 147 | + # TODO: This is better to be passed as a parameter from _linux-test workflow |
| 148 | + # so that it can be consistent with what is set in build |
| 149 | + TORCH_CUDA_ARCH_LIST="8.0;8.6" pip_install --no-use-pep517 --user "git+https://github.com/pytorch/audio.git@${commit}" |
| 150 | + else |
| 151 | + pip_install --no-use-pep517 --user "git+https://github.com/pytorch/audio.git@${commit}" |
| 152 | + fi |
| 153 | + |
| 154 | +} |
| 155 | + |
| 156 | +function install_torchtext() { |
| 157 | + local data_commit |
| 158 | + local text_commit |
| 159 | + data_commit=$(get_pinned_commit data) |
| 160 | + text_commit=$(get_pinned_commit text) |
| 161 | + pip_install --no-use-pep517 --user "git+https://github.com/pytorch/data.git@${data_commit}" |
| 162 | + pip_install --no-use-pep517 --user "git+https://github.com/pytorch/text.git@${text_commit}" |
| 163 | +} |
| 164 | + |
| 165 | +function install_torchvision() { |
| 166 | + local orig_preload |
| 167 | + local commit |
| 168 | + commit=$(get_pinned_commit vision) |
| 169 | + orig_preload=${LD_PRELOAD} |
| 170 | + if [ -n "${LD_PRELOAD}" ]; then |
| 171 | + # Silence dlerror to work-around glibc ASAN bug, see https://sourceware.org/bugzilla/show_bug.cgi?id=27653#c9 |
| 172 | + echo 'char* dlerror(void) { return "";}'|gcc -fpic -shared -o "${HOME}/dlerror.so" -x c - |
| 173 | + LD_PRELOAD=${orig_preload}:${HOME}/dlerror.so |
| 174 | + fi |
| 175 | + pip_install --no-use-pep517 --user "git+https://github.com/pytorch/vision.git@${commit}" |
| 176 | + if [ -n "${LD_PRELOAD}" ]; then |
| 177 | + LD_PRELOAD=${orig_preload} |
| 178 | + fi |
| 179 | +} |
| 180 | + |
| 181 | +function install_tlparse() { |
| 182 | + pip_install --user "tlparse==0.3.7" |
| 183 | + PATH="$(python -m site --user-base)/bin:$PATH" |
| 184 | +} |
| 185 | + |
| 186 | +function install_torchrec_and_fbgemm() { |
| 187 | + local torchrec_commit |
| 188 | + torchrec_commit=$(get_pinned_commit torchrec) |
| 189 | + local fbgemm_commit |
| 190 | + fbgemm_commit=$(get_pinned_commit fbgemm) |
| 191 | + pip_uninstall torchrec-nightly |
| 192 | + pip_uninstall fbgemm-gpu-nightly |
| 193 | + pip_install setuptools-git-versioning scikit-build pyre-extensions |
| 194 | + # See https://github.com/pytorch/pytorch/issues/106971 |
| 195 | + CUDA_PATH=/usr/local/cuda-12.1 pip_install --no-use-pep517 --user "git+https://github.com/pytorch/FBGEMM.git@${fbgemm_commit}#egg=fbgemm-gpu&subdirectory=fbgemm_gpu" |
| 196 | + pip_install --no-use-pep517 --user "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}" |
| 197 | +} |
| 198 | + |
| 199 | +function clone_pytorch_xla() { |
| 200 | + if [[ ! -d ./xla ]]; then |
| 201 | + git clone --recursive --quiet https://github.com/pytorch/xla.git |
| 202 | + pushd xla |
| 203 | + # pin the xla hash so that we don't get broken by changes to xla |
| 204 | + git checkout "$(cat ../.github/ci_commit_pins/xla.txt)" |
| 205 | + git submodule sync |
| 206 | + git submodule update --init --recursive |
| 207 | + popd |
| 208 | + fi |
| 209 | +} |
| 210 | + |
| 211 | +function checkout_install_torchbench() { |
| 212 | + local commit |
| 213 | + commit=$(get_pinned_commit torchbench) |
| 214 | + git clone https://github.com/pytorch/benchmark torchbench |
| 215 | + pushd torchbench |
| 216 | + git checkout "$commit" |
| 217 | + |
| 218 | + if [ "$1" ]; then |
| 219 | + python install.py --continue_on_fail models "$@" |
| 220 | + else |
| 221 | + # Occasionally the installation may fail on one model but it is ok to continue |
| 222 | + # to install and test other models |
| 223 | + python install.py --continue_on_fail |
| 224 | + fi |
| 225 | + echo "Print all dependencies after TorchBench is installed" |
| 226 | + python -mpip freeze |
| 227 | + popd |
| 228 | +} |
| 229 | + |
| 230 | +function print_sccache_stats() { |
| 231 | + echo 'PyTorch Build Statistics' |
| 232 | + sccache --show-stats |
| 233 | + |
| 234 | + if [[ -n "${OUR_GITHUB_JOB_ID}" ]]; then |
| 235 | + sccache --show-stats --stats-format json | jq .stats \ |
| 236 | + > "sccache-stats-${BUILD_ENVIRONMENT}-${OUR_GITHUB_JOB_ID}.json" |
| 237 | + else |
| 238 | + echo "env var OUR_GITHUB_JOB_ID not set, will not write sccache stats to json" |
| 239 | + fi |
| 240 | +} |
0 commit comments