Skip to content

Commit 52b0749

Browse files
q10facebook-github-bot
authored andcommitted
Clone of D68511145 (#2697)
Summary: Pull Request resolved: #2697 - [OSS] set LD_LIBRARY_PATH for fbgemm in validate_binaries.sh Reviewed By: PaulZhang12 Differential Revision: D68516472 fbshipit-source-id: 2ddc42b59704103729ea398f92cb6baa92e284a2
1 parent 526902f commit 52b0749

File tree

1 file changed

+42
-24
lines changed

1 file changed

+42
-24
lines changed

.github/scripts/validate_binaries.sh

+42-24
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,9 @@
77

88

99
export PYTORCH_CUDA_PKG=""
10+
export CONDA_ENV="build_binary"
1011

11-
conda create -y -n build_binary python="${MATRIX_PYTHON_VERSION}"
12+
conda create -y -n "${CONDA_ENV}" python="${MATRIX_PYTHON_VERSION}"
1213

1314
conda run -n build_binary python --version
1415

@@ -49,41 +50,58 @@ elif [[ ${MATRIX_CHANNEL} = 'release' ]]; then
4950
export PYTORCH_URL="https://download.pytorch.org/whl/${CUDA_VERSION}"
5051
fi
5152

53+
54+
echo "CU_VERSION: ${CUDA_VERSION}"
55+
echo "MATRIX_CHANNEL: ${MATRIX_CHANNEL}"
56+
echo "CONDA_ENV: ${CONDA_ENV}"
57+
58+
# shellcheck disable=SC2155
59+
export CONDA_PREFIX=$(conda run -n "${CONDA_ENV}" printenv CONDA_PREFIX)
60+
61+
62+
# Set LD_LIBRARY_PATH to fix the runtime error with fbgemm_gpu not
63+
# being able to locate libnvrtc.so
64+
# NOTE: The order of the entries in LD_LIBRARY_PATH matters
65+
echo "[NOVA] Setting LD_LIBRARY_PATH ..."
66+
conda env config vars set -n ${CONDA_ENV} \
67+
LD_LIBRARY_PATH="${CONDA_PREFIX}/lib:/usr/local/lib:/usr/lib64:${LD_LIBRARY_PATH}"
68+
69+
5270
# install pytorch
5371
# switch back to conda once torch nightly is fixed
5472
# if [[ ${MATRIX_GPU_ARCH_TYPE} = 'cuda' ]]; then
5573
# export PYTORCH_CUDA_PKG="pytorch-cuda=${MATRIX_GPU_ARCH_VERSION}"
5674
# fi
57-
conda run -n build_binary pip install torch --index-url "$PYTORCH_URL"
75+
conda run -n "${CONDA_ENV}" pip install torch --index-url "$PYTORCH_URL"
5876

5977
# install fbgemm
60-
conda run -n build_binary pip install fbgemm-gpu --index-url "$PYTORCH_URL"
78+
conda run -n "${CONDA_ENV}" pip install fbgemm-gpu --index-url "$PYTORCH_URL"
6179

6280
# install requirements from pypi
63-
conda run -n build_binary pip install torchmetrics==1.0.3
81+
conda run -n "${CONDA_ENV}" pip install torchmetrics==1.0.3
6482

6583
# install torchrec
66-
conda run -n build_binary pip install torchrec --index-url "$PYTORCH_URL"
84+
conda run -n "${CONDA_ENV}" pip install torchrec --index-url "$PYTORCH_URL"
6785

6886
# Run small import test
69-
conda run -n build_binary python -c "import torch; import fbgemm_gpu; import torchrec"
87+
conda run -n "${CONDA_ENV}" python -c "import torch; import fbgemm_gpu; import torchrec"
7088

7189
# check directory
7290
ls -R
7391

7492
# check if cuda available
75-
conda run -n build_binary python -c "import torch; print(torch.cuda.is_available())"
93+
conda run -n "${CONDA_ENV}" python -c "import torch; print(torch.cuda.is_available())"
7694

7795
# check cuda version
78-
conda run -n build_binary python -c "import torch; print(torch.version.cuda)"
96+
conda run -n "${CONDA_ENV}" python -c "import torch; print(torch.version.cuda)"
7997

8098
# Finally run smoke test
8199
# python 3.11 needs torchx-nightly
82-
conda run -n build_binary pip install torchx-nightly iopath
100+
conda run -n "${CONDA_ENV}" pip install torchx-nightly iopath
83101
if [[ ${MATRIX_GPU_ARCH_TYPE} = 'cuda' ]]; then
84-
conda run -n build_binary torchx run -s local_cwd dist.ddp -j 1 --gpu 2 --script test_installation.py
102+
conda run -n "${CONDA_ENV}" torchx run -s local_cwd dist.ddp -j 1 --gpu 2 --script test_installation.py
85103
else
86-
conda run -n build_binary torchx run -s local_cwd dist.ddp -j 1 --script test_installation.py -- --cpu_only
104+
conda run -n "${CONDA_ENV}" torchx run -s local_cwd dist.ddp -j 1 --script test_installation.py -- --cpu_only
87105
fi
88106

89107

@@ -93,31 +111,31 @@ if [[ ${MATRIX_CHANNEL} != 'release' ]]; then
93111
exit 0
94112
else
95113
# Check version matches only for release binaries
96-
torchrec_version=$(conda run -n build_binary pip show torchrec | grep Version | cut -d' ' -f2)
97-
fbgemm_version=$(conda run -n build_binary pip show fbgemm_gpu | grep Version | cut -d' ' -f2)
114+
torchrec_version=$(conda run -n "${CONDA_ENV}" pip show torchrec | grep Version | cut -d' ' -f2)
115+
fbgemm_version=$(conda run -n "${CONDA_ENV}" pip show fbgemm_gpu | grep Version | cut -d' ' -f2)
98116

99117
if [ "$torchrec_version" != "$fbgemm_version" ]; then
100118
echo "Error: TorchRec package version does not match FBGEMM package version"
101119
exit 1
102120
fi
103121
fi
104122

105-
conda create -y -n build_binary python="${MATRIX_PYTHON_VERSION}"
123+
conda create -y -n "${CONDA_ENV}" python="${MATRIX_PYTHON_VERSION}"
106124

107-
conda run -n build_binary python --version
125+
conda run -n "${CONDA_ENV}" python --version
108126

109127
if [[ ${MATRIX_GPU_ARCH_VERSION} != '12.4' ]]; then
110128
exit 0
111129
fi
112130

113131
echo "checking pypi release"
114-
conda run -n build_binary pip install torch
115-
conda run -n build_binary pip install fbgemm-gpu
116-
conda run -n build_binary pip install torchrec
132+
conda run -n "${CONDA_ENV}" pip install torch
133+
conda run -n "${CONDA_ENV}" pip install fbgemm-gpu
134+
conda run -n "${CONDA_ENV}" pip install torchrec
117135

118136
# Check version matching again for PyPI
119-
torchrec_version=$(conda run -n build_binary pip show torchrec | grep Version | cut -d' ' -f2)
120-
fbgemm_version=$(conda run -n build_binary pip show fbgemm_gpu | grep Version | cut -d' ' -f2)
137+
torchrec_version=$(conda run -n "${CONDA_ENV}" pip show torchrec | grep Version | cut -d' ' -f2)
138+
fbgemm_version=$(conda run -n "${CONDA_ENV}" pip show fbgemm_gpu | grep Version | cut -d' ' -f2)
121139

122140
if [ "$torchrec_version" != "$fbgemm_version" ]; then
123141
echo "Error: TorchRec package version does not match FBGEMM package version"
@@ -128,13 +146,13 @@ fi
128146
ls -R
129147

130148
# check if cuda available
131-
conda run -n build_binary python -c "import torch; print(torch.cuda.is_available())"
149+
conda run -n "${CONDA_ENV}" python -c "import torch; print(torch.cuda.is_available())"
132150

133151
# check cuda version
134-
conda run -n build_binary python -c "import torch; print(torch.version.cuda)"
152+
conda run -n "${CONDA_ENV}" python -c "import torch; print(torch.version.cuda)"
135153

136154
# python 3.11 needs torchx-nightly
137-
conda run -n build_binary pip install torchx-nightly iopath
155+
conda run -n "${CONDA_ENV}" pip install torchx-nightly iopath
138156

139157
# Finally run smoke test
140-
conda run -n build_binary torchx run -s local_cwd dist.ddp -j 1 --gpu 2 --script test_installation.py
158+
conda run -n "${CONDA_ENV}" torchx run -s local_cwd dist.ddp -j 1 --gpu 2 --script test_installation.py

0 commit comments

Comments
 (0)