7
7
8
8
9
9
export PYTORCH_CUDA_PKG=" "
10
+ export CONDA_ENV=" build_binary"
10
11
11
- conda create -y -n build_binary python=" ${MATRIX_PYTHON_VERSION} "
12
+ conda create -y -n " ${CONDA_ENV} " python=" ${MATRIX_PYTHON_VERSION} "
12
13
13
14
conda run -n build_binary python --version
14
15
@@ -49,41 +50,60 @@ elif [[ ${MATRIX_CHANNEL} = 'release' ]]; then
49
50
export PYTORCH_URL=" https://download.pytorch.org/whl/${CUDA_VERSION} "
50
51
fi
51
52
53
+
54
+ echo " CU_VERSION: ${CUDA_VERSION} "
55
+ echo " MATRIX_CHANNEL: ${MATRIX_CHANNEL} "
56
+ echo " CONDA_ENV: ${CONDA_ENV} "
57
+
58
+ # shellcheck disable=SC2155
59
+ export CONDA_PREFIX=$( conda run -n " ${CONDA_ENV} " printenv CONDA_PREFIX)
60
+
61
+ find / -name * cuda*
62
+
63
+ if [[ $CUDA_VERSION = cu* ]]; then
64
+ # Setting LD_LIBRARY_PATH fixes the runtime error with fbgemm_gpu not
65
+ # being able to locate libnvrtc.so
66
+ echo " [NOVA] Setting LD_LIBRARY_PATH ..."
67
+ conda env config vars set -n ${CONDA_ENV} \
68
+ LD_LIBRARY_PATH=" /usr/local/lib:/usr/lib64:${CONDA_PREFIX} /lib:${LD_LIBRARY_PATH} "
69
+ fi
70
+
71
+
52
72
# install pytorch
53
73
# switch back to conda once torch nightly is fixed
54
74
# if [[ ${MATRIX_GPU_ARCH_TYPE} = 'cuda' ]]; then
55
75
# export PYTORCH_CUDA_PKG="pytorch-cuda=${MATRIX_GPU_ARCH_VERSION}"
56
76
# fi
57
- conda run -n build_binary pip install torch --index-url " $PYTORCH_URL "
77
+ conda run -n " ${CONDA_ENV} " pip install torch --index-url " $PYTORCH_URL "
58
78
59
79
# install fbgemm
60
- conda run -n build_binary pip install fbgemm-gpu --index-url " $PYTORCH_URL "
80
+ conda run -n " ${CONDA_ENV} " pip install fbgemm-gpu --index-url " $PYTORCH_URL "
61
81
62
82
# install requirements from pypi
63
- conda run -n build_binary pip install torchmetrics==1.0.3
83
+ conda run -n " ${CONDA_ENV} " pip install torchmetrics==1.0.3
64
84
65
85
# install torchrec
66
- conda run -n build_binary pip install torchrec --index-url " $PYTORCH_URL "
86
+ conda run -n " ${CONDA_ENV} " pip install torchrec --index-url " $PYTORCH_URL "
67
87
68
88
# Run small import test
69
- conda run -n build_binary python -c " import torch; import fbgemm_gpu; import torchrec"
89
+ conda run -n " ${CONDA_ENV} " python -c " import torch; import fbgemm_gpu; import torchrec"
70
90
71
91
# check directory
72
92
ls -R
73
93
74
94
# check if cuda available
75
- conda run -n build_binary python -c " import torch; print(torch.cuda.is_available())"
95
+ conda run -n " ${CONDA_ENV} " python -c " import torch; print(torch.cuda.is_available())"
76
96
77
97
# check cuda version
78
- conda run -n build_binary python -c " import torch; print(torch.version.cuda)"
98
+ conda run -n " ${CONDA_ENV} " python -c " import torch; print(torch.version.cuda)"
79
99
80
100
# Finally run smoke test
81
101
# python 3.11 needs torchx-nightly
82
- conda run -n build_binary pip install torchx-nightly iopath
102
+ conda run -n " ${CONDA_ENV} " pip install torchx-nightly iopath
83
103
if [[ ${MATRIX_GPU_ARCH_TYPE} = ' cuda' ]]; then
84
- conda run -n build_binary torchx run -s local_cwd dist.ddp -j 1 --gpu 2 --script test_installation.py
104
+ conda run -n " ${CONDA_ENV} " torchx run -s local_cwd dist.ddp -j 1 --gpu 2 --script test_installation.py
85
105
else
86
- conda run -n build_binary torchx run -s local_cwd dist.ddp -j 1 --script test_installation.py -- --cpu_only
106
+ conda run -n " ${CONDA_ENV} " torchx run -s local_cwd dist.ddp -j 1 --script test_installation.py -- --cpu_only
87
107
fi
88
108
89
109
@@ -93,31 +113,31 @@ if [[ ${MATRIX_CHANNEL} != 'release' ]]; then
93
113
exit 0
94
114
else
95
115
# Check version matches only for release binaries
96
- torchrec_version=$( conda run -n build_binary pip show torchrec | grep Version | cut -d' ' -f2)
97
- fbgemm_version=$( conda run -n build_binary pip show fbgemm_gpu | grep Version | cut -d' ' -f2)
116
+ torchrec_version=$( conda run -n " ${CONDA_ENV} " pip show torchrec | grep Version | cut -d' ' -f2)
117
+ fbgemm_version=$( conda run -n " ${CONDA_ENV} " pip show fbgemm_gpu | grep Version | cut -d' ' -f2)
98
118
99
119
if [ " $torchrec_version " != " $fbgemm_version " ]; then
100
120
echo " Error: TorchRec package version does not match FBGEMM package version"
101
121
exit 1
102
122
fi
103
123
fi
104
124
105
- conda create -y -n build_binary python=" ${MATRIX_PYTHON_VERSION} "
125
+ conda create -y -n " ${CONDA_ENV} " python=" ${MATRIX_PYTHON_VERSION} "
106
126
107
- conda run -n build_binary python --version
127
+ conda run -n " ${CONDA_ENV} " python --version
108
128
109
129
if [[ ${MATRIX_GPU_ARCH_VERSION} != ' 12.4' ]]; then
110
130
exit 0
111
131
fi
112
132
113
133
echo " checking pypi release"
114
- conda run -n build_binary pip install torch
115
- conda run -n build_binary pip install fbgemm-gpu
116
- conda run -n build_binary pip install torchrec
134
+ conda run -n " ${CONDA_ENV} " pip install torch
135
+ conda run -n " ${CONDA_ENV} " pip install fbgemm-gpu
136
+ conda run -n " ${CONDA_ENV} " pip install torchrec
117
137
118
138
# Check version matching again for PyPI
119
- torchrec_version=$( conda run -n build_binary pip show torchrec | grep Version | cut -d' ' -f2)
120
- fbgemm_version=$( conda run -n build_binary pip show fbgemm_gpu | grep Version | cut -d' ' -f2)
139
+ torchrec_version=$( conda run -n " ${CONDA_ENV} " pip show torchrec | grep Version | cut -d' ' -f2)
140
+ fbgemm_version=$( conda run -n " ${CONDA_ENV} " pip show fbgemm_gpu | grep Version | cut -d' ' -f2)
121
141
122
142
if [ " $torchrec_version " != " $fbgemm_version " ]; then
123
143
echo " Error: TorchRec package version does not match FBGEMM package version"
128
148
ls -R
129
149
130
150
# check if cuda available
131
- conda run -n build_binary python -c " import torch; print(torch.cuda.is_available())"
151
+ conda run -n " ${CONDA_ENV} " python -c " import torch; print(torch.cuda.is_available())"
132
152
133
153
# check cuda version
134
- conda run -n build_binary python -c " import torch; print(torch.version.cuda)"
154
+ conda run -n " ${CONDA_ENV} " python -c " import torch; print(torch.version.cuda)"
135
155
136
156
# python 3.11 needs torchx-nightly
137
- conda run -n build_binary pip install torchx-nightly iopath
157
+ conda run -n " ${CONDA_ENV} " pip install torchx-nightly iopath
138
158
139
159
# Finally run smoke test
140
- conda run -n build_binary torchx run -s local_cwd dist.ddp -j 1 --gpu 2 --script test_installation.py
160
+ conda run -n " ${CONDA_ENV} " torchx run -s local_cwd dist.ddp -j 1 --gpu 2 --script test_installation.py
0 commit comments