7
7
8
8
9
9
export PYTORCH_CUDA_PKG=" "
10
+ export CONDA_ENV=" build_binary"
10
11
11
- conda create -y -n build_binary python=" ${MATRIX_PYTHON_VERSION} "
12
+ conda create -y -n " ${CONDA_ENV} " python=" ${MATRIX_PYTHON_VERSION} "
12
13
13
14
conda run -n build_binary python --version
14
15
@@ -49,41 +50,58 @@ elif [[ ${MATRIX_CHANNEL} = 'release' ]]; then
49
50
export PYTORCH_URL=" https://download.pytorch.org/whl/${CUDA_VERSION} "
50
51
fi
51
52
53
+
54
+ echo " CU_VERSION: ${CUDA_VERSION} "
55
+ echo " MATRIX_CHANNEL: ${MATRIX_CHANNEL} "
56
+ echo " CONDA_ENV: ${CONDA_ENV} "
57
+
58
+ # shellcheck disable=SC2155
59
+ export CONDA_PREFIX=$( conda run -n " ${CONDA_ENV} " printenv CONDA_PREFIX)
60
+
61
+
62
+ # Set LD_LIBRARY_PATH to fix the runtime error with fbgemm_gpu not
63
+ # being able to locate libnvrtc.so
64
+ # NOTE: The order of the entries in LD_LIBRARY_PATH matters
65
+ echo " [NOVA] Setting LD_LIBRARY_PATH ..."
66
+ conda env config vars set -n ${CONDA_ENV} \
67
+ LD_LIBRARY_PATH=" ${CONDA_PREFIX} /lib:/usr/local/lib:/usr/lib64:${LD_LIBRARY_PATH} "
68
+
69
+
52
70
# install pytorch
53
71
# switch back to conda once torch nightly is fixed
54
72
# if [[ ${MATRIX_GPU_ARCH_TYPE} = 'cuda' ]]; then
55
73
# export PYTORCH_CUDA_PKG="pytorch-cuda=${MATRIX_GPU_ARCH_VERSION}"
56
74
# fi
57
- conda run -n build_binary pip install torch --index-url " $PYTORCH_URL "
75
+ conda run -n " ${CONDA_ENV} " pip install torch --index-url " $PYTORCH_URL "
58
76
59
77
# install fbgemm
60
- conda run -n build_binary pip install fbgemm-gpu --index-url " $PYTORCH_URL "
78
+ conda run -n " ${CONDA_ENV} " pip install fbgemm-gpu --index-url " $PYTORCH_URL "
61
79
62
80
# install requirements from pypi
63
- conda run -n build_binary pip install torchmetrics==1.0.3
81
+ conda run -n " ${CONDA_ENV} " pip install torchmetrics==1.0.3
64
82
65
83
# install torchrec
66
- conda run -n build_binary pip install torchrec --index-url " $PYTORCH_URL "
84
+ conda run -n " ${CONDA_ENV} " pip install torchrec --index-url " $PYTORCH_URL "
67
85
68
86
# Run small import test
69
- conda run -n build_binary python -c " import torch; import fbgemm_gpu; import torchrec"
87
+ conda run -n " ${CONDA_ENV} " python -c " import torch; import fbgemm_gpu; import torchrec"
70
88
71
89
# check directory
72
90
ls -R
73
91
74
92
# check if cuda available
75
- conda run -n build_binary python -c " import torch; print(torch.cuda.is_available())"
93
+ conda run -n " ${CONDA_ENV} " python -c " import torch; print(torch.cuda.is_available())"
76
94
77
95
# check cuda version
78
- conda run -n build_binary python -c " import torch; print(torch.version.cuda)"
96
+ conda run -n " ${CONDA_ENV} " python -c " import torch; print(torch.version.cuda)"
79
97
80
98
# Finally run smoke test
81
99
# python 3.11 needs torchx-nightly
82
- conda run -n build_binary pip install torchx-nightly iopath
100
+ conda run -n " ${CONDA_ENV} " pip install torchx-nightly iopath
83
101
if [[ ${MATRIX_GPU_ARCH_TYPE} = ' cuda' ]]; then
84
- conda run -n build_binary torchx run -s local_cwd dist.ddp -j 1 --gpu 2 --script test_installation.py
102
+ conda run -n " ${CONDA_ENV} " torchx run -s local_cwd dist.ddp -j 1 --gpu 2 --script test_installation.py
85
103
else
86
- conda run -n build_binary torchx run -s local_cwd dist.ddp -j 1 --script test_installation.py -- --cpu_only
104
+ conda run -n " ${CONDA_ENV} " torchx run -s local_cwd dist.ddp -j 1 --script test_installation.py -- --cpu_only
87
105
fi
88
106
89
107
@@ -93,31 +111,31 @@ if [[ ${MATRIX_CHANNEL} != 'release' ]]; then
93
111
exit 0
94
112
else
95
113
# Check version matches only for release binaries
96
- torchrec_version=$( conda run -n build_binary pip show torchrec | grep Version | cut -d' ' -f2)
97
- fbgemm_version=$( conda run -n build_binary pip show fbgemm_gpu | grep Version | cut -d' ' -f2)
114
+ torchrec_version=$( conda run -n " ${CONDA_ENV} " pip show torchrec | grep Version | cut -d' ' -f2)
115
+ fbgemm_version=$( conda run -n " ${CONDA_ENV} " pip show fbgemm_gpu | grep Version | cut -d' ' -f2)
98
116
99
117
if [ " $torchrec_version " != " $fbgemm_version " ]; then
100
118
echo " Error: TorchRec package version does not match FBGEMM package version"
101
119
exit 1
102
120
fi
103
121
fi
104
122
105
- conda create -y -n build_binary python=" ${MATRIX_PYTHON_VERSION} "
123
+ conda create -y -n " ${CONDA_ENV} " python=" ${MATRIX_PYTHON_VERSION} "
106
124
107
- conda run -n build_binary python --version
125
+ conda run -n " ${CONDA_ENV} " python --version
108
126
109
127
if [[ ${MATRIX_GPU_ARCH_VERSION} != ' 12.4' ]]; then
110
128
exit 0
111
129
fi
112
130
113
131
echo " checking pypi release"
114
- conda run -n build_binary pip install torch
115
- conda run -n build_binary pip install fbgemm-gpu
116
- conda run -n build_binary pip install torchrec
132
+ conda run -n " ${CONDA_ENV} " pip install torch
133
+ conda run -n " ${CONDA_ENV} " pip install fbgemm-gpu
134
+ conda run -n " ${CONDA_ENV} " pip install torchrec
117
135
118
136
# Check version matching again for PyPI
119
- torchrec_version=$( conda run -n build_binary pip show torchrec | grep Version | cut -d' ' -f2)
120
- fbgemm_version=$( conda run -n build_binary pip show fbgemm_gpu | grep Version | cut -d' ' -f2)
137
+ torchrec_version=$( conda run -n " ${CONDA_ENV} " pip show torchrec | grep Version | cut -d' ' -f2)
138
+ fbgemm_version=$( conda run -n " ${CONDA_ENV} " pip show fbgemm_gpu | grep Version | cut -d' ' -f2)
121
139
122
140
if [ " $torchrec_version " != " $fbgemm_version " ]; then
123
141
echo " Error: TorchRec package version does not match FBGEMM package version"
128
146
ls -R
129
147
130
148
# check if cuda available
131
- conda run -n build_binary python -c " import torch; print(torch.cuda.is_available())"
149
+ conda run -n " ${CONDA_ENV} " python -c " import torch; print(torch.cuda.is_available())"
132
150
133
151
# check cuda version
134
- conda run -n build_binary python -c " import torch; print(torch.version.cuda)"
152
+ conda run -n " ${CONDA_ENV} " python -c " import torch; print(torch.version.cuda)"
135
153
136
154
# python 3.11 needs torchx-nightly
137
- conda run -n build_binary pip install torchx-nightly iopath
155
+ conda run -n " ${CONDA_ENV} " pip install torchx-nightly iopath
138
156
139
157
# Finally run smoke test
140
- conda run -n build_binary torchx run -s local_cwd dist.ddp -j 1 --gpu 2 --script test_installation.py
158
+ conda run -n " ${CONDA_ENV} " torchx run -s local_cwd dist.ddp -j 1 --gpu 2 --script test_installation.py
0 commit comments