@@ -3,12 +3,13 @@ ARG BASE_UBI_IMAGE_TAG=9.3-1552
3
3
ARG PROTOC_VERSION=25.2
4
4
ARG PYTORCH_INDEX="https://download.pytorch.org/whl"
5
5
# ARG PYTORCH_INDEX="https://download.pytorch.org/whl/nightly"
6
+ ARG AUTO_GPTQ_VERSION=0.7.1
6
7
7
8
# match PyTorch version that was used to compile flash-attention v2 pre-built wheels
8
9
# e.g. flash-attn v2.5.2 => torch ['1.12.1', '1.13.1', '2.0.1', '2.1.2', '2.2.0', '2.3.0.dev20240126']
9
10
# https://github.com/Dao-AILab/flash-attention/blob/v2.5.2/.github/workflows/publish.yml#L47
10
11
# use nightly build index for torch .dev pre-release versions
11
- ARG PYTORCH_VERSION=2.2.0
12
+ ARG PYTORCH_VERSION=2.2.1
12
13
13
14
ARG PYTHON_VERSION=3.11
14
15
@@ -35,18 +36,19 @@ ENV LANG=C.UTF-8 \
35
36
# # CUDA Base ###################################################################
36
37
FROM base as cuda-base
37
38
38
- ENV CUDA_VERSION=11.8.0 \
39
- NV_CUDA_LIB_VERSION=11.8.0-1 \
39
+ # Ref: https://docs.nvidia.com/cuda/archive/12.1.0/cuda-toolkit-release-notes/
40
+ ENV CUDA_VERSION=12.1.0 \
41
+ NV_CUDA_LIB_VERSION=12.1.0-1 \
40
42
NVIDIA_VISIBLE_DEVICES=all \
41
43
NVIDIA_DRIVER_CAPABILITIES=compute,utility \
42
- NV_CUDA_CUDART_VERSION=11.8.89 -1 \
43
- NV_CUDA_COMPAT_VERSION=520.61.05 -1
44
+ NV_CUDA_CUDART_VERSION=12.1.55 -1 \
45
+ NV_CUDA_COMPAT_VERSION=530.30.02 -1
44
46
45
47
RUN dnf config-manager \
46
48
--add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo \
47
49
&& dnf install -y \
48
- cuda-cudart-11-8 -${NV_CUDA_CUDART_VERSION} \
49
- cuda-compat-11-8 -${NV_CUDA_COMPAT_VERSION} \
50
+ cuda-cudart-12-1 -${NV_CUDA_CUDART_VERSION} \
51
+ cuda-compat-12-1 -${NV_CUDA_COMPAT_VERSION} \
50
52
&& echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf \
51
53
&& echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf \
52
54
&& dnf clean all
@@ -59,22 +61,23 @@ ENV CUDA_HOME="/usr/local/cuda" \
59
61
# # CUDA Development ############################################################
60
62
FROM cuda-base as cuda-devel
61
63
62
- ENV NV_CUDA_CUDART_DEV_VERSION=11.8.89-1 \
63
- NV_NVML_DEV_VERSION=11.8.86-1 \
64
- NV_LIBCUBLAS_DEV_VERSION=11.11.3.6-1 \
65
- NV_LIBNPP_DEV_VERSION=11.8.0.86-1 \
66
- NV_LIBNCCL_DEV_PACKAGE_VERSION=2.15.5-1+cuda11.8
64
+ # Ref: https://developer.nvidia.com/nccl/nccl-legacy-downloads
65
+ ENV NV_CUDA_CUDART_DEV_VERSION=12.1.55-1 \
66
+ NV_NVML_DEV_VERSION=12.1.55-1 \
67
+ NV_LIBCUBLAS_DEV_VERSION=12.1.0.26-1 \
68
+ NV_LIBNPP_DEV_VERSION=12.0.2.50-1 \
69
+ NV_LIBNCCL_DEV_PACKAGE_VERSION=2.18.3-1+cuda12.1
67
70
68
71
RUN dnf config-manager \
69
72
--add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo \
70
73
&& dnf install -y \
71
- cuda-command-line-tools-11-8 -${NV_CUDA_LIB_VERSION} \
72
- cuda-libraries-devel-11-8 -${NV_CUDA_LIB_VERSION} \
73
- cuda-minimal-build-11-8 -${NV_CUDA_LIB_VERSION} \
74
- cuda-cudart-devel-11-8 -${NV_CUDA_CUDART_DEV_VERSION} \
75
- cuda-nvml-devel-11-8 -${NV_NVML_DEV_VERSION} \
76
- libcublas-devel-11-8 -${NV_LIBCUBLAS_DEV_VERSION} \
77
- libnpp-devel-11-8 -${NV_LIBNPP_DEV_VERSION} \
74
+ cuda-command-line-tools-12-1 -${NV_CUDA_LIB_VERSION} \
75
+ cuda-libraries-devel-12-1 -${NV_CUDA_LIB_VERSION} \
76
+ cuda-minimal-build-12-1 -${NV_CUDA_LIB_VERSION} \
77
+ cuda-cudart-devel-12-1 -${NV_CUDA_CUDART_DEV_VERSION} \
78
+ cuda-nvml-devel-12-1 -${NV_NVML_DEV_VERSION} \
79
+ libcublas-devel-12-1 -${NV_LIBCUBLAS_DEV_VERSION} \
80
+ libnpp-devel-12-1 -${NV_LIBNPP_DEV_VERSION} \
78
81
libnccl-devel-${NV_LIBNCCL_DEV_PACKAGE_VERSION} \
79
82
&& dnf clean all
80
83
@@ -199,12 +202,12 @@ ENV PATH=/opt/tgis/bin/:$PATH
199
202
# Install specific version of torch
200
203
RUN pip install ninja==1.11.1.1 --no-cache-dir
201
204
RUN pip install packaging --no-cache-dir
202
- RUN pip install torch==$PYTORCH_VERSION+cu118 --index-url "${PYTORCH_INDEX}/cu118 " --no-cache-dir
205
+ RUN pip install torch==$PYTORCH_VERSION+cu121 --index-url "${PYTORCH_INDEX}/cu121 " --no-cache-dir
203
206
204
207
205
208
# # Build flash attention v2 ####################################################
206
209
FROM python-builder as flash-att-v2-builder
207
- ARG FLASH_ATT_VERSION=v2.5.2
210
+ ARG FLASH_ATT_VERSION=v2.5.6
208
211
209
212
WORKDIR /usr/src/flash-attention-v2
210
213
@@ -217,14 +220,15 @@ RUN MAX_JOBS=2 pip --verbose wheel --no-deps flash-attn==${FLASH_ATT_VERSION} \
217
220
218
221
219
222
# # Install auto-gptq ###########################################################
220
- FROM python-builder as auto-gptq-installer
221
- ARG AUTO_GPTQ_REF=ccb6386ebfde63c17c45807d38779a93cd25846f
222
-
223
- WORKDIR /usr/src/auto-gptq-wheel
224
-
225
- # numpy is required to run auto-gptq's setup.py
226
- RUN pip install numpy
227
- RUN DISABLE_QIGEN=1 pip wheel git+https://github.com/AutoGPTQ/AutoGPTQ@${AUTO_GPTQ_REF} --no-cache-dir --no-deps --verbose
223
+ # # Uncomment if a custom autogptq build is required
224
+ # FROM python-builder as auto-gptq-installer
225
+ # ARG AUTO_GPTQ_REF=896d8204bc89a7cfbda42bf3314e13cf4ce20b02
226
+ #
227
+ # WORKDIR /usr/src/auto-gptq-wheel
228
+ #
229
+ # # numpy is required to run auto-gptq's setup.py
230
+ # RUN pip install numpy
231
+ # RUN DISABLE_QIGEN=1 pip wheel git+https://github.com/AutoGPTQ/AutoGPTQ@${AUTO_GPTQ_REF} --no-cache-dir --no-deps --verbose
228
232
229
233
# # Build libraries #############################################################
230
234
FROM python-builder as build
@@ -241,18 +245,20 @@ FROM base as flash-att-v2-cache
241
245
COPY --from=flash-att-v2-builder /usr/src/flash-attention-v2 /usr/src/flash-attention-v2
242
246
243
247
244
- # # Auto gptq cached build image
245
- FROM base as auto-gptq-cache
246
-
247
- # Copy just the wheel we built for auto-gptq
248
- COPY --from=auto-gptq-installer /usr/src/auto-gptq-wheel /usr/src/auto-gptq-wheel
248
+ # # Auto gptq cached build image ################################################
249
+ # # Uncomment if a custom autogptq build is required
250
+ # FROM base as auto-gptq-cache
251
+ #
252
+ # # Copy just the wheel we built for auto-gptq
253
+ # COPY --from=auto-gptq-installer /usr/src/auto-gptq-wheel /usr/src/auto-gptq-wheel
249
254
250
255
251
256
# # Full set of python installations for server release #########################
252
257
253
258
FROM python-builder as python-installations
254
259
255
260
ARG PYTHON_VERSION
261
+ ARG AUTO_GPTQ_VERSION
256
262
ARG SITE_PACKAGES=/opt/tgis/lib/python${PYTHON_VERSION}/site-packages
257
263
258
264
COPY --from=build /opt/tgis /opt/tgis
@@ -265,15 +271,21 @@ RUN --mount=type=bind,from=flash-att-v2-cache,src=/usr/src/flash-attention-v2,ta
265
271
pip install /usr/src/flash-attention-v2/*.whl --no-cache-dir
266
272
267
273
# Copy over the auto-gptq wheel and install it
268
- RUN --mount=type=bind,from=auto-gptq-cache,src=/usr/src/auto-gptq-wheel,target=/usr/src/auto-gptq-wheel \
269
- pip install /usr/src/auto-gptq-wheel/*.whl --no-cache-dir
274
+ # RUN --mount=type=bind,from=auto-gptq-cache,src=/usr/src/auto-gptq-wheel,target=/usr/src/auto-gptq-wheel \
275
+ # pip install /usr/src/auto-gptq-wheel/*.whl --no-cache-dir
276
+
277
+ # We only need to install a custom-built auto-gptq version if we need a pre-release
278
+ # or are using a PyTorch nightly version
279
+ RUN pip install auto-gptq=="${AUTO_GPTQ_VERSION}" --no-cache-dir
270
280
271
281
# Install server
272
282
# git is required to pull the fms-extras dependency
273
283
RUN dnf install -y git && dnf clean all
274
284
COPY proto proto
275
285
COPY server server
276
- RUN cd server && make gen-server && pip install ".[accelerate, ibm-fms, onnx-gpu, quantize]" --no-cache-dir
286
+ # Extra url is required to install cuda-12 version of onnxruntime-gpu
287
+ # Ref: https://onnxruntime.ai/docs/install/#install-onnx-runtime-gpu-cuda-12x
288
+ RUN cd server && make gen-server && pip install ".[accelerate, ibm-fms, onnx-gpu, quantize]" --no-cache-dir --extra-index-url=https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/
277
289
278
290
# Patch codegen model changes into transformers 4.35
279
291
RUN cp server/transformers_patch/modeling_codegen.py ${SITE_PACKAGES}/transformers/models/codegen/modeling_codegen.py
0 commit comments