Skip to content

Commit 40fcdbf

Browse files
committed
Add e2e test for CUDA Minor Version Compatibility
Signed-off-by: Christopher Desiniotis <cdesiniotis@nvidia.com>
1 parent 24bea0e commit 40fcdbf

3 files changed

Lines changed: 52 additions & 3 deletions

File tree

.github/workflows/e2e.yaml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,9 @@ jobs:
4949
# Production Branch. EOL March 2027
5050
- 595
5151
exclude:
52-
- ispr: true
53-
driver_branch: 580
52+
# temporary change to test CUDA minor version compatibility with the 580 branch
53+
#- ispr: true
54+
# driver_branch: 580
5455
- ispr: true
5556
driver_branch: 595
5657
steps:

tests/e2e/infra/driver-branch-580.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,8 @@ spec:
2929
install: true
3030
source: package
3131
package:
32-
branch: "580"
32+
# We pin the version here to allow us to test CUDA Minor Version Compatibility.
33+
# If we deploy a CUDA 13.0.3+ container (associated with NVIDIA Linux Driver
34+
# version 580.126.20), NVIDIA Container Toolkit should use the CUDA compat
35+
# libraries in the container instead of the 580.105.08 libraries on the host.
36+
version: "580.105.08"

tests/e2e/nvidia-container-toolkit_test.go

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,50 @@ var _ = Describe("docker", Ordered, ContinueOnFailure, func() {
224224
})
225225
})
226226

227+
When("Testing CUDA Minor Version compatibility", Ordered, func() {
228+
BeforeAll(func(ctx context.Context) {
229+
/*
230+
* CUDA 13.0.3, NVIDIA Linux Driver 580.126.20
231+
*/
232+
_, _, err := runner.Run("docker pull nvcr.io/nvidia/cuda:13.0.3-base-ubi8")
233+
Expect(err).ToNot(HaveOccurred())
234+
235+
compatOutput, _, err := runner.Run("docker run --rm -i -e NVIDIA_VISIBLE_DEVICES=void nvcr.io/nvidia/cuda:13.0.3-base-ubi8 bash -c \"ls /usr/local/cuda/compat/libcuda.*.*\"")
236+
Expect(err).ToNot(HaveOccurred())
237+
Expect(compatOutput).ToNot(BeEmpty())
238+
239+
compatDriverVersion := strings.TrimPrefix(filepath.Base(compatOutput), "libcuda.so.")
240+
compatMajor := strings.SplitN(compatDriverVersion, ".", 2)[0]
241+
242+
if hostDriverMajor != compatMajor {
243+
GinkgoLogr.Info("CUDA Minor Version Compatibility tests require the host driver branch to equal the compat driver branch", "hostDriverVersion", hostDriverVersion, "compatDriverVersion", compatDriverVersion)
244+
Skip("CUDA Minor Version Compatibility tests require the host driver branch to equal the compat driver branch")
245+
}
246+
})
247+
248+
It("should work with the nvidia runtime in legacy mode", func(ctx context.Context) {
249+
ldconfigOut, _, err := runner.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true --runtime=nvidia --gpus all nvcr.io/nvidia/cuda:13.0.3-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
250+
Expect(err).ToNot(HaveOccurred())
251+
Expect(ldconfigOut).To(ContainSubstring("/usr/local/cuda-13.0/compat/"))
252+
})
253+
254+
It("should work with the nvidia runtime in CDI mode", func(ctx context.Context) {
255+
ldconfigOut, _, err := runner.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/cuda:13.0.3-base-ubi8 bash -c \"ldconfig -p | grep libcuda.so.1\"")
256+
Expect(err).ToNot(HaveOccurred())
257+
Expect(ldconfigOut).To(ContainSubstring("/usr/local/cuda-13.0/compat/"))
258+
})
259+
260+
It("should create a single ld.so.conf.d config file", func(ctx context.Context) {
261+
lsout, _, err := runner.Run("docker run --rm -i -e NVIDIA_DISABLE_REQUIRE=true --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=runtime.nvidia.com/gpu=all nvcr.io/nvidia/cuda:13.0.3-base-ubi8 bash -c \"ls -l /etc/ld.so.conf.d/00-compat-*.conf\"")
262+
Expect(err).ToNot(HaveOccurred())
263+
Expect(lsout).To(WithTransform(
264+
func(s string) []string {
265+
return strings.Split(strings.TrimSpace(s), "\n")
266+
}, HaveLen(1),
267+
))
268+
})
269+
})
270+
227271
When("Disabling device node creation", Ordered, func() {
228272
BeforeAll(func(ctx context.Context) {
229273
_, _, err := runner.Run("docker pull ubuntu")

0 commit comments

Comments
 (0)