Skip to content

Commit

Permalink
Merge pull request #4 from fourhu/master
Browse files Browse the repository at this point in the history
Fixed the issue where the DCU device plugin failed to run on K100
  • Loading branch information
archlitchi authored Dec 18, 2024
2 parents af86d04 + f7408f3 commit 7c0f320
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 43 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ RUN cd /device-plugin && go build -o ./k8s-device-plugin cmd/k8s-device-plugin/m
FROM ubuntu:20.04
ENV TZ=Asia/Dubai
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
RUN apt-get update && apt-get -y install libhwloc-dev libdrm-dev pciutils
RUN apt-get update && apt-get -y install libhwloc-dev libdrm-dev pciutils libelf-dev kmod
ENV LD_LIBRARY_PATH=/opt/hygondriver/hip/lib:/opt/hygondriver/llvm/lib:/opt/hygondriver/lib:/opt/hygondriver/lib64:/opt/hyhal/lib:/opt/hyhal/lib64:/opt/hygondriver/.hyhal/lib:/opt/hygondriver/.hyhal/lib64:
ENV PATH=/opt/hygondriver/bin:/opt/hygondriver/llvm/bin:/opt/hygondriver/hip/bin:/opt/hygondriver/hip/bin/hipify:/opt/hyhal/bin:/opt/hygondriver/.hyhal/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
ENV C_INCLUDE_PATH=/opt/hygondriver/include:/opt/hyhal/include:/opt/hygondriver/llvm/include:/opt/hygondriver/.hyhal/include:
Expand Down
8 changes: 4 additions & 4 deletions internal/pkg/dcu/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ func (p *Plugin) Start() error {
}
p.count = 0

cmd := exec.Command("hy-smi", "--showmeminfo", "vram")
cmd := exec.Command("/opt/hyhal/bin/hy-smi", "--showmeminfo", "vram")
out, err := cmd.CombinedOutput()
if err != nil {
log.Fatalf("cmd.Run() failed with %s\n", err)
Expand Down Expand Up @@ -124,7 +124,7 @@ func (p *Plugin) Start() error {
p.count++
}

cmd = exec.Command("hy-smi", "--showproductname")
cmd = exec.Command("/opt/hyhal/bin/hy-smi", "--showproductname")
out, err = cmd.CombinedOutput()
if err != nil {
log.Fatalf("cmd.Run() failed with %s\n", err)
Expand All @@ -148,7 +148,7 @@ func (p *Plugin) Start() error {
index++
}

cmd = exec.Command("hy-smi", "--showbus")
cmd = exec.Command("/opt/hyhal/bin/hy-smi", "--showbus")
out, err = cmd.CombinedOutput()
if err != nil {
log.Fatalf("cmd.Run() failed with %s\n", err)
Expand All @@ -167,7 +167,7 @@ func (p *Plugin) Start() error {
}
fmt.Println("collecting pcibus=", p.pcibusid)

cmd = exec.Command("hy-virtual", "--show-device-info")
cmd = exec.Command("/opt/hyhal/bin/hy-virtual", "--show-device-info")
out, err = cmd.CombinedOutput()
if err != nil {
log.Fatalf("cmd.Run() failed with %s\n", err)
Expand Down
86 changes: 48 additions & 38 deletions k8s-dcu-plugin.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,55 +21,65 @@ spec:
- key: CriticalAddonsOnly
operator: Exists
containers:
- image: projecthami/dcu-vgpu-device-plugin:v1.0.1
- image: projecthami/dcu-vgpu-device-plugin:master
#command: ["/bin/bash","-c","source /opt/hygondriver/env.sh && sleep infinity"]
command: ["/root/k8s-device-plugin"]
command: [ "/root/k8s-device-plugin" ]
name: dcu-dp-cntr
env:
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: HYGONPATH
value: /opt/dtk
- name: BASH_ENV
value: ~/.bashrc
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: HYGONPATH
value: /opt/dtk
- name: BASH_ENV
value: ~/.bashrc
securityContext:
privileged: true
allowPrivilegeEscalation: true
capabilities:
drop: ["ALL"]
add: ["SYS_ADMIN"]
drop: [ "ALL" ]
add: [ "SYS_ADMIN" ]
volumeMounts:
- name: dp
mountPath: /var/lib/kubelet/device-plugins
- name: sys
mountPath: /sys
- name: hwpath
mountPath: /usr/share/hwdata
- name: hygonloc
mountPath: /opt/hygondriver/
- name: lib
mountPath: /usr/local/vgpu
- name: hyhal
mountPath: /opt/hyhal
volumes:
- name: dp
hostPath:
path: /var/lib/kubelet/device-plugins
mountPath: /var/lib/kubelet/device-plugins
- name: sys
hostPath:
path: /sys
mountPath: /sys
- name: dev
mountPath: /dev
- name: vdev
mountPath: /etc/vdev
- name: hwpath
hostPath:
path: /usr/share/hwdata
mountPath: /usr/share/hwdata
- name: hygonloc
hostPath:
path: /opt/dtk
mountPath: /opt/hygondriver/
- name: lib
hostPath:
path: /usr/local/vgpu
mountPath: /usr/local/vgpu
- name: hyhal
hostPath:
path: /opt/hyhal

mountPath: /opt/hyhal
volumes:
- name: dp
hostPath:
path: /var/lib/kubelet/device-plugins
- name: sys
hostPath:
path: /sys
- name: dev
hostPath:
path: /dev
- name: vdev
hostPath:
path: /etc/vdev
type: Directory
- name: hwpath
hostPath:
path: /usr/share/hwdata
- name: hygonloc
hostPath:
path: /opt/dtk
- name: lib
hostPath:
path: /usr/local/vgpu
- name: hyhal
hostPath:
path: /opt/hyhal

0 comments on commit 7c0f320

Please sign in to comment.