update examples and README

Signed-off-by: 逍遥 <[email protected]>
Project-HAMi · Dec 19, 2024 · b5a8b27 · b5a8b27
1 parent 3c220fc
commit b5a8b27
Show file tree

Hide file tree

Showing 16 changed files with 68 additions and 28 deletions.
diff --git a/README.md b/README.md
@@ -51,14 +51,18 @@ A task with the following resources:
 ```
       resources:
         limits:
-          nvidia.com/gpu: 1 # requesting 1 vGPU
-          nvidia.com/gpumem: 3000 # Each vGPU contains 3000m device memory
+          nvidia.com/gpu: 1 # Declare how many physical GPUs the pod needs
+          nvidia.com/gpumem: 3000 # Identifies 3G GPU memory each physical GPU allocates to the pod
 ```
 
 will see 3G device memory inside container
 
 ![img](./imgs/hard_limit.jpg)
 
+> Note:
+1. **After installing HAMi, the value of `nvidia.com/gpu` registered on the node defaults to the "number of vGPUs".**
+2. **When requesting resources in a pod, `nvidia.com/gpu` refers to the "number of physical GPUs required by the current pod".**
+
 ### Supported devices
 
 [![nvidia GPU](https://img.shields.io/badge/Nvidia-GPU-blue)](https://github.com/Project-HAMi/HAMi#preparing-your-gpu-nodes)

diff --git a/README_cn.md b/README_cn.md
@@ -55,6 +55,10 @@ HAMi支持设备资源的硬隔离
 
 ![img](./imgs/hard_limit.jpg)
 
+> 注意：
+1. **安装HAMi后，节点上注册的 `nvidia.com/gpu` 值默认为“vGPU数量”**
+2. **pod中申请资源时，`nvidia.com/gpu` 指“当前pod需要的物理GPU数量”**
+
 ### 支持的设备
 
 [![nvidia GPU](https://img.shields.io/badge/Nvidia-GPU-blue)](https://github.com/Project-HAMi/HAMi#preparing-your-gpu-nodes)

diff --git a/docs/develop/tasklist.md b/docs/develop/tasklist.md
@@ -113,6 +113,6 @@ spec:
       command:["bash"，"-c"，"sleep 86400"]
       resources:
         limits:
-          nvidia.com/gpu: 2 # requesting 2 VGPUs
+          nvidia.com/gpu: 2 # Declare how many physical GPUs the pod needs
 ```
 
diff --git a/example.yaml b/example.yaml
@@ -34,8 +34,8 @@ spec:
             - while true; do /cuda-samples/vectorAdd; done
           resources:
             limits:
-              nvidia.com/gpu: 1 # requesting 1 vGPUs
-              nvidia.com/gpumem: 3000 # Each vGPU contains 3000m device memory （Optional,Integer）
+              nvidia.com/gpu: 1 # Declare how many physical GPUs the pod needs
+              nvidia.com/gpumem: 3000 # Each vGPU contains 3000M device memory （Optional,Integer）
           terminationMessagePath: /dev/termination-log
           terminationMessagePolicy: File
           imagePullPolicy: IfNotPresent

diff --git a/examples/nvidia/default_use.yaml b/examples/nvidia/default_use.yaml
@@ -9,6 +9,6 @@ spec:
       command: ["bash", "-c", "sleep 86400"]
       resources:
         limits:
-          nvidia.com/gpu: 2 # requesting 2 vGPUs
-          nvidia.com/gpumem: 3000 # Each vGPU contains 3000m device memory （Optional,Integer）
-          nvidia.com/gpucores: 30 # Each vGPU uses 30% of the entire GPU （Optional,Integer)
+          nvidia.com/gpu: 2 # Declare how many physical GPUs the pod needs
+          nvidia.com/gpumem: 3000 # Identifies 3000M GPU memory each physical GPU allocates to the pod （Optional,Integer）
+          nvidia.com/gpucores: 30 # Identifies 30% GPU GPU core each physical GPU allocates to the pod （Optional,Integer)
diff --git a/examples/nvidia/default_use_legacy.yaml b/examples/nvidia/default_use_legacy.yaml
@@ -9,4 +9,4 @@ spec:
       command: ["bash", "-c", "sleep 86400"]
       resources:
         limits:
-          nvidia.com/gpu: 2 # requesting 2 vGPUs
+          nvidia.com/gpu: 2 # Declare how many physical GPUs the pod needs
diff --git a/examples/nvidia/example.yaml b/examples/nvidia/example.yaml
@@ -9,10 +9,10 @@ spec:
       command: ["bash", "-c", "sleep 86400"]
       resources:
         limits:
-          nvidia.com/gpu: 2 # requesting 2 vGPUs
-          #nvidia.com/gpumem: 3000 # Each vGPU containers 3000M device memory
-          nvidia.com/gpumem-percentage: 50 #Each vGPU containers 50% device memory of that GPU. Can not be used with nvidia.com/gpumem
-          #nvidia.com/gpucores: 90 # Utilization limit of this vGPU is set to 50% of total GPU utilization 
+          nvidia.com/gpu: 2 # Declare how many physical GPUs the pod needs
+          #nvidia.com/gpumem: 3000 # Identifies 3000M GPU memory each physical GPU allocates to the pod
+          nvidia.com/gpumem-percentage: 50 # Identifies 50% GPU memory each physical GPU allocates to the pod. Can not be used with nvidia.com/gpumem
+          #nvidia.com/gpucores: 90 # Identifies 90% GPU GPU core each physical GPU allocates to the pod 
           #nvidia.com/priority: 0 # We only have two priority class, 0(high) and 1(low), default: 1 
           #The utilization of high priority task won't be limited to resourceCores unless sharing GPU node with other high priority tasks.
           #The utilization of low priority task won't be limited to resourceCores if no other tasks sharing its GPU.
@@ -24,7 +24,7 @@ spec:
       command: ["bash", "-c", "sleep 86400"]
       resources:
         limits:
-          nvidia.com/gpu: 2 # requesting 2 vGPUs
+          nvidia.com/gpu: 2 # Declare how many physical GPUs the pod needs
           nvidia.com/gpumem: 2000
-          #nvidia.com/gpucores: 90
+          #nvidia.com/gpucores: 90 # Identifies 90% GPU GPU core each physical GPU allocates to the pod
 
diff --git a/examples/nvidia/specify_card_type_not_use.yaml b/examples/nvidia/specify_card_type_not_use.yaml
@@ -14,4 +14,4 @@ spec:
       command: ["bash", "-c", "sleep 86400"]
       resources:
         limits:
-          nvidia.com/gpu: 2 # requesting 2 vGPUs
+          nvidia.com/gpu: 2 # Declare how many physical GPUs the pod needs
diff --git a/examples/nvidia/specify_card_type_to_use.yaml b/examples/nvidia/specify_card_type_to_use.yaml
@@ -14,4 +14,4 @@ spec:
       command: ["bash", "-c", "sleep 86400"]
       resources:
         limits:
-          nvidia.com/gpu: 2 # requesting 2 vGPUs
+          nvidia.com/gpu: 2 # Declare how many physical GPUs the pod needs
diff --git a/examples/nvidia/specify_scheduling_policy.yaml b/examples/nvidia/specify_scheduling_policy.yaml
@@ -12,4 +12,4 @@ spec:
       command: ["bash", "-c", "sleep 86400"]
       resources:
         limits:
-          nvidia.com/gpu: 1 # requesting 2 vGPUs
+          nvidia.com/gpu: 1 # Declare how many physical GPUs the pod needs
diff --git a/examples/nvidia/specify_uuid_not_use.yaml b/examples/nvidia/specify_uuid_not_use.yaml
@@ -14,4 +14,4 @@ spec:
       command: ["bash", "-c", "sleep 86400"]
       resources:
         limits:
-          nvidia.com/gpu: 2 # requesting 2 vGPUs
+          nvidia.com/gpu: 2 # Declare how many physical GPUs the pod needs
diff --git a/examples/nvidia/specify_uuid_to_use.yaml b/examples/nvidia/specify_uuid_to_use.yaml
@@ -14,4 +14,4 @@ spec:
       command: ["bash", "-c", "sleep 86400"]
       resources:
         limits:
-          nvidia.com/gpu: 1 # requesting 1 vGPU
+          nvidia.com/gpu: 1 # Declare how many physical GPUs the pod needs
diff --git a/examples/nvidia/use_as_normal.yaml b/examples/nvidia/use_as_normal.yaml
@@ -9,7 +9,7 @@ spec:
       command: ["bash", "-c", "sleep 86400"]
       resources:
         limits:
-          nvidia.com/gpu: 2 # requesting 2 vGPUs
+          nvidia.com/gpu: 1 # Declare how many physical GPUs the pod needs
 ---
 apiVersion: v1
 kind: Pod
@@ -22,5 +22,5 @@ spec:
       command: ["bash", "-c", "sleep 86400"]
       resources:
         limits:
-          nvidia.com/gpu: 2 # requesting 2 vGPUs
+          nvidia.com/gpu: 1 # Declare how many physical GPUs the pod needs
 # gpu-pod1 and gpu-pod2 will NOT share the same GPU
diff --git a/examples/nvidia/use_exclusive_card.yaml b/examples/nvidia/use_exclusive_card.yaml
@@ -9,6 +9,6 @@ spec:
       command: ["bash", "-c", "sleep 86400"]
       resources:
         limits:
-          nvidia.com/gpu: 2 # requesting 2 vGPUs
-          nvidia.com/gpumem-percentage: 100 # Each vGPU contains 100% of the entire GPU device memory （Optional,Integer）
-          nvidia.com/gpucores: 100 # Each vGPU uses 100% of the entire GPU cores（Optional,Integer)
+          nvidia.com/gpu: 2 # Declare how many physical GPUs the pod needs
+          nvidia.com/gpumem-percentage: 100 # Identifies 100% GPU memory each physical GPU allocates to the pod （Optional,Integer）
+          nvidia.com/gpucores: 100 # Identifies 100% GPU GPU core each physical GPU allocates to the pod（Optional,Integer)
diff --git a/examples/nvidia/use_memory_fraction.yaml b/examples/nvidia/use_memory_fraction.yaml
@@ -9,6 +9,6 @@ spec:
       command: ["bash", "-c", "sleep 86400"]
       resources:
         limits:
-          nvidia.com/gpu: 2 # requesting 2 vGPUs
-          nvidia.com/gpumem-percentage: 50 # Each vGPU contains 50% device memory of that GPU （Optional,Integer）
-          nvidia.com/gpucores: 30 # Each vGPU uses 30% of the entire GPU （Optional,Integer)
+          nvidia.com/gpu: 2 # Declare how many physical GPUs the pod needs
+          nvidia.com/gpumem-percentage: 50 # Identifies 50% GPU memory each physical GPU allocates to the pod （Optional,Integer）
+          nvidia.com/gpucores: 30 # Identifies 30% GPU GPU core each physical GPU allocates to the pod （Optional,Integer)
diff --git a/examples/nvidia/use_sharing_card.yaml b/examples/nvidia/use_sharing_card.yaml
@@ -0,0 +1,32 @@
+apiVersion: v1
+kind: Pod
+metadata:
+  name: gpu-pod1
+spec:
+  containers:
+    - name: ubuntu-container
+      image: ubuntu:18.04
+      command: ["bash", "-c", "sleep 86400"]
+      resources:
+        limits:
+          nvidia.com/gpu: 1 # Declare how many physical GPUs the pod needs
+          nvidia.com/gpumem-percentage: 40 # Identifies 40% GPU memory each physical GPU allocates to the pod （Optional,Integer)
+          nvidia.com/gpucores: 60 # Identifies 60% GPU GPU core each physical GPU allocates to the pod （Optional,Integer)
+
+---
+apiVersion: v1
+kind: Pod
+metadata:
+  name: gpu-pod2
+spec:
+  containers:
+    - name: ubuntu-container
+      image: ubuntu:18.04
+      command: ["bash", "-c", "sleep 86400"]
+      resources:
+        limits:
+          nvidia.com/gpu: 1 # Declare how many physical GPUs the pod needs
+          nvidia.com/gpumem-percentage: 60 # Identifies 60% GPU memory each physical GPU allocates to the pod （Optional,Integer)
+          nvidia.com/gpucores: 40 # Identifies 40% GPU GPU core each physical GPU allocates to the pod （Optional,Integer)
+
+# gpu-pod1 and gpu-pod2 could share the same GPU