Skip to content

Commit 2358039

Browse files
committed
Add GPU/Accelerator support to VMs
1 parent 5b4ab44 commit 2358039

File tree

6 files changed

+136
-0
lines changed

6 files changed

+136
-0
lines changed

api/v1beta1/gcpmachine_types.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,25 @@ type GCPMachineSpec struct {
346346
// RootDiskEncryptionKey defines the KMS key to be used to encrypt the root disk.
347347
// +optional
348348
RootDiskEncryptionKey *CustomerEncryptionKey `json:"rootDiskEncryptionKey,omitempty"`
349+
350+
// GuestAccelerators is a list of the type and count of accelerator cards
351+
// attached to the instance.
352+
// +optional
353+
GuestAccelerators []Accelerator `json:"guestAccelerators,omitempty"`
354+
}
355+
356+
// Accelerator is a specification of the type and number of accelerator
357+
// cards attached to the instance.
358+
type Accelerator struct {
359+
// Count is the number of the guest accelerator cards exposed to this
360+
// instance.
361+
Count int64 `json:"count,omitempty"`
362+
// Type is the full or partial URL of the accelerator type resource to
363+
// attach to this instance. For example:
364+
// projects/my-project/zones/us-central1-c/acceleratorTypes/nvidia-tesla-p100
365+
// If you are creating an instance template, specify only the accelerator name.
366+
// See GPUs on Compute Engine for a full list of accelerator types.
367+
Type string `json:"type,omitempty"`
349368
}
350369

351370
// MetadataItem defines a single piece of metadata associated with an instance.

api/v1beta1/zz_generated.deepcopy.go

Lines changed: 20 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cloud/scope/machine.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,22 @@ func (m *MachineScope) InstanceAdditionalMetadataSpec() *compute.Metadata {
374374
return metadata
375375
}
376376

377+
// InstanceGuestAcceleratorsSpec returns a slice of Guest Accelerator Config specs.
378+
func (m *MachineScope) InstanceGuestAcceleratorsSpec() []*compute.AcceleratorConfig {
379+
if len(m.GCPMachine.Spec.GuestAccelerators) == 0 {
380+
return nil
381+
}
382+
accelConfigs := make([]*compute.AcceleratorConfig, 0, len(m.GCPMachine.Spec.GuestAccelerators))
383+
for _, accel := range m.GCPMachine.Spec.GuestAccelerators {
384+
accelConfig := &compute.AcceleratorConfig{
385+
AcceleratorType: accel.Type,
386+
AcceleratorCount: accel.Count,
387+
}
388+
accelConfigs = append(accelConfigs, accelConfig)
389+
}
390+
return accelConfigs
391+
}
392+
377393
// InstanceSpec returns instance spec.
378394
func (m *MachineScope) InstanceSpec(log logr.Logger) *compute.Instance {
379395
instance := &compute.Instance{
@@ -457,6 +473,11 @@ func (m *MachineScope) InstanceSpec(log logr.Logger) *compute.Instance {
457473
instance.Metadata = m.InstanceAdditionalMetadataSpec()
458474
instance.ServiceAccounts = append(instance.ServiceAccounts, m.InstanceServiceAccountsSpec())
459475
instance.NetworkInterfaces = append(instance.NetworkInterfaces, m.InstanceNetworkInterfaceSpec())
476+
instance.GuestAccelerators = m.InstanceGuestAcceleratorsSpec()
477+
if len(instance.GuestAccelerators) > 0 {
478+
instance.Scheduling.OnHostMaintenance = "TERMINATE"
479+
}
480+
460481
return instance
461482
}
462483

config/crd/bases/infrastructure.cluster.x-k8s.io_gcpmachines.yaml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,31 @@ spec:
200200
- Enabled
201201
- Disabled
202202
type: string
203+
guestAccelerators:
204+
description: |-
205+
GuestAccelerators is a list of the type and count of accelerator cards
206+
attached to the instance.
207+
items:
208+
description: |-
209+
Accelerator is a specification of the type and number of accelerator
210+
cards attached to the instance.
211+
properties:
212+
count:
213+
description: |-
214+
Count is the number of the guest accelerator cards exposed to this
215+
instance.
216+
format: int64
217+
type: integer
218+
type:
219+
description: |-
220+
Type is the full or partial URL of the accelerator type resource to
221+
attach to this instance. For example:
222+
projects/my-project/zones/us-central1-c/acceleratorTypes/nvidia-tesla-p100
223+
If you are creating an instance template, specify only the accelerator name.
224+
See GPUs on Compute Engine for a full list of accelerator types.
225+
type: string
226+
type: object
227+
type: array
203228
image:
204229
description: |-
205230
Image is the full reference to a valid image to be used for this machine.

config/crd/bases/infrastructure.cluster.x-k8s.io_gcpmachinetemplates.yaml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,31 @@ spec:
215215
- Enabled
216216
- Disabled
217217
type: string
218+
guestAccelerators:
219+
description: |-
220+
GuestAccelerators is a list of the type and count of accelerator cards
221+
attached to the instance.
222+
items:
223+
description: |-
224+
Accelerator is a specification of the type and number of accelerator
225+
cards attached to the instance.
226+
properties:
227+
count:
228+
description: |-
229+
Count is the number of the guest accelerator cards exposed to this
230+
instance.
231+
format: int64
232+
type: integer
233+
type:
234+
description: |-
235+
Type is the full or partial URL of the accelerator type resource to
236+
attach to this instance. For example:
237+
projects/my-project/zones/us-central1-c/acceleratorTypes/nvidia-tesla-p100
238+
If you are creating an instance template, specify only the accelerator name.
239+
See GPUs on Compute Engine for a full list of accelerator types.
240+
type: string
241+
type: object
242+
type: array
218243
image:
219244
description: |-
220245
Image is the full reference to a valid image to be used for this machine.

docs/book/src/topics/gpus.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# GPUs
2+
3+
Add GPUs via the `guestAccelerators` field in `GCPMachineTemplate`.
4+
5+
```
6+
---
7+
apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
8+
kind: GCPMachineTemplate
9+
metadata:
10+
name: mygcpmachinetemplate
11+
namespace: mynamespace
12+
spec:
13+
template:
14+
spec:
15+
image: projects/myproject/global/images/myimage
16+
instanceType: n1-standard-2
17+
guestAccelerators:
18+
- type: projects/myproject/zones/us-central1-c/acceleratorTypes/nvidia-tesla-t4
19+
count: 1
20+
```
21+
22+
https://cloud.google.com/compute/docs/gpus
23+
24+
NOTE: Instances with accelerators/GPUs do NOT support live migration.
25+
Therefore, the `onHostMaintenance` event is always `TERMINATE`.
26+
https://cloud.google.com/compute/docs/instances/setting-vm-host-options

0 commit comments

Comments
 (0)