Skip to content

Commit ab63a15

Browse files
committed
[feat][kubectl-plugin] support waiting for RayCluster to be provisioned
in the command `kubectl ray create cluster` with an optional flag `--wait` that times out after a configurable duration that defaults to five minutes. The RayCluster is provisioned when it has a status condition with `type=RayClusterProvisioned` and `status=true` and falls back to checking if its `.status.state` is `ready`. The command returns an error if the timeout is reached. The status condition checking behavior relies on ray-operator having its feature gate `RayClusterStatusConditions=true` enabled. Signed-off-by: David Xia <[email protected]>
1 parent 7b13f94 commit ab63a15

File tree

6 files changed

+150
-65
lines changed

6 files changed

+150
-65
lines changed

kubectl-plugin/go.mod

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,19 @@ require (
1919
k8s.io/cli-runtime v0.31.1
2020
k8s.io/client-go v0.31.1
2121
k8s.io/kubectl v0.31.1
22+
k8s.io/utils v0.0.0-20240902221715-702e33fdd3c3
2223
sigs.k8s.io/yaml v1.4.0
2324
)
2425

2526
require (
2627
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect
2728
github.com/MakeNowJust/heredoc v1.0.0 // indirect
28-
github.com/beorn7/perks v1.0.1 // indirect
2929
github.com/blang/semver/v4 v4.0.0 // indirect
30-
github.com/cespare/xxhash/v2 v2.3.0 // indirect
3130
github.com/chai2010/gettext-go v1.0.3 // indirect
3231
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
3332
github.com/emicklei/go-restful/v3 v3.12.1 // indirect
34-
github.com/evanphx/json-patch/v5 v5.9.0 // indirect
3533
github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f // indirect
3634
github.com/fatih/camelcase v1.0.0 // indirect
37-
github.com/fsnotify/fsnotify v1.7.0 // indirect
3835
github.com/fxamacker/cbor/v2 v2.7.0 // indirect
3936
github.com/go-errors/errors v1.5.1 // indirect
4037
github.com/go-logr/logr v1.4.2 // indirect
@@ -43,7 +40,6 @@ require (
4340
github.com/go-openapi/swag v0.23.0 // indirect
4441
github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
4542
github.com/gogo/protobuf v1.3.2 // indirect
46-
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
4743
github.com/golang/protobuf v1.5.4 // indirect
4844
github.com/google/btree v1.1.3 // indirect
4945
github.com/google/gnostic-models v0.6.8 // indirect
@@ -57,7 +53,6 @@ require (
5753
github.com/inconshreveable/mousetrap v1.1.0 // indirect
5854
github.com/josharian/intern v1.0.0 // indirect
5955
github.com/json-iterator/go v1.1.12 // indirect
60-
github.com/klauspost/compress v1.17.9 // indirect
6156
github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de // indirect
6257
github.com/mailru/easyjson v0.7.7 // indirect
6358
github.com/mitchellh/go-wordwrap v1.0.1 // indirect
@@ -71,15 +66,11 @@ require (
7166
github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
7267
github.com/pkg/errors v0.9.1 // indirect
7368
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
74-
github.com/prometheus/client_golang v1.20.4 // indirect
75-
github.com/prometheus/client_model v0.6.1 // indirect
76-
github.com/prometheus/common v0.59.1 // indirect
77-
github.com/prometheus/procfs v0.15.1 // indirect
7869
github.com/russross/blackfriday/v2 v2.1.0 // indirect
70+
github.com/stretchr/objx v0.5.2 // indirect
7971
github.com/x448/float16 v0.8.4 // indirect
8072
github.com/xlab/treeprint v1.2.0 // indirect
8173
go.starlark.net v0.0.0-20240725214946-42030a7cedce // indirect
82-
golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 // indirect
8374
golang.org/x/net v0.33.0 // indirect
8475
golang.org/x/oauth2 v0.23.0 // indirect
8576
golang.org/x/sync v0.10.0 // indirect
@@ -88,16 +79,13 @@ require (
8879
golang.org/x/text v0.21.0 // indirect
8980
golang.org/x/time v0.6.0 // indirect
9081
golang.org/x/tools v0.25.0 // indirect
91-
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
9282
google.golang.org/protobuf v1.34.2 // indirect
9383
gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
9484
gopkg.in/inf.v0 v0.9.1 // indirect
9585
gopkg.in/yaml.v3 v3.0.1 // indirect
96-
k8s.io/apiextensions-apiserver v0.31.1 // indirect
9786
k8s.io/component-base v0.31.1 // indirect
9887
k8s.io/klog/v2 v2.130.1 // indirect
9988
k8s.io/kube-openapi v0.0.0-20240903163716-9e1beecbcb38 // indirect
100-
k8s.io/utils v0.0.0-20240902221715-702e33fdd3c3 // indirect
10189
sigs.k8s.io/controller-runtime v0.19.0 // indirect
10290
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
10391
sigs.k8s.io/kustomize/api v0.17.3 // indirect

kubectl-plugin/go.sum

Lines changed: 0 additions & 36 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

kubectl-plugin/pkg/cmd/create/create_cluster.go

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package create
33
import (
44
"context"
55
"fmt"
6+
"time"
67

78
"github.com/ray-project/kuberay/kubectl-plugin/pkg/util"
89
"github.com/ray-project/kuberay/kubectl-plugin/pkg/util/generation"
@@ -29,18 +30,18 @@ type CreateClusterOptions struct {
2930
workerGPU string
3031
workerReplicas int32
3132
dryRun bool
33+
wait bool
34+
timeout time.Duration
3235
}
3336

3437
var (
35-
createClusterLong = templates.LongDesc(`
36-
Creates Ray Cluster from inputed file or generate one for user.
37-
`)
38+
defaultProvisionedTimeout = 5 * time.Minute
3839

3940
createClusterExample = templates.Examples(fmt.Sprintf(`
40-
# Create a Ray Cluster using default values
41+
# Create a Ray cluster using default values
4142
kubectl ray create cluster sample-cluster
4243
43-
# Creates Ray Cluster from flags input
44+
# Create a Ray cluster from flags input
4445
kubectl ray create cluster sample-cluster --ray-version %s --image %s --head-cpu 1 --head-memory 5Gi --worker-replicas 3 --worker-cpu 1 --worker-memory 5Gi
4546
`, util.RayVersion, util.RayImage))
4647
)
@@ -58,8 +59,7 @@ func NewCreateClusterCommand(streams genericclioptions.IOStreams) *cobra.Command
5859

5960
cmd := &cobra.Command{
6061
Use: "cluster [CLUSTERNAME]",
61-
Short: "Create Ray Cluster resource",
62-
Long: createClusterLong,
62+
Short: "Create Ray cluster",
6363
Example: createClusterExample,
6464
SilenceUsage: true,
6565
RunE: func(cmd *cobra.Command, args []string) error {
@@ -83,6 +83,8 @@ func NewCreateClusterCommand(streams genericclioptions.IOStreams) *cobra.Command
8383
cmd.Flags().StringVar(&options.workerMemory, "worker-memory", "4Gi", "amount of memory in each worker group replica")
8484
cmd.Flags().StringVar(&options.workerGPU, "worker-gpu", "0", "number of GPUs in each worker group replica")
8585
cmd.Flags().BoolVar(&options.dryRun, "dry-run", false, "print the generated YAML instead of creating the cluster")
86+
cmd.Flags().BoolVar(&options.wait, "wait", false, "wait for the cluster to be provisioned before returning. Returns an error if the cluster is not provisioned by the timeout specified")
87+
cmd.Flags().DurationVar(&options.timeout, "timeout", defaultProvisionedTimeout, "the timeout for --wait")
8688

8789
options.configFlags.AddFlags(cmd.Flags())
8890
return cmd
@@ -108,7 +110,7 @@ func (options *CreateClusterOptions) Complete(cmd *cobra.Command, args []string)
108110
func (options *CreateClusterOptions) Validate() error {
109111
config, err := options.configFlags.ToRawKubeConfigLoader().RawConfig()
110112
if err != nil {
111-
return fmt.Errorf("Error retrieving raw config: %w", err)
113+
return fmt.Errorf("error retrieving raw config: %w", err)
112114
}
113115
if !util.HasKubectlContext(config, options.configFlags) {
114116
return fmt.Errorf("no context is currently set, use %q or %q to select a new one", "--context", "kubectl config use-context <context>")
@@ -123,7 +125,6 @@ func (options *CreateClusterOptions) Run(ctx context.Context, factory cmdutil.Fa
123125
return fmt.Errorf("failed to create client: %w", err)
124126
}
125127

126-
// Will generate yaml file
127128
rayClusterObject := generation.RayClusterYamlObject{
128129
Namespace: *options.configFlags.Namespace,
129130
ClusterName: options.clusterName,
@@ -142,23 +143,31 @@ func (options *CreateClusterOptions) Run(ctx context.Context, factory cmdutil.Fa
142143

143144
rayClusterac := rayClusterObject.GenerateRayClusterApplyConfig()
144145

145-
// If dry run is enabled, it will call the yaml converter and print out the yaml
146+
// If dry run is enabled, it will call the YAML converter and print out the YAML
146147
if options.dryRun {
147148
rayClusterYaml, err := generation.ConvertRayClusterApplyConfigToYaml(rayClusterac)
148149
if err != nil {
149-
return fmt.Errorf("Error when converting RayClusterApplyConfig to YAML: %w", err)
150+
return fmt.Errorf("error creating RayCluster YAML: %w", err)
150151
}
151152
fmt.Printf("%s\n", rayClusterYaml)
152153
return nil
153154
}
154155

155-
// TODO: Decide whether to save yaml to file or not.
156+
// TODO: Decide whether to save YAML to file or not.
156157

157-
// Applying the YAML
158158
result, err := k8sClient.RayClient().RayV1().RayClusters(*options.configFlags.Namespace).Apply(ctx, rayClusterac, metav1.ApplyOptions{FieldManager: "kubectl-plugin"})
159159
if err != nil {
160-
return fmt.Errorf("Failed to create Ray cluster with: %w", err)
160+
return fmt.Errorf("failed to create Ray cluster: %w", err)
161161
}
162-
fmt.Printf("Created Ray Cluster: %s\n", result.GetName())
162+
fmt.Printf("Created Ray cluster: %s\n", result.GetName())
163+
164+
if options.wait {
165+
err = k8sClient.WaitRayClusterProvisioned(ctx, *options.configFlags.Namespace, result.GetName(), options.timeout)
166+
if err != nil {
167+
return err
168+
}
169+
fmt.Printf("Ray cluster %s is provisioned\n", result.GetName())
170+
}
171+
163172
return nil
164173
}

kubectl-plugin/pkg/cmd/version/version_test.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"context"
66
"fmt"
77
"testing"
8+
"time"
89

910
"github.com/ray-project/kuberay/kubectl-plugin/pkg/util"
1011
"github.com/stretchr/testify/assert"
@@ -97,6 +98,10 @@ func (c fakeClient) GetRayHeadSvcName(_ context.Context, _ string, _ util.Resour
9798
return "", nil
9899
}
99100

101+
func (c fakeClient) WaitRayClusterProvisioned(_ context.Context, _ string, _ string, _ time.Duration) error {
102+
return nil
103+
}
104+
100105
func (c fakeClient) KubernetesClient() kubernetes.Interface {
101106
return nil
102107
}

0 commit comments

Comments
 (0)