Skip to content

Commit

Permalink
[feat][kubectl-plugin] support waiting for RayCluster to be provisioned
Browse files Browse the repository at this point in the history
in the command `kubectl ray create cluster` with an optional flag `--wait` that
times out after a configurable duration that defaults to five minutes.

The RayCluster is provisioned when it has a status condition with
`type=RayClusterProvisioned` and `status=true` and falls back to checking if
its `.status.state` is `ready`. The command returns an error if the timeout is
reached.

The status condition checking behavior relies on ray-operator having its
feature gate `RayClusterStatusConditions=true` enabled.

Signed-off-by: David Xia <[email protected]>
  • Loading branch information
davidxia committed Feb 19, 2025
1 parent 7b13f94 commit ab63a15
Show file tree
Hide file tree
Showing 6 changed files with 150 additions and 65 deletions.
16 changes: 2 additions & 14 deletions kubectl-plugin/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -19,22 +19,19 @@ require (
k8s.io/cli-runtime v0.31.1
k8s.io/client-go v0.31.1
k8s.io/kubectl v0.31.1
k8s.io/utils v0.0.0-20240902221715-702e33fdd3c3
sigs.k8s.io/yaml v1.4.0
)

require (
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect
github.com/MakeNowJust/heredoc v1.0.0 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/blang/semver/v4 v4.0.0 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/chai2010/gettext-go v1.0.3 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/emicklei/go-restful/v3 v3.12.1 // indirect
github.com/evanphx/json-patch/v5 v5.9.0 // indirect
github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f // indirect
github.com/fatih/camelcase v1.0.0 // indirect
github.com/fsnotify/fsnotify v1.7.0 // indirect
github.com/fxamacker/cbor/v2 v2.7.0 // indirect
github.com/go-errors/errors v1.5.1 // indirect
github.com/go-logr/logr v1.4.2 // indirect
Expand All @@ -43,7 +40,6 @@ require (
github.com/go-openapi/swag v0.23.0 // indirect
github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/golang/protobuf v1.5.4 // indirect
github.com/google/btree v1.1.3 // indirect
github.com/google/gnostic-models v0.6.8 // indirect
Expand All @@ -57,7 +53,6 @@ require (
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/compress v1.17.9 // indirect
github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/mitchellh/go-wordwrap v1.0.1 // indirect
Expand All @@ -71,15 +66,11 @@ require (
github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/client_golang v1.20.4 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.59.1 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/stretchr/objx v0.5.2 // indirect
github.com/x448/float16 v0.8.4 // indirect
github.com/xlab/treeprint v1.2.0 // indirect
go.starlark.net v0.0.0-20240725214946-42030a7cedce // indirect
golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 // indirect
golang.org/x/net v0.33.0 // indirect
golang.org/x/oauth2 v0.23.0 // indirect
golang.org/x/sync v0.10.0 // indirect
Expand All @@ -88,16 +79,13 @@ require (
golang.org/x/text v0.21.0 // indirect
golang.org/x/time v0.6.0 // indirect
golang.org/x/tools v0.25.0 // indirect
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
google.golang.org/protobuf v1.34.2 // indirect
gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/apiextensions-apiserver v0.31.1 // indirect
k8s.io/component-base v0.31.1 // indirect
k8s.io/klog/v2 v2.130.1 // indirect
k8s.io/kube-openapi v0.0.0-20240903163716-9e1beecbcb38 // indirect
k8s.io/utils v0.0.0-20240902221715-702e33fdd3c3 // indirect
sigs.k8s.io/controller-runtime v0.19.0 // indirect
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
sigs.k8s.io/kustomize/api v0.17.3 // indirect
Expand Down
36 changes: 0 additions & 36 deletions kubectl-plugin/go.sum

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

39 changes: 24 additions & 15 deletions kubectl-plugin/pkg/cmd/create/create_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package create
import (
"context"
"fmt"
"time"

"github.com/ray-project/kuberay/kubectl-plugin/pkg/util"
"github.com/ray-project/kuberay/kubectl-plugin/pkg/util/generation"
Expand All @@ -29,18 +30,18 @@ type CreateClusterOptions struct {
workerGPU string
workerReplicas int32
dryRun bool
wait bool
timeout time.Duration
}

var (
createClusterLong = templates.LongDesc(`
Creates Ray Cluster from inputed file or generate one for user.
`)
defaultProvisionedTimeout = 5 * time.Minute

createClusterExample = templates.Examples(fmt.Sprintf(`
# Create a Ray Cluster using default values
# Create a Ray cluster using default values
kubectl ray create cluster sample-cluster
# Creates Ray Cluster from flags input
# Create a Ray cluster from flags input
kubectl ray create cluster sample-cluster --ray-version %s --image %s --head-cpu 1 --head-memory 5Gi --worker-replicas 3 --worker-cpu 1 --worker-memory 5Gi
`, util.RayVersion, util.RayImage))
)
Expand All @@ -58,8 +59,7 @@ func NewCreateClusterCommand(streams genericclioptions.IOStreams) *cobra.Command

cmd := &cobra.Command{
Use: "cluster [CLUSTERNAME]",
Short: "Create Ray Cluster resource",
Long: createClusterLong,
Short: "Create Ray cluster",
Example: createClusterExample,
SilenceUsage: true,
RunE: func(cmd *cobra.Command, args []string) error {
Expand All @@ -83,6 +83,8 @@ func NewCreateClusterCommand(streams genericclioptions.IOStreams) *cobra.Command
cmd.Flags().StringVar(&options.workerMemory, "worker-memory", "4Gi", "amount of memory in each worker group replica")
cmd.Flags().StringVar(&options.workerGPU, "worker-gpu", "0", "number of GPUs in each worker group replica")
cmd.Flags().BoolVar(&options.dryRun, "dry-run", false, "print the generated YAML instead of creating the cluster")
cmd.Flags().BoolVar(&options.wait, "wait", false, "wait for the cluster to be provisioned before returning. Returns an error if the cluster is not provisioned by the timeout specified")
cmd.Flags().DurationVar(&options.timeout, "timeout", defaultProvisionedTimeout, "the timeout for --wait")

options.configFlags.AddFlags(cmd.Flags())
return cmd
Expand All @@ -108,7 +110,7 @@ func (options *CreateClusterOptions) Complete(cmd *cobra.Command, args []string)
func (options *CreateClusterOptions) Validate() error {
config, err := options.configFlags.ToRawKubeConfigLoader().RawConfig()
if err != nil {
return fmt.Errorf("Error retrieving raw config: %w", err)
return fmt.Errorf("error retrieving raw config: %w", err)
}
if !util.HasKubectlContext(config, options.configFlags) {
return fmt.Errorf("no context is currently set, use %q or %q to select a new one", "--context", "kubectl config use-context <context>")
Expand All @@ -123,7 +125,6 @@ func (options *CreateClusterOptions) Run(ctx context.Context, factory cmdutil.Fa
return fmt.Errorf("failed to create client: %w", err)
}

// Will generate yaml file
rayClusterObject := generation.RayClusterYamlObject{
Namespace: *options.configFlags.Namespace,
ClusterName: options.clusterName,
Expand All @@ -142,23 +143,31 @@ func (options *CreateClusterOptions) Run(ctx context.Context, factory cmdutil.Fa

rayClusterac := rayClusterObject.GenerateRayClusterApplyConfig()

// If dry run is enabled, it will call the yaml converter and print out the yaml
// If dry run is enabled, it will call the YAML converter and print out the YAML
if options.dryRun {
rayClusterYaml, err := generation.ConvertRayClusterApplyConfigToYaml(rayClusterac)
if err != nil {
return fmt.Errorf("Error when converting RayClusterApplyConfig to YAML: %w", err)
return fmt.Errorf("error creating RayCluster YAML: %w", err)
}
fmt.Printf("%s\n", rayClusterYaml)
return nil
}

// TODO: Decide whether to save yaml to file or not.
// TODO: Decide whether to save YAML to file or not.

// Applying the YAML
result, err := k8sClient.RayClient().RayV1().RayClusters(*options.configFlags.Namespace).Apply(ctx, rayClusterac, metav1.ApplyOptions{FieldManager: "kubectl-plugin"})
if err != nil {
return fmt.Errorf("Failed to create Ray cluster with: %w", err)
return fmt.Errorf("failed to create Ray cluster: %w", err)
}
fmt.Printf("Created Ray Cluster: %s\n", result.GetName())
fmt.Printf("Created Ray cluster: %s\n", result.GetName())

if options.wait {
err = k8sClient.WaitRayClusterProvisioned(ctx, *options.configFlags.Namespace, result.GetName(), options.timeout)
if err != nil {
return err
}
fmt.Printf("Ray cluster %s is provisioned\n", result.GetName())
}

return nil
}
5 changes: 5 additions & 0 deletions kubectl-plugin/pkg/cmd/version/version_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"context"
"fmt"
"testing"
"time"

"github.com/ray-project/kuberay/kubectl-plugin/pkg/util"
"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -97,6 +98,10 @@ func (c fakeClient) GetRayHeadSvcName(_ context.Context, _ string, _ util.Resour
return "", nil
}

func (c fakeClient) WaitRayClusterProvisioned(_ context.Context, _ string, _ string, _ time.Duration) error {
return nil
}

func (c fakeClient) KubernetesClient() kubernetes.Interface {
return nil
}
Expand Down
Loading

0 comments on commit ab63a15

Please sign in to comment.