Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
apiVersion: config.karmada.io/v1alpha1
kind: ResourceInterpreterCustomization
metadata:
name: declarative-configuration-raycluster
spec:
target:
apiVersion: ray.io/v1
kind: RayCluster
customizations:
componentResource:
luaScript: |
local kube = require("kube")

local function isempty(s)
return s == nil or s == ''
end

function GetComponents(desiredObj)
local components = {}

-- Head component (always 1 replica)
if desiredObj.spec ~= nil and desiredObj.spec.headGroupSpec ~= nil and desiredObj.spec.headGroupSpec.template ~= nil then
local headRequires = kube.accuratePodRequirements(desiredObj.spec.headGroupSpec.template)
local headComponent = {
name = "ray-head",
replicas = 1,
replicaRequirements = headRequires
}
table.insert(components, headComponent)
end

-- Worker group components
if desiredObj.spec ~= nil and desiredObj.spec.workerGroupSpecs ~= nil then
for i, workerGroup in ipairs(desiredObj.spec.workerGroupSpecs) do
local name = workerGroup.groupName
if isempty(name) then
name = "worker-" .. tostring(i)
end
local replicas = workerGroup.replicas or 0
local requires = nil
if workerGroup.template ~= nil then
requires = kube.accuratePodRequirements(workerGroup.template)
end
local wgComponent = {
name = name,
replicas = replicas,
replicaRequirements = requires
}
table.insert(components, wgComponent)
end
end

return components
end
replicaResource:
luaScript: >
local kube = require("kube")

local function isempty(s)
return s == nil or s == ''
end

function GetReplicas(desiredObj)
-- Head pod is always 1 for RayCluster
local totalReplicas = 1
local requires = {
resourceRequest = {},
nodeClaim = {},
}

-- Initialize with head group resources
-- Until multiple podTemplates are supported, interpreter will only take affinity, toleration, and priorityclass input from head group to common podTemplate
requires.resourceRequest.cpu = desiredObj.spec.headGroupSpec.template.spec.containers[1].resources.limits.cpu
requires.resourceRequest.memory = desiredObj.spec.headGroupSpec.template.spec.containers[1].resources.limits.memory
if not isempty(desiredObj.spec.headGroupSpec.template.spec.nodeSelector) then
requires.nodeClaim.nodeSelector = desiredObj.spec.headGroupSpec.template.spec.nodeSelector
end
if not isempty(desiredObj.spec.headGroupSpec.template.spec.tolerations) then
requires.nodeClaim.tolerations = desiredObj.spec.headGroupSpec.template.spec.tolerations
end
if not isempty(desiredObj.spec.headGroupSpec.template.spec.priorityClassName) then
requires.priorityClassName = desiredObj.spec.headGroupSpec.template.spec.priorityClassName
end

-- Convert to numeric for comparison
local max_memory_value = kube.getResourceQuantity(requires.resourceRequest.memory)

-- Add replicas from all worker group specs
-- Until multiple podTemplates are supported in replicaRequirements, take max of cpu + memory values as requirement
if desiredObj.spec.workerGroupSpecs then
for i, workerGroup in ipairs(desiredObj.spec.workerGroupSpecs) do
if workerGroup.replicas then
totalReplicas = totalReplicas + workerGroup.replicas
end
if not isempty(workerGroup.template.spec.containers.resources.limits.cpu) then
requires.resourceRequest.cpu = math.max(requires.resourceRequest.cpu, workerGroup.template.spec.containers.resources.limits.cpu)
end
if not isempty(workerGroup.template.spec.containers.resources.limits.memory) then
local memory_value = kube.getResourceQuantity(workerGroup.template.spec.containers.resources.limits.memory)
if memory_value > max_memory_value then
max_memory_value = memory_value
requires.resourceRequest.memory = workerGroup.template.spec.containers.resources.limits.memory
end
end
end
end

return totalReplicas, requires
end
healthInterpretation:
luaScript: >
function InterpretHealth(observedObj)
if observedObj.status == nil or observedObj.status.state == nil then
return false
end
return observedObj.status.state == 'ready'
end
statusAggregation:
luaScript: >
function AggregateStatus(desiredObj, statusItems)
if desiredObj.status == nil then
desiredObj.status = {}
end

for i = 1, #statusItems do
currentStatus = statusItems[i].status
if currentStatus ~= nil then
desiredObj.status.state = currentStatus.state
desiredObj.status.stateTransitionTimes = currentStatus.stateTransitionTimes
desiredObj.status.conditions = currentStatus.conditions
desiredObj.status.desiredCPU = currentStatus.desiredCPU
desiredObj.status.desiredGPU = currentStatus.desiredGPU
desiredObj.status.desiredMemory = currentStatus.desiredMemory
desiredObj.status.desiredTPU = currentStatus.desiredTPU
desiredObj.status.endpoints = currentStatus.endpoints
desiredObj.status.head = currentStatus.head
desiredObj.status.lastUpdateTime = currentStatus.lastUpdateTime
desiredObj.status.maxWorkerReplicas = currentStatus.maxWorkerReplicas
desiredObj.status.minWorkerReplicas = currentStatus.minWorkerReplicas
desiredObj.status.observedGeneration = currentStatus.observedGeneration
desiredObj.status.readyWorkerReplicas = currentStatus.readyWorkerReplicas
end
end

return desiredObj
end
statusReflection:
luaScript: >
function ReflectStatus(observedObj)
status = {}
if observedObj == nil or observedObj.status == nil then
return status
end
status.state = observedObj.status.state
status.stateTransitionTimes = observedObj.status.stateTransitionTimes
status.conditions = observedObj.status.conditions
status.desiredCPU = observedObj.status.desiredCPU
status.desiredGPU = observedObj.status.desiredGPU
status.desiredMemory = observedObj.status.desiredMemory
status.desiredTPU = observedObj.status.desiredTPU
status.desiredWorkerReplicas = observedObj.status.desiredWorkerReplicas
status.endpoints = observedObj.status.endpoints
status.head = observedObj.status.head
status.lastUpdateTime = observedObj.status.lastUpdateTime
status.maxWorkerReplicas = observedObj.status.maxWorkerReplicas
status.minWorkerReplicas = observedObj.status.minWorkerReplicas
status.observedGeneration = observedObj.status.observedGeneration
status.readyWorkerReplicas = observedObj.status.readyWorkerReplicas
return status
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
tests:
- desiredInputPath: testdata/desired-raycluster.yaml
statusInputPath: testdata/status-file.yaml
operation: AggregateStatus
- desiredInputPath: testdata/desired-raycluster.yaml
operation: InterpretReplica
- observedInputPath: testdata/observed-raycluster.yaml
operation: InterpretHealth
- observedInputPath: testdata/observed-raycluster.yaml
operation: InterpretStatus
- observedInputPath: testdata/observed-raycluster.yaml
operation: InterpretComponent
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
apiVersion: ray.io/v1
kind: RayCluster
metadata:
name: sample
spec:
rayVersion: '2.46.0'
headGroupSpec:
rayStartParams: {}
template:
spec:
containers:
- name: ray-head
image: rayproject/ray:2.46.0
resources:
limits:
cpu: 1
memory: 2G
requests:
cpu: 1
memory: 2G
ports:
- containerPort: 6379
name: gcs-server
- containerPort: 8265
name: dashboard
- containerPort: 10001
name: client
workerGroupSpecs:
- replicas: 1
minReplicas: 1
maxReplicas: 5
groupName: workergroup
rayStartParams: {}
template:
spec:
containers:
image: rayproject/ray:2.46.0
resources:
limits:
cpu: 1
memory: 1G
requests:
cpu: 1
memory: 1G
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
apiVersion: ray.io/v1
kind: RayCluster
metadata:
annotations:
kubectl.kubernetes.io/last-applied-configuration: |
{"apiVersion":"ray.io/v1","kind":"RayCluster","metadata":{"annotations":{},"name":"sample","namespace":"default"},"spec":{"headGroupSpec":{"rayStartParams":{},"template":{"spec":{"containers":[{"image":"rayproject/ray:2.46.0","name":"ray-head","ports":[{"containerPort":6379,"name":"gcs-server"},{"containerPort":8265,"name":"dashboard"},{"containerPort":10001,"name":"client"}],"resources":{"limits":{"cpu":1,"memory":"2G"},"requests":{"cpu":1,"memory":"2G"}}}]}}},"rayVersion":"2.46.0","workerGroupSpecs":[{"groupName":"workergroup","maxReplicas":5,"minReplicas":1,"rayStartParams":{},"replicas":1,"template":{"spec":{"containers":[{"image":"rayproject/ray:2.46.0","name":"ray-worker","resources":{"limits":{"cpu":1,"memory":"1G"},"requests":{"cpu":1,"memory":"1G"}}}]}}}]}}
creationTimestamp: "2025-09-21T03:54:44Z"
generation: 1
name: sample
namespace: default
resourceVersion: "850"
uid: 040acb09-4e53-4a23-a8b2-28b4300af70b
spec:
headGroupSpec:
rayStartParams: {}
template:
spec:
containers:
- image: rayproject/ray:2.46.0
name: ray-head
ports:
- containerPort: 6379
name: gcs-server
protocol: TCP
- containerPort: 8265
name: dashboard
protocol: TCP
- containerPort: 10001
name: client
protocol: TCP
resources:
limits:
cpu: 1
memory: 2G
requests:
cpu: 1
memory: 2G
rayVersion: 2.46.0
workerGroupSpecs:
- groupName: workergroup
maxReplicas: 5
minReplicas: 1
numOfHosts: 1
rayStartParams: {}
replicas: 1
template:
spec:
containers:
- image: rayproject/ray:2.46.0
name: ray-worker
resources:
limits:
cpu: 1
memory: 1G
requests:
cpu: 1
memory: 1G
status:
availableWorkerReplicas: 1
conditions:
- lastTransitionTime: "2025-09-21T03:55:30Z"
message: ""
reason: HeadPodRunningAndReady
status: "True"
type: HeadPodReady
- lastTransitionTime: "2025-09-21T03:55:45Z"
message: All Ray Pods are ready for the first time
reason: AllPodRunningAndReadyFirstTime
status: "True"
type: RayClusterProvisioned
- lastTransitionTime: "2025-09-21T03:54:44Z"
message: ""
reason: RayClusterSuspended
status: "False"
type: RayClusterSuspended
- lastTransitionTime: "2025-09-21T03:54:44Z"
message: ""
reason: RayClusterSuspending
status: "False"
type: RayClusterSuspending
desiredCPU: "2"
desiredGPU: "0"
desiredMemory: 3G
desiredTPU: "0"
desiredWorkerReplicas: 1
endpoints:
client: "10001"
dashboard: "8265"
gcs-server: "6379"
metrics: "8080"
head:
podIP: 10.244.0.6
podName: sample-head-9cvfc
serviceIP: 10.244.0.6
serviceName: sample-head-svc
lastUpdateTime: "2025-09-21T03:55:45Z"
maxWorkerReplicas: 5
minWorkerReplicas: 1
observedGeneration: 1
readyWorkerReplicas: 1
state: ready
stateTransitionTimes:
ready: "2025-09-21T03:55:45Z"
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
applied: true
clusterName: member1
status:
availableWorkerReplicas: 1
conditions:
- lastTransitionTime: "2025-09-21T03:55:30Z"
message: ""
reason: HeadPodRunningAndReady
status: "True"
type: HeadPodReady
- lastTransitionTime: "2025-09-21T03:55:45Z"
message: All Ray Pods are ready for the first time
reason: AllPodRunningAndReadyFirstTime
status: "True"
type: RayClusterProvisioned
- lastTransitionTime: "2025-09-21T03:54:44Z"
message: ""
reason: RayClusterSuspended
status: "False"
type: RayClusterSuspended
- lastTransitionTime: "2025-09-21T03:54:44Z"
message: ""
reason: RayClusterSuspending
status: "False"
type: RayClusterSuspending
desiredCPU: "2"
desiredGPU: "0"
desiredMemory: 3G
desiredTPU: "0"
desiredWorkerReplicas: 1
endpoints:
client: "10001"
dashboard: "8265"
gcs-server: "6379"
metrics: "8080"
head:
podIP: 10.244.0.6
podName: sample-head-9cvfc
serviceIP: 10.244.0.6
serviceName: sample-head-svc
lastUpdateTime: "2025-09-21T03:55:45Z"
maxWorkerReplicas: 5
minWorkerReplicas: 1
observedGeneration: 1
readyWorkerReplicas: 1
state: ready
stateTransitionTimes:
ready: "2025-09-21T03:55:45Z"