Fixing HPA spec for NIM Service (#52)

Signed-off-by: Vishesh Tanksale <[email protected]>
NVIDIA · Aug 15, 2024 · 0672d66 · 0672d66
1 parent 6f5e1a9
commit 0672d66
Show file tree

Hide file tree

Showing 9 changed files with 64 additions and 277 deletions.
diff --git a/api/apps/v1alpha1/common_types.go b/api/apps/v1alpha1/common_types.go
@@ -51,8 +51,15 @@ type ServiceMonitor struct {
 
 // Autoscaling defines attributes to automatically scale the service based on metrics
 type Autoscaling struct {
-	Enabled *bool                                     `json:"enabled,omitempty"`
-	HPASpec autoscalingv2.HorizontalPodAutoscalerSpec `json:"hpa,omitempty"`
+	Enabled *bool                       `json:"enabled,omitempty"`
+	HPA     HorizontalPodAutoscalerSpec `json:"hpa,omitempty"`
+}
+
+type HorizontalPodAutoscalerSpec struct {
+	MinReplicas *int32                                         `json:"minReplicas,omitempty"`
+	MaxReplicas int32                                          `json:"maxReplicas"`
+	Metrics     []autoscalingv2.MetricSpec                     `json:"metrics,omitempty"`
+	Behavior    *autoscalingv2.HorizontalPodAutoscalerBehavior `json:"behavior,omitempty" `
 }
 
 // Image defines image attributes

diff --git a/api/apps/v1alpha1/nimservice_types.go b/api/apps/v1alpha1/nimservice_types.go
@@ -439,9 +439,9 @@ func (n *NIMService) GetExternalPVC() *PersistentVolumeClaim {
 	return &n.Spec.Storage.PVC
 }
 
-// GetHPASpec returns the HPA spec for the NIMService deployment
-func (n *NIMService) GetHPASpec() autoscalingv2.HorizontalPodAutoscalerSpec {
-	return n.Spec.Scale.HPASpec
+// GetHPA returns the HPA spec for the NIMService deployment
+func (n *NIMService) GetHPA() HorizontalPodAutoscalerSpec {
+	return n.Spec.Scale.HPA
 }
 
 // GetReplicas returns replicas for the NIMService deployment
@@ -668,10 +668,18 @@ func (n *NIMService) GetHPAParams() *rendertypes.HPAParams {
 	params.Annotations = n.GetServiceAnnotations()
 
 	// Set HPA spec
-	hpaSpec := n.GetHPASpec()
-	hpaSpec.ScaleTargetRef.Kind = n.GetDeploymentKind()
-	hpaSpec.ScaleTargetRef.Name = n.GetName()
-	hpaSpec.ScaleTargetRef.APIVersion = "apps/v1"
+	hpa := n.GetHPA()
+	hpaSpec := autoscalingv2.HorizontalPodAutoscalerSpec{
+		ScaleTargetRef: autoscalingv2.CrossVersionObjectReference{
+			Kind:       n.GetDeploymentKind(),
+			Name:       n.GetName(),
+			APIVersion: "apps/v1",
+		},
+		MinReplicas: hpa.MinReplicas,
+		MaxReplicas: hpa.MaxReplicas,
+		Metrics:     hpa.Metrics,
+		Behavior:    hpa.Behavior,
+	}
 	params.HPASpec = hpaSpec
 	return params
 }

diff --git a/api/apps/v1alpha1/zz_generated.deepcopy.go b/api/apps/v1alpha1/zz_generated.deepcopy.go
diff --git a/bundle/manifests/apps.nvidia.com_nimpipelines.yaml b/bundle/manifests/apps.nvidia.com_nimpipelines.yaml
@@ -1299,14 +1299,11 @@ spec:
                             enabled:
                               type: boolean
                             hpa:
-                              description: HorizontalPodAutoscalerSpec describes the
-                                desired functionality of the HorizontalPodAutoscaler.
                               properties:
                                 behavior:
                                   description: |-
-                                    behavior configures the scaling behavior of the target
+                                    HorizontalPodAutoscalerBehavior configures the scaling behavior of the target
                                     in both Up and Down directions (scaleUp and scaleDown fields respectively).
-                                    If not set, the default HPAScalingRules for scale up and scale down are used.
                                   properties:
                                     scaleDown:
                                       description: |-
@@ -1421,21 +1418,9 @@ spec:
                                       type: object
                                   type: object
                                 maxReplicas:
-                                  description: |-
-                                    maxReplicas is the upper limit for the number of replicas to which the autoscaler can scale up.
-                                    It cannot be less that minReplicas.
                                   format: int32
                                   type: integer
                                 metrics:
-                                  description: |-
-                                    metrics contains the specifications for which to use to calculate the
-                                    desired replica count (the maximum replica count across all metrics will
-                                    be used).  The desired replica count is calculated multiplying the
-                                    ratio between the target value and the current value by the current
-                                    number of pods.  Ergo, metrics used must decrease as the pod count is
-                                    increased, and vice-versa.  See the individual metric source types for
-                                    more information about how each type of metric must respond.
-                                    If not set, the default metric will be set to 80% average CPU utilization.
                                   items:
                                     description: |-
                                       MetricSpec specifies how to scale based on a single metric
@@ -1918,40 +1903,11 @@ spec:
                                     - type
                                     type: object
                                   type: array
-                                  x-kubernetes-list-type: atomic
                                 minReplicas:
-                                  description: |-
-                                    minReplicas is the lower limit for the number of replicas to which the autoscaler
-                                    can scale down.  It defaults to 1 pod.  minReplicas is allowed to be 0 if the
-                                    alpha feature gate HPAScaleToZero is enabled and at least one Object or External
-                                    metric is configured.  Scaling is active as long as at least one metric value is
-                                    available.
                                   format: int32
                                   type: integer
-                                scaleTargetRef:
-                                  description: |-
-                                    scaleTargetRef points to the target resource to scale, and is used to the pods for which metrics
-                                    should be collected, as well as to actually change the replica count.
-                                  properties:
-                                    apiVersion:
-                                      description: apiVersion is the API version of
-                                        the referent
-                                      type: string
-                                    kind:
-                                      description: 'kind is the kind of the referent;
-                                        More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
-                                      type: string
-                                    name:
-                                      description: 'name is the name of the referent;
-                                        More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
-                                      type: string
-                                  required:
-                                  - kind
-                                  - name
-                                  type: object
                               required:
                               - maxReplicas
-                              - scaleTargetRef
                               type: object
                           type: object
                         startupProbe:

diff --git a/bundle/manifests/apps.nvidia.com_nimservices.yaml b/bundle/manifests/apps.nvidia.com_nimservices.yaml
@@ -1267,14 +1267,11 @@ spec:
                   enabled:
                     type: boolean
                   hpa:
-                    description: HorizontalPodAutoscalerSpec describes the desired
-                      functionality of the HorizontalPodAutoscaler.
                     properties:
                       behavior:
                         description: |-
-                          behavior configures the scaling behavior of the target
+                          HorizontalPodAutoscalerBehavior configures the scaling behavior of the target
                           in both Up and Down directions (scaleUp and scaleDown fields respectively).
-                          If not set, the default HPAScalingRules for scale up and scale down are used.
                         properties:
                           scaleDown:
                             description: |-
@@ -1387,21 +1384,9 @@ spec:
                             type: object
                         type: object
                       maxReplicas:
-                        description: |-
-                          maxReplicas is the upper limit for the number of replicas to which the autoscaler can scale up.
-                          It cannot be less that minReplicas.
                         format: int32
                         type: integer
                       metrics:
-                        description: |-
-                          metrics contains the specifications for which to use to calculate the
-                          desired replica count (the maximum replica count across all metrics will
-                          be used).  The desired replica count is calculated multiplying the
-                          ratio between the target value and the current value by the current
-                          number of pods.  Ergo, metrics used must decrease as the pod count is
-                          increased, and vice-versa.  See the individual metric source types for
-                          more information about how each type of metric must respond.
-                          If not set, the default metric will be set to 80% average CPU utilization.
                         items:
                           description: |-
                             MetricSpec specifies how to scale based on a single metric
@@ -1868,39 +1853,11 @@ spec:
                           - type
                           type: object
                         type: array
-                        x-kubernetes-list-type: atomic
                       minReplicas:
-                        description: |-
-                          minReplicas is the lower limit for the number of replicas to which the autoscaler
-                          can scale down.  It defaults to 1 pod.  minReplicas is allowed to be 0 if the
-                          alpha feature gate HPAScaleToZero is enabled and at least one Object or External
-                          metric is configured.  Scaling is active as long as at least one metric value is
-                          available.
                         format: int32
                         type: integer
-                      scaleTargetRef:
-                        description: |-
-                          scaleTargetRef points to the target resource to scale, and is used to the pods for which metrics
-                          should be collected, as well as to actually change the replica count.
-                        properties:
-                          apiVersion:
-                            description: apiVersion is the API version of the referent
-                            type: string
-                          kind:
-                            description: 'kind is the kind of the referent; More info:
-                              https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
-                            type: string
-                          name:
-                            description: 'name is the name of the referent; More info:
-                              https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
-                            type: string
-                        required:
-                        - kind
-                        - name
-                        type: object
                     required:
                     - maxReplicas
-                    - scaleTargetRef
                     type: object
                 type: object
               startupProbe:

diff --git a/config/crd/bases/apps.nvidia.com_nimpipelines.yaml b/config/crd/bases/apps.nvidia.com_nimpipelines.yaml
@@ -1299,14 +1299,11 @@ spec:
                             enabled:
                               type: boolean
                             hpa:
-                              description: HorizontalPodAutoscalerSpec describes the
-                                desired functionality of the HorizontalPodAutoscaler.
                               properties:
                                 behavior:
                                   description: |-
-                                    behavior configures the scaling behavior of the target
+                                    HorizontalPodAutoscalerBehavior configures the scaling behavior of the target
                                     in both Up and Down directions (scaleUp and scaleDown fields respectively).
-                                    If not set, the default HPAScalingRules for scale up and scale down are used.
                                   properties:
                                     scaleDown:
                                       description: |-
@@ -1421,21 +1418,9 @@ spec:
                                       type: object
                                   type: object
                                 maxReplicas:
-                                  description: |-
-                                    maxReplicas is the upper limit for the number of replicas to which the autoscaler can scale up.
-                                    It cannot be less that minReplicas.
                                   format: int32
                                   type: integer
                                 metrics:
-                                  description: |-
-                                    metrics contains the specifications for which to use to calculate the
-                                    desired replica count (the maximum replica count across all metrics will
-                                    be used).  The desired replica count is calculated multiplying the
-                                    ratio between the target value and the current value by the current
-                                    number of pods.  Ergo, metrics used must decrease as the pod count is
-                                    increased, and vice-versa.  See the individual metric source types for
-                                    more information about how each type of metric must respond.
-                                    If not set, the default metric will be set to 80% average CPU utilization.
                                   items:
                                     description: |-
                                       MetricSpec specifies how to scale based on a single metric
@@ -1918,40 +1903,11 @@ spec:
                                     - type
                                     type: object
                                   type: array
-                                  x-kubernetes-list-type: atomic
                                 minReplicas:
-                                  description: |-
-                                    minReplicas is the lower limit for the number of replicas to which the autoscaler
-                                    can scale down.  It defaults to 1 pod.  minReplicas is allowed to be 0 if the
-                                    alpha feature gate HPAScaleToZero is enabled and at least one Object or External
-                                    metric is configured.  Scaling is active as long as at least one metric value is
-                                    available.
                                   format: int32
                                   type: integer
-                                scaleTargetRef:
-                                  description: |-
-                                    scaleTargetRef points to the target resource to scale, and is used to the pods for which metrics
-                                    should be collected, as well as to actually change the replica count.
-                                  properties:
-                                    apiVersion:
-                                      description: apiVersion is the API version of
-                                        the referent
-                                      type: string
-                                    kind:
-                                      description: 'kind is the kind of the referent;
-                                        More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
-                                      type: string
-                                    name:
-                                      description: 'name is the name of the referent;
-                                        More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
-                                      type: string
-                                  required:
-                                  - kind
-                                  - name
-                                  type: object
                               required:
                               - maxReplicas
-                              - scaleTargetRef
                               type: object
                           type: object
                         startupProbe: