Skip to content

Commit 0ba0a89

Browse files
committed
introduce tests to check whether workqueue metrics exist
Signed-off-by: chaosi-zju <[email protected]>
1 parent 446dbe9 commit 0ba0a89

File tree

6 files changed

+337
-5
lines changed

6 files changed

+337
-5
lines changed

.github/workflows/installation-cli.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ jobs:
4444
hack/cli-testing-environment.sh
4545
4646
# run a single e2e
47-
export KUBECONFIG=${HOME}/karmada/karmada-apiserver.config
47+
export KUBECONFIG=${HOME}/.kube/karmada-host.config:${HOME}/karmada/karmada-apiserver.config
4848
GO111MODULE=on go install github.com/onsi/ginkgo/v2/ginkgo
4949
ginkgo -v --race --trace -p --focus="[BasicPropagation] propagation testing deployment propagation testing" ./test/e2e/
5050
- name: export logs
@@ -87,7 +87,7 @@ jobs:
8787
hack/cli-testing-init-with-config.sh
8888
8989
# run a single e2e
90-
export KUBECONFIG=${HOME}/karmada/karmada-apiserver.config
90+
export KUBECONFIG=${HOME}/.kube/karmada-host.config:${HOME}/karmada/karmada-apiserver.config
9191
GO111MODULE=on go install github.com/onsi/ginkgo/v2/ginkgo
9292
ginkgo -v --race --trace -p --focus="[BasicPropagation] propagation testing deployment propagation testing" ./test/e2e/
9393
- name: export logs for config test

go.mod

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ require (
1919
github.com/onsi/gomega v1.34.1
2020
github.com/opensearch-project/opensearch-go v1.1.0
2121
github.com/prometheus/client_golang v1.19.1
22+
github.com/prometheus/common v0.55.0
2223
github.com/spf13/cobra v1.8.1
2324
github.com/spf13/pflag v1.0.5
2425
github.com/stretchr/testify v1.9.0
@@ -134,7 +135,6 @@ require (
134135
github.com/pkg/errors v0.9.1 // indirect
135136
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
136137
github.com/prometheus/client_model v0.6.1 // indirect
137-
github.com/prometheus/common v0.55.0 // indirect
138138
github.com/prometheus/procfs v0.15.1 // indirect
139139
github.com/rivo/uniseg v0.4.2 // indirect
140140
github.com/robfig/cron/v3 v3.0.1 // indirect

test/e2e/framework/cluster.go

+4-1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"context"
2121
"fmt"
2222
"os"
23+
"path/filepath"
2324
"strings"
2425
"time"
2526

@@ -310,7 +311,9 @@ func WaitClusterFitWith(c client.Client, clusterName string, fit func(cluster *c
310311

311312
// LoadRESTClientConfig creates a rest.Config using the passed kubeconfig. If context is empty, current context in kubeconfig will be used.
312313
func LoadRESTClientConfig(kubeconfig string, context string) (*rest.Config, error) {
313-
loader := &clientcmd.ClientConfigLoadingRules{ExplicitPath: kubeconfig}
314+
loader := &clientcmd.ClientConfigLoadingRules{Precedence: filepath.SplitList(kubeconfig)}
315+
klog.Infof("use kubeconfig: %+v", loader.Precedence)
316+
314317
loadedConfig, err := loader.Load()
315318
if err != nil {
316319
return nil, err

test/e2e/framework/metrics.go

+193
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
/*
2+
Copyright 2024 The Karmada Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package framework
18+
19+
import (
20+
"context"
21+
"fmt"
22+
"regexp"
23+
"time"
24+
25+
"github.com/prometheus/common/model"
26+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
27+
"k8s.io/apimachinery/pkg/util/wait"
28+
clientset "k8s.io/client-go/kubernetes"
29+
"k8s.io/component-base/metrics/testutil"
30+
"k8s.io/klog/v2"
31+
32+
"github.com/karmada-io/karmada/pkg/util/names"
33+
)
34+
35+
const (
36+
karmadaNamespace = "karmada-system"
37+
metricsBindPort = 8080
38+
leaderPodMetric = "leader_election_master_status"
39+
queryTimeout = 10 * time.Second
40+
)
41+
42+
// following refers to https://github.com/kubernetes/kubernetes/blob/master/test/e2e/framework/metrics/metrics_grabber.go
43+
44+
// Grabber is used to grab metrics from karmada components
45+
type Grabber struct {
46+
hostKubeClient clientset.Interface
47+
controllerManagerPods []string
48+
schedulerPods []string
49+
deschedulerPods []string
50+
metricsAdapterPods []string
51+
schedulerEstimatorPods []string
52+
webhookPods []string
53+
}
54+
55+
// NewMetricsGrabber creates a new metrics grabber
56+
func NewMetricsGrabber(ctx context.Context, c clientset.Interface) (*Grabber, error) {
57+
grabber := Grabber{hostKubeClient: c}
58+
regKarmadaControllerManagerPods := regexp.MustCompile(names.KarmadaControllerManagerComponentName + "-.*")
59+
regKarmadaSchedulerPods := regexp.MustCompile(names.KarmadaSchedulerComponentName + "-.*")
60+
regKarmadaDeschedulerPods := regexp.MustCompile(names.KarmadaDeschedulerComponentName + "-.*")
61+
regKarmadaMetricsAdapterPods := regexp.MustCompile(names.KarmadaMetricsAdapterComponentName + "-.*")
62+
regKarmadaSchedulerEstimatorPods := regexp.MustCompile(names.KarmadaSchedulerEstimatorComponentName + "-" + ClusterNames()[0] + "-.*")
63+
regKarmadaWebhookPods := regexp.MustCompile(names.KarmadaWebhookComponentName + "-.*")
64+
65+
podList, err := c.CoreV1().Pods(karmadaNamespace).List(ctx, metav1.ListOptions{})
66+
if err != nil {
67+
return nil, err
68+
}
69+
if len(podList.Items) < 1 {
70+
klog.Warningf("Can't find any pods in namespace %s to grab metrics from", karmadaNamespace)
71+
}
72+
for _, pod := range podList.Items {
73+
if regKarmadaControllerManagerPods.MatchString(pod.Name) {
74+
grabber.controllerManagerPods = append(grabber.controllerManagerPods, pod.Name)
75+
continue
76+
}
77+
if regKarmadaDeschedulerPods.MatchString(pod.Name) {
78+
grabber.deschedulerPods = append(grabber.deschedulerPods, pod.Name)
79+
continue
80+
}
81+
if regKarmadaMetricsAdapterPods.MatchString(pod.Name) {
82+
grabber.metricsAdapterPods = append(grabber.metricsAdapterPods, pod.Name)
83+
continue
84+
}
85+
if regKarmadaSchedulerEstimatorPods.MatchString(pod.Name) {
86+
grabber.schedulerEstimatorPods = append(grabber.schedulerEstimatorPods, pod.Name)
87+
continue
88+
}
89+
if regKarmadaSchedulerPods.MatchString(pod.Name) {
90+
grabber.schedulerPods = append(grabber.schedulerPods, pod.Name)
91+
continue
92+
}
93+
if regKarmadaWebhookPods.MatchString(pod.Name) {
94+
grabber.webhookPods = append(grabber.webhookPods, pod.Name)
95+
}
96+
}
97+
return &grabber, nil
98+
}
99+
100+
// GrabMetricsFromComponent fetch metrics from the leader of a specified Karmada component
101+
func (g *Grabber) GrabMetricsFromComponent(ctx context.Context, component string) (map[string]testutil.Metrics, error) {
102+
pods, fromLeader := make([]string, 0), false
103+
switch component {
104+
case names.KarmadaControllerManagerComponentName:
105+
pods, fromLeader = g.controllerManagerPods, true
106+
case names.KarmadaSchedulerComponentName:
107+
pods, fromLeader = g.schedulerPods, true
108+
case names.KarmadaDeschedulerComponentName:
109+
pods, fromLeader = g.deschedulerPods, true
110+
case names.KarmadaMetricsAdapterComponentName:
111+
pods = g.metricsAdapterPods
112+
case names.KarmadaSchedulerEstimatorComponentName:
113+
pods = g.schedulerEstimatorPods
114+
case names.KarmadaWebhookComponentName:
115+
pods = g.webhookPods
116+
}
117+
return g.grabMetricsFromPod(ctx, component, pods, fromLeader)
118+
}
119+
120+
// grabMetricsFromPod fetch metrics from the leader pod
121+
func (g *Grabber) grabMetricsFromPod(ctx context.Context, component string, pods []string, fromLeader bool) (map[string]testutil.Metrics, error) {
122+
var output string
123+
var lastMetricsFetchErr error
124+
125+
result := make(map[string]testutil.Metrics)
126+
for _, podName := range pods {
127+
if metricsWaitErr := wait.PollUntilContextTimeout(ctx, time.Second, queryTimeout, true, func(ctx context.Context) (bool, error) {
128+
output, lastMetricsFetchErr = GetMetricsFromPod(ctx, g.hostKubeClient, podName, karmadaNamespace, metricsBindPort)
129+
return lastMetricsFetchErr == nil, nil
130+
}); metricsWaitErr != nil {
131+
klog.Errorf("error waiting for %s to expose metrics: %v; %v", podName, metricsWaitErr, lastMetricsFetchErr)
132+
continue
133+
}
134+
135+
podMetrics := testutil.Metrics{}
136+
metricsParseErr := testutil.ParseMetrics(output, &podMetrics)
137+
if metricsParseErr != nil {
138+
klog.Errorf("failed to parse metrics for %s: %v", podName, metricsParseErr)
139+
continue
140+
}
141+
142+
// judge which pod is the leader pod
143+
if fromLeader && !isLeaderPod(podMetrics[leaderPodMetric]) {
144+
klog.Infof("skip fetch %s since it is not the leader pod", podName)
145+
continue
146+
}
147+
148+
result[podName] = podMetrics
149+
klog.Infof("successfully grabbed metrics of %s", podName)
150+
}
151+
152+
if len(result) == 0 {
153+
return nil, fmt.Errorf("failed to fetch metrics from the pod of %s", component)
154+
}
155+
return result, nil
156+
}
157+
158+
// GetMetricsFromPod retrieves metrics data.
159+
func GetMetricsFromPod(ctx context.Context, client clientset.Interface, podName string, namespace string, port int) (string, error) {
160+
rawOutput, err := client.CoreV1().RESTClient().Get().
161+
Namespace(namespace).
162+
Resource("pods").
163+
SubResource("proxy").
164+
Name(fmt.Sprintf("%s:%d", podName, port)).
165+
Suffix("metrics").
166+
Do(ctx).Raw()
167+
if err != nil {
168+
return "", err
169+
}
170+
return string(rawOutput), nil
171+
}
172+
173+
func isLeaderPod(samples model.Samples) bool {
174+
for _, sample := range samples {
175+
if sample.Value > 0 {
176+
return true
177+
}
178+
}
179+
return false
180+
}
181+
182+
// PrintMetricSample prints the metric sample
183+
func PrintMetricSample(podName string, sample model.Samples) {
184+
if sample.Len() == 0 {
185+
return
186+
}
187+
if podName != "" {
188+
klog.Infof("metrics from pod: %s", podName)
189+
}
190+
for _, s := range sample {
191+
klog.Infof("metric: %v, value: %v, timestamp: %v", s.Metric, s.Value, s.Timestamp)
192+
}
193+
}

test/e2e/metrics_test.go

+126
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
/*
2+
Copyright 2023 The Karmada Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package e2e
18+
19+
import (
20+
"context"
21+
22+
"github.com/onsi/ginkgo/v2"
23+
"github.com/onsi/gomega"
24+
appsv1 "k8s.io/api/apps/v1"
25+
"k8s.io/apimachinery/pkg/util/rand"
26+
"k8s.io/klog/v2"
27+
28+
policyv1alpha1 "github.com/karmada-io/karmada/pkg/apis/policy/v1alpha1"
29+
"github.com/karmada-io/karmada/pkg/util/names"
30+
"github.com/karmada-io/karmada/test/e2e/framework"
31+
testhelper "github.com/karmada-io/karmada/test/helper"
32+
)
33+
34+
var _ = ginkgo.Describe("metrics testing", func() {
35+
var grabber *framework.Grabber
36+
37+
var componentMetrics = map[string][]string{
38+
names.KarmadaControllerManagerComponentName: {
39+
"workqueue_queue_duration_seconds_sum", // workqueue metrics
40+
"cluster_ready_state", // custom ClusterCollectors metrics
41+
"work_sync_workload_duration_seconds_sum", // custom ResourceCollectors metrics
42+
},
43+
names.KarmadaSchedulerComponentName: {
44+
"workqueue_queue_duration_seconds_sum", // workqueue metrics
45+
"karmada_scheduler_schedule_attempts_total", // scheduler custom metrics
46+
},
47+
names.KarmadaDeschedulerComponentName: {
48+
"workqueue_queue_duration_seconds_sum", // workqueue metrics
49+
},
50+
names.KarmadaMetricsAdapterComponentName: {
51+
"workqueue_queue_duration_seconds_sum", // workqueue metrics
52+
},
53+
names.KarmadaSchedulerEstimatorComponentName: {
54+
"karmada_scheduler_estimator_estimating_request_total", // scheduler estimator custom metrics
55+
},
56+
names.KarmadaWebhookComponentName: {
57+
"controller_runtime_webhook_requests_total", // controller runtime hook server metrics
58+
},
59+
}
60+
61+
ginkgo.BeforeEach(func() {
62+
var err error
63+
grabber, err = framework.NewMetricsGrabber(context.TODO(), hostKubeClient)
64+
gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
65+
})
66+
67+
ginkgo.Context("metrics presence testing", func() {
68+
ginkgo.It("metrics presence testing for each component", func() {
69+
ginkgo.By("do a simple scheduling to ensure above metrics exist", func() {
70+
name := deploymentNamePrefix + rand.String(RandomStrLength)
71+
deployment := testhelper.NewDeployment(testNamespace, name)
72+
policy := testhelper.NewPropagationPolicy(testNamespace, name, []policyv1alpha1.ResourceSelector{
73+
{
74+
APIVersion: deployment.APIVersion,
75+
Kind: deployment.Kind,
76+
Name: deployment.Name,
77+
},
78+
}, policyv1alpha1.Placement{
79+
ClusterAffinity: &policyv1alpha1.ClusterAffinity{
80+
ClusterNames: framework.ClusterNames(),
81+
},
82+
})
83+
framework.CreateDeployment(kubeClient, deployment)
84+
framework.CreatePropagationPolicy(karmadaClient, policy)
85+
ginkgo.DeferCleanup(func() {
86+
framework.RemoveDeployment(kubeClient, deployment.Namespace, deployment.Name)
87+
framework.RemovePropagationPolicy(karmadaClient, policy.Namespace, policy.Name)
88+
})
89+
framework.WaitDeploymentPresentOnClustersFitWith(framework.ClusterNames(), deployment.Namespace, deployment.Name, func(_ *appsv1.Deployment) bool { return true })
90+
})
91+
92+
for component, metricNameList := range componentMetrics {
93+
ginkgo.By("judge metrics presence of component: "+component, func() {
94+
podsMetrics, err := grabber.GrabMetricsFromComponent(context.TODO(), component)
95+
gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
96+
97+
for _, metricName := range metricNameList {
98+
metricExist := false
99+
for podName, metrics := range podsMetrics {
100+
// the output format of `metrics` is like:
101+
// {
102+
// "workqueue_queue_duration_seconds_sum": [{
103+
// "metric": {
104+
// "__name__": "workqueue_queue_duration_seconds_sum",
105+
// "controller": "work-status-controller",
106+
// "name": "work-status-controller"
107+
// },
108+
// "value": [0, "0.12403110800000001"]
109+
// }]
110+
// }
111+
framework.PrintMetricSample(podName, metrics[metricName])
112+
if metrics[metricName].Len() > 0 {
113+
metricExist = true
114+
break
115+
}
116+
}
117+
if !metricExist {
118+
klog.Errorf("metric %s not found in component %s", metricName, component)
119+
gomega.Expect(metricExist).ShouldNot(gomega.BeFalse())
120+
}
121+
}
122+
})
123+
}
124+
})
125+
})
126+
})

0 commit comments

Comments
 (0)