diff --git a/pkg/cache/internal/informers.go b/pkg/cache/internal/informers.go index b21a9c6345..5d8605c760 100644 --- a/pkg/cache/internal/informers.go +++ b/pkg/cache/internal/informers.go @@ -392,6 +392,12 @@ func (ip *Informers) addInformerToMap(gvk schema.GroupVersionKind, obj runtime.O return nil, false, err } + // Add metric event handler to track cache resources count + metricsHandler := NewMetricsResourceEventHandler(gvk, sharedIndexInformer) + if _, err := sharedIndexInformer.AddEventHandler(metricsHandler); err != nil { + return nil, false, err + } + mapping, err := ip.mapper.RESTMapping(gvk.GroupKind(), gvk.Version) if err != nil { return nil, false, err @@ -614,3 +620,22 @@ func restrictNamespaceBySelector(namespaceOpt string, s Selector) string { } return "" } + +// VisitInformers calls the given function for each informer in the cache +func (ip *Informers) VisitInformers(visitor func(gvk schema.GroupVersionKind, informer cache.SharedIndexInformer)) { + ip.mu.RLock() + defer ip.mu.RUnlock() + + // Visit each tracked informer + for gvk, _cache := range ip.tracker.Structured { + visitor(gvk, _cache.Informer) + } + + for gvk, _cache := range ip.tracker.Unstructured { + visitor(gvk, _cache.Informer) + } + + for gvk, _cache := range ip.tracker.Metadata { + visitor(gvk, _cache.Informer) + } +} diff --git a/pkg/cache/internal/metrics_handler.go b/pkg/cache/internal/metrics_handler.go new file mode 100644 index 0000000000..843fa9da0d --- /dev/null +++ b/pkg/cache/internal/metrics_handler.go @@ -0,0 +1,66 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package internal + +import ( + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/tools/cache" + + "sigs.k8s.io/controller-runtime/pkg/metrics" +) + +// NewMetricsResourceEventHandler creates a new metrics-collecting event handler for an informer. +// It counts resource additions, updates, and deletions and records them in metrics. +func NewMetricsResourceEventHandler(gvk schema.GroupVersionKind, informer cache.SharedIndexInformer) cache.ResourceEventHandler { + handler := &metricsResourceEventHandler{ + gvk: gvk, + informer: informer, + } + + // Initialize the initial count + handler.updateCount() + + return handler +} + +// metricsResourceEventHandler implements cache.ResourceEventHandler interface +// to collect metrics about resources in the cache +type metricsResourceEventHandler struct { + gvk schema.GroupVersionKind + informer cache.SharedIndexInformer +} + +// OnAdd is called when an object is added. +func (h *metricsResourceEventHandler) OnAdd(obj interface{}, isInInitialList bool) { + h.updateCount() +} + +// OnUpdate is called when an object is modified. +func (h *metricsResourceEventHandler) OnUpdate(oldObj, newObj interface{}) { + // No need to update counts on update as the total count hasn't changed +} + +// OnDelete is called when an object is deleted. +func (h *metricsResourceEventHandler) OnDelete(obj interface{}) { + h.updateCount() +} + +// updateCount updates the metrics with the current count of resources. +func (h *metricsResourceEventHandler) updateCount() { + count := len(h.informer.GetIndexer().ListKeys()) + metrics.RecordCacheResourceCount(h.gvk, count) +} diff --git a/pkg/cache/internal/metrics_handler_test.go b/pkg/cache/internal/metrics_handler_test.go new file mode 100644 index 0000000000..0a629b0723 --- /dev/null +++ b/pkg/cache/internal/metrics_handler_test.go @@ -0,0 +1,253 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package internal + +import ( + "context" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" + "k8s.io/apimachinery/pkg/runtime/schema" + toolscache "k8s.io/client-go/tools/cache" + + "sigs.k8s.io/controller-runtime/pkg/metrics" +) + +var _ = Describe("Metrics Handler", func() { + + Describe("RecordCacheResourceCount", func() { + var ( + podGVK schema.GroupVersionKind + ) + + BeforeEach(func() { + podGVK = schema.GroupVersionKind{Group: "", Version: "v1", Kind: "Pod"} + }) + + DescribeTable("recording different resource counts", + func(count int) { + // Directly call RecordCacheResourceCount to record metrics + metrics.RecordCacheResourceCount(podGVK, count) + // Since we cannot directly verify prometheus metric values in tests + // we can only ensure the function doesn't panic + Expect(true).To(BeTrue()) // Simple assertion to show test passed + }, + Entry("empty", 0), + Entry("one pod", 1), + Entry("multiple pods", 5), + ) + }) + + Describe("MetricsResourceEventHandler", func() { + var ( + podGVK schema.GroupVersionKind + objects []interface{} + indexer *mockIndexer + informer *mockSharedIndexInformer + handler *metricsResourceEventHandler + metricRegistry *prometheus.Registry + ) + + BeforeEach(func() { + podGVK = schema.GroupVersionKind{Group: "", Version: "v1", Kind: "Pod"} + objects = []interface{}{} + indexer = &mockIndexer{getListFunc: func() []interface{} { return objects }} + informer = &mockSharedIndexInformer{indexer: indexer} + + // Reset metrics Registry + metricRegistry = prometheus.NewRegistry() + metrics.Registry = metricRegistry + metrics.Registry.MustRegister(metrics.CacheResourceCount) + + handler = NewMetricsResourceEventHandler(podGVK, informer) + }) + + verifyMetricValue := func(gvk schema.GroupVersionKind, expectedValue float64) { + gauge := metrics.CacheResourceCount.WithLabelValues(gvk.Group, gvk.Version, gvk.Kind) + var metric dto.Metric + err := gauge.Write(&metric) + Expect(err).NotTo(HaveOccurred(), "Failed to write metric") + + actualValue := metric.GetGauge().GetValue() + Expect(actualValue).To(Equal(expectedValue), "Metric value does not match expected") + } + + It("should update metrics on events", func() { + // Verify initial state - empty list, count should be 0 + verifyMetricValue(podGVK, 0) + + // Test OnAdd - adding a pod should update the count + objects = append(objects, "pod-1") + handler.OnAdd("pod-1", false) + verifyMetricValue(podGVK, 1) + + // Test OnUpdate - should not change the count since total object count hasn't changed + handler.OnUpdate("pod-1", "pod-1-updated") + verifyMetricValue(podGVK, 1) + + // Add another pod + objects = append(objects, "pod-2") + handler.OnAdd("pod-2", false) + verifyMetricValue(podGVK, 2) + + // Test OnDelete - deleting a pod should update the count + objects = objects[:1] // Only keep the first pod + handler.OnDelete("pod-2") + verifyMetricValue(podGVK, 1) + + // Delete all pods + objects = []interface{}{} + handler.OnDelete("pod-1") + verifyMetricValue(podGVK, 0) + }) + }) +}) + +// mockIndexer is a simple Indexer implementation for testing +type mockIndexer struct { + getListFunc func() []interface{} +} + +func (m *mockIndexer) Add(obj interface{}) error { + return nil +} + +func (m *mockIndexer) Update(obj interface{}) error { + return nil +} + +func (m *mockIndexer) Delete(obj interface{}) error { + return nil +} + +func (m *mockIndexer) List() []interface{} { + return m.getListFunc() +} + +func (m *mockIndexer) ListKeys() []string { + return nil +} + +func (m *mockIndexer) Get(obj interface{}) (item interface{}, exists bool, err error) { + return nil, false, nil +} + +func (m *mockIndexer) GetByKey(key string) (item interface{}, exists bool, err error) { + return nil, false, nil +} + +func (m *mockIndexer) Replace(list []interface{}, resourceVersion string) error { + return nil +} + +func (m *mockIndexer) Resync() error { + return nil +} + +func (m *mockIndexer) Index(indexName string, obj interface{}) ([]interface{}, error) { + return nil, nil +} + +func (m *mockIndexer) IndexKeys(indexName, indexedValue string) ([]string, error) { + return nil, nil +} + +func (m *mockIndexer) ListIndexFuncValues(indexName string) []string { + return nil +} + +func (m *mockIndexer) ByIndex(indexName, indexedValue string) ([]interface{}, error) { + return nil, nil +} + +func (m *mockIndexer) GetIndexers() toolscache.Indexers { + return nil +} + +func (m *mockIndexer) AddIndexers(newIndexers toolscache.Indexers) error { + return nil +} + +// mockSharedIndexInformer is a simple SharedIndexInformer implementation for testing +type mockSharedIndexInformer struct { + indexer toolscache.Indexer +} + +func (m *mockSharedIndexInformer) AddEventHandler(handler toolscache.ResourceEventHandler) (toolscache.ResourceEventHandlerRegistration, error) { + return nil, nil +} + +func (m *mockSharedIndexInformer) AddEventHandlerWithResyncPeriod(handler toolscache.ResourceEventHandler, resyncPeriod time.Duration) (toolscache.ResourceEventHandlerRegistration, error) { + return nil, nil +} + +func (m *mockSharedIndexInformer) AddEventHandlerWithOptions(handler toolscache.ResourceEventHandler, options toolscache.HandlerOptions) (toolscache.ResourceEventHandlerRegistration, error) { + return nil, nil +} + +func (m *mockSharedIndexInformer) RemoveEventHandler(registration toolscache.ResourceEventHandlerRegistration) error { + return nil +} + +func (m *mockSharedIndexInformer) GetStore() toolscache.Store { + return m.indexer +} + +func (m *mockSharedIndexInformer) GetController() toolscache.Controller { + return nil +} + +func (m *mockSharedIndexInformer) Run(stopCh <-chan struct{}) { +} + +func (m *mockSharedIndexInformer) RunWithContext(ctx context.Context) { +} + +func (m *mockSharedIndexInformer) HasSynced() bool { + return true +} + +func (m *mockSharedIndexInformer) LastSyncResourceVersion() string { + return "" +} + +func (m *mockSharedIndexInformer) SetWatchErrorHandler(handler toolscache.WatchErrorHandler) error { + return nil +} + +func (m *mockSharedIndexInformer) SetWatchErrorHandlerWithContext(handler toolscache.WatchErrorHandlerWithContext) error { + return nil +} + +func (m *mockSharedIndexInformer) SetTransform(transformer toolscache.TransformFunc) error { + return nil +} + +func (m *mockSharedIndexInformer) GetIndexer() toolscache.Indexer { + return m.indexer +} + +func (m *mockSharedIndexInformer) AddIndexers(indexers toolscache.Indexers) error { + return nil +} + +func (m *mockSharedIndexInformer) IsStopped() bool { + return false +} diff --git a/pkg/cache/internal/suite_test.go b/pkg/cache/internal/suite_test.go new file mode 100644 index 0000000000..fe4eec2629 --- /dev/null +++ b/pkg/cache/internal/suite_test.go @@ -0,0 +1,29 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package internal + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestCacheInternal(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Cache Internal Suite") +} diff --git a/pkg/cache/metrics.go b/pkg/cache/metrics.go new file mode 100644 index 0000000000..88bf67e34f --- /dev/null +++ b/pkg/cache/metrics.go @@ -0,0 +1,94 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cache + +import ( + "context" + "fmt" + + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/tools/cache" + + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/metrics" +) + +// informersMap defines an interface that allows access to the internal informers of a cache +type informersMap interface { + // VisitInformers iterates through all informers and calls the visitor function + VisitInformers(visitor func(gvk schema.GroupVersionKind, informer cache.SharedIndexInformer)) +} + +// hasInformers defines an interface that a cache must implement to use the DumpCacheResourceMetrics function +type hasInformers interface { + // Informers returns an object that implements the informersMap interface + Informers() interface{} +} + +// DumpCacheResourceMetrics manually updates metrics for all resources +// currently in the cache. This can be useful for initialization or +// to force a refresh of the metrics. +func DumpCacheResourceMetrics(ctx context.Context, c Cache) error { + // First check if the cache implements the hasInformers interface + cacheWithInformers, ok := c.(hasInformers) + if !ok { + return fmt.Errorf("cache does not implement necessary interface to access informers") + } + + // Get the informers + informers := cacheWithInformers.Informers() + + // Try to convert it to the informersMap interface + informersMap, ok := informers.(informersMap) + if !ok { + return fmt.Errorf("cache.Informers() does not return a valid informers map") + } + + // Visit all informers and update metrics + informersMap.VisitInformers(func(gvk schema.GroupVersionKind, informer cache.SharedIndexInformer) { + count := len(informer.GetIndexer().List()) + metrics.RecordCacheResourceCount(gvk, count) + }) + + return nil +} + +// SetCacheResourceLimit configures a limit on the number of resources +// cached for a specific GVK. When implemented, this will prevent the cache +// from growing beyond the specified limit. +// Note: This is a placeholder for future implementation. +func SetCacheResourceLimit(gvk schema.GroupVersionKind, limit int) error { + // This is a placeholder for future implementation + // Eventually this could set limits that would be enforced + // by the cache implementation + return nil +} + +// GetCachedResourceCount returns the current count of resources in the cache for a specific GVK. +func GetCachedResourceCount(ctx context.Context, c Cache, obj client.Object) (int, error) { + informer, err := c.GetInformer(ctx, obj) + if err != nil { + return 0, err + } + + // Use type assertion to get SharedIndexInformer to access the GetIndexer method + if sii, ok := informer.(cache.SharedIndexInformer); ok { + return len(sii.GetIndexer().List()), nil + } + + return 0, fmt.Errorf("informer does not implement SharedIndexInformer") +} diff --git a/pkg/cache/metrics_test.go b/pkg/cache/metrics_test.go new file mode 100644 index 0000000000..370599faad --- /dev/null +++ b/pkg/cache/metrics_test.go @@ -0,0 +1,475 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cache + +import ( + "context" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/kubernetes/scheme" + toolscache "k8s.io/client-go/tools/cache" + + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/apiutil" +) + +var _ = Describe("Cache Metrics", func() { + + Describe("GetCachedResourceCount", func() { + var ( + ctx context.Context + podGVK schema.GroupVersionKind + informerCache *fakeInformerCache + pod *unstructured.Unstructured + ) + + BeforeEach(func() { + ctx = context.Background() + podGVK = schema.GroupVersionKind{Group: "", Version: "v1", Kind: "Pod"} + s := scheme.Scheme + informerCache = &fakeInformerCache{ + informersByGVK: map[schema.GroupVersionKind]toolscache.SharedIndexInformer{ + podGVK: &fakeSharedIndexInformer{ + objects: []runtime.Object{ + &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "v1", + "kind": "Pod", + "metadata": map[string]interface{}{ + "name": "pod-1", + "namespace": "default", + }, + }, + }, + &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "v1", + "kind": "Pod", + "metadata": map[string]interface{}{ + "name": "pod-2", + "namespace": "default", + }, + }, + }, + }, + }, + }, + scheme: s, + } + + pod = &unstructured.Unstructured{} + pod.SetGroupVersionKind(podGVK) + }) + + It("should return the correct resource count", func() { + count, err := GetCachedResourceCount(ctx, informerCache, pod) + Expect(err).NotTo(HaveOccurred()) + Expect(count).To(Equal(2)) + }) + }) + + Describe("DumpCacheResourceMetrics", func() { + var ( + ctx context.Context + podGVK schema.GroupVersionKind + deploymentGVK schema.GroupVersionKind + cacheImpl *testInformerCache + ) + + BeforeEach(func() { + ctx = context.Background() + podGVK = schema.GroupVersionKind{Group: "", Version: "v1", Kind: "Pod"} + deploymentGVK = schema.GroupVersionKind{Group: "apps", Version: "v1", Kind: "Deployment"} + + // Prepare test data + informersByGVK := map[schema.GroupVersionKind]toolscache.SharedIndexInformer{ + podGVK: &fakeSharedIndexInformer{ + objects: []runtime.Object{ + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Pod", + "metadata": map[string]interface{}{ + "name": "pod-1", "namespace": "default", + }, + }}, + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "v1", "kind": "Pod", + "metadata": map[string]interface{}{ + "name": "pod-2", "namespace": "default", + }, + }}, + }, + }, + deploymentGVK: &fakeSharedIndexInformer{ + objects: []runtime.Object{ + &unstructured.Unstructured{Object: map[string]interface{}{ + "apiVersion": "apps/v1", "kind": "Deployment", + "metadata": map[string]interface{}{ + "name": "deployment-1", "namespace": "default", + }, + }}, + }, + }, + } + + // Create an object that conforms to the informerCache type + cacheImpl = &testInformerCache{ + informers: informersByGVK, + } + }) + + It("should collect metrics without error", func() { + err := DumpCacheResourceMetrics(ctx, cacheImpl) + Expect(err).NotTo(HaveOccurred()) + // Due to test framework limitations, we can only verify the function didn't throw an error + }) + }) +}) + +// A simplified version of the real informerCache, sufficient for type checking +type testInformerCache struct { + informers map[schema.GroupVersionKind]toolscache.SharedIndexInformer +} + +// Allow DumpCacheResourceMetrics to access internal informers +func (c *testInformerCache) Informers() interface{} { + return &testInformersMap{ + informers: c.informers, + } +} + +// Implement the required methods of the Cache interface +func (c *testInformerCache) Get(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { + return nil +} + +func (c *testInformerCache) List(ctx context.Context, list client.ObjectList, opts ...client.ListOption) error { + return nil +} + +func (c *testInformerCache) GetInformer(ctx context.Context, obj client.Object, opts ...InformerGetOption) (Informer, error) { + return nil, nil +} + +func (c *testInformerCache) GetInformerForKind(ctx context.Context, gvk schema.GroupVersionKind, opts ...InformerGetOption) (Informer, error) { + return nil, nil +} + +func (c *testInformerCache) Start(ctx context.Context) error { + return nil +} + +func (c *testInformerCache) WaitForCacheSync(ctx context.Context) bool { + return true +} + +func (c *testInformerCache) IndexField(ctx context.Context, obj client.Object, field string, extractValue client.IndexerFunc) error { + return nil +} + +func (c *testInformerCache) RemoveInformer(ctx context.Context, obj client.Object) error { + return nil +} + +// Implement the VisitInformers method interface to allow DumpCacheResourceMetrics to access informers +type testInformersMap struct { + informers map[schema.GroupVersionKind]toolscache.SharedIndexInformer +} + +func (m *testInformersMap) VisitInformers(visitor func(gvk schema.GroupVersionKind, informer toolscache.SharedIndexInformer)) { + for gvk, informer := range m.informers { + visitor(gvk, informer) + } +} + +// Fake implementation for testing +type fakeInformerCache struct { + scheme *runtime.Scheme + informersByGVK map[schema.GroupVersionKind]toolscache.SharedIndexInformer +} + +func (c *fakeInformerCache) GetInformer(ctx context.Context, obj client.Object, _ ...InformerGetOption) (Informer, error) { + gvk, err := apiutil.GVKForObject(obj, c.scheme) + if err != nil { + return nil, err + } + + if informer, ok := c.informersByGVK[gvk]; ok { + return &fakeInformer{SharedIndexInformer: informer}, nil + } + + return nil, nil +} + +func (c *fakeInformerCache) GetInformerForKind(ctx context.Context, gvk schema.GroupVersionKind, _ ...InformerGetOption) (Informer, error) { + if informer, ok := c.informersByGVK[gvk]; ok { + return &fakeInformer{SharedIndexInformer: informer}, nil + } + return nil, nil +} + +func (c *fakeInformerCache) Start(ctx context.Context) error { + return nil +} + +func (c *fakeInformerCache) WaitForCacheSync(ctx context.Context) bool { + return true +} + +func (c *fakeInformerCache) IndexField(ctx context.Context, obj client.Object, field string, extractValue client.IndexerFunc) error { + return nil +} + +func (c *fakeInformerCache) Get(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { + return nil +} + +func (c *fakeInformerCache) List(ctx context.Context, list client.ObjectList, opts ...client.ListOption) error { + return nil +} + +func (c *fakeInformerCache) RemoveInformer(ctx context.Context, obj client.Object) error { + return nil +} + +func (c *fakeInformerCache) NeedLeaderElection() bool { + return false +} + +// Informer implementation for testing +type fakeInformer struct { + toolscache.SharedIndexInformer +} + +func (f *fakeInformer) AddEventHandler(handler toolscache.ResourceEventHandler) (toolscache.ResourceEventHandlerRegistration, error) { + return nil, nil +} + +func (f *fakeInformer) AddEventHandlerWithResyncPeriod(handler toolscache.ResourceEventHandler, resyncPeriod time.Duration) (toolscache.ResourceEventHandlerRegistration, error) { + return nil, nil +} + +func (f *fakeInformer) AddEventHandlerWithOptions(handler toolscache.ResourceEventHandler, options toolscache.HandlerOptions) (toolscache.ResourceEventHandlerRegistration, error) { + return nil, nil +} + +func (f *fakeInformer) RemoveEventHandler(registration toolscache.ResourceEventHandlerRegistration) error { + return nil +} + +func (f *fakeInformer) HasSynced() bool { + return true +} + +func (f *fakeInformer) AddIndexers(indexers toolscache.Indexers) error { + return nil +} + +// SharedIndexInformer implementation for testing +type fakeSharedIndexInformer struct { + objects []runtime.Object +} + +func (f *fakeSharedIndexInformer) AddEventHandler(handler toolscache.ResourceEventHandler) (toolscache.ResourceEventHandlerRegistration, error) { + return nil, nil +} + +func (f *fakeSharedIndexInformer) AddEventHandlerWithResyncPeriod(handler toolscache.ResourceEventHandler, resyncPeriod time.Duration) (toolscache.ResourceEventHandlerRegistration, error) { + return nil, nil +} + +func (f *fakeSharedIndexInformer) AddEventHandlerWithOptions(handler toolscache.ResourceEventHandler, options toolscache.HandlerOptions) (toolscache.ResourceEventHandlerRegistration, error) { + return nil, nil +} + +func (f *fakeSharedIndexInformer) RemoveEventHandler(registration toolscache.ResourceEventHandlerRegistration) error { + return nil +} + +func (f *fakeSharedIndexInformer) GetStore() toolscache.Store { + return &fakeStore{objects: f.objects} +} + +func (f *fakeSharedIndexInformer) GetController() toolscache.Controller { + return nil +} + +func (f *fakeSharedIndexInformer) Run(stopCh <-chan struct{}) { +} + +func (f *fakeSharedIndexInformer) RunWithContext(context.Context) { +} + +func (f *fakeSharedIndexInformer) HasSynced() bool { + return true +} + +func (f *fakeSharedIndexInformer) LastSyncResourceVersion() string { + return "" +} + +func (f *fakeSharedIndexInformer) SetWatchErrorHandler(toolscache.WatchErrorHandler) error { + return nil +} + +func (f *fakeSharedIndexInformer) SetWatchErrorHandlerWithContext(toolscache.WatchErrorHandlerWithContext) error { + return nil +} + +func (f *fakeSharedIndexInformer) SetTransform(toolscache.TransformFunc) error { + return nil +} + +func (f *fakeSharedIndexInformer) GetIndexer() toolscache.Indexer { + return &fakeIndexer{objects: f.objects} +} + +func (f *fakeSharedIndexInformer) AddIndexers(toolscache.Indexers) error { + return nil +} + +func (f *fakeSharedIndexInformer) IsStopped() bool { + return false +} + +// Store implementation for testing +type fakeStore struct { + objects []runtime.Object +} + +func (f *fakeStore) Add(obj interface{}) error { + f.objects = append(f.objects, obj.(runtime.Object)) + return nil +} + +func (f *fakeStore) Update(obj interface{}) error { + return nil +} + +func (f *fakeStore) Delete(obj interface{}) error { + return nil +} + +func (f *fakeStore) List() []interface{} { + result := make([]interface{}, len(f.objects)) + for i, obj := range f.objects { + result[i] = obj + } + return result +} + +func (f *fakeStore) ListKeys() []string { + return nil +} + +func (f *fakeStore) Get(obj interface{}) (item interface{}, exists bool, err error) { + return nil, false, nil +} + +func (f *fakeStore) GetByKey(key string) (item interface{}, exists bool, err error) { + return nil, false, nil +} + +func (f *fakeStore) Replace(list []interface{}, resourceVersion string) error { + return nil +} + +func (f *fakeStore) Resync() error { + return nil +} + +// Indexer implementation for testing +type fakeIndexer struct { + objects []runtime.Object +} + +func (f *fakeIndexer) Add(obj interface{}) error { + f.objects = append(f.objects, obj.(runtime.Object)) + return nil +} + +func (f *fakeIndexer) Update(obj interface{}) error { + return nil +} + +func (f *fakeIndexer) Delete(obj interface{}) error { + return nil +} + +func (f *fakeIndexer) List() []interface{} { + result := make([]interface{}, len(f.objects)) + for i, obj := range f.objects { + result[i] = obj + } + return result +} + +func (f *fakeIndexer) ListKeys() []string { + return nil +} + +func (f *fakeIndexer) Get(obj interface{}) (item interface{}, exists bool, err error) { + return nil, false, nil +} + +func (f *fakeIndexer) GetByKey(key string) (item interface{}, exists bool, err error) { + return nil, false, nil +} + +func (f *fakeIndexer) Replace(list []interface{}, resourceVersion string) error { + return nil +} + +func (f *fakeIndexer) Resync() error { + return nil +} + +func (f *fakeIndexer) Index(indexName string, obj interface{}) ([]interface{}, error) { + return nil, nil +} + +func (f *fakeIndexer) IndexKeys(indexName, indexedValue string) ([]string, error) { + return nil, nil +} + +func (f *fakeIndexer) ListIndexFuncValues(indexName string) []string { + return nil +} + +func (f *fakeIndexer) ByIndex(indexName, indexedValue string) ([]interface{}, error) { + return nil, nil +} + +func (f *fakeIndexer) GetIndexers() toolscache.Indexers { + return nil +} + +func (f *fakeIndexer) AddIndexers(newIndexers toolscache.Indexers) error { + return nil +} + +// Implement the Informers interface method +func (c *fakeInformerCache) Informers() interface{} { + return &testInformersMap{ + informers: c.informersByGVK, + } +} diff --git a/pkg/cache/suite_test.go b/pkg/cache/suite_test.go new file mode 100644 index 0000000000..1b5788e4d6 --- /dev/null +++ b/pkg/cache/suite_test.go @@ -0,0 +1,29 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cache + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestCacheMetrics(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Cache Metrics Suite") +} diff --git a/pkg/metrics/cache.go b/pkg/metrics/cache.go new file mode 100644 index 0000000000..ea38dd685b --- /dev/null +++ b/pkg/metrics/cache.go @@ -0,0 +1,41 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "github.com/prometheus/client_golang/prometheus" + + "k8s.io/apimachinery/pkg/runtime/schema" +) + +var ( + // CacheResourceCount is a prometheus metric which counts the number of resources + // cached in the local controller-runtime cache, broken down by resource GVK. + CacheResourceCount = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "controller_runtime_cache_resources", + Help: "Number of resources cached in the controller-runtime local cache, broken down by group, version, kind", + }, []string{"group", "version", "kind"}) +) + +func init() { + Registry.MustRegister(CacheResourceCount) +} + +// RecordCacheResourceCount records the count of a specific resource type in the cache +func RecordCacheResourceCount(gvk schema.GroupVersionKind, count int) { + CacheResourceCount.WithLabelValues(gvk.Group, gvk.Version, gvk.Kind).Set(float64(count)) +} diff --git a/pkg/metrics/cache_test.go b/pkg/metrics/cache_test.go new file mode 100644 index 0000000000..637fd67862 --- /dev/null +++ b/pkg/metrics/cache_test.go @@ -0,0 +1,208 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "fmt" + "strings" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" + dto "github.com/prometheus/client_model/go" + + "k8s.io/apimachinery/pkg/runtime/schema" +) + +var _ = Describe("Cache Metrics", func() { + + Describe("RecordCacheResourceCount", func() { + BeforeEach(func() { + // Reset Registry to ensure tests don't affect each other + Registry = prometheus.NewRegistry() + Registry.MustRegister(CacheResourceCount) + }) + + DescribeTable("recording resource counts", + func(gvk schema.GroupVersionKind, count int, wantCount float64) { + // Call the function being tested + RecordCacheResourceCount(gvk, count) + + // Build metric validation function + gauge := CacheResourceCount.WithLabelValues(gvk.Group, gvk.Version, gvk.Kind) + var metric dto.Metric + err := gauge.Write(&metric) + Expect(err).NotTo(HaveOccurred(), "Failed to write metric") + + // Verify the metric value matches the expected value + actualValue := metric.GetGauge().GetValue() + Expect(actualValue).To(Equal(wantCount)) + }, + Entry("record pod count", + schema.GroupVersionKind{Group: "", Version: "v1", Kind: "Pod"}, + 10, + float64(10)), + Entry("record deployment count", + schema.GroupVersionKind{Group: "apps", Version: "v1", Kind: "Deployment"}, + 5, + float64(5)), + ) + + It("should update existing metric values", func() { + gvk := schema.GroupVersionKind{Group: "", Version: "v1", Kind: "Pod"} + + // First record + RecordCacheResourceCount(gvk, 10) + gauge := CacheResourceCount.WithLabelValues(gvk.Group, gvk.Version, gvk.Kind) + var metric dto.Metric + err := gauge.Write(&metric) + Expect(err).NotTo(HaveOccurred()) + Expect(metric.GetGauge().GetValue()).To(Equal(float64(10))) + + // Update + RecordCacheResourceCount(gvk, 15) + err = gauge.Write(&metric) + Expect(err).NotTo(HaveOccurred()) + Expect(metric.GetGauge().GetValue()).To(Equal(float64(15))) + }) + }) + + Describe("CacheResourceCount metric configuration", func() { + It("should have the correct configuration", func() { + // Create a new registry to avoid effects from previous tests + Registry = prometheus.NewRegistry() + + // Register our metric + CacheResourceCount = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "controller_runtime_cache_resources", + Help: "Number of resources cached in the controller-runtime local cache, broken down by group, version, kind", + }, []string{"group", "version", "kind"}) + + Registry.MustRegister(CacheResourceCount) + + expected := ` + # HELP controller_runtime_cache_resources Number of resources cached in the controller-runtime local cache, broken down by group, version, kind + # TYPE controller_runtime_cache_resources gauge + ` + + err := testutil.GatherAndCompare(Registry, strings.NewReader(expected), + "controller_runtime_cache_resources") + Expect(err).NotTo(HaveOccurred(), "Metrics configuration is incorrect") + }) + }) + + Describe("Multiple Resource Counts", func() { + var gvks []struct { + gvk schema.GroupVersionKind + count int + } + + BeforeEach(func() { + Registry = prometheus.NewRegistry() + Registry.MustRegister(CacheResourceCount) + + // Define test data for multiple GVKs + gvks = []struct { + gvk schema.GroupVersionKind + count int + }{ + { + gvk: schema.GroupVersionKind{ + Group: "", + Version: "v1", + Kind: "Pod", + }, + count: 10, + }, + { + gvk: schema.GroupVersionKind{ + Group: "apps", + Version: "v1", + Kind: "Deployment", + }, + count: 5, + }, + { + gvk: schema.GroupVersionKind{ + Group: "networking.k8s.io", + Version: "v1", + Kind: "Ingress", + }, + count: 3, + }, + } + + // Record all metrics + for _, g := range gvks { + RecordCacheResourceCount(g.gvk, g.count) + } + }) + + It("should store multiple GVK resource counts correctly", func() { + // Collect and validate all metrics + metrics, err := Registry.Gather() + Expect(err).NotTo(HaveOccurred(), "Failed to gather metrics") + + // There should be only one metric family (controller_runtime_cache_resources) + Expect(metrics).To(HaveLen(1), "Expected 1 metric family") + + // Verify counter count matches the number of GVKs we set + metricFamily := metrics[0] + Expect(metricFamily.Metric).To(HaveLen(len(gvks)), "Expected metrics count to match GVK count") + + // Create a map of expected values for easier lookup and verification + expected := make(map[string]int) + for _, g := range gvks { + key := fmt.Sprintf("%s/%s/%s", g.gvk.Group, g.gvk.Version, g.gvk.Kind) + if g.gvk.Group == "" { + key = fmt.Sprintf("/%s/%s", g.gvk.Version, g.gvk.Kind) + } + expected[key] = g.count + } + + // Verify each metric value + for _, m := range metricFamily.Metric { + // Build key from labels + var group, version, kind string + for _, l := range m.Label { + switch l.GetName() { + case "group": + group = l.GetValue() + case "version": + version = l.GetValue() + case "kind": + kind = l.GetValue() + } + } + + key := fmt.Sprintf("%s/%s/%s", group, version, kind) + if group == "" { + key = fmt.Sprintf("/%s/%s", version, kind) + } + + // Verify value matches expected + expectedCount, ok := expected[key] + Expect(ok).To(BeTrue(), "Unexpected metric with labels %s", key) + + actualValue := m.GetGauge().GetValue() + Expect(actualValue).To(Equal(float64(expectedCount)), + "For %s: expected value %d but got %f", key, expectedCount, actualValue) + } + }) + }) +}) diff --git a/pkg/metrics/suite_test.go b/pkg/metrics/suite_test.go new file mode 100644 index 0000000000..9dacba51d9 --- /dev/null +++ b/pkg/metrics/suite_test.go @@ -0,0 +1,29 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestMetrics(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Metrics Suite") +}