Skip to content

Commit f1d425b

Browse files
authored
reduce log level in metrics logger not to trash the log (#708)
* reduce log level in metrics logger not to trash the log Signed-off-by: Nir Rozenbaum <[email protected]> * rename flush metrics to refresh metrics Signed-off-by: Nir Rozenbaum <[email protected]> * revert log level Signed-off-by: Nir Rozenbaum <[email protected]> --------- Signed-off-by: Nir Rozenbaum <[email protected]>
1 parent 7d238dd commit f1d425b

File tree

4 files changed

+16
-22
lines changed

4 files changed

+16
-22
lines changed

cmd/epp/main.go

+4-5
Original file line numberDiff line numberDiff line change
@@ -142,17 +142,15 @@ func run() error {
142142
}
143143

144144
poolNamespacedName := types.NamespacedName{
145-
Namespace: *poolNamespace,
146145
Name: *poolName,
146+
Namespace: *poolNamespace,
147147
}
148148
mgr, err := runserver.NewDefaultManager(poolNamespacedName, cfg)
149149
if err != nil {
150150
setupLog.Error(err, "Failed to create controller manager")
151151
return err
152152
}
153153

154-
ctx := ctrl.SetupSignalHandler()
155-
156154
// Set up mapper for metric scraping.
157155
mapping, err := backendmetrics.NewMetricMapping(
158156
*totalQueuedRequestsMetric,
@@ -167,14 +165,15 @@ func run() error {
167165

168166
pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.PodMetricsClientImpl{MetricMapping: mapping}, *refreshMetricsInterval)
169167
// Setup runner.
168+
ctx := ctrl.SetupSignalHandler()
169+
170170
datastore := datastore.NewDatastore(ctx, pmf)
171171

172172
serverRunner := &runserver.ExtProcServerRunner{
173173
GrpcPort: *grpcPort,
174174
DestinationEndpointHintMetadataNamespace: *destinationEndpointHintMetadataNamespace,
175175
DestinationEndpointHintKey: *destinationEndpointHintKey,
176-
PoolName: *poolName,
177-
PoolNamespace: *poolNamespace,
176+
PoolNamespacedName: poolNamespacedName,
178177
Datastore: datastore,
179178
SecureServing: *secureServing,
180179
CertPath: *certPath,

pkg/epp/backend/metrics/logger.go

+5-5
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,8 @@ func StartMetricsLogger(ctx context.Context, datastore Datastore, refreshPrometh
5555
case <-ctx.Done():
5656
logger.V(logutil.DEFAULT).Info("Shutting down prometheus metrics thread")
5757
return
58-
case <-ticker.C: // Periodically flush prometheus metrics for inference pool
59-
flushPrometheusMetricsOnce(logger, datastore)
58+
case <-ticker.C: // Periodically refresh prometheus metrics for inference pool
59+
refreshPrometheusMetrics(logger, datastore)
6060
}
6161
}
6262
}()
@@ -86,19 +86,19 @@ func StartMetricsLogger(ctx context.Context, datastore Datastore, refreshPrometh
8686
}
8787
}
8888

89-
func flushPrometheusMetricsOnce(logger logr.Logger, datastore Datastore) {
89+
func refreshPrometheusMetrics(logger logr.Logger, datastore Datastore) {
9090
pool, err := datastore.PoolGet()
9191
if err != nil {
9292
// No inference pool or not initialize.
93-
logger.V(logutil.DEFAULT).Info("pool is not initialized, skipping flushing metrics")
93+
logger.V(logutil.DEFAULT).Info("Pool is not initialized, skipping refreshing metrics")
9494
return
9595
}
9696

9797
var kvCacheTotal float64
9898
var queueTotal int
9999

100100
podMetrics := datastore.PodGetAll()
101-
logger.V(logutil.VERBOSE).Info("Flushing Prometheus Metrics", "ReadyPods", len(podMetrics))
101+
logger.V(logutil.TRACE).Info("Refreshing Prometheus Metrics", "ReadyPods", len(podMetrics))
102102
if len(podMetrics) == 0 {
103103
return
104104
}

pkg/epp/server/runserver.go

+6-11
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,7 @@ type ExtProcServerRunner struct {
4343
GrpcPort int
4444
DestinationEndpointHintMetadataNamespace string
4545
DestinationEndpointHintKey string
46-
PoolName string
47-
PoolNamespace string
46+
PoolNamespacedName types.NamespacedName
4847
Datastore datastore.Datastore
4948
SecureServing bool
5049
CertPath string
@@ -73,8 +72,7 @@ func NewDefaultExtProcServerRunner() *ExtProcServerRunner {
7372
GrpcPort: DefaultGrpcPort,
7473
DestinationEndpointHintKey: DefaultDestinationEndpointHintKey,
7574
DestinationEndpointHintMetadataNamespace: DefaultDestinationEndpointHintMetadataNamespace,
76-
PoolName: DefaultPoolName,
77-
PoolNamespace: DefaultPoolNamespace,
75+
PoolNamespacedName: types.NamespacedName{Name: DefaultPoolName, Namespace: DefaultPoolNamespace},
7876
SecureServing: DefaultSecureServing,
7977
RefreshPrometheusMetricsInterval: DefaultRefreshPrometheusMetricsInterval,
8078
// Datastore can be assigned later.
@@ -93,13 +91,10 @@ func (r *ExtProcServerRunner) SetupWithManager(ctx context.Context, mgr ctrl.Man
9391
}
9492

9593
if err := (&controller.InferenceModelReconciler{
96-
Datastore: r.Datastore,
97-
Client: mgr.GetClient(),
98-
PoolNamespacedName: types.NamespacedName{
99-
Name: r.PoolName,
100-
Namespace: r.PoolNamespace,
101-
},
102-
Record: mgr.GetEventRecorderFor("InferenceModel"),
94+
Datastore: r.Datastore,
95+
Client: mgr.GetClient(),
96+
PoolNamespacedName: r.PoolNamespacedName,
97+
Record: mgr.GetEventRecorderFor("InferenceModel"),
10398
}).SetupWithManager(ctx, mgr); err != nil {
10499
return fmt.Errorf("failed setting up InferenceModelReconciler: %w", err)
105100
}

test/integration/epp/hermetic_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -1348,7 +1348,7 @@ func BeforeSuite() func() {
13481348
serverRunner.TestPodMetricsClient = &backendmetrics.FakePodMetricsClient{}
13491349
pmf := backendmetrics.NewPodMetricsFactory(serverRunner.TestPodMetricsClient, 10*time.Millisecond)
13501350
// Adjust from defaults
1351-
serverRunner.PoolName = "vllm-llama3-8b-instruct-pool"
1351+
serverRunner.PoolNamespacedName = types.NamespacedName{Name: "vllm-llama3-8b-instruct-pool", Namespace: "default"}
13521352
serverRunner.Datastore = datastore.NewDatastore(context.Background(), pmf)
13531353
serverRunner.SecureServing = false
13541354

0 commit comments

Comments
 (0)