Skip to content

Commit 95cd467

Browse files
committed
[test][venice-common][router]: Add diff-coverage tests for new close paths
Cover the new close() entry points exercised by the prior commit so CI's diffCoverage gate (45% branch threshold) is satisfied: - RetryManagerTest.testCloseIsSafeWhenDisabled — exercises RetryManager.close() when no RetryManagerStats was wired in; verifies idempotency. - RouterStatsTest (new) — covers RouterStats.close() for both Closeable and non-Closeable STAT_TYPE parameterisations, idempotency, and the per-request-type getter. - TestVeniceDelegateMode.testCloseIsIdempotent — exercises VeniceDelegateMode.close().
1 parent 67c6e5a commit 95cd467

28 files changed

Lines changed: 290 additions & 299 deletions

clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggVersionedDIVStats.java

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -50,14 +50,9 @@ public class AggVersionedDIVStats extends AbstractVeniceAggVersionedStats<DIVSta
5050
private final Map<VeniceMetricsDimensions, String> baseDimensionsMap;
5151

5252
/**
53-
* Per-store entry map. Each entry bundles the per-store {@link AbstractStatsCloseable#resources}
54-
* with lazily-populated wrappers for the four per-store OTel metrics. A single {@code remove()} in
55-
* {@link #handleStoreDeleted(String)} closes all wrappers atomically — there is no
56-
* cross-map race window where a concurrent record could resurrect parallel entries.
57-
*
58-
* <p>Map grows lazily via {@code computeIfAbsent} and is bounded by the number of stores the
59-
* server is actively ingesting. Each map is OTel-only; Tehuti recording is handled by the parent
60-
* class via {@code recordVersionedAndTotalStat}.
53+
* Per-store entry bundling the four per-store OTel wrappers; one {@code remove()} in
54+
* {@link #handleStoreDeleted} closes them atomically. Bounded by the number of stores ingested.
55+
* Tehuti recording stays on the parent via {@code recordVersionedAndTotalStat}.
6156
*/
6257
private final Map<String, PerStoreEntry> perStore = new VeniceConcurrentHashMap<>();
6358

@@ -100,10 +95,7 @@ private static final class PerStoreEntry extends AbstractStatsCloseable {
10095

10196
/**
10297
* Per-store version info for classifying versions as CURRENT, FUTURE, or BACKUP.
103-
* Updated via {@link #onVersionInfoUpdated(String, int, int)}. This map is intentionally
104-
* separate from {@link #perStore}: it stores plain data (no {@link java.io.Closeable}), so it
105-
* is not vulnerable to the delete-vs-record resurrection race that motivated bundling the
106-
* per-store closeable wrappers into a single entry holder.
98+
* Updated via {@link #onVersionInfoUpdated(String, int, int)}.
10799
*/
108100
private final Map<String, OtelVersionedStatsUtils.VersionInfo> versionInfoMap = new VeniceConcurrentHashMap<>();
109101

clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggVersionedDaVinciRecordTransformerStats.java

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,7 @@ public class AggVersionedDaVinciRecordTransformerStats
3030
private final Map<VeniceMetricsDimensions, String> baseDimensionsMap;
3131
private final boolean emitOtelMetrics;
3232

33-
/**
34-
* Per-store entry map. Each entry bundles the per-store {@link AbstractStatsCloseable#resources}
35-
* with the latency and error-count wrappers. A single {@code remove()} in
36-
* {@link #handleStoreDeleted(String)} closes both wrappers atomically — there is no
37-
* cross-map race window where a concurrent record could resurrect parallel entries.
38-
* Bounded by the number of stores on this host.
39-
*/
33+
/** Per-store entry bundling latency + error-count wrappers; one {@code remove()} in {@link #handleStoreDeleted} closes them. */
4034
private final Map<String, PerStoreEntry> perStore = new VeniceConcurrentHashMap<>();
4135

4236
/** Per-store state held by {@link #perStore}. */

clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/NativeMetadataRepositoryStats.java

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -34,16 +34,9 @@ public class NativeMetadataRepositoryStats extends AbstractVeniceStats {
3434
private final Map<String, Long> metadataCacheTimestampMapInMs = new VeniceConcurrentHashMap<>();
3535
private final Clock clock;
3636

37-
// OTel: per-store ASYNC_DOUBLE_GAUGE for staleness. Bounded by the number of stores currently subscribed.
38-
// Per-store entries are removed and closed in {@link #handleStoreDeleted}, deregistering the callback
39-
// from the SDK so it stops polling the gauge.
4037
private final VeniceOpenTelemetryMetricsRepository otelRepository;
4138
private final Map<VeniceMetricsDimensions, String> baseDimensionsMap;
42-
/**
43-
* Per-store entry map. Each entry bundles the per-store registry with the async-gauge wrapper
44-
* (registered eagerly in the {@link PerStoreEntry} constructor), so a single {@code remove()} in
45-
* {@link #handleStoreDeleted(String)} is race-free w.r.t. concurrent recordings.
46-
*/
39+
/** Per-store ASYNC_DOUBLE_GAUGE; entries are closed in {@link #handleStoreDeleted} so the SDK stops polling. */
4740
private final Map<String, PerStoreEntry> perStore = new VeniceConcurrentHashMap<>();
4841

4942
/** Per-store state held by {@link #perStore}. */
@@ -105,11 +98,7 @@ public void updateCacheTimestamp(String storeName, String clusterName, long cach
10598
registerOtelGaugeIfAbsent(storeName, clusterName);
10699
}
107100

108-
/**
109-
* Removes the Tehuti cache-timestamp entry and closes the per-store OTel ASYNC gauge wrapper,
110-
* deregistering the SDK callback so it stops polling. Called by the owning
111-
* {@code NativeMetadataRepository} from its store-removal path.
112-
*/
101+
/** Removes the Tehuti cache-timestamp entry and closes the per-store OTel async-gauge wrapper. */
113102
public void handleStoreDeleted(String storeName) {
114103
metadataCacheTimestampMapInMs.remove(storeName);
115104
MetricEntityStateUtils.closeQuietly(perStore.remove(storeName));

clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/ServerMetadataServiceStats.java

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -41,16 +41,11 @@ public class ServerMetadataServiceStats extends AbstractVeniceStats {
4141
private final VeniceOpenTelemetryMetricsRepository otelRepository;
4242
private final Map<VeniceMetricsDimensions, String> baseDimensionsMap;
4343
/**
44-
* Per-store entry map. Entries live for the process lifetime and are drained on {@link #close()}.
45-
*
46-
* <p>When the requested store does not exist (e.g., {@link VeniceNoStoreException}),
47-
* {@link OpenTelemetryMetricsSetup#UNKNOWN_STORE_NAME} is used as a sentinel so that all
48-
* unknown-store failures share one entry. Cardinality is bounded by stores deployed to this
49-
* server + the sentinel.
44+
* Per-store entry map; process-lifetime, drained on {@link #close()}. Unknown stores share the
45+
* {@link OpenTelemetryMetricsSetup#UNKNOWN_STORE_NAME} sentinel.
5046
*/
5147
private final Map<String, PerStoreEntry> perStore = new VeniceConcurrentHashMap<>();
5248

53-
/** Per-store state held by {@link #perStore}. */
5449
private static final class PerStoreEntry extends AbstractStatsCloseable {
5550
final MetricEntityStateOneEnum<VeniceResponseStatusCategory> wrapper;
5651

internal/venice-client-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStats.java

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,10 +73,7 @@ public T getTotalStats() {
7373
return totalStats;
7474
}
7575

76-
/**
77-
* Closes {@link #totalStats} and every per-store entry in {@link #storeStats}. Subclasses with
78-
* additional cleanup should override and call {@code super.close()}.
79-
*/
76+
/** Closes {@link #totalStats} and every per-store entry in {@link #storeStats}. */
8077
@Override
8178
public void close() {
8279
MetricEntityStateUtils.closeQuietly(totalStats);

internal/venice-client-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceStats.java

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -325,10 +325,7 @@ protected final MeasurableStat[] avgAndTotal() {
325325
return new MeasurableStat[] { new Avg(), new Total() };
326326
}
327327

328-
/**
329-
* Drains every {@link Closeable} registered into {@link #resources}. Idempotent. Subclasses with
330-
* additional cleanup should override and call {@code super.close()}.
331-
*/
328+
/** Drains every {@link Closeable} registered into {@link #resources}. Idempotent. */
332329
@Override
333330
public void close() {
334331
resources.close();

internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/AsyncMetricEntityState.java

Lines changed: 7 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,7 @@ public abstract class AsyncMetricEntityState implements Closeable {
5050
private final Map<VeniceMetricsDimensions, String> baseDimensionsMap;
5151
protected final MetricEntity metricEntity;
5252

53-
/**
54-
* The OTel SDK instrument handle. Volatile so that {@link #close()} setting it to {@code null} is
55-
* promptly visible to recording threads, and so that recording threads see a consistent snapshot
56-
* (read once into a local) when the cast happens.
57-
*/
53+
/** OTel SDK instrument handle. Volatile so {@link #close()} nulling it is promptly visible to recording threads. */
5854
protected volatile Object otelMetric = null;
5955
/** The Tehuti sensor. Volatile for the same reason as {@link #otelMetric}. */
6056
protected volatile Sensor tehutiSensor = null;
@@ -405,33 +401,15 @@ public Object getOtelMetric() {
405401
}
406402

407403
/**
408-
* Releases this wrapper's OTel resources. <b>For async wrappers</b> (the OTel instrument is an
409-
* {@link AutoCloseable} {@code Observable*Gauge} / {@code Observable*Counter}), this deregisters
410-
* the SDK callback so it stops being polled. <b>For sync wrappers</b> ({@code LongCounter},
411-
* {@code DoubleHistogram}, {@code LongUpDownCounter}, sync {@code LongGauge}) the SDK-side
412-
* instrument and aggregator persist until the MeterProvider is closed; this method only releases
413-
* the wrapper-side reference to the SDK instrument and the Tehuti sensor reference. Idempotent.
414-
* Best-effort: SDK close exceptions are logged at WARN and swallowed so a misbehaving close
415-
* cannot break shutdown.
416-
*
417-
* <p><b>Behaviour after close:</b> {@code record()} is a silent no-op (both Tehuti and OTel
418-
* sides). For {@code ASYNC_COUNTER_FOR_HIGH_PERF_CASES}, any pending un-collected values held in
419-
* the per-attribute {@code LongAdder}s are dropped. The Tehuti {@code Sensor} stays registered in
420-
* the underlying {@code MetricsRepository}; only the wrapper's reference is released. For sync
421-
* counters/histograms, recreating a wrapper for the same {@link MetricEntity} binds to the SDK's
422-
* existing aggregator, so values continue to accumulate across close/recreate.
423-
*
424-
* <p><b>Caller contract:</b> not concurrent-safe with {@code record()} — callers must coordinate
425-
* so that close happens after this wrapper is no longer in use. Direct subclasses
426-
* (e.g., {@link AsyncMetricEntityStateBase}) may override to also clear cached attribute maps
427-
* and other wrapper-side state, calling {@code super.close()} first.
404+
* Releases this wrapper's OTel resources. For async wrappers this deregisters the SDK callback so
405+
* it stops being polled; for sync wrappers it only releases the wrapper-side reference (the SDK
406+
* aggregator persists until MeterProvider close). Idempotent and best-effort — SDK close
407+
* exceptions are logged at WARN and swallowed. Post-close {@code record()} is a silent no-op.
408+
* Not concurrent-safe with {@code record()}; callers must coordinate.
428409
*/
429410
@Override
430411
public void close() {
431-
// Snapshot the volatile field before the helper call so a second concurrent close() cannot
432-
// observe the field non-null in instanceof and then invoke close() on a now-null reference
433-
// and emit a misleading "OTel SDK close threw" WARN. Idempotency is preserved: the second
434-
// close sees null here and skips the SDK call.
412+
// Snapshot before the helper call so a concurrent second close() sees null here and skips.
435413
Object localInstrument = otelMetric;
436414
otelMetric = null;
437415
tehutiSensor = null;

internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/AsyncMetricEntityStateBase.java

Lines changed: 4 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -77,12 +77,7 @@ private void validateBaseAttributes(
7777

7878
// --- LongSupplier factory methods (for ASYNC_GAUGE) ---
7979

80-
/**
81-
* Factory method for OTel-only ASYNC_GAUGE with LongSupplier callback.
82-
*
83-
* @param registry the {@link CompositeCloseable} that closes the returned wrapper at shutdown.
84-
* Pass {@link CompositeCloseable#NONE} at test or ad-hoc callsites without lifecycle.
85-
*/
80+
/** Factory method for OTel-only ASYNC_GAUGE with LongSupplier callback. */
8681
public static AsyncMetricEntityStateBase create(
8782
MetricEntity metricEntity,
8883
VeniceOpenTelemetryMetricsRepository otelRepository,
@@ -102,12 +97,7 @@ public static AsyncMetricEntityStateBase create(
10297
asyncCallback));
10398
}
10499

105-
/**
106-
* Factory method for joint Tehuti+OTel ASYNC_GAUGE with LongSupplier callback.
107-
*
108-
* @param registry the {@link CompositeCloseable} that closes the returned wrapper at shutdown.
109-
* Pass {@link CompositeCloseable#NONE} at test or ad-hoc callsites without lifecycle.
110-
*/
100+
/** Factory method for joint Tehuti+OTel ASYNC_GAUGE with LongSupplier callback. */
111101
public static AsyncMetricEntityStateBase create(
112102
MetricEntity metricEntity,
113103
VeniceOpenTelemetryMetricsRepository otelRepository,
@@ -132,12 +122,7 @@ public static AsyncMetricEntityStateBase create(
132122

133123
// --- DoubleSupplier factory methods (for ASYNC_DOUBLE_GAUGE) ---
134124

135-
/**
136-
* Factory method for OTel-only ASYNC_DOUBLE_GAUGE with DoubleSupplier callback.
137-
*
138-
* @param registry the {@link CompositeCloseable} that closes the returned wrapper at shutdown.
139-
* Pass {@link CompositeCloseable#NONE} at test or ad-hoc callsites without lifecycle.
140-
*/
125+
/** Factory method for OTel-only ASYNC_DOUBLE_GAUGE with DoubleSupplier callback. */
141126
public static AsyncMetricEntityStateBase create(
142127
MetricEntity metricEntity,
143128
VeniceOpenTelemetryMetricsRepository otelRepository,
@@ -157,12 +142,7 @@ public static AsyncMetricEntityStateBase create(
157142
asyncDoubleCallback));
158143
}
159144

160-
/**
161-
* Factory method for joint Tehuti+OTel ASYNC_DOUBLE_GAUGE with DoubleSupplier callback.
162-
*
163-
* @param registry the {@link CompositeCloseable} that closes the returned wrapper at shutdown.
164-
* Pass {@link CompositeCloseable#NONE} at test or ad-hoc callsites without lifecycle.
165-
*/
145+
/** Factory method for joint Tehuti+OTel ASYNC_DOUBLE_GAUGE with DoubleSupplier callback. */
166146
public static AsyncMetricEntityStateBase create(
167147
MetricEntity metricEntity,
168148
VeniceOpenTelemetryMetricsRepository otelRepository,

internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/AsyncMetricEntityStateOneEnum.java

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,7 @@ private AsyncMetricEntityStateOneEnum(
7676
*
7777
* @param <S> the state type returned by {@code liveStateResolver}. Can be any reference type
7878
* (wrapper, task, counter, etc.) — the infra never inspects it beyond null-check.
79-
* @param registry the {@link CompositeCloseable} that closes the returned wrapper at shutdown.
80-
* Pass {@link CompositeCloseable#NONE} at test or ad-hoc callsites without lifecycle.
79+
* @param registry closes the returned wrapper at shutdown; pass {@link CompositeCloseable#NONE} for tests.
8180
*/
8281
public static <E extends Enum<E> & VeniceDimensionInterface, S> AsyncMetricEntityStateOneEnum<E> create(
8382
MetricEntity metricEntity,
@@ -192,17 +191,9 @@ public Object getInstrument() {
192191
}
193192

194193
/**
195-
* Deregisters the underlying SDK observable gauge (if registered) and releases the cached
196-
* per-enum {@link Attributes} so the wrapper can be GC'd. Idempotent. Best-effort: SDK close
197-
* exceptions are logged at WARN and swallowed.
198-
*
199-
* <p><b>Caller contract:</b> closing this wrapper deregisters the callback for ALL enum values
200-
* (the entire multi-emit instrument is retired). Use {@code liveStateResolver} returning
201-
* {@code null} for per-combo dormancy; reserve {@code close()} for full retirement of the
202-
* wrapper (e.g., process shutdown or removal of the owning per-store/per-version stats class).
203-
* Not concurrent-safe with the SDK collection callback in flight; the SDK's {@code close()}
204-
* deregisters the callback but does not block in-flight invocations — those use the local
205-
* {@code attributesByEnum} captured at registration time, so the field-nulling here is safe.
194+
* Deregisters the underlying SDK observable gauge and releases the cached per-enum {@link Attributes}.
195+
* Closes the callback for ALL enum values — use {@code liveStateResolver} returning {@code null}
196+
* for per-combo dormancy. Idempotent and best-effort.
206197
*/
207198
@Override
208199
public void close() {

internal/venice-client-common/src/main/java/com/linkedin/venice/stats/metrics/AsyncMetricEntityStateTwoEnums.java

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,7 @@ private AsyncMetricEntityStateTwoEnums(
7676
*
7777
* @param <S> the state type returned by {@code liveStateResolver}. Any reference type — the
7878
* infra never inspects it beyond null-check.
79-
* @param registry the {@link CompositeCloseable} that closes the returned wrapper at shutdown.
80-
* Pass {@link CompositeCloseable#NONE} at test or ad-hoc callsites without lifecycle.
79+
* @param registry closes the returned wrapper at shutdown; pass {@link CompositeCloseable#NONE} for tests.
8180
*/
8281
public static <E1 extends Enum<E1> & VeniceDimensionInterface, E2 extends Enum<E2> & VeniceDimensionInterface, S> AsyncMetricEntityStateTwoEnums<E1, E2> create(
8382
MetricEntity metricEntity,
@@ -203,20 +202,12 @@ public Object getInstrument() {
203202
}
204203

205204
/**
206-
* Deregisters the underlying SDK observable gauge (if registered) and releases the cached
207-
* per-pair {@link Attributes} so the wrapper can be GC'd. Idempotent. Best-effort: SDK close
208-
* exceptions are logged at WARN and swallowed.
209-
*
210-
* <p><b>Caller contract:</b> closing this wrapper deregisters the callback for ALL enum-pair
211-
* combinations. Use {@code liveStateResolver} returning {@code null} for per-combo dormancy;
212-
* reserve {@code close()} for full retirement of the wrapper.
205+
* Deregisters the underlying SDK observable gauge and releases the cached per-pair {@link Attributes}.
206+
* Closes the callback for ALL enum-pair combinations. Idempotent and best-effort.
213207
*/
214208
@Override
215209
public void close() {
216-
// Snapshot the volatile field before the helper call so a second concurrent close() cannot
217-
// observe the field non-null in instanceof and then invoke close() on a now-null reference
218-
// and emit a misleading "OTel SDK close threw" WARN. Idempotency is preserved: the second
219-
// close sees null here and skips the SDK call.
210+
// Snapshot before the helper call so a concurrent second close() sees null here and skips.
220211
Object localInstrument = instrument;
221212
instrument = null;
222213
attributesByEnum = null;

0 commit comments

Comments
 (0)