You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: metrics/src/main/java/ai/vespa/metrics/ClusterControllerMetrics.java
+2-2Lines changed: 2 additions & 2 deletions
Original file line number
Diff line number
Diff line change
@@ -24,8 +24,8 @@ public enum ClusterControllerMetrics implements VespaMetrics {
24
24
// DO NOT RELY ON THIS METRIC YET.
25
25
NODE_EVENT_COUNT("cluster-controller.node-event.count", Unit.OPERATION, "Number of node events"),
26
26
RESOURCE_USAGE_NODES_ABOVE_LIMIT("cluster-controller.resource_usage.nodes_above_limit", Unit.NODE, "The number of content nodes above resource limit, blocking feed"),
27
-
RESOURCE_USAGE_MAX_MEMORY_UTILIZATION("cluster-controller.resource_usage.max_memory_utilization", Unit.FRACTION, "Current memory utilisation, for content node with highest value"),
28
-
RESOURCE_USAGE_MAX_DISK_UTILIZATION("cluster-controller.resource_usage.max_disk_utilization", Unit.FRACTION, "Current disk space utilisation, for content node with highest value"),
27
+
RESOURCE_USAGE_MAX_MEMORY_UTILIZATION("cluster-controller.resource_usage.max_memory_utilization", Unit.FRACTION, "Current memory utilisation, for content node with the highest value"),
28
+
RESOURCE_USAGE_MAX_DISK_UTILIZATION("cluster-controller.resource_usage.max_disk_utilization", Unit.FRACTION, "Current disk space utilisation, for content node with the highest value"),
29
29
RESOURCE_USAGE_MEMORY_LIMIT("cluster-controller.resource_usage.memory_limit", Unit.FRACTION, "Memory space limit as a fraction of available memory"),
30
30
RESOURCE_USAGE_DISK_LIMIT("cluster-controller.resource_usage.disk_limit", Unit.FRACTION, "Disk space limit as a fraction of available disk space"),
Copy file name to clipboardExpand all lines: metrics/src/main/java/ai/vespa/metrics/ConfigServerMetrics.java
+3-3Lines changed: 3 additions & 3 deletions
Original file line number
Diff line number
Diff line change
@@ -98,7 +98,7 @@ public enum ConfigServerMetrics implements VespaMetrics {
98
98
NUMBER_OF_SERVICES_NOT_CHECKED("numberOfServicesNotChecked", Unit.INSTANCE, "The number of services supposed to run on a node, that has not checked"),
99
99
NUMBER_OF_SERVICES_DOWN("numberOfServicesDown", Unit.INSTANCE, "The number of services confirmed to not be running on a node"),
100
100
SOME_SERVICES_DOWN("someServicesDown", Unit.BINARY, "One if one or more services has been confirmed to not run on a node, zero if not"),
101
-
NUMBER_OF_SERVICES_UNKNOWN("numberOfServicesUnknown", Unit.INSTANCE, "The number of services the config server does not know if is running on a node"),
101
+
NUMBER_OF_SERVICES_UNKNOWN("numberOfServicesUnknown", Unit.INSTANCE, "The number of services the config server does not know is running on a node"),
102
102
NODE_FAILER_BAD_NODE("nodeFailerBadNode", Unit.BINARY, "One if the node is failed due to being bad, zero if not"),
103
103
DOWN_IN_NODE_REPO("downInNodeRepo", Unit.BINARY, "One if the node is registered as being down in the node repository, zero if not"),
104
104
NUMBER_OF_SERVICES("numberOfServices", Unit.INSTANCE, "Number of services supposed to run on a node"),
@@ -108,8 +108,8 @@ public enum ConfigServerMetrics implements VespaMetrics {
108
108
LOCK_ATTEMPT_LOCKED_LATENCY("lockAttempt.lockedLatency", Unit.SECOND, "Longest lock duration in the snapshot period"),
109
109
LOCK_ATTEMPT_LOCKED_LOAD("lockAttempt.lockedLoad", Unit.OPERATION, "Average number of locks held concurrently during the snapshot period"),
110
110
LOCK_ATTEMPT_ACQUIRE_TIMED_OUT("lockAttempt.acquireTimedOut", Unit.OPERATION, " Number of locking attempts that timed out during the snapshot period"),
111
-
LOCK_ATTEMPT_DEADLOCK("lockAttempt.deadlock", Unit.OPERATION, "Number of lock grab deadlocks detected during the snapshont period"),
112
-
LOCK_ATTEMPT_ERRORS("lockAttempt.errors", Unit.OPERATION, "Number of other lock related errors detected during the snapshont period"),
111
+
LOCK_ATTEMPT_DEADLOCK("lockAttempt.deadlock", Unit.OPERATION, "Number of lock grab deadlocks detected during the snapshot period"),
112
+
LOCK_ATTEMPT_ERRORS("lockAttempt.errors", Unit.OPERATION, "Number of other lock related errors detected during the snapshot period"),
113
113
114
114
HOSTED_VESPA_DOCKER_TOTAL_CAPACITY_CPU("hostedVespa.docker.totalCapacityCpu", Unit.VCPU, "Total number of VCPUs on tenant hosts managed by hosted Vespa in a zone"),
115
115
HOSTED_VESPA_DOCKER_TOTAL_CAPACITY_MEM("hostedVespa.docker.totalCapacityMem", Unit.GIGABYTE, "Total amount of memory on tenant hosts managed by hosted Vespa in a zone"),
Copy file name to clipboardExpand all lines: metrics/src/main/java/ai/vespa/metrics/SearchNodeMetrics.java
+7-7Lines changed: 7 additions & 7 deletions
Original file line number
Diff line number
Diff line change
@@ -19,7 +19,7 @@ public enum SearchNodeMetrics implements VespaMetrics {
19
19
CONTENT_PROTON_DOCUMENTDB_MEMORY_USAGE_DEAD_BYTES("content.proton.documentdb.memory_usage.dead_bytes", Unit.BYTE, "The number of dead bytes (<= used_bytes)"),
20
20
CONTENT_PROTON_DOCUMENTDB_MEMORY_USAGE_ONHOLD_BYTES("content.proton.documentdb.memory_usage.onhold_bytes", Unit.BYTE, "The number of bytes on hold"),
21
21
CONTENT_PROTON_DOCUMENTDB_MEMORY_USAGE_USED_BYTES("content.proton.documentdb.memory_usage.used_bytes", Unit.BYTE, "The number of used bytes (<= allocated_bytes)"),
22
-
CONTENT_PROTON_DOCUMENTDB_HEART_BEAT_AGE("content.proton.documentdb.heart_beat_age", Unit.SECOND, "How long ago (in seconds) heart beat maintenace job was run"),
22
+
CONTENT_PROTON_DOCUMENTDB_HEART_BEAT_AGE("content.proton.documentdb.heart_beat_age", Unit.SECOND, "How long ago (in seconds) heart beat maintenance job was run"),
@@ -36,27 +36,27 @@ public enum SearchNodeMetrics implements VespaMetrics {
36
36
// Executors shared between all document dbs
37
37
CONTENT_PROTON_EXECUTOR_PROTON_QUEUESIZE("content.proton.executor.proton.queuesize", Unit.TASK, "Size of executor proton task queue"),
38
38
CONTENT_PROTON_EXECUTOR_PROTON_ACCEPTED("content.proton.executor.proton.accepted", Unit.TASK, "Number of executor proton accepted tasks"),
39
-
CONTENT_PROTON_EXECUTOR_PROTON_WAKEUPS("content.proton.executor.proton.wakeups", Unit.WAKEUP, "Number of times a executor proton worker thread has been woken up"),
39
+
CONTENT_PROTON_EXECUTOR_PROTON_WAKEUPS("content.proton.executor.proton.wakeups", Unit.WAKEUP, "Number of times an executor proton worker thread has been woken up"),
40
40
CONTENT_PROTON_EXECUTOR_PROTON_UTILIZATION("content.proton.executor.proton.utilization", Unit.FRACTION, "Ratio of time the executor proton worker threads has been active"),
41
41
CONTENT_PROTON_EXECUTOR_PROTON_REJECTED("content.proton.executor.proton.rejected", Unit.TASK, "Number of rejected tasks"),
42
42
CONTENT_PROTON_EXECUTOR_FLUSH_QUEUESIZE("content.proton.executor.flush.queuesize", Unit.TASK, "Size of executor flush task queue"),
43
43
CONTENT_PROTON_EXECUTOR_FLUSH_ACCEPTED("content.proton.executor.flush.accepted", Unit.TASK, "Number of accepted executor flush tasks"),
44
-
CONTENT_PROTON_EXECUTOR_FLUSH_WAKEUPS("content.proton.executor.flush.wakeups", Unit.WAKEUP, "Number of times a executor flush worker thread has been woken up"),
44
+
CONTENT_PROTON_EXECUTOR_FLUSH_WAKEUPS("content.proton.executor.flush.wakeups", Unit.WAKEUP, "Number of times an executor flush worker thread has been woken up"),
45
45
CONTENT_PROTON_EXECUTOR_FLUSH_UTILIZATION("content.proton.executor.flush.utilization", Unit.FRACTION, "Ratio of time the executor flush worker threads has been active"),
46
46
CONTENT_PROTON_EXECUTOR_FLUSH_REJECTED("content.proton.executor.flush.rejected", Unit.TASK, "Number of rejected tasks"),
47
47
CONTENT_PROTON_EXECUTOR_MATCH_QUEUESIZE("content.proton.executor.match.queuesize", Unit.TASK, "Size of executor match task queue"),
48
48
CONTENT_PROTON_EXECUTOR_MATCH_ACCEPTED("content.proton.executor.match.accepted", Unit.TASK, "Number of accepted executor match tasks"),
49
-
CONTENT_PROTON_EXECUTOR_MATCH_WAKEUPS("content.proton.executor.match.wakeups", Unit.WAKEUP, "Number of times a executor match worker thread has been woken up"),
49
+
CONTENT_PROTON_EXECUTOR_MATCH_WAKEUPS("content.proton.executor.match.wakeups", Unit.WAKEUP, "Number of times an executor match worker thread has been woken up"),
50
50
CONTENT_PROTON_EXECUTOR_MATCH_UTILIZATION("content.proton.executor.match.utilization", Unit.FRACTION, "Ratio of time the executor match worker threads has been active"),
51
51
CONTENT_PROTON_EXECUTOR_MATCH_REJECTED("content.proton.executor.match.rejected", Unit.TASK, "Number of rejected tasks"),
52
52
CONTENT_PROTON_EXECUTOR_DOCSUM_QUEUESIZE("content.proton.executor.docsum.queuesize", Unit.TASK, "Size of executor docsum task queue"),
53
53
CONTENT_PROTON_EXECUTOR_DOCSUM_ACCEPTED("content.proton.executor.docsum.accepted", Unit.TASK, "Number of executor accepted docsum tasks"),
54
-
CONTENT_PROTON_EXECUTOR_DOCSUM_WAKEUPS("content.proton.executor.docsum.wakeups", Unit.WAKEUP, "Number of times a executor docsum worker thread has been woken up"),
54
+
CONTENT_PROTON_EXECUTOR_DOCSUM_WAKEUPS("content.proton.executor.docsum.wakeups", Unit.WAKEUP, "Number of times an executor docsum worker thread has been woken up"),
55
55
CONTENT_PROTON_EXECUTOR_DOCSUM_UTILIZATION("content.proton.executor.docsum.utilization", Unit.FRACTION, "Ratio of time the executor docsum worker threads has been active"),
56
56
CONTENT_PROTON_EXECUTOR_DOCSUM_REJECTED("content.proton.executor.docsum.rejected", Unit.TASK, "Number of rejected tasks"),
57
57
CONTENT_PROTON_EXECUTOR_SHARED_QUEUESIZE("content.proton.executor.shared.queuesize", Unit.TASK, "Size of executor shared task queue"),
58
58
CONTENT_PROTON_EXECUTOR_SHARED_ACCEPTED("content.proton.executor.shared.accepted", Unit.TASK, "Number of executor shared accepted tasks"),
59
-
CONTENT_PROTON_EXECUTOR_SHARED_WAKEUPS("content.proton.executor.shared.wakeups", Unit.WAKEUP, "Number of times a executor shared worker thread has been woken up"),
59
+
CONTENT_PROTON_EXECUTOR_SHARED_WAKEUPS("content.proton.executor.shared.wakeups", Unit.WAKEUP, "Number of times an executor shared worker thread has been woken up"),
60
60
CONTENT_PROTON_EXECUTOR_SHARED_UTILIZATION("content.proton.executor.shared.utilization", Unit.FRACTION, "Ratio of time the executor shared worker threads has been active"),
61
61
CONTENT_PROTON_EXECUTOR_SHARED_REJECTED("content.proton.executor.shared.rejected", Unit.TASK, "Number of rejected tasks"),
62
62
CONTENT_PROTON_EXECUTOR_WARMUP_QUEUESIZE("content.proton.executor.warmup.queuesize", Unit.TASK, "Size of executor warmup task queue"),
@@ -66,7 +66,7 @@ public enum SearchNodeMetrics implements VespaMetrics {
66
66
CONTENT_PROTON_EXECUTOR_WARMUP_REJECTED("content.proton.executor.warmup.rejected", Unit.TASK, "Number of rejected tasks"),
67
67
CONTENT_PROTON_EXECUTOR_FIELD_WRITER_QUEUESIZE("content.proton.executor.field_writer.queuesize", Unit.TASK, "Size of executor field writer task queue"),
68
68
CONTENT_PROTON_EXECUTOR_FIELD_WRITER_ACCEPTED("content.proton.executor.field_writer.accepted", Unit.TASK, "Number of accepted executor field writer tasks"),
69
-
CONTENT_PROTON_EXECUTOR_FIELD_WRITER_WAKEUPS("content.proton.executor.field_writer.wakeups", Unit.WAKEUP, "Number of times a executor field writer worker thread has been woken up"),
69
+
CONTENT_PROTON_EXECUTOR_FIELD_WRITER_WAKEUPS("content.proton.executor.field_writer.wakeups", Unit.WAKEUP, "Number of times an executor field writer worker thread has been woken up"),
70
70
CONTENT_PROTON_EXECUTOR_FIELD_WRITER_UTILIZATION("content.proton.executor.field_writer.utilization", Unit.FRACTION, "Ratio of time the executor fieldwriter worker threads has been active"),
71
71
CONTENT_PROTON_EXECUTOR_FIELD_WRITER_SATURATION("content.proton.executor.field_writer.saturation", Unit.FRACTION, "Ratio indicating the max saturation of underlying worker threads. A higher saturation than utilization indicates a bottleneck in one of the worker threads."),
72
72
CONTENT_PROTON_EXECUTOR_FIELD_WRITER_REJECTED("content.proton.executor.field_writer.rejected", Unit.TASK, "Number of rejected tasks"),
Copy file name to clipboardExpand all lines: metrics/src/main/java/ai/vespa/metrics/StorageMetrics.java
+3-3Lines changed: 3 additions & 3 deletions
Original file line number
Diff line number
Diff line change
@@ -222,13 +222,13 @@ public enum StorageMetrics implements VespaMetrics {
222
222
VDS_VISITOR_CV_QUEUEEVICTEDWAITTIME("vds.visitor.cv_queueevictedwaittime", Unit.MILLISECOND, "Milliseconds waiting in create visitor queue, for visitors that was evicted from queue due to higher priority visitors coming"),
223
223
VDS_VISITOR_CV_QUEUEFULL("vds.visitor.cv_queuefull", Unit.OPERATION, "Number of create visitor messages failed as queue is full"),
224
224
VDS_VISITOR_CV_QUEUESIZE("vds.visitor.cv_queuesize", Unit.ITEM, "Size of create visitor queue"),
225
-
VDS_VISITOR_CV_QUEUETIMEOUTWAITTIME("vds.visitor.cv_queuetimeoutwaittime", Unit.MILLISECOND, "Milliseconds waiting in create visitor queue, for visitors that timed out while in the visitor quueue"),
225
+
VDS_VISITOR_CV_QUEUETIMEOUTWAITTIME("vds.visitor.cv_queuetimeoutwaittime", Unit.MILLISECOND, "Milliseconds waiting in create visitor queue, for visitors that timed out while in the visitor queue"),
226
226
VDS_VISITOR_CV_QUEUEWAITTIME("vds.visitor.cv_queuewaittime", Unit.MILLISECOND, "Milliseconds waiting in create visitor queue, for visitors that was added to visitor queue but scheduled later"),
227
227
VDS_VISITOR_CV_SKIPQUEUE("vds.visitor.cv_skipqueue", Unit.OPERATION, "Number of times we could skip queue as we had free visitor spots"),
228
228
229
229
// C++ capability metrics
230
-
VDS_SERVER_NETWORK_RPC_CAPABILITY_CHECKS_FAILED("vds.server.network.rpc-capability-checks-failed", Unit.FAILURE, "Number of RPC operations that failed to due one or more missing capabilities"),
231
-
VDS_SERVER_NETWORK_STATUS_CAPABILITY_CHECKS_FAILED("vds.server.network.status-capability-checks-failed", Unit.FAILURE, "Number of status page operations that failed to due one or more missing capabilities"),
230
+
VDS_SERVER_NETWORK_RPC_CAPABILITY_CHECKS_FAILED("vds.server.network.rpc-capability-checks-failed", Unit.FAILURE, "Number of RPC operations that failed due to one or more missing capabilities"),
231
+
VDS_SERVER_NETWORK_STATUS_CAPABILITY_CHECKS_FAILED("vds.server.network.status-capability-checks-failed", Unit.FAILURE, "Number of status page operations that failed due to one or more missing capabilities"),
232
232
233
233
// C++ Fnet metrics
234
234
VDS_SERVER_FNET_NUM_CONNECTIONS("vds.server.fnet.num-connections", Unit.CONNECTION, "Total number of connection objects");
Copy file name to clipboardExpand all lines: metrics/src/main/java/ai/vespa/metrics/Unit.java
+4-4Lines changed: 4 additions & 4 deletions
Original file line number
Diff line number
Diff line change
@@ -18,21 +18,21 @@ public enum Unit {
18
18
DOLLAR_PER_HOUR(BaseUnit.DOLLAR, BaseUnit.HOUR, "Total current cost of the cluster in $/hr"),
19
19
FAILURE(BaseUnit.FAILURE, "Failures, typically for requests, operations or nodes"),
20
20
FILE(BaseUnit.FILE, "Data file stored on the disk on a node"),
21
-
FRACTION(BaseUnit.FRACTION, "A value in the range [0..1]. Higher values can occur for some metrics, but would indicate the value is outside of the allowed range."),
22
-
GENERATION(BaseUnit.GENERATION,"Typically generation of configuration or application package"),
21
+
FRACTION(BaseUnit.FRACTION, "A value in the range [0..1]. Higher values can occur for some metrics, but would indicate the value is outside the allowed range."),
22
+
GENERATION(BaseUnit.GENERATION,"Typically, generation of configuration or application package"),
23
23
GIGABYTE(BaseUnit.GIGABYTE,"One billion bytes"),
24
24
HIT(BaseUnit.HIT, "Document that meets the filtering/restriction criteria specified by a given query"),
25
25
HIT_PER_QUERY(BaseUnit.HIT, BaseUnit.QUERY, "Number of hits per query over a period of time"),
26
26
HOST(BaseUnit.HOST, "Bare metal computer that contain nodes"),
27
-
INSTANCE(BaseUnit.INSTANCE, "Typically tenant or application"),
27
+
INSTANCE(BaseUnit.INSTANCE, "Typically, tenant or application"),
28
28
ITEM(BaseUnit.ITEM, "Object or unit maintained in e.g. a queue"),
29
29
MILLISECOND(BaseUnit.MILLISECOND, "Millisecond, 1/1000 of a second"),
30
30
NANOSECOND(BaseUnit.NANOSECOND, "Nanosecond, 1/1000.000.000 of a second"),
31
31
NODE(BaseUnit.NODE, "(Virtual) computer that is part of a Vespa cluster"),
32
32
PACKET(BaseUnit.PACKET, "Collection of data transmitted over the network as a single unit"),
33
33
OPERATION(BaseUnit.OPERATION, "A clearly defined task"),
34
34
OPERATION_PER_SECOND(BaseUnit.OPERATION, BaseUnit.SECOND, "Number of operations per second"),
35
-
PERCENTAGE(BaseUnit.PERCENTAGE, "A number expressed as a fraction of 100. Typically in the range [0..100]."),
35
+
PERCENTAGE(BaseUnit.PERCENTAGE, "A number expressed as a fraction of 100, normally in the range [0..100]."),
36
36
QUERY(BaseUnit.QUERY, "A request for matching, grouping and/or scoring documents stored in Vespa"),
37
37
QUERY_PER_SECOND(BaseUnit.QUERY, BaseUnit.SECOND, "Number of queries per second."),
38
38
RECORD(BaseUnit.RECORD, "A collection of information, typically a set of key/value, e.g. stored in a transaction log"),
0 commit comments