cockroachdb
diff --git a/‎build/tools/gen-cockroachdb-metrics/main.go‎
Lines changed: 3 additions & 3 deletions b/‎build/tools/gen-cockroachdb-metrics/main.go‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs/generated/metrics/metrics.yaml‎
Lines changed: 126 additions & 126 deletions b/‎docs/generated/metrics/metrics.yaml‎
Lines changed: 126 additions & 126 deletions
diff --git a/‎pkg/backup/schedule_exec.go‎
Lines changed: 1 addition & 1 deletion b/‎pkg/backup/schedule_exec.go‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pkg/base/license.go‎
Lines changed: 2 additions & 2 deletions b/‎pkg/base/license.go‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pkg/ccl/changefeedccl/metrics.go‎
Lines changed: 6 additions & 6 deletions b/‎pkg/ccl/changefeedccl/metrics.go‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎pkg/cli/gen.go‎
Lines changed: 7 additions & 2 deletions b/‎pkg/cli/gen.go‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎pkg/crosscluster/logical/metrics.go‎
Lines changed: 5 additions & 5 deletions b/‎pkg/crosscluster/logical/metrics.go‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎pkg/crosscluster/physical/metrics.go‎
Lines changed: 2 additions & 2 deletions b/‎pkg/crosscluster/physical/metrics.go‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pkg/jobs/metrics.go‎
Lines changed: 11 additions & 11 deletions b/‎pkg/jobs/metrics.go‎
Lines changed: 11 additions & 11 deletions
diff --git a/‎pkg/jobs/schedule_metrics.go‎
Lines changed: 2 additions & 2 deletions b/‎pkg/jobs/schedule_metrics.go‎
Lines changed: 2 additions & 2 deletions
@@ -59,7 +59,7 @@ type MetricInfo struct {
 	Aggregation  string `yaml:"aggregation"`
 	Derivative   string `yaml:"derivative"`
 	HowToUse     string `yaml:"how_to_use,omitempty"`
-	Essential    bool   `yaml:"essential,omitempty"`
+	Visibility   string `yaml:"visibility,omitempty"`
 }
 
 // Category represents a category of metrics
@@ -170,9 +170,9 @@ func parseDatadogMappings(r io.Reader) (map[string]string, error) {
 
 	// CRDB-Datadog mappings are stored as python dictionaries in the following file:
 	// https://github.com/DataDog/integrations-core/blob/master/cockroachdb/datadog_checks/cockroachdb/metrics.py
-	//   - METRIC_MAP: represents the raw CRDB-Datadog metric name mapping. 
+	//   - METRIC_MAP: represents the raw CRDB-Datadog metric name mapping.
 	//   - OMV2_METRIC_MAP: represents the metric in OpenMetrics V2 format.
-	// E.g. 
+	// E.g.
 	// 'admission_errored_sql_kv_response': 'admission.errored.sql_kv.response'
 	// here the key is the CRDB metric name in prometheus format, and the value is the corresponding metric name visible in Datadog.
 	// Both maps are mutually exclusive. Parse both dictionaries to get the complete mapping.
 
@@ -604,7 +604,7 @@ func init() {
 						`),
 						Measurement: "Jobs",
 						Unit:        metric.Unit_TIMESTAMP_SEC,
-						Essential:   true,
+						Visibility:  metric.Metadata_ESSENTIAL,
 						Category:    metric.Metadata_SQL,
 						HowToUse: crstrings.UnwrapText(`
 							Monitor this metric to ensure that backups are meeting the
 
@@ -41,7 +41,7 @@ var LicenseTTLMetadata = metric.Metadata{
 	Help:        "Seconds until license expiry (0 if no license present)",
 	Measurement: "Seconds",
 	Unit:        metric.Unit_SECONDS,
-	Essential:   true,
+	Visibility:  metric.Metadata_ESSENTIAL,
 	Category:    metric.Metadata_EXPIRATIONS,
 	HowToUse:    "See Description.",
 }
@@ -51,7 +51,7 @@ var AdditionalLicenseTTLMetadata = metric.Metadata{
 	Help:        "Seconds until license expiry (0 if no license present)",
 	Measurement: "Seconds",
 	Unit:        metric.Unit_SECONDS,
-	Essential:   true,
+	Visibility:  metric.Metadata_ESSENTIAL,
 	Category:    metric.Metadata_EXPIRATIONS,
 	HowToUse:    "See Description.",
 }
 
@@ -765,7 +765,7 @@ var (
 		Help:        "Total retryable errors encountered by all changefeeds",
 		Measurement: "Errors",
 		Unit:        metric.Unit_COUNT,
-		Essential:   true,
+		Visibility:  metric.Metadata_ESSENTIAL,
 		Category:    metric.Metadata_CHANGEFEEDS,
 		HowToUse: crstrings.UnwrapText(`
 			This metric tracks transient changefeed errors. Alert on "too many"
@@ -781,7 +781,7 @@ var (
 		Help:        "Total number of changefeed jobs which have failed",
 		Measurement: "Errors",
 		Unit:        metric.Unit_COUNT,
-		Essential:   true,
+		Visibility:  metric.Metadata_ESSENTIAL,
 		Category:    metric.Metadata_CHANGEFEEDS,
 		HowToUse: crstrings.UnwrapText(`
 			This metric tracks the permanent changefeed job failures that the jobs
@@ -866,7 +866,7 @@ func newAggregateMetrics(histogramWindow time.Duration, lookup *cidr.Lookup) *Ag
 		Help:        "Messages emitted by all feeds",
 		Measurement: "Messages",
 		Unit:        metric.Unit_COUNT,
-		Essential:   true,
+		Visibility:  metric.Metadata_ESSENTIAL,
 		Category:    metric.Metadata_CHANGEFEEDS,
 		HowToUse: crstrings.UnwrapText(`
 			This metric provides a useful context when assessing the state of
@@ -894,7 +894,7 @@ func newAggregateMetrics(histogramWindow time.Duration, lookup *cidr.Lookup) *Ag
 		Help:        "Bytes emitted by all feeds",
 		Measurement: "Bytes",
 		Unit:        metric.Unit_BYTES,
-		Essential:   true,
+		Visibility:  metric.Metadata_ESSENTIAL,
 		Category:    metric.Metadata_CHANGEFEEDS,
 		HowToUse: crstrings.UnwrapText(`
 			This metric provides a useful context when assessing the state of
@@ -945,7 +945,7 @@ func newAggregateMetrics(histogramWindow time.Duration, lookup *cidr.Lookup) *Ag
 		`),
 		Measurement: "Nanoseconds",
 		Unit:        metric.Unit_NANOSECONDS,
-		Essential:   true,
+		Visibility:  metric.Metadata_ESSENTIAL,
 		Category:    metric.Metadata_CHANGEFEEDS,
 		HowToUse: crstrings.UnwrapText(`
 			This metric provides a useful context when assessing the state of
@@ -982,7 +982,7 @@ func newAggregateMetrics(histogramWindow time.Duration, lookup *cidr.Lookup) *Ag
 		Help:        "Number of currently running changefeeds, including sinkless",
 		Measurement: "Changefeeds",
 		Unit:        metric.Unit_COUNT,
-		Essential:   true,
+		Visibility:  metric.Metadata_ESSENTIAL,
 		Category:    metric.Metadata_CHANGEFEEDS,
 		HowToUse:    `This metric tracks the total number of all running changefeeds.`,
 	}
 
@@ -47,7 +47,7 @@ type MetricInfo struct {
 	Aggregation  string `yaml:"aggregation"`
 	Derivative   string `yaml:"derivative"`
 	HowToUse     string `yaml:"how_to_use,omitempty"`
-	Essential    bool   `yaml:"essential,omitempty"`
+	Visibility   string `yaml:"visibility,omitempty"`
 }
 
 type Category struct {
@@ -477,6 +477,11 @@ func generateMetricList(ctx context.Context, skipFiltering bool) (map[string]*La
 
 		for _, chart := range section.Charts {
 			// There are many charts, but only 1 metric per chart.
+			visibility := chart.Metrics[0].Visibility
+			// Only include visibility if it's not the default INTERNAL value
+			if visibility == "INTERNAL" {
+				visibility = ""
+			}
 			metric := MetricInfo{
 				Name:         chart.Metrics[0].Name,
 				ExportedName: chart.Metrics[0].ExportedName,
@@ -488,7 +493,7 @@ func generateMetricList(ctx context.Context, skipFiltering bool) (map[string]*La
 				Aggregation:  chart.Aggregator.String(),
 				Derivative:   chart.Derivative.String(),
 				HowToUse:     strings.TrimSpace(chart.Metrics[0].HowToUse),
-				Essential:    chart.Metrics[0].Essential,
+				Visibility:   visibility,
 			}
 			category.Metrics = append(category.Metrics, metric)
 		}
 
@@ -18,7 +18,7 @@ var (
 		Name:        "logical_replication.events_ingested",
 		Help:        "Events ingested by all replication jobs",
 		Measurement: "Events",
-		Essential:   true,
+		Visibility:  metric.Metadata_ESSENTIAL,
 		Category:    metric.Metadata_LOGICAL_DATA_REPLICATION,
 		Unit:        metric.Unit_COUNT,
 		HowToUse:    "track events (e.g. updates, deletes, inserts) ingested",
@@ -27,15 +27,15 @@ var (
 		Name:        "logical_replication.events_dlqed",
 		Help:        "Row update events sent to DLQ",
 		Measurement: "Failures",
-		Essential:   true,
+		Visibility:  metric.Metadata_ESSENTIAL,
 		Category:    metric.Metadata_LOGICAL_DATA_REPLICATION,
 		Unit:        metric.Unit_COUNT,
 		HowToUse:    "track events sent to the dead letter queue",
 	}
 	metaReceivedLogicalBytes = metric.Metadata{
 		Name:        "logical_replication.logical_bytes",
 		Help:        "Logical bytes (sum of keys + values) received by all replication jobs",
-		Essential:   true,
+		Visibility:  metric.Metadata_ESSENTIAL,
 		Category:    metric.Metadata_LOGICAL_DATA_REPLICATION,
 		Measurement: "Bytes",
 		Unit:        metric.Unit_BYTES,
@@ -49,7 +49,7 @@ var (
 			between the oldest event in the batch and flush is recorded
 		`),
 		Measurement: "Nanoseconds",
-		Essential:   true,
+		Visibility:  metric.Metadata_ESSENTIAL,
 		Category:    metric.Metadata_LOGICAL_DATA_REPLICATION,
 		Unit:        metric.Unit_NANOSECONDS,
 		HowToUse:    "track the latency of of applying events from source to destination",
@@ -58,7 +58,7 @@ var (
 		Name:        "logical_replication.replicated_time_seconds",
 		Help:        "The replicated time of the logical replication stream in seconds since the unix epoch.",
 		Measurement: "Seconds",
-		Essential:   true,
+		Visibility:  metric.Metadata_ESSENTIAL,
 		Category:    metric.Metadata_LOGICAL_DATA_REPLICATION,
 		Unit:        metric.Unit_SECONDS,
 		HowToUse:    "Track replication lag via current time - logical_replication.replicated_time_seconds",
 
@@ -35,7 +35,7 @@ var (
 		Name:        "physical_replication.logical_bytes",
 		Help:        "Logical bytes (sum of keys + values) ingested by all replication jobs",
 		Measurement: "Bytes",
-		Essential:   true,
+		Visibility:  metric.Metadata_ESSENTIAL,
 		Category:    metric.Metadata_CROSS_CLUSTER_REPLICATION,
 		Unit:        metric.Unit_BYTES,
 		HowToUse:    "Track PCR throughput",
@@ -79,7 +79,7 @@ var (
 		Name:        "physical_replication.replicated_time_seconds",
 		Help:        "The replicated time of the physical replication stream in seconds since the unix epoch.",
 		Measurement: "Seconds",
-		Essential:   true,
+		Visibility:  metric.Metadata_ESSENTIAL,
 		Category:    metric.Metadata_CROSS_CLUSTER_REPLICATION,
 		Unit:        metric.Unit_SECONDS,
 		HowToUse:    "Track replication lag via current time - physical_replication.replicated_time_seconds",
 
@@ -104,7 +104,7 @@ func makeMetaCurrentlyRunning(jt jobspb.Type) metric.Metadata {
 
 	switch jt {
 	case jobspb.TypeCreateStats, jobspb.TypeAutoCreateStats, jobspb.TypeAutoCreatePartialStats:
-		m.Essential = true
+		m.Visibility = metric.Metadata_ESSENTIAL
 		m.Category = metric.Metadata_SQL
 		var detail string
 		if jt == jobspb.TypeCreateStats {
@@ -116,11 +116,11 @@ func makeMetaCurrentlyRunning(jt jobspb.Type) metric.Metadata {
 		}
 		m.HowToUse = fmt.Sprintf(`This metric tracks the number of active %s statistics jobs that could also be consuming resources. Ensure that foreground SQL traffic is not impacted by correlating this metric with SQL latency and query volume metrics.`, detail)
 	case jobspb.TypeBackup:
-		m.Essential = true
+		m.Visibility = metric.Metadata_ESSENTIAL
 		m.Category = metric.Metadata_SQL
 		m.HowToUse = `See Description.`
 	case jobspb.TypeRowLevelTTL:
-		m.Essential = true
+		m.Visibility = metric.Metadata_ESSENTIAL
 		m.Category = metric.Metadata_TTL
 		m.HowToUse = `Monitor this metric to ensure there are not too many Row Level TTL jobs running at the same time. Generally, this metric should be in the low single digits.`
 	}
@@ -162,23 +162,23 @@ func makeMetaCurrentlyPaused(jt jobspb.Type) metric.Metadata {
 	}
 	switch jt {
 	case jobspb.TypeAutoCreateStats, jobspb.TypeAutoCreatePartialStats:
-		m.Essential = true
+		m.Visibility = metric.Metadata_ESSENTIAL
 		m.Category = metric.Metadata_SQL
 		var partialDetail string
 		if jt == jobspb.TypeAutoCreatePartialStats {
 			partialDetail = "partial "
 		}
 		m.HowToUse = fmt.Sprintf(`This metric is a high-level indicator that automatically generated %sstatistics jobs are paused which can lead to the query optimizer running with stale statistics. Stale statistics can cause suboptimal query plans to be selected leading to poor query performance.`, partialDetail)
 	case jobspb.TypeBackup:
-		m.Essential = true
+		m.Visibility = metric.Metadata_ESSENTIAL
 		m.Category = metric.Metadata_SQL
 		m.HowToUse = `Monitor and alert on this metric to safeguard against an inadvertent operational error of leaving a backup job in a paused state for an extended period of time. In functional areas, a paused job can hold resources or have concurrency impact or some other negative consequence. Paused backup may break the recovery point objective (RPO).`
 	case jobspb.TypeChangefeed:
-		m.Essential = true
+		m.Visibility = metric.Metadata_ESSENTIAL
 		m.Category = metric.Metadata_CHANGEFEEDS
 		m.HowToUse = `Monitor and alert on this metric to safeguard against an inadvertent operational error of leaving a changefeed job in a paused state for an extended period of time. Changefeed jobs should not be paused for a long time because the protected timestamp prevents garbage collection.`
 	case jobspb.TypeRowLevelTTL:
-		m.Essential = true
+		m.Visibility = metric.Metadata_ESSENTIAL
 		m.Category = metric.Metadata_TTL
 		m.HowToUse = `Monitor this metric to ensure the Row Level TTL job does not remain paused inadvertently for an extended period.`
 	}
@@ -203,7 +203,7 @@ func makeMetaResumeCompeted(jt jobspb.Type) metric.Metadata {
 
 	switch jt {
 	case jobspb.TypeRowLevelTTL:
-		m.Essential = true
+		m.Visibility = metric.Metadata_ESSENTIAL
 		m.Category = metric.Metadata_TTL
 		m.HowToUse = `If Row Level TTL is enabled, this metric should be nonzero and correspond to the ttl_cron setting that was chosen. If this metric is zero, it means the job is not running`
 	}
@@ -245,15 +245,15 @@ func makeMetaResumeFailed(jt jobspb.Type) metric.Metadata {
 
 	switch jt {
 	case jobspb.TypeAutoCreateStats, jobspb.TypeAutoCreatePartialStats:
-		m.Essential = true
+		m.Visibility = metric.Metadata_ESSENTIAL
 		m.Category = metric.Metadata_SQL
 		var partialDetail string
 		if jt == jobspb.TypeAutoCreatePartialStats {
 			partialDetail = "partial "
 		}
 		m.HowToUse = fmt.Sprintf(`This metric is a high-level indicator that automatically generated %stable statistics is failing. Failed statistic creation can lead to the query optimizer running with stale statistics. Stale statistics can cause suboptimal query plans to be selected leading to poor query performance.`, partialDetail)
 	case jobspb.TypeRowLevelTTL:
-		m.Essential = true
+		m.Visibility = metric.Metadata_ESSENTIAL
 		m.Category = metric.Metadata_TTL
 		m.HowToUse = `This metric should remain at zero. Repeated errors means the Row Level TTL job is not deleting data.`
 	}
@@ -327,7 +327,7 @@ func makeMetaProtectedAge(jt jobspb.Type) metric.Metadata {
 
 	switch jt {
 	case jobspb.TypeChangefeed:
-		m.Essential = true
+		m.Visibility = metric.Metadata_ESSENTIAL
 		m.Category = metric.Metadata_CHANGEFEEDS
 		m.HowToUse = `Changefeeds use protected timestamps to protect the data from being garbage collected. Ensure the protected timestamp age does not significantly exceed the GC TTL zone configuration. Alert on this metric if the protected timestamp age is greater than 3 times the GC TTL.`
 	}
 
@@ -144,13 +144,13 @@ func MakeExecutorMetrics(name string) ExecutorMetrics {
 	}
 
 	if name == tree.ScheduledBackupExecutor.UserName() {
-		m.NumFailed.Essential = true
+		m.NumFailed.Visibility = metric.Metadata_ESSENTIAL
 		m.NumFailed.Category = metric.Metadata_SQL
 		m.NumFailed.HowToUse = `Monitor this metric and investigate backup job failures.`
 	}
 
 	if name == tree.ScheduledRowLevelTTLExecutor.InternalName() {
-		m.NumFailed.Essential = true
+		m.NumFailed.Visibility = metric.Metadata_ESSENTIAL
 		m.NumFailed.Category = metric.Metadata_TTL
 		m.NumFailed.HowToUse = `Monitor this metric to ensure the Row Level TTL job is running. If it is non-zero, it means the job could not be created.`
 	}
Original file line number	Diff line number	Diff line change
`@@ -144,13 +144,13 @@ func MakeExecutorMetrics(name string) ExecutorMetrics {`
`144`	`144`	`}`
`145`	`145`
`146`	`146`	`if name == tree.ScheduledBackupExecutor.UserName() {`
`147`		`- m.NumFailed.Essential = true`
	`147`	`+ m.NumFailed.Visibility = metric.Metadata_ESSENTIAL`
`148`	`148`	`m.NumFailed.Category = metric.Metadata_SQL`
`149`	`149`	m.NumFailed.HowToUse = `Monitor this metric and investigate backup job failures.`
`150`	`150`	`}`
`151`	`151`
`152`	`152`	`if name == tree.ScheduledRowLevelTTLExecutor.InternalName() {`
`153`		`- m.NumFailed.Essential = true`
	`153`	`+ m.NumFailed.Visibility = metric.Metadata_ESSENTIAL`
`154`	`154`	`m.NumFailed.Category = metric.Metadata_TTL`
`155`	`155`	m.NumFailed.HowToUse = `Monitor this metric to ensure the Row Level TTL job is running. If it is non-zero, it means the job could not be created.`
`156`	`156`	`}`