diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..aa09b6e --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +*.iml +*.zip +*.tgz +*.tar.gz +.DS_Store +.idea/ +build/ +.gradle/ +out/ +bin/ +.settings diff --git a/1-custom-resource-definition/alertmanager-crd.yaml b/1-custom-resource-definition/alertmanager-crd.yaml new file mode 100644 index 0000000..7cf1246 --- /dev/null +++ b/1-custom-resource-definition/alertmanager-crd.yaml @@ -0,0 +1,2600 @@ +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + creationTimestamp: null + name: alertmanagers.monitoring.coreos.com +spec: + group: monitoring.coreos.com + names: + kind: Alertmanager + plural: alertmanagers + scope: Namespaced + validation: + openAPIV3Schema: + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + spec: + description: 'AlertmanagerSpec is a specification of the desired behavior + of the Alertmanager cluster. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#spec-and-status' + properties: + additionalPeers: + description: AdditionalPeers allows injecting a set of additional Alertmanagers + to peer with to form a highly available cluster. + items: + type: string + type: array + affinity: + description: Affinity is a group of affinity scheduling rules. + properties: + nodeAffinity: + description: Node affinity is a group of node affinity scheduling + rules. + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: The scheduler will prefer to schedule pods to nodes + that satisfy the affinity expressions specified by this field, + but it may choose a node that violates one or more of the + expressions. The node that is most preferred is the one with + the greatest sum of weights, i.e. for each node that meets + all of the scheduling requirements (resource request, requiredDuringScheduling + affinity expressions, etc.), compute a sum by iterating through + the elements of this field and adding "weight" to the sum + if the node matches the corresponding matchExpressions; the + node(s) with the highest sum are the most preferred. + items: + description: An empty preferred scheduling term matches all + objects with implicit weight 0 (i.e. it's a no-op). A null + preferred scheduling term matches no objects (i.e. is also + a no-op). + properties: + preference: + description: A null or empty node selector term matches + no objects. The requirements of them are ANDed. The + TopologySelectorTerm type implements a subset of the + NodeSelectorTerm. + properties: + matchExpressions: + description: A list of node selector requirements + by node's labels. + items: + description: A node selector requirement is a selector + that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: The label key that the selector + applies to. + type: string + operator: + description: Represents a key's relationship + to a set of values. Valid operators are In, + NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: An array of string values. If the + operator is In or NotIn, the values array + must be non-empty. If the operator is Exists + or DoesNotExist, the values array must be + empty. If the operator is Gt or Lt, the values + array must have a single element, which will + be interpreted as an integer. This array is + replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchFields: + description: A list of node selector requirements + by node's fields. + items: + description: A node selector requirement is a selector + that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: The label key that the selector + applies to. + type: string + operator: + description: Represents a key's relationship + to a set of values. Valid operators are In, + NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: An array of string values. If the + operator is In or NotIn, the values array + must be non-empty. If the operator is Exists + or DoesNotExist, the values array must be + empty. If the operator is Gt or Lt, the values + array must have a single element, which will + be interpreted as an integer. This array is + replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + type: object + weight: + description: Weight associated with matching the corresponding + nodeSelectorTerm, in the range 1-100. + format: int32 + type: integer + required: + - weight + - preference + type: object + type: array + requiredDuringSchedulingIgnoredDuringExecution: + description: A node selector represents the union of the results + of one or more label queries over a set of nodes; that is, + it represents the OR of the selectors represented by the node + selector terms. + properties: + nodeSelectorTerms: + description: Required. A list of node selector terms. The + terms are ORed. + items: + description: A null or empty node selector term matches + no objects. The requirements of them are ANDed. The + TopologySelectorTerm type implements a subset of the + NodeSelectorTerm. + properties: + matchExpressions: + description: A list of node selector requirements + by node's labels. + items: + description: A node selector requirement is a selector + that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: The label key that the selector + applies to. + type: string + operator: + description: Represents a key's relationship + to a set of values. Valid operators are In, + NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: An array of string values. If the + operator is In or NotIn, the values array + must be non-empty. If the operator is Exists + or DoesNotExist, the values array must be + empty. If the operator is Gt or Lt, the values + array must have a single element, which will + be interpreted as an integer. This array is + replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchFields: + description: A list of node selector requirements + by node's fields. + items: + description: A node selector requirement is a selector + that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: The label key that the selector + applies to. + type: string + operator: + description: Represents a key's relationship + to a set of values. Valid operators are In, + NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: An array of string values. If the + operator is In or NotIn, the values array + must be non-empty. If the operator is Exists + or DoesNotExist, the values array must be + empty. If the operator is Gt or Lt, the values + array must have a single element, which will + be interpreted as an integer. This array is + replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + type: object + type: array + required: + - nodeSelectorTerms + type: object + type: object + podAffinity: + description: Pod affinity is a group of inter pod affinity scheduling + rules. + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: The scheduler will prefer to schedule pods to nodes + that satisfy the affinity expressions specified by this field, + but it may choose a node that violates one or more of the + expressions. The node that is most preferred is the one with + the greatest sum of weights, i.e. for each node that meets + all of the scheduling requirements (resource request, requiredDuringScheduling + affinity expressions, etc.), compute a sum by iterating through + the elements of this field and adding "weight" to the sum + if the node has pods which matches the corresponding podAffinityTerm; + the node(s) with the highest sum are the most preferred. + items: + description: The weights of all of the matched WeightedPodAffinityTerm + fields are added per-node to find the most preferred node(s) + properties: + podAffinityTerm: + description: Defines a set of pods (namely those matching + the labelSelector relative to the given namespace(s)) + that this pod should be co-located (affinity) or not + co-located (anti-affinity) with, where co-located is + defined as running on a node whose value of the label + with key matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: A label selector is a label query over + a set of resources. The result of matchLabels and + matchExpressions are ANDed. An empty label selector + matches all objects. A null label selector matches + no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label + selector requirements. The requirements are + ANDed. + items: + description: A label selector requirement is + a selector that contains values, a key, and + an operator that relates the key and values. + properties: + key: + description: key is the label key that the + selector applies to. + type: string + operator: + description: operator represents a key's + relationship to a set of values. Valid + operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string + values. If the operator is In or NotIn, + the values array must be non-empty. If + the operator is Exists or DoesNotExist, + the values array must be empty. This array + is replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + description: matchLabels is a map of {key,value} + pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, + whose key field is "key", the operator is "In", + and the values array contains only "value". + The requirements are ANDed. + type: object + type: object + namespaces: + description: namespaces specifies which namespaces + the labelSelector applies to (matches against); + null or empty list means "this pod's namespace" + items: + type: string + type: array + topologyKey: + description: This pod should be co-located (affinity) + or not co-located (anti-affinity) with the pods + matching the labelSelector in the specified namespaces, + where co-located is defined as running on a node + whose value of the label with key topologyKey matches + that of any node on which any of the selected pods + is running. Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + weight: + description: weight associated with matching the corresponding + podAffinityTerm, in the range 1-100. + format: int32 + type: integer + required: + - weight + - podAffinityTerm + type: object + type: array + requiredDuringSchedulingIgnoredDuringExecution: + description: If the affinity requirements specified by this + field are not met at scheduling time, the pod will not be + scheduled onto the node. If the affinity requirements specified + by this field cease to be met at some point during pod execution + (e.g. due to a pod label update), the system may or may not + try to eventually evict the pod from its node. When there + are multiple elements, the lists of nodes corresponding to + each podAffinityTerm are intersected, i.e. all terms must + be satisfied. + items: + description: Defines a set of pods (namely those matching + the labelSelector relative to the given namespace(s)) that + this pod should be co-located (affinity) or not co-located + (anti-affinity) with, where co-located is defined as running + on a node whose value of the label with key + matches that of any node on which a pod of the set of pods + is running + properties: + labelSelector: + description: A label selector is a label query over a + set of resources. The result of matchLabels and matchExpressions + are ANDed. An empty label selector matches all objects. + A null label selector matches no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector + that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: operator represents a key's relationship + to a set of values. Valid operators are In, + NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. + If the operator is In or NotIn, the values + array must be non-empty. If the operator is + Exists or DoesNotExist, the values array must + be empty. This array is replaced during a + strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + description: matchLabels is a map of {key,value} pairs. + A single {key,value} in the matchLabels map is equivalent + to an element of matchExpressions, whose key field + is "key", the operator is "In", and the values array + contains only "value". The requirements are ANDed. + type: object + type: object + namespaces: + description: namespaces specifies which namespaces the + labelSelector applies to (matches against); null or + empty list means "this pod's namespace" + items: + type: string + type: array + topologyKey: + description: This pod should be co-located (affinity) + or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where + co-located is defined as running on a node whose value + of the label with key topologyKey matches that of any + node on which any of the selected pods is running. Empty + topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + type: array + type: object + podAntiAffinity: + description: Pod anti affinity is a group of inter pod anti affinity + scheduling rules. + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: The scheduler will prefer to schedule pods to nodes + that satisfy the anti-affinity expressions specified by this + field, but it may choose a node that violates one or more + of the expressions. The node that is most preferred is the + one with the greatest sum of weights, i.e. for each node that + meets all of the scheduling requirements (resource request, + requiredDuringScheduling anti-affinity expressions, etc.), + compute a sum by iterating through the elements of this field + and adding "weight" to the sum if the node has pods which + matches the corresponding podAffinityTerm; the node(s) with + the highest sum are the most preferred. + items: + description: The weights of all of the matched WeightedPodAffinityTerm + fields are added per-node to find the most preferred node(s) + properties: + podAffinityTerm: + description: Defines a set of pods (namely those matching + the labelSelector relative to the given namespace(s)) + that this pod should be co-located (affinity) or not + co-located (anti-affinity) with, where co-located is + defined as running on a node whose value of the label + with key matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: A label selector is a label query over + a set of resources. The result of matchLabels and + matchExpressions are ANDed. An empty label selector + matches all objects. A null label selector matches + no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label + selector requirements. The requirements are + ANDed. + items: + description: A label selector requirement is + a selector that contains values, a key, and + an operator that relates the key and values. + properties: + key: + description: key is the label key that the + selector applies to. + type: string + operator: + description: operator represents a key's + relationship to a set of values. Valid + operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string + values. If the operator is In or NotIn, + the values array must be non-empty. If + the operator is Exists or DoesNotExist, + the values array must be empty. This array + is replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + description: matchLabels is a map of {key,value} + pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, + whose key field is "key", the operator is "In", + and the values array contains only "value". + The requirements are ANDed. + type: object + type: object + namespaces: + description: namespaces specifies which namespaces + the labelSelector applies to (matches against); + null or empty list means "this pod's namespace" + items: + type: string + type: array + topologyKey: + description: This pod should be co-located (affinity) + or not co-located (anti-affinity) with the pods + matching the labelSelector in the specified namespaces, + where co-located is defined as running on a node + whose value of the label with key topologyKey matches + that of any node on which any of the selected pods + is running. Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + weight: + description: weight associated with matching the corresponding + podAffinityTerm, in the range 1-100. + format: int32 + type: integer + required: + - weight + - podAffinityTerm + type: object + type: array + requiredDuringSchedulingIgnoredDuringExecution: + description: If the anti-affinity requirements specified by + this field are not met at scheduling time, the pod will not + be scheduled onto the node. If the anti-affinity requirements + specified by this field cease to be met at some point during + pod execution (e.g. due to a pod label update), the system + may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding + to each podAffinityTerm are intersected, i.e. all terms must + be satisfied. + items: + description: Defines a set of pods (namely those matching + the labelSelector relative to the given namespace(s)) that + this pod should be co-located (affinity) or not co-located + (anti-affinity) with, where co-located is defined as running + on a node whose value of the label with key + matches that of any node on which a pod of the set of pods + is running + properties: + labelSelector: + description: A label selector is a label query over a + set of resources. The result of matchLabels and matchExpressions + are ANDed. An empty label selector matches all objects. + A null label selector matches no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector + that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: operator represents a key's relationship + to a set of values. Valid operators are In, + NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. + If the operator is In or NotIn, the values + array must be non-empty. If the operator is + Exists or DoesNotExist, the values array must + be empty. This array is replaced during a + strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + description: matchLabels is a map of {key,value} pairs. + A single {key,value} in the matchLabels map is equivalent + to an element of matchExpressions, whose key field + is "key", the operator is "In", and the values array + contains only "value". The requirements are ANDed. + type: object + type: object + namespaces: + description: namespaces specifies which namespaces the + labelSelector applies to (matches against); null or + empty list means "this pod's namespace" + items: + type: string + type: array + topologyKey: + description: This pod should be co-located (affinity) + or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where + co-located is defined as running on a node whose value + of the label with key topologyKey matches that of any + node on which any of the selected pods is running. Empty + topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + type: array + type: object + type: object + baseImage: + description: Base image that is used to deploy pods, without tag. + type: string + configMaps: + description: ConfigMaps is a list of ConfigMaps in the same namespace + as the Alertmanager object, which shall be mounted into the Alertmanager + Pods. The ConfigMaps are mounted into /etc/alertmanager/configmaps/. + items: + type: string + type: array + containers: + description: Containers allows injecting additional containers. This + is meant to allow adding an authentication proxy to an Alertmanager + pod. + items: + description: A single application container that you want to run within + a pod. + properties: + args: + description: 'Arguments to the entrypoint. The docker image''s + CMD is used if this is not provided. Variable references $(VAR_NAME) + are expanded using the container''s environment. If a variable + cannot be resolved, the reference in the input string will be + unchanged. The $(VAR_NAME) syntax can be escaped with a double + $$, ie: $$(VAR_NAME). Escaped references will never be expanded, + regardless of whether the variable exists or not. Cannot be + updated. More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell' + items: + type: string + type: array + command: + description: 'Entrypoint array. Not executed within a shell. The + docker image''s ENTRYPOINT is used if this is not provided. + Variable references $(VAR_NAME) are expanded using the container''s + environment. If a variable cannot be resolved, the reference + in the input string will be unchanged. The $(VAR_NAME) syntax + can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references + will never be expanded, regardless of whether the variable exists + or not. Cannot be updated. More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell' + items: + type: string + type: array + env: + description: List of environment variables to set in the container. + Cannot be updated. + items: + description: EnvVar represents an environment variable present + in a Container. + properties: + name: + description: Name of the environment variable. Must be a + C_IDENTIFIER. + type: string + value: + description: 'Variable references $(VAR_NAME) are expanded + using the previous defined environment variables in the + container and any service environment variables. If a + variable cannot be resolved, the reference in the input + string will be unchanged. The $(VAR_NAME) syntax can be + escaped with a double $$, ie: $$(VAR_NAME). Escaped references + will never be expanded, regardless of whether the variable + exists or not. Defaults to "".' + type: string + valueFrom: + description: EnvVarSource represents a source for the value + of an EnvVar. + properties: + configMapKeyRef: + description: Selects a key from a ConfigMap. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the ConfigMap or it's + key must be defined + type: boolean + required: + - key + type: object + fieldRef: + description: ObjectFieldSelector selects an APIVersioned + field of an object. + properties: + apiVersion: + description: Version of the schema the FieldPath + is written in terms of, defaults to "v1". + type: string + fieldPath: + description: Path of the field to select in the + specified API version. + type: string + required: + - fieldPath + type: object + resourceFieldRef: + description: ResourceFieldSelector represents container + resources (cpu, memory) and their output format + properties: + containerName: + description: 'Container name: required for volumes, + optional for env vars' + type: string + divisor: {} + resource: + description: 'Required: resource to select' + type: string + required: + - resource + type: object + secretKeyRef: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's + key must be defined + type: boolean + required: + - key + type: object + type: object + required: + - name + type: object + type: array + envFrom: + description: List of sources to populate environment variables + in the container. The keys defined within a source must be a + C_IDENTIFIER. All invalid keys will be reported as an event + when the container is starting. When a key exists in multiple + sources, the value associated with the last source will take + precedence. Values defined by an Env with a duplicate key will + take precedence. Cannot be updated. + items: + description: EnvFromSource represents the source of a set of + ConfigMaps + properties: + configMapRef: + description: |- + ConfigMapEnvSource selects a ConfigMap to populate the environment variables with. + + The contents of the target ConfigMap's Data field will represent the key-value pairs as environment variables. + properties: + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the ConfigMap must be defined + type: boolean + type: object + prefix: + description: An optional identifier to prepend to each key + in the ConfigMap. Must be a C_IDENTIFIER. + type: string + secretRef: + description: |- + SecretEnvSource selects a Secret to populate the environment variables with. + + The contents of the target Secret's Data field will represent the key-value pairs as environment variables. + properties: + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret must be defined + type: boolean + type: object + type: object + type: array + image: + description: 'Docker image name. More info: https://kubernetes.io/docs/concepts/containers/images + This field is optional to allow higher level config management + to default or override container images in workload controllers + like Deployments and StatefulSets.' + type: string + imagePullPolicy: + description: 'Image pull policy. One of Always, Never, IfNotPresent. + Defaults to Always if :latest tag is specified, or IfNotPresent + otherwise. Cannot be updated. More info: https://kubernetes.io/docs/concepts/containers/images#updating-images' + type: string + lifecycle: + description: Lifecycle describes actions that the management system + should take in response to container lifecycle events. For the + PostStart and PreStop lifecycle handlers, management of the + container blocks until the action is complete, unless the container + process fails, in which case the handler is aborted. + properties: + postStart: + description: Handler defines a specific action that should + be taken + properties: + exec: + description: ExecAction describes a "run in container" + action. + properties: + command: + description: Command is the command line to execute + inside the container, the working directory for + the command is root ('/') in the container's filesystem. + The command is simply exec'd, it is not run inside + a shell, so traditional shell instructions ('|', + etc) won't work. To use a shell, you need to explicitly + call out to that shell. Exit status of 0 is treated + as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + type: object + httpGet: + description: HTTPGetAction describes an action based on + HTTP Get requests. + properties: + host: + description: Host name to connect to, defaults to + the pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. + HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom header + to be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: string + - type: integer + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + tcpSocket: + description: TCPSocketAction describes an action based + on opening a socket + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: string + - type: integer + required: + - port + type: object + type: object + preStop: + description: Handler defines a specific action that should + be taken + properties: + exec: + description: ExecAction describes a "run in container" + action. + properties: + command: + description: Command is the command line to execute + inside the container, the working directory for + the command is root ('/') in the container's filesystem. + The command is simply exec'd, it is not run inside + a shell, so traditional shell instructions ('|', + etc) won't work. To use a shell, you need to explicitly + call out to that shell. Exit status of 0 is treated + as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + type: object + httpGet: + description: HTTPGetAction describes an action based on + HTTP Get requests. + properties: + host: + description: Host name to connect to, defaults to + the pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. + HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom header + to be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: string + - type: integer + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + tcpSocket: + description: TCPSocketAction describes an action based + on opening a socket + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: string + - type: integer + required: + - port + type: object + type: object + type: object + livenessProbe: + description: Probe describes a health check to be performed against + a container to determine whether it is alive or ready to receive + traffic. + properties: + exec: + description: ExecAction describes a "run in container" action. + properties: + command: + description: Command is the command line to execute inside + the container, the working directory for the command is + root ('/') in the container's filesystem. The command + is simply exec'd, it is not run inside a shell, so traditional + shell instructions ('|', etc) won't work. To use a shell, + you need to explicitly call out to that shell. Exit + status of 0 is treated as live/healthy and non-zero + is unhealthy. + items: + type: string + type: array + type: object + failureThreshold: + description: Minimum consecutive failures for the probe to + be considered failed after having succeeded. Defaults to + 3. Minimum value is 1. + format: int32 + type: integer + httpGet: + description: HTTPGetAction describes an action based on HTTP + Get requests. + properties: + host: + description: Host name to connect to, defaults to the + pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header to + be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: string + - type: integer + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: 'Number of seconds after the container has started + before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: Minimum consecutive successes for the probe to + be considered successful after having failed. Defaults to + 1. Must be 1 for liveness. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocketAction describes an action based on + opening a socket + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: string + - type: integer + required: + - port + type: object + timeoutSeconds: + description: 'Number of seconds after which the probe times + out. Defaults to 1 second. Minimum value is 1. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + type: object + name: + description: Name of the container specified as a DNS_LABEL. Each + container in a pod must have a unique name (DNS_LABEL). Cannot + be updated. + type: string + ports: + description: List of ports to expose from the container. Exposing + a port here gives the system additional information about the + network connections a container uses, but is primarily informational. + Not specifying a port here DOES NOT prevent that port from being + exposed. Any port which is listening on the default "0.0.0.0" + address inside a container will be accessible from the network. + Cannot be updated. + items: + description: ContainerPort represents a network port in a single + container. + properties: + containerPort: + description: Number of port to expose on the pod's IP address. + This must be a valid port number, 0 < x < 65536. + format: int32 + type: integer + hostIP: + description: What host IP to bind the external port to. + type: string + hostPort: + description: Number of port to expose on the host. If specified, + this must be a valid port number, 0 < x < 65536. If HostNetwork + is specified, this must match ContainerPort. Most containers + do not need this. + format: int32 + type: integer + name: + description: If specified, this must be an IANA_SVC_NAME + and unique within the pod. Each named port in a pod must + have a unique name. Name for the port that can be referred + to by services. + type: string + protocol: + description: Protocol for port. Must be UDP, TCP, or SCTP. + Defaults to "TCP". + type: string + required: + - containerPort + type: object + type: array + readinessProbe: + description: Probe describes a health check to be performed against + a container to determine whether it is alive or ready to receive + traffic. + properties: + exec: + description: ExecAction describes a "run in container" action. + properties: + command: + description: Command is the command line to execute inside + the container, the working directory for the command is + root ('/') in the container's filesystem. The command + is simply exec'd, it is not run inside a shell, so traditional + shell instructions ('|', etc) won't work. To use a shell, + you need to explicitly call out to that shell. Exit + status of 0 is treated as live/healthy and non-zero + is unhealthy. + items: + type: string + type: array + type: object + failureThreshold: + description: Minimum consecutive failures for the probe to + be considered failed after having succeeded. Defaults to + 3. Minimum value is 1. + format: int32 + type: integer + httpGet: + description: HTTPGetAction describes an action based on HTTP + Get requests. + properties: + host: + description: Host name to connect to, defaults to the + pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header to + be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: string + - type: integer + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: 'Number of seconds after the container has started + before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: Minimum consecutive successes for the probe to + be considered successful after having failed. Defaults to + 1. Must be 1 for liveness. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocketAction describes an action based on + opening a socket + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: string + - type: integer + required: + - port + type: object + timeoutSeconds: + description: 'Number of seconds after which the probe times + out. Defaults to 1 second. Minimum value is 1. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + type: object + resources: + description: ResourceRequirements describes the compute resource + requirements. + properties: + limits: + description: 'Limits describes the maximum amount of compute + resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + requests: + description: 'Requests describes the minimum amount of compute + resources required. If Requests is omitted for a container, + it defaults to Limits if that is explicitly specified, otherwise + to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + type: object + securityContext: + description: SecurityContext holds security configuration that + will be applied to a container. Some fields are present in both + SecurityContext and PodSecurityContext. When both are set, + the values in SecurityContext take precedence. + properties: + allowPrivilegeEscalation: + description: 'AllowPrivilegeEscalation controls whether a + process can gain more privileges than its parent process. + This bool directly controls if the no_new_privs flag will + be set on the container process. AllowPrivilegeEscalation + is true always when the container is: 1) run as Privileged + 2) has CAP_SYS_ADMIN' + type: boolean + capabilities: + description: Adds and removes POSIX capabilities from running + containers. + properties: + add: + description: Added capabilities + items: + type: string + type: array + drop: + description: Removed capabilities + items: + type: string + type: array + type: object + privileged: + description: Run container in privileged mode. Processes in + privileged containers are essentially equivalent to root + on the host. Defaults to false. + type: boolean + procMount: + description: procMount denotes the type of proc mount to use + for the containers. The default is DefaultProcMount which + uses the container runtime defaults for readonly paths and + masked paths. This requires the ProcMountType feature flag + to be enabled. + type: string + readOnlyRootFilesystem: + description: Whether this container has a read-only root filesystem. + Default is false. + type: boolean + runAsGroup: + description: The GID to run the entrypoint of the container + process. Uses runtime default if unset. May also be set + in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext + takes precedence. + format: int64 + type: integer + runAsNonRoot: + description: Indicates that the container must run as a non-root + user. If true, the Kubelet will validate the image at runtime + to ensure that it does not run as UID 0 (root) and fail + to start the container if it does. If unset or false, no + such validation will be performed. May also be set in PodSecurityContext. If + set in both SecurityContext and PodSecurityContext, the + value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: The UID to run the entrypoint of the container + process. Defaults to user specified in image metadata if + unspecified. May also be set in PodSecurityContext. If + set in both SecurityContext and PodSecurityContext, the + value specified in SecurityContext takes precedence. + format: int64 + type: integer + seLinuxOptions: + description: SELinuxOptions are the labels to be applied to + the container + properties: + level: + description: Level is SELinux level label that applies + to the container. + type: string + role: + description: Role is a SELinux role label that applies + to the container. + type: string + type: + description: Type is a SELinux type label that applies + to the container. + type: string + user: + description: User is a SELinux user label that applies + to the container. + type: string + type: object + type: object + stdin: + description: Whether this container should allocate a buffer for + stdin in the container runtime. If this is not set, reads from + stdin in the container will always result in EOF. Default is + false. + type: boolean + stdinOnce: + description: Whether the container runtime should close the stdin + channel after it has been opened by a single attach. When stdin + is true the stdin stream will remain open across multiple attach + sessions. If stdinOnce is set to true, stdin is opened on container + start, is empty until the first client attaches to stdin, and + then remains open and accepts data until the client disconnects, + at which time stdin is closed and remains closed until the container + is restarted. If this flag is false, a container processes that + reads from stdin will never receive an EOF. Default is false + type: boolean + terminationMessagePath: + description: 'Optional: Path at which the file to which the container''s + termination message will be written is mounted into the container''s + filesystem. Message written is intended to be brief final status, + such as an assertion failure message. Will be truncated by the + node if greater than 4096 bytes. The total message length across + all containers will be limited to 12kb. Defaults to /dev/termination-log. + Cannot be updated.' + type: string + terminationMessagePolicy: + description: Indicate how the termination message should be populated. + File will use the contents of terminationMessagePath to populate + the container status message on both success and failure. FallbackToLogsOnError + will use the last chunk of container log output if the termination + message file is empty and the container exited with an error. + The log output is limited to 2048 bytes or 80 lines, whichever + is smaller. Defaults to File. Cannot be updated. + type: string + tty: + description: Whether this container should allocate a TTY for + itself, also requires 'stdin' to be true. Default is false. + type: boolean + volumeDevices: + description: volumeDevices is the list of block devices to be + used by the container. This is a beta feature. + items: + description: volumeDevice describes a mapping of a raw block + device within a container. + properties: + devicePath: + description: devicePath is the path inside of the container + that the device will be mapped to. + type: string + name: + description: name must match the name of a persistentVolumeClaim + in the pod + type: string + required: + - name + - devicePath + type: object + type: array + volumeMounts: + description: Pod volumes to mount into the container's filesystem. + Cannot be updated. + items: + description: VolumeMount describes a mounting of a Volume within + a container. + properties: + mountPath: + description: Path within the container at which the volume + should be mounted. Must not contain ':'. + type: string + mountPropagation: + description: mountPropagation determines how mounts are + propagated from the host to container and the other way + around. When not set, MountPropagationNone is used. This + field is beta in 1.10. + type: string + name: + description: This must match the Name of a Volume. + type: string + readOnly: + description: Mounted read-only if true, read-write otherwise + (false or unspecified). Defaults to false. + type: boolean + subPath: + description: Path within the volume from which the container's + volume should be mounted. Defaults to "" (volume's root). + type: string + subPathExpr: + description: Expanded path within the volume from which + the container's volume should be mounted. Behaves similarly + to SubPath but environment variable references $(VAR_NAME) + are expanded using the container's environment. Defaults + to "" (volume's root). SubPathExpr and SubPath are mutually + exclusive. This field is alpha in 1.14. + type: string + required: + - name + - mountPath + type: object + type: array + workingDir: + description: Container's working directory. If not specified, + the container runtime's default will be used, which might be + configured in the container image. Cannot be updated. + type: string + required: + - name + type: object + type: array + externalUrl: + description: The external URL the Alertmanager instances will be available + under. This is necessary to generate correct URLs. This is necessary + if Alertmanager is not served from root of a DNS name. + type: string + image: + description: Image if specified has precedence over baseImage, tag and + sha combinations. Specifying the version is still necessary to ensure + the Prometheus Operator knows what version of Alertmanager is being + configured. + type: string + imagePullSecrets: + description: An optional list of references to secrets in the same namespace + to use for pulling prometheus and alertmanager images from registries + see http://kubernetes.io/docs/user-guide/images#specifying-imagepullsecrets-on-a-pod + items: + description: LocalObjectReference contains enough information to let + you locate the referenced object inside the same namespace. + properties: + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + type: object + type: array + listenLocal: + description: ListenLocal makes the Alertmanager server listen on loopback, + so that it does not bind against the Pod IP. Note this is only for + the Alertmanager UI, not the gossip communication. + type: boolean + logFormat: + description: Log format for Alertmanager to be configured with. + type: string + logLevel: + description: Log level for Alertmanager to be configured with. + type: string + nodeSelector: + description: Define which Nodes the Pods are scheduled on. + type: object + paused: + description: If set to true all actions on the underlaying managed objects + are not goint to be performed, except for delete actions. + type: boolean + podMetadata: + description: ObjectMeta is metadata that all persisted resources must + have, which includes all objects users must create. + properties: + annotations: + description: 'Annotations is an unstructured key value map stored + with a resource that may be set by external tools to store and + retrieve arbitrary metadata. They are not queryable and should + be preserved when modifying objects. More info: http://kubernetes.io/docs/user-guide/annotations' + type: object + clusterName: + description: The name of the cluster which the object belongs to. + This is used to distinguish resources with same name and namespace + in different clusters. This field is not set anywhere right now + and apiserver is going to ignore it if set in create or update + request. + type: string + creationTimestamp: + description: Time is a wrapper around time.Time which supports correct + marshaling to YAML and JSON. Wrappers are provided for many of + the factory methods that the time package offers. + format: date-time + type: string + deletionGracePeriodSeconds: + description: Number of seconds allowed for this object to gracefully + terminate before it will be removed from the system. Only set + when deletionTimestamp is also set. May only be shortened. Read-only. + format: int64 + type: integer + deletionTimestamp: + description: Time is a wrapper around time.Time which supports correct + marshaling to YAML and JSON. Wrappers are provided for many of + the factory methods that the time package offers. + format: date-time + type: string + finalizers: + description: Must be empty before the object is deleted from the + registry. Each entry is an identifier for the responsible component + that will remove the entry from the list. If the deletionTimestamp + of the object is non-nil, entries in this list can only be removed. + items: + type: string + type: array + generateName: + description: |- + GenerateName is an optional prefix, used by the server, to generate a unique name ONLY IF the Name field has not been provided. If this field is used, the name returned to the client will be different than the name passed. This value will also be combined with a unique suffix. The provided value has the same validation rules as the Name field, and may be truncated by the length of the suffix required to make the value unique on the server. + + If this field is specified and the generated name exists, the server will NOT return a 409 - instead, it will either return 201 Created or 500 with Reason ServerTimeout indicating a unique name could not be found in the time allotted, and the client should retry (optionally after the time indicated in the Retry-After header). + + Applied only if Name is not specified. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#idempotency + type: string + generation: + description: A sequence number representing a specific generation + of the desired state. Populated by the system. Read-only. + format: int64 + type: integer + initializers: + description: Initializers tracks the progress of initialization. + properties: + pending: + description: Pending is a list of initializers that must execute + in order before this object is visible. When the last pending + initializer is removed, and no failing result is set, the + initializers struct will be set to nil and the object is considered + as initialized and visible to all clients. + items: + description: Initializer is information about an initializer + that has not yet completed. + properties: + name: + description: name of the process that is responsible for + initializing this object. + type: string + required: + - name + type: object + type: array + result: + description: Status is a return value for calls that don't return + other objects. + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of + this representation of an object. Servers should convert + recognized schemas to the latest internal value, and may + reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' + type: string + code: + description: Suggested HTTP return code for this status, + 0 if not set. + format: int32 + type: integer + details: + description: StatusDetails is a set of additional properties + that MAY be set by the server to provide additional information + about a response. The Reason field of a Status object + defines what attributes will be set. Clients must ignore + fields that do not match the defined type of each attribute, + and should assume that any attribute may be empty, invalid, + or under defined. + properties: + causes: + description: The Causes array includes more details + associated with the StatusReason failure. Not all + StatusReasons may provide detailed causes. + items: + description: StatusCause provides more information + about an api.Status failure, including cases when + multiple errors are encountered. + properties: + field: + description: |- + The field of the resource that has caused this error, as named by its JSON serialization. May include dot and postfix notation for nested attributes. Arrays are zero-indexed. Fields may appear more than once in an array of causes due to fields having multiple errors. Optional. + + Examples: + "name" - the field "name" on the current resource + "items[0].name" - the field "name" on the first array entry in "items" + type: string + message: + description: A human-readable description of the + cause of the error. This field may be presented + as-is to a reader. + type: string + reason: + description: A machine-readable description of + the cause of the error. If this value is empty + there is no information available. + type: string + type: object + type: array + group: + description: The group attribute of the resource associated + with the status StatusReason. + type: string + kind: + description: 'The kind attribute of the resource associated + with the status StatusReason. On some operations may + differ from the requested resource Kind. More info: + https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + name: + description: The name attribute of the resource associated + with the status StatusReason (when there is a single + name which can be described). + type: string + retryAfterSeconds: + description: If specified, the time in seconds before + the operation should be retried. Some errors may indicate + the client must take an alternate action - for those + errors this field may indicate how long to wait before + taking the alternate action. + format: int32 + type: integer + uid: + description: 'UID of the resource. (when there is a + single resource which can be described). More info: + http://kubernetes.io/docs/user-guide/identifiers#uids' + type: string + type: object + kind: + description: 'Kind is a string value representing the REST + resource this object represents. Servers may infer this + from the endpoint the client submits requests to. Cannot + be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + message: + description: A human-readable description of the status + of this operation. + type: string + metadata: + description: ListMeta describes metadata that synthetic + resources must have, including lists and various status + objects. A resource may have only one of {ObjectMeta, + ListMeta}. + properties: + continue: + description: continue may be set if the user set a limit + on the number of items returned, and indicates that + the server has more data available. The value is opaque + and may be used to issue another request to the endpoint + that served this list to retrieve the next set of + available objects. Continuing a consistent list may + not be possible if the server configuration has changed + or more than a few minutes have passed. The resourceVersion + field returned when using this continue value will + be identical to the value in the first response, unless + you have received this token from an error message. + type: string + resourceVersion: + description: 'String that identifies the server''s internal + version of this object that can be used by clients + to determine when objects have changed. Value must + be treated as opaque by clients and passed unmodified + back to the server. Populated by the system. Read-only. + More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency' + type: string + selfLink: + description: selfLink is a URL representing this object. + Populated by the system. Read-only. + type: string + type: object + reason: + description: A machine-readable description of why this + operation is in the "Failure" status. If this value is + empty there is no information available. A Reason clarifies + an HTTP status code but does not override it. + type: string + status: + description: 'Status of the operation. One of: "Success" + or "Failure". More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#spec-and-status' + type: string + type: object + required: + - pending + type: object + labels: + description: 'Map of string keys and values that can be used to + organize and categorize (scope and select) objects. May match + selectors of replication controllers and services. More info: + http://kubernetes.io/docs/user-guide/labels' + type: object + managedFields: + description: |- + ManagedFields maps workflow-id and version to the set of fields that are managed by that workflow. This is mostly for internal housekeeping, and users typically shouldn't need to set or understand this field. A workflow can be the user's name, a controller's name, or the name of a specific apply path like "ci-cd". The set of fields is always in the version that the workflow used when modifying the object. + + This field is alpha and can be changed or removed without notice. + items: + description: ManagedFieldsEntry is a workflow-id, a FieldSet and + the group version of the resource that the fieldset applies + to. + properties: + apiVersion: + description: APIVersion defines the version of this resource + that this field set applies to. The format is "group/version" + just like the top-level APIVersion field. It is necessary + to track the version of a field set because it cannot be + automatically converted. + type: string + fields: + description: 'Fields stores a set of fields in a data structure + like a Trie. To understand how this is used, see: https://github.com/kubernetes-sigs/structured-merge-diff' + type: object + manager: + description: Manager is an identifier of the workflow managing + these fields. + type: string + operation: + description: Operation is the type of operation which lead + to this ManagedFieldsEntry being created. The only valid + values for this field are 'Apply' and 'Update'. + type: string + time: + description: Time is a wrapper around time.Time which supports + correct marshaling to YAML and JSON. Wrappers are provided + for many of the factory methods that the time package offers. + format: date-time + type: string + type: object + type: array + name: + description: 'Name must be unique within a namespace. Is required + when creating resources, although some resources may allow a client + to request the generation of an appropriate name automatically. + Name is primarily intended for creation idempotence and configuration + definition. Cannot be updated. More info: http://kubernetes.io/docs/user-guide/identifiers#names' + type: string + namespace: + description: |- + Namespace defines the space within each name must be unique. An empty namespace is equivalent to the "default" namespace, but "default" is the canonical representation. Not all objects are required to be scoped to a namespace - the value of this field for those objects will be empty. + + Must be a DNS_LABEL. Cannot be updated. More info: http://kubernetes.io/docs/user-guide/namespaces + type: string + ownerReferences: + description: List of objects depended by this object. If ALL objects + in the list have been deleted, this object will be garbage collected. + If this object is managed by a controller, then an entry in this + list will point to this controller, with the controller field + set to true. There cannot be more than one managing controller. + items: + description: OwnerReference contains enough information to let + you identify an owning object. An owning object must be in the + same namespace as the dependent, or be cluster-scoped, so there + is no namespace field. + properties: + apiVersion: + description: API version of the referent. + type: string + blockOwnerDeletion: + description: If true, AND if the owner has the "foregroundDeletion" + finalizer, then the owner cannot be deleted from the key-value + store until this reference is removed. Defaults to false. + To set this field, a user needs "delete" permission of the + owner, otherwise 422 (Unprocessable Entity) will be returned. + type: boolean + controller: + description: If true, this reference points to the managing + controller. + type: boolean + kind: + description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + name: + description: 'Name of the referent. More info: http://kubernetes.io/docs/user-guide/identifiers#names' + type: string + uid: + description: 'UID of the referent. More info: http://kubernetes.io/docs/user-guide/identifiers#uids' + type: string + required: + - apiVersion + - kind + - name + - uid + type: object + type: array + resourceVersion: + description: |- + An opaque value that represents the internal version of this object that can be used by clients to determine when objects have changed. May be used for optimistic concurrency, change detection, and the watch operation on a resource or set of resources. Clients must treat these values as opaque and passed unmodified back to the server. They may only be valid for a particular resource or set of resources. + + Populated by the system. Read-only. Value must be treated as opaque by clients and . More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency + type: string + selfLink: + description: SelfLink is a URL representing this object. Populated + by the system. Read-only. + type: string + uid: + description: |- + UID is the unique in time and space value for this object. It is typically generated by the server on successful creation of a resource and is not allowed to change on PUT operations. + + Populated by the system. Read-only. More info: http://kubernetes.io/docs/user-guide/identifiers#uids + type: string + type: object + priorityClassName: + description: Priority class assigned to the Pods + type: string + replicas: + description: Size is the expected size of the alertmanager cluster. + The controller will eventually make the size of the running cluster + equal to the expected size. + format: int32 + type: integer + resources: + description: ResourceRequirements describes the compute resource requirements. + properties: + limits: + description: 'Limits describes the maximum amount of compute resources + allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + requests: + description: 'Requests describes the minimum amount of compute resources + required. If Requests is omitted for a container, it defaults + to Limits if that is explicitly specified, otherwise to an implementation-defined + value. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + type: object + retention: + description: Time duration Alertmanager shall retain data for. Default + is '120h', and must match the regular expression `[0-9]+(ms|s|m|h)` + (milliseconds seconds minutes hours). + type: string + routePrefix: + description: The route prefix Alertmanager registers HTTP handlers for. + This is useful, if using ExternalURL and a proxy is rewriting HTTP + routes of a request, and the actual ExternalURL is still true, but + the server serves requests under a different route prefix. For example + for use with `kubectl proxy`. + type: string + secrets: + description: Secrets is a list of Secrets in the same namespace as the + Alertmanager object, which shall be mounted into the Alertmanager + Pods. The Secrets are mounted into /etc/alertmanager/secrets/. + items: + type: string + type: array + securityContext: + description: PodSecurityContext holds pod-level security attributes + and common container settings. Some fields are also present in container.securityContext. Field + values of container.securityContext take precedence over field values + of PodSecurityContext. + properties: + fsGroup: + description: |- + A special supplemental group that applies to all containers in a pod. Some volume types allow the Kubelet to change the ownership of that volume to be owned by the pod: + + 1. The owning GID will be the FSGroup 2. The setgid bit is set (new files created in the volume will be owned by FSGroup) 3. The permission bits are OR'd with rw-rw---- + + If unset, the Kubelet will not modify the ownership and permissions of any volume. + format: int64 + type: integer + runAsGroup: + description: The GID to run the entrypoint of the container process. + Uses runtime default if unset. May also be set in SecurityContext. If + set in both SecurityContext and PodSecurityContext, the value + specified in SecurityContext takes precedence for that container. + format: int64 + type: integer + runAsNonRoot: + description: Indicates that the container must run as a non-root + user. If true, the Kubelet will validate the image at runtime + to ensure that it does not run as UID 0 (root) and fail to start + the container if it does. If unset or false, no such validation + will be performed. May also be set in SecurityContext. If set + in both SecurityContext and PodSecurityContext, the value specified + in SecurityContext takes precedence. + type: boolean + runAsUser: + description: The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. May + also be set in SecurityContext. If set in both SecurityContext + and PodSecurityContext, the value specified in SecurityContext + takes precedence for that container. + format: int64 + type: integer + seLinuxOptions: + description: SELinuxOptions are the labels to be applied to the + container + properties: + level: + description: Level is SELinux level label that applies to the + container. + type: string + role: + description: Role is a SELinux role label that applies to the + container. + type: string + type: + description: Type is a SELinux type label that applies to the + container. + type: string + user: + description: User is a SELinux user label that applies to the + container. + type: string + type: object + supplementalGroups: + description: A list of groups applied to the first process run in + each container, in addition to the container's primary GID. If + unspecified, no groups will be added to any container. + items: + format: int64 + type: integer + type: array + sysctls: + description: Sysctls hold a list of namespaced sysctls used for + the pod. Pods with unsupported sysctls (by the container runtime) + might fail to launch. + items: + description: Sysctl defines a kernel parameter to be set + properties: + name: + description: Name of a property to set + type: string + value: + description: Value of a property to set + type: string + required: + - name + - value + type: object + type: array + type: object + serviceAccountName: + description: ServiceAccountName is the name of the ServiceAccount to + use to run the Prometheus Pods. + type: string + sha: + description: SHA of Alertmanager container image to be deployed. Defaults + to the value of `version`. Similar to a tag, but the SHA explicitly + deploys an immutable container image. Version and Tag are ignored + if SHA is set. + type: string + storage: + description: StorageSpec defines the configured storage for a group + Prometheus servers. If neither `emptyDir` nor `volumeClaimTemplate` + is specified, then by default an [EmptyDir](https://kubernetes.io/docs/concepts/storage/volumes/#emptydir) + will be used. + properties: + emptyDir: + description: Represents an empty directory for a pod. Empty directory + volumes support ownership management and SELinux relabeling. + properties: + medium: + description: 'What type of storage medium should back this directory. + The default is "" which means to use the node''s default medium. + Must be an empty string (default) or Memory. More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir' + type: string + sizeLimit: {} + type: object + volumeClaimTemplate: + description: PersistentVolumeClaim is a user's request for and claim + to a persistent volume + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this + representation of an object. Servers should convert recognized + schemas to the latest internal value, and may reject unrecognized + values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource + this object represents. Servers may infer this from the endpoint + the client submits requests to. Cannot be updated. In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + metadata: + description: ObjectMeta is metadata that all persisted resources + must have, which includes all objects users must create. + properties: + annotations: + description: 'Annotations is an unstructured key value map + stored with a resource that may be set by external tools + to store and retrieve arbitrary metadata. They are not + queryable and should be preserved when modifying objects. + More info: http://kubernetes.io/docs/user-guide/annotations' + type: object + clusterName: + description: The name of the cluster which the object belongs + to. This is used to distinguish resources with same name + and namespace in different clusters. This field is not + set anywhere right now and apiserver is going to ignore + it if set in create or update request. + type: string + creationTimestamp: + description: Time is a wrapper around time.Time which supports + correct marshaling to YAML and JSON. Wrappers are provided + for many of the factory methods that the time package + offers. + format: date-time + type: string + deletionGracePeriodSeconds: + description: Number of seconds allowed for this object to + gracefully terminate before it will be removed from the + system. Only set when deletionTimestamp is also set. May + only be shortened. Read-only. + format: int64 + type: integer + deletionTimestamp: + description: Time is a wrapper around time.Time which supports + correct marshaling to YAML and JSON. Wrappers are provided + for many of the factory methods that the time package + offers. + format: date-time + type: string + finalizers: + description: Must be empty before the object is deleted + from the registry. Each entry is an identifier for the + responsible component that will remove the entry from + the list. If the deletionTimestamp of the object is non-nil, + entries in this list can only be removed. + items: + type: string + type: array + generateName: + description: |- + GenerateName is an optional prefix, used by the server, to generate a unique name ONLY IF the Name field has not been provided. If this field is used, the name returned to the client will be different than the name passed. This value will also be combined with a unique suffix. The provided value has the same validation rules as the Name field, and may be truncated by the length of the suffix required to make the value unique on the server. + + If this field is specified and the generated name exists, the server will NOT return a 409 - instead, it will either return 201 Created or 500 with Reason ServerTimeout indicating a unique name could not be found in the time allotted, and the client should retry (optionally after the time indicated in the Retry-After header). + + Applied only if Name is not specified. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#idempotency + type: string + generation: + description: A sequence number representing a specific generation + of the desired state. Populated by the system. Read-only. + format: int64 + type: integer + initializers: + description: Initializers tracks the progress of initialization. + properties: + pending: + description: Pending is a list of initializers that + must execute in order before this object is visible. + When the last pending initializer is removed, and + no failing result is set, the initializers struct + will be set to nil and the object is considered as + initialized and visible to all clients. + items: + description: Initializer is information about an initializer + that has not yet completed. + properties: + name: + description: name of the process that is responsible + for initializing this object. + type: string + required: + - name + type: object + type: array + result: + description: Status is a return value for calls that + don't return other objects. + properties: + apiVersion: + description: 'APIVersion defines the versioned schema + of this representation of an object. Servers should + convert recognized schemas to the latest internal + value, and may reject unrecognized values. More + info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' + type: string + code: + description: Suggested HTTP return code for this + status, 0 if not set. + format: int32 + type: integer + details: + description: StatusDetails is a set of additional + properties that MAY be set by the server to provide + additional information about a response. The Reason + field of a Status object defines what attributes + will be set. Clients must ignore fields that do + not match the defined type of each attribute, + and should assume that any attribute may be empty, + invalid, or under defined. + properties: + causes: + description: The Causes array includes more + details associated with the StatusReason failure. + Not all StatusReasons may provide detailed + causes. + items: + description: StatusCause provides more information + about an api.Status failure, including cases + when multiple errors are encountered. + properties: + field: + description: |- + The field of the resource that has caused this error, as named by its JSON serialization. May include dot and postfix notation for nested attributes. Arrays are zero-indexed. Fields may appear more than once in an array of causes due to fields having multiple errors. Optional. + + Examples: + "name" - the field "name" on the current resource + "items[0].name" - the field "name" on the first array entry in "items" + type: string + message: + description: A human-readable description + of the cause of the error. This field + may be presented as-is to a reader. + type: string + reason: + description: A machine-readable description + of the cause of the error. If this value + is empty there is no information available. + type: string + type: object + type: array + group: + description: The group attribute of the resource + associated with the status StatusReason. + type: string + kind: + description: 'The kind attribute of the resource + associated with the status StatusReason. On + some operations may differ from the requested + resource Kind. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + name: + description: The name attribute of the resource + associated with the status StatusReason (when + there is a single name which can be described). + type: string + retryAfterSeconds: + description: If specified, the time in seconds + before the operation should be retried. Some + errors may indicate the client must take an + alternate action - for those errors this field + may indicate how long to wait before taking + the alternate action. + format: int32 + type: integer + uid: + description: 'UID of the resource. (when there + is a single resource which can be described). + More info: http://kubernetes.io/docs/user-guide/identifiers#uids' + type: string + type: object + kind: + description: 'Kind is a string value representing + the REST resource this object represents. Servers + may infer this from the endpoint the client submits + requests to. Cannot be updated. In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + message: + description: A human-readable description of the + status of this operation. + type: string + metadata: + description: ListMeta describes metadata that synthetic + resources must have, including lists and various + status objects. A resource may have only one of + {ObjectMeta, ListMeta}. + properties: + continue: + description: continue may be set if the user + set a limit on the number of items returned, + and indicates that the server has more data + available. The value is opaque and may be + used to issue another request to the endpoint + that served this list to retrieve the next + set of available objects. Continuing a consistent + list may not be possible if the server configuration + has changed or more than a few minutes have + passed. The resourceVersion field returned + when using this continue value will be identical + to the value in the first response, unless + you have received this token from an error + message. + type: string + resourceVersion: + description: 'String that identifies the server''s + internal version of this object that can be + used by clients to determine when objects + have changed. Value must be treated as opaque + by clients and passed unmodified back to the + server. Populated by the system. Read-only. + More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency' + type: string + selfLink: + description: selfLink is a URL representing + this object. Populated by the system. Read-only. + type: string + type: object + reason: + description: A machine-readable description of why + this operation is in the "Failure" status. If + this value is empty there is no information available. + A Reason clarifies an HTTP status code but does + not override it. + type: string + status: + description: 'Status of the operation. One of: "Success" + or "Failure". More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#spec-and-status' + type: string + type: object + required: + - pending + type: object + labels: + description: 'Map of string keys and values that can be + used to organize and categorize (scope and select) objects. + May match selectors of replication controllers and services. + More info: http://kubernetes.io/docs/user-guide/labels' + type: object + managedFields: + description: |- + ManagedFields maps workflow-id and version to the set of fields that are managed by that workflow. This is mostly for internal housekeeping, and users typically shouldn't need to set or understand this field. A workflow can be the user's name, a controller's name, or the name of a specific apply path like "ci-cd". The set of fields is always in the version that the workflow used when modifying the object. + + This field is alpha and can be changed or removed without notice. + items: + description: ManagedFieldsEntry is a workflow-id, a FieldSet + and the group version of the resource that the fieldset + applies to. + properties: + apiVersion: + description: APIVersion defines the version of this + resource that this field set applies to. The format + is "group/version" just like the top-level APIVersion + field. It is necessary to track the version of a + field set because it cannot be automatically converted. + type: string + fields: + description: 'Fields stores a set of fields in a data + structure like a Trie. To understand how this is + used, see: https://github.com/kubernetes-sigs/structured-merge-diff' + type: object + manager: + description: Manager is an identifier of the workflow + managing these fields. + type: string + operation: + description: Operation is the type of operation which + lead to this ManagedFieldsEntry being created. The + only valid values for this field are 'Apply' and + 'Update'. + type: string + time: + description: Time is a wrapper around time.Time which + supports correct marshaling to YAML and JSON. Wrappers + are provided for many of the factory methods that + the time package offers. + format: date-time + type: string + type: object + type: array + name: + description: 'Name must be unique within a namespace. Is + required when creating resources, although some resources + may allow a client to request the generation of an appropriate + name automatically. Name is primarily intended for creation + idempotence and configuration definition. Cannot be updated. + More info: http://kubernetes.io/docs/user-guide/identifiers#names' + type: string + namespace: + description: |- + Namespace defines the space within each name must be unique. An empty namespace is equivalent to the "default" namespace, but "default" is the canonical representation. Not all objects are required to be scoped to a namespace - the value of this field for those objects will be empty. + + Must be a DNS_LABEL. Cannot be updated. More info: http://kubernetes.io/docs/user-guide/namespaces + type: string + ownerReferences: + description: List of objects depended by this object. If + ALL objects in the list have been deleted, this object + will be garbage collected. If this object is managed by + a controller, then an entry in this list will point to + this controller, with the controller field set to true. + There cannot be more than one managing controller. + items: + description: OwnerReference contains enough information + to let you identify an owning object. An owning object + must be in the same namespace as the dependent, or be + cluster-scoped, so there is no namespace field. + properties: + apiVersion: + description: API version of the referent. + type: string + blockOwnerDeletion: + description: If true, AND if the owner has the "foregroundDeletion" + finalizer, then the owner cannot be deleted from + the key-value store until this reference is removed. + Defaults to false. To set this field, a user needs + "delete" permission of the owner, otherwise 422 + (Unprocessable Entity) will be returned. + type: boolean + controller: + description: If true, this reference points to the + managing controller. + type: boolean + kind: + description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + name: + description: 'Name of the referent. More info: http://kubernetes.io/docs/user-guide/identifiers#names' + type: string + uid: + description: 'UID of the referent. More info: http://kubernetes.io/docs/user-guide/identifiers#uids' + type: string + required: + - apiVersion + - kind + - name + - uid + type: object + type: array + resourceVersion: + description: |- + An opaque value that represents the internal version of this object that can be used by clients to determine when objects have changed. May be used for optimistic concurrency, change detection, and the watch operation on a resource or set of resources. Clients must treat these values as opaque and passed unmodified back to the server. They may only be valid for a particular resource or set of resources. + + Populated by the system. Read-only. Value must be treated as opaque by clients and . More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency + type: string + selfLink: + description: SelfLink is a URL representing this object. + Populated by the system. Read-only. + type: string + uid: + description: |- + UID is the unique in time and space value for this object. It is typically generated by the server on successful creation of a resource and is not allowed to change on PUT operations. + + Populated by the system. Read-only. More info: http://kubernetes.io/docs/user-guide/identifiers#uids + type: string + type: object + spec: + description: PersistentVolumeClaimSpec describes the common + attributes of storage devices and allows a Source for provider-specific + attributes + properties: + accessModes: + description: 'AccessModes contains the desired access modes + the volume should have. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1' + items: + type: string + type: array + dataSource: + description: TypedLocalObjectReference contains enough information + to let you locate the typed referenced object inside the + same namespace. + properties: + apiGroup: + description: APIGroup is the group for the resource + being referenced. If APIGroup is not specified, the + specified Kind must be in the core API group. For + any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource being referenced + type: string + name: + description: Name is the name of resource being referenced + type: string + required: + - kind + - name + type: object + resources: + description: ResourceRequirements describes the compute + resource requirements. + properties: + limits: + description: 'Limits describes the maximum amount of + compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + requests: + description: 'Requests describes the minimum amount + of compute resources required. If Requests is omitted + for a container, it defaults to Limits if that is + explicitly specified, otherwise to an implementation-defined + value. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + type: object + selector: + description: A label selector is a label query over a set + of resources. The result of matchLabels and matchExpressions + are ANDed. An empty label selector matches all objects. + A null label selector matches no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector + that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: operator represents a key's relationship + to a set of values. Valid operators are In, + NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. + If the operator is In or NotIn, the values array + must be non-empty. If the operator is Exists + or DoesNotExist, the values array must be empty. + This array is replaced during a strategic merge + patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + description: matchLabels is a map of {key,value} pairs. + A single {key,value} in the matchLabels map is equivalent + to an element of matchExpressions, whose key field + is "key", the operator is "In", and the values array + contains only "value". The requirements are ANDed. + type: object + type: object + storageClassName: + description: 'Name of the StorageClass required by the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1' + type: string + volumeMode: + description: volumeMode defines what type of volume is required + by the claim. Value of Filesystem is implied when not + included in claim spec. This is a beta feature. + type: string + volumeName: + description: VolumeName is the binding reference to the + PersistentVolume backing this claim. + type: string + type: object + status: + description: PersistentVolumeClaimStatus is the current status + of a persistent volume claim. + properties: + accessModes: + description: 'AccessModes contains the actual access modes + the volume backing the PVC has. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1' + items: + type: string + type: array + capacity: + description: Represents the actual resources of the underlying + volume. + type: object + conditions: + description: Current Condition of persistent volume claim. + If underlying persistent volume is being resized then + the Condition will be set to 'ResizeStarted'. + items: + description: PersistentVolumeClaimCondition contails details + about state of pvc + properties: + lastProbeTime: + description: Time is a wrapper around time.Time which + supports correct marshaling to YAML and JSON. Wrappers + are provided for many of the factory methods that + the time package offers. + format: date-time + type: string + lastTransitionTime: + description: Time is a wrapper around time.Time which + supports correct marshaling to YAML and JSON. Wrappers + are provided for many of the factory methods that + the time package offers. + format: date-time + type: string + message: + description: Human-readable message indicating details + about last transition. + type: string + reason: + description: Unique, this should be a short, machine + understandable string that gives the reason for + condition's last transition. If it reports "ResizeStarted" + that means the underlying persistent volume is being + resized. + type: string + status: + type: string + type: + type: string + required: + - type + - status + type: object + type: array + phase: + description: Phase represents the current phase of PersistentVolumeClaim. + type: string + type: object + type: object + type: object + tag: + description: Tag of Alertmanager container image to be deployed. Defaults + to the value of `version`. Version is ignored if Tag is set. + type: string + tolerations: + description: If specified, the pod's tolerations. + items: + description: The pod this Toleration is attached to tolerates any + taint that matches the triple using the matching + operator . + properties: + effect: + description: Effect indicates the taint effect to match. Empty + means match all taint effects. When specified, allowed values + are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: Key is the taint key that the toleration applies + to. Empty means match all taint keys. If the key is empty, operator + must be Exists; this combination means to match all values and + all keys. + type: string + operator: + description: Operator represents a key's relationship to the value. + Valid operators are Exists and Equal. Defaults to Equal. Exists + is equivalent to wildcard for value, so that a pod can tolerate + all taints of a particular category. + type: string + tolerationSeconds: + description: TolerationSeconds represents the period of time the + toleration (which must be of effect NoExecute, otherwise this + field is ignored) tolerates the taint. By default, it is not + set, which means tolerate the taint forever (do not evict). + Zero and negative values will be treated as 0 (evict immediately) + by the system. + format: int64 + type: integer + value: + description: Value is the taint value the toleration matches to. + If the operator is Exists, the value should be empty, otherwise + just a regular string. + type: string + type: object + type: array + version: + description: Version the cluster should be on. + type: string + type: object + status: + description: 'AlertmanagerStatus is the most recent observed status of the + Alertmanager cluster. Read-only. Not included when requesting from the + apiserver, only from the Prometheus Operator API itself. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#spec-and-status' + properties: + availableReplicas: + description: Total number of available pods (ready for at least minReadySeconds) + targeted by this Alertmanager cluster. + format: int32 + type: integer + paused: + description: Represents whether any actions on the underlaying managed + objects are being performed. Only delete actions will be performed. + type: boolean + replicas: + description: Total number of non-terminated pods targeted by this Alertmanager + cluster (their labels match the selector). + format: int32 + type: integer + unavailableReplicas: + description: Total number of unavailable pods targeted by this Alertmanager + cluster. + format: int32 + type: integer + updatedReplicas: + description: Total number of non-terminated pods targeted by this Alertmanager + cluster that have the desired version spec. + format: int32 + type: integer + required: + - paused + - replicas + - updatedReplicas + - availableReplicas + - unavailableReplicas + type: object + type: object + version: v1 diff --git a/1-custom-resource-definition/prometheus-crd.yaml b/1-custom-resource-definition/prometheus-crd.yaml new file mode 100644 index 0000000..ae6300f --- /dev/null +++ b/1-custom-resource-definition/prometheus-crd.yaml @@ -0,0 +1,3448 @@ +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + creationTimestamp: null + name: prometheuses.monitoring.coreos.com +spec: + group: monitoring.coreos.com + names: + kind: Prometheus + plural: prometheuses + scope: Namespaced + validation: + openAPIV3Schema: + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + spec: + description: 'PrometheusSpec is a specification of the desired behavior + of the Prometheus cluster. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#spec-and-status' + properties: + additionalAlertManagerConfigs: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must be a valid + secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's key must be defined + type: boolean + required: + - key + type: object + additionalAlertRelabelConfigs: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must be a valid + secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's key must be defined + type: boolean + required: + - key + type: object + additionalScrapeConfigs: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must be a valid + secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's key must be defined + type: boolean + required: + - key + type: object + affinity: + description: Affinity is a group of affinity scheduling rules. + properties: + nodeAffinity: + description: Node affinity is a group of node affinity scheduling + rules. + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: The scheduler will prefer to schedule pods to nodes + that satisfy the affinity expressions specified by this field, + but it may choose a node that violates one or more of the + expressions. The node that is most preferred is the one with + the greatest sum of weights, i.e. for each node that meets + all of the scheduling requirements (resource request, requiredDuringScheduling + affinity expressions, etc.), compute a sum by iterating through + the elements of this field and adding "weight" to the sum + if the node matches the corresponding matchExpressions; the + node(s) with the highest sum are the most preferred. + items: + description: An empty preferred scheduling term matches all + objects with implicit weight 0 (i.e. it's a no-op). A null + preferred scheduling term matches no objects (i.e. is also + a no-op). + properties: + preference: + description: A null or empty node selector term matches + no objects. The requirements of them are ANDed. The + TopologySelectorTerm type implements a subset of the + NodeSelectorTerm. + properties: + matchExpressions: + description: A list of node selector requirements + by node's labels. + items: + description: A node selector requirement is a selector + that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: The label key that the selector + applies to. + type: string + operator: + description: Represents a key's relationship + to a set of values. Valid operators are In, + NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: An array of string values. If the + operator is In or NotIn, the values array + must be non-empty. If the operator is Exists + or DoesNotExist, the values array must be + empty. If the operator is Gt or Lt, the values + array must have a single element, which will + be interpreted as an integer. This array is + replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchFields: + description: A list of node selector requirements + by node's fields. + items: + description: A node selector requirement is a selector + that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: The label key that the selector + applies to. + type: string + operator: + description: Represents a key's relationship + to a set of values. Valid operators are In, + NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: An array of string values. If the + operator is In or NotIn, the values array + must be non-empty. If the operator is Exists + or DoesNotExist, the values array must be + empty. If the operator is Gt or Lt, the values + array must have a single element, which will + be interpreted as an integer. This array is + replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + type: object + weight: + description: Weight associated with matching the corresponding + nodeSelectorTerm, in the range 1-100. + format: int32 + type: integer + required: + - weight + - preference + type: object + type: array + requiredDuringSchedulingIgnoredDuringExecution: + description: A node selector represents the union of the results + of one or more label queries over a set of nodes; that is, + it represents the OR of the selectors represented by the node + selector terms. + properties: + nodeSelectorTerms: + description: Required. A list of node selector terms. The + terms are ORed. + items: + description: A null or empty node selector term matches + no objects. The requirements of them are ANDed. The + TopologySelectorTerm type implements a subset of the + NodeSelectorTerm. + properties: + matchExpressions: + description: A list of node selector requirements + by node's labels. + items: + description: A node selector requirement is a selector + that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: The label key that the selector + applies to. + type: string + operator: + description: Represents a key's relationship + to a set of values. Valid operators are In, + NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: An array of string values. If the + operator is In or NotIn, the values array + must be non-empty. If the operator is Exists + or DoesNotExist, the values array must be + empty. If the operator is Gt or Lt, the values + array must have a single element, which will + be interpreted as an integer. This array is + replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchFields: + description: A list of node selector requirements + by node's fields. + items: + description: A node selector requirement is a selector + that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: The label key that the selector + applies to. + type: string + operator: + description: Represents a key's relationship + to a set of values. Valid operators are In, + NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: An array of string values. If the + operator is In or NotIn, the values array + must be non-empty. If the operator is Exists + or DoesNotExist, the values array must be + empty. If the operator is Gt or Lt, the values + array must have a single element, which will + be interpreted as an integer. This array is + replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + type: object + type: array + required: + - nodeSelectorTerms + type: object + type: object + podAffinity: + description: Pod affinity is a group of inter pod affinity scheduling + rules. + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: The scheduler will prefer to schedule pods to nodes + that satisfy the affinity expressions specified by this field, + but it may choose a node that violates one or more of the + expressions. The node that is most preferred is the one with + the greatest sum of weights, i.e. for each node that meets + all of the scheduling requirements (resource request, requiredDuringScheduling + affinity expressions, etc.), compute a sum by iterating through + the elements of this field and adding "weight" to the sum + if the node has pods which matches the corresponding podAffinityTerm; + the node(s) with the highest sum are the most preferred. + items: + description: The weights of all of the matched WeightedPodAffinityTerm + fields are added per-node to find the most preferred node(s) + properties: + podAffinityTerm: + description: Defines a set of pods (namely those matching + the labelSelector relative to the given namespace(s)) + that this pod should be co-located (affinity) or not + co-located (anti-affinity) with, where co-located is + defined as running on a node whose value of the label + with key matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: A label selector is a label query over + a set of resources. The result of matchLabels and + matchExpressions are ANDed. An empty label selector + matches all objects. A null label selector matches + no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label + selector requirements. The requirements are + ANDed. + items: + description: A label selector requirement is + a selector that contains values, a key, and + an operator that relates the key and values. + properties: + key: + description: key is the label key that the + selector applies to. + type: string + operator: + description: operator represents a key's + relationship to a set of values. Valid + operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string + values. If the operator is In or NotIn, + the values array must be non-empty. If + the operator is Exists or DoesNotExist, + the values array must be empty. This array + is replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + description: matchLabels is a map of {key,value} + pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, + whose key field is "key", the operator is "In", + and the values array contains only "value". + The requirements are ANDed. + type: object + type: object + namespaces: + description: namespaces specifies which namespaces + the labelSelector applies to (matches against); + null or empty list means "this pod's namespace" + items: + type: string + type: array + topologyKey: + description: This pod should be co-located (affinity) + or not co-located (anti-affinity) with the pods + matching the labelSelector in the specified namespaces, + where co-located is defined as running on a node + whose value of the label with key topologyKey matches + that of any node on which any of the selected pods + is running. Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + weight: + description: weight associated with matching the corresponding + podAffinityTerm, in the range 1-100. + format: int32 + type: integer + required: + - weight + - podAffinityTerm + type: object + type: array + requiredDuringSchedulingIgnoredDuringExecution: + description: If the affinity requirements specified by this + field are not met at scheduling time, the pod will not be + scheduled onto the node. If the affinity requirements specified + by this field cease to be met at some point during pod execution + (e.g. due to a pod label update), the system may or may not + try to eventually evict the pod from its node. When there + are multiple elements, the lists of nodes corresponding to + each podAffinityTerm are intersected, i.e. all terms must + be satisfied. + items: + description: Defines a set of pods (namely those matching + the labelSelector relative to the given namespace(s)) that + this pod should be co-located (affinity) or not co-located + (anti-affinity) with, where co-located is defined as running + on a node whose value of the label with key + matches that of any node on which a pod of the set of pods + is running + properties: + labelSelector: + description: A label selector is a label query over a + set of resources. The result of matchLabels and matchExpressions + are ANDed. An empty label selector matches all objects. + A null label selector matches no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector + that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: operator represents a key's relationship + to a set of values. Valid operators are In, + NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. + If the operator is In or NotIn, the values + array must be non-empty. If the operator is + Exists or DoesNotExist, the values array must + be empty. This array is replaced during a + strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + description: matchLabels is a map of {key,value} pairs. + A single {key,value} in the matchLabels map is equivalent + to an element of matchExpressions, whose key field + is "key", the operator is "In", and the values array + contains only "value". The requirements are ANDed. + type: object + type: object + namespaces: + description: namespaces specifies which namespaces the + labelSelector applies to (matches against); null or + empty list means "this pod's namespace" + items: + type: string + type: array + topologyKey: + description: This pod should be co-located (affinity) + or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where + co-located is defined as running on a node whose value + of the label with key topologyKey matches that of any + node on which any of the selected pods is running. Empty + topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + type: array + type: object + podAntiAffinity: + description: Pod anti affinity is a group of inter pod anti affinity + scheduling rules. + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: The scheduler will prefer to schedule pods to nodes + that satisfy the anti-affinity expressions specified by this + field, but it may choose a node that violates one or more + of the expressions. The node that is most preferred is the + one with the greatest sum of weights, i.e. for each node that + meets all of the scheduling requirements (resource request, + requiredDuringScheduling anti-affinity expressions, etc.), + compute a sum by iterating through the elements of this field + and adding "weight" to the sum if the node has pods which + matches the corresponding podAffinityTerm; the node(s) with + the highest sum are the most preferred. + items: + description: The weights of all of the matched WeightedPodAffinityTerm + fields are added per-node to find the most preferred node(s) + properties: + podAffinityTerm: + description: Defines a set of pods (namely those matching + the labelSelector relative to the given namespace(s)) + that this pod should be co-located (affinity) or not + co-located (anti-affinity) with, where co-located is + defined as running on a node whose value of the label + with key matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: A label selector is a label query over + a set of resources. The result of matchLabels and + matchExpressions are ANDed. An empty label selector + matches all objects. A null label selector matches + no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label + selector requirements. The requirements are + ANDed. + items: + description: A label selector requirement is + a selector that contains values, a key, and + an operator that relates the key and values. + properties: + key: + description: key is the label key that the + selector applies to. + type: string + operator: + description: operator represents a key's + relationship to a set of values. Valid + operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string + values. If the operator is In or NotIn, + the values array must be non-empty. If + the operator is Exists or DoesNotExist, + the values array must be empty. This array + is replaced during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + description: matchLabels is a map of {key,value} + pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, + whose key field is "key", the operator is "In", + and the values array contains only "value". + The requirements are ANDed. + type: object + type: object + namespaces: + description: namespaces specifies which namespaces + the labelSelector applies to (matches against); + null or empty list means "this pod's namespace" + items: + type: string + type: array + topologyKey: + description: This pod should be co-located (affinity) + or not co-located (anti-affinity) with the pods + matching the labelSelector in the specified namespaces, + where co-located is defined as running on a node + whose value of the label with key topologyKey matches + that of any node on which any of the selected pods + is running. Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + weight: + description: weight associated with matching the corresponding + podAffinityTerm, in the range 1-100. + format: int32 + type: integer + required: + - weight + - podAffinityTerm + type: object + type: array + requiredDuringSchedulingIgnoredDuringExecution: + description: If the anti-affinity requirements specified by + this field are not met at scheduling time, the pod will not + be scheduled onto the node. If the anti-affinity requirements + specified by this field cease to be met at some point during + pod execution (e.g. due to a pod label update), the system + may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding + to each podAffinityTerm are intersected, i.e. all terms must + be satisfied. + items: + description: Defines a set of pods (namely those matching + the labelSelector relative to the given namespace(s)) that + this pod should be co-located (affinity) or not co-located + (anti-affinity) with, where co-located is defined as running + on a node whose value of the label with key + matches that of any node on which a pod of the set of pods + is running + properties: + labelSelector: + description: A label selector is a label query over a + set of resources. The result of matchLabels and matchExpressions + are ANDed. An empty label selector matches all objects. + A null label selector matches no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector + that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: operator represents a key's relationship + to a set of values. Valid operators are In, + NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. + If the operator is In or NotIn, the values + array must be non-empty. If the operator is + Exists or DoesNotExist, the values array must + be empty. This array is replaced during a + strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + description: matchLabels is a map of {key,value} pairs. + A single {key,value} in the matchLabels map is equivalent + to an element of matchExpressions, whose key field + is "key", the operator is "In", and the values array + contains only "value". The requirements are ANDed. + type: object + type: object + namespaces: + description: namespaces specifies which namespaces the + labelSelector applies to (matches against); null or + empty list means "this pod's namespace" + items: + type: string + type: array + topologyKey: + description: This pod should be co-located (affinity) + or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where + co-located is defined as running on a node whose value + of the label with key topologyKey matches that of any + node on which any of the selected pods is running. Empty + topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + type: array + type: object + type: object + alerting: + description: AlertingSpec defines parameters for alerting configuration + of Prometheus servers. + properties: + alertmanagers: + description: AlertmanagerEndpoints Prometheus should fire alerts + against. + items: + description: AlertmanagerEndpoints defines a selection of a single + Endpoints object containing alertmanager IPs to fire alerts + against. + properties: + bearerTokenFile: + description: BearerTokenFile to read from filesystem to use + when authenticating to Alertmanager. + type: string + name: + description: Name of Endpoints object in Namespace. + type: string + namespace: + description: Namespace of Endpoints object. + type: string + pathPrefix: + description: Prefix for the HTTP path alerts are pushed to. + type: string + port: + anyOf: + - type: string + - type: integer + scheme: + description: Scheme to use when firing alerts. + type: string + tlsConfig: + description: TLSConfig specifies TLS configuration parameters. + properties: + caFile: + description: The CA cert to use for the targets. + type: string + certFile: + description: The client cert file for the targets. + type: string + insecureSkipVerify: + description: Disable target certificate validation. + type: boolean + keyFile: + description: The client key file for the targets. + type: string + serverName: + description: Used to verify the hostname for the targets. + type: string + type: object + required: + - namespace + - name + - port + type: object + type: array + required: + - alertmanagers + type: object + apiserverConfig: + description: 'APIServerConfig defines a host and auth methods to access + apiserver. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config' + properties: + basicAuth: + description: 'BasicAuth allow an endpoint to authenticate over basic + authentication More info: https://prometheus.io/docs/operating/configuration/#endpoints' + properties: + password: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's key must + be defined + type: boolean + required: + - key + type: object + username: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's key must + be defined + type: boolean + required: + - key + type: object + type: object + bearerToken: + description: Bearer token for accessing apiserver. + type: string + bearerTokenFile: + description: File to read bearer token for accessing apiserver. + type: string + host: + description: Host of apiserver. A valid string consisting of a hostname + or IP followed by an optional port number + type: string + tlsConfig: + description: TLSConfig specifies TLS configuration parameters. + properties: + caFile: + description: The CA cert to use for the targets. + type: string + certFile: + description: The client cert file for the targets. + type: string + insecureSkipVerify: + description: Disable target certificate validation. + type: boolean + keyFile: + description: The client key file for the targets. + type: string + serverName: + description: Used to verify the hostname for the targets. + type: string + type: object + required: + - host + type: object + baseImage: + description: Base image to use for a Prometheus deployment. + type: string + configMaps: + description: ConfigMaps is a list of ConfigMaps in the same namespace + as the Prometheus object, which shall be mounted into the Prometheus + Pods. The ConfigMaps are mounted into /etc/prometheus/configmaps/. + items: + type: string + type: array + containers: + description: 'Containers allows injecting additional containers or modifying + operator generated containers. This can be used to allow adding an + authentication proxy to a Prometheus pod or to change the behavior + of an operator generated container. Containers described here modify + an operator generated container if they share the same name and modifications + are done via a strategic merge patch. The current container names + are: `prometheus`, `prometheus-config-reloader`, `rules-configmap-reloader`, + and `thanos-sidecar`. Overriding containers is entirely outside the + scope of what the maintainers will support and by doing so, you accept + that this behaviour may break at any time without notice.' + items: + description: A single application container that you want to run within + a pod. + properties: + args: + description: 'Arguments to the entrypoint. The docker image''s + CMD is used if this is not provided. Variable references $(VAR_NAME) + are expanded using the container''s environment. If a variable + cannot be resolved, the reference in the input string will be + unchanged. The $(VAR_NAME) syntax can be escaped with a double + $$, ie: $$(VAR_NAME). Escaped references will never be expanded, + regardless of whether the variable exists or not. Cannot be + updated. More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell' + items: + type: string + type: array + command: + description: 'Entrypoint array. Not executed within a shell. The + docker image''s ENTRYPOINT is used if this is not provided. + Variable references $(VAR_NAME) are expanded using the container''s + environment. If a variable cannot be resolved, the reference + in the input string will be unchanged. The $(VAR_NAME) syntax + can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references + will never be expanded, regardless of whether the variable exists + or not. Cannot be updated. More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell' + items: + type: string + type: array + env: + description: List of environment variables to set in the container. + Cannot be updated. + items: + description: EnvVar represents an environment variable present + in a Container. + properties: + name: + description: Name of the environment variable. Must be a + C_IDENTIFIER. + type: string + value: + description: 'Variable references $(VAR_NAME) are expanded + using the previous defined environment variables in the + container and any service environment variables. If a + variable cannot be resolved, the reference in the input + string will be unchanged. The $(VAR_NAME) syntax can be + escaped with a double $$, ie: $$(VAR_NAME). Escaped references + will never be expanded, regardless of whether the variable + exists or not. Defaults to "".' + type: string + valueFrom: + description: EnvVarSource represents a source for the value + of an EnvVar. + properties: + configMapKeyRef: + description: Selects a key from a ConfigMap. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the ConfigMap or it's + key must be defined + type: boolean + required: + - key + type: object + fieldRef: + description: ObjectFieldSelector selects an APIVersioned + field of an object. + properties: + apiVersion: + description: Version of the schema the FieldPath + is written in terms of, defaults to "v1". + type: string + fieldPath: + description: Path of the field to select in the + specified API version. + type: string + required: + - fieldPath + type: object + resourceFieldRef: + description: ResourceFieldSelector represents container + resources (cpu, memory) and their output format + properties: + containerName: + description: 'Container name: required for volumes, + optional for env vars' + type: string + divisor: {} + resource: + description: 'Required: resource to select' + type: string + required: + - resource + type: object + secretKeyRef: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's + key must be defined + type: boolean + required: + - key + type: object + type: object + required: + - name + type: object + type: array + envFrom: + description: List of sources to populate environment variables + in the container. The keys defined within a source must be a + C_IDENTIFIER. All invalid keys will be reported as an event + when the container is starting. When a key exists in multiple + sources, the value associated with the last source will take + precedence. Values defined by an Env with a duplicate key will + take precedence. Cannot be updated. + items: + description: EnvFromSource represents the source of a set of + ConfigMaps + properties: + configMapRef: + description: |- + ConfigMapEnvSource selects a ConfigMap to populate the environment variables with. + + The contents of the target ConfigMap's Data field will represent the key-value pairs as environment variables. + properties: + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the ConfigMap must be defined + type: boolean + type: object + prefix: + description: An optional identifier to prepend to each key + in the ConfigMap. Must be a C_IDENTIFIER. + type: string + secretRef: + description: |- + SecretEnvSource selects a Secret to populate the environment variables with. + + The contents of the target Secret's Data field will represent the key-value pairs as environment variables. + properties: + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret must be defined + type: boolean + type: object + type: object + type: array + image: + description: 'Docker image name. More info: https://kubernetes.io/docs/concepts/containers/images + This field is optional to allow higher level config management + to default or override container images in workload controllers + like Deployments and StatefulSets.' + type: string + imagePullPolicy: + description: 'Image pull policy. One of Always, Never, IfNotPresent. + Defaults to Always if :latest tag is specified, or IfNotPresent + otherwise. Cannot be updated. More info: https://kubernetes.io/docs/concepts/containers/images#updating-images' + type: string + lifecycle: + description: Lifecycle describes actions that the management system + should take in response to container lifecycle events. For the + PostStart and PreStop lifecycle handlers, management of the + container blocks until the action is complete, unless the container + process fails, in which case the handler is aborted. + properties: + postStart: + description: Handler defines a specific action that should + be taken + properties: + exec: + description: ExecAction describes a "run in container" + action. + properties: + command: + description: Command is the command line to execute + inside the container, the working directory for + the command is root ('/') in the container's filesystem. + The command is simply exec'd, it is not run inside + a shell, so traditional shell instructions ('|', + etc) won't work. To use a shell, you need to explicitly + call out to that shell. Exit status of 0 is treated + as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + type: object + httpGet: + description: HTTPGetAction describes an action based on + HTTP Get requests. + properties: + host: + description: Host name to connect to, defaults to + the pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. + HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom header + to be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: string + - type: integer + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + tcpSocket: + description: TCPSocketAction describes an action based + on opening a socket + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: string + - type: integer + required: + - port + type: object + type: object + preStop: + description: Handler defines a specific action that should + be taken + properties: + exec: + description: ExecAction describes a "run in container" + action. + properties: + command: + description: Command is the command line to execute + inside the container, the working directory for + the command is root ('/') in the container's filesystem. + The command is simply exec'd, it is not run inside + a shell, so traditional shell instructions ('|', + etc) won't work. To use a shell, you need to explicitly + call out to that shell. Exit status of 0 is treated + as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + type: object + httpGet: + description: HTTPGetAction describes an action based on + HTTP Get requests. + properties: + host: + description: Host name to connect to, defaults to + the pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. + HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom header + to be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: string + - type: integer + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + tcpSocket: + description: TCPSocketAction describes an action based + on opening a socket + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: string + - type: integer + required: + - port + type: object + type: object + type: object + livenessProbe: + description: Probe describes a health check to be performed against + a container to determine whether it is alive or ready to receive + traffic. + properties: + exec: + description: ExecAction describes a "run in container" action. + properties: + command: + description: Command is the command line to execute inside + the container, the working directory for the command is + root ('/') in the container's filesystem. The command + is simply exec'd, it is not run inside a shell, so traditional + shell instructions ('|', etc) won't work. To use a shell, + you need to explicitly call out to that shell. Exit + status of 0 is treated as live/healthy and non-zero + is unhealthy. + items: + type: string + type: array + type: object + failureThreshold: + description: Minimum consecutive failures for the probe to + be considered failed after having succeeded. Defaults to + 3. Minimum value is 1. + format: int32 + type: integer + httpGet: + description: HTTPGetAction describes an action based on HTTP + Get requests. + properties: + host: + description: Host name to connect to, defaults to the + pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header to + be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: string + - type: integer + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: 'Number of seconds after the container has started + before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: Minimum consecutive successes for the probe to + be considered successful after having failed. Defaults to + 1. Must be 1 for liveness. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocketAction describes an action based on + opening a socket + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: string + - type: integer + required: + - port + type: object + timeoutSeconds: + description: 'Number of seconds after which the probe times + out. Defaults to 1 second. Minimum value is 1. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + type: object + name: + description: Name of the container specified as a DNS_LABEL. Each + container in a pod must have a unique name (DNS_LABEL). Cannot + be updated. + type: string + ports: + description: List of ports to expose from the container. Exposing + a port here gives the system additional information about the + network connections a container uses, but is primarily informational. + Not specifying a port here DOES NOT prevent that port from being + exposed. Any port which is listening on the default "0.0.0.0" + address inside a container will be accessible from the network. + Cannot be updated. + items: + description: ContainerPort represents a network port in a single + container. + properties: + containerPort: + description: Number of port to expose on the pod's IP address. + This must be a valid port number, 0 < x < 65536. + format: int32 + type: integer + hostIP: + description: What host IP to bind the external port to. + type: string + hostPort: + description: Number of port to expose on the host. If specified, + this must be a valid port number, 0 < x < 65536. If HostNetwork + is specified, this must match ContainerPort. Most containers + do not need this. + format: int32 + type: integer + name: + description: If specified, this must be an IANA_SVC_NAME + and unique within the pod. Each named port in a pod must + have a unique name. Name for the port that can be referred + to by services. + type: string + protocol: + description: Protocol for port. Must be UDP, TCP, or SCTP. + Defaults to "TCP". + type: string + required: + - containerPort + type: object + type: array + readinessProbe: + description: Probe describes a health check to be performed against + a container to determine whether it is alive or ready to receive + traffic. + properties: + exec: + description: ExecAction describes a "run in container" action. + properties: + command: + description: Command is the command line to execute inside + the container, the working directory for the command is + root ('/') in the container's filesystem. The command + is simply exec'd, it is not run inside a shell, so traditional + shell instructions ('|', etc) won't work. To use a shell, + you need to explicitly call out to that shell. Exit + status of 0 is treated as live/healthy and non-zero + is unhealthy. + items: + type: string + type: array + type: object + failureThreshold: + description: Minimum consecutive failures for the probe to + be considered failed after having succeeded. Defaults to + 3. Minimum value is 1. + format: int32 + type: integer + httpGet: + description: HTTPGetAction describes an action based on HTTP + Get requests. + properties: + host: + description: Host name to connect to, defaults to the + pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header to + be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: string + - type: integer + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: 'Number of seconds after the container has started + before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: Minimum consecutive successes for the probe to + be considered successful after having failed. Defaults to + 1. Must be 1 for liveness. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocketAction describes an action based on + opening a socket + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: string + - type: integer + required: + - port + type: object + timeoutSeconds: + description: 'Number of seconds after which the probe times + out. Defaults to 1 second. Minimum value is 1. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + type: object + resources: + description: ResourceRequirements describes the compute resource + requirements. + properties: + limits: + description: 'Limits describes the maximum amount of compute + resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + requests: + description: 'Requests describes the minimum amount of compute + resources required. If Requests is omitted for a container, + it defaults to Limits if that is explicitly specified, otherwise + to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + type: object + securityContext: + description: SecurityContext holds security configuration that + will be applied to a container. Some fields are present in both + SecurityContext and PodSecurityContext. When both are set, + the values in SecurityContext take precedence. + properties: + allowPrivilegeEscalation: + description: 'AllowPrivilegeEscalation controls whether a + process can gain more privileges than its parent process. + This bool directly controls if the no_new_privs flag will + be set on the container process. AllowPrivilegeEscalation + is true always when the container is: 1) run as Privileged + 2) has CAP_SYS_ADMIN' + type: boolean + capabilities: + description: Adds and removes POSIX capabilities from running + containers. + properties: + add: + description: Added capabilities + items: + type: string + type: array + drop: + description: Removed capabilities + items: + type: string + type: array + type: object + privileged: + description: Run container in privileged mode. Processes in + privileged containers are essentially equivalent to root + on the host. Defaults to false. + type: boolean + procMount: + description: procMount denotes the type of proc mount to use + for the containers. The default is DefaultProcMount which + uses the container runtime defaults for readonly paths and + masked paths. This requires the ProcMountType feature flag + to be enabled. + type: string + readOnlyRootFilesystem: + description: Whether this container has a read-only root filesystem. + Default is false. + type: boolean + runAsGroup: + description: The GID to run the entrypoint of the container + process. Uses runtime default if unset. May also be set + in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext + takes precedence. + format: int64 + type: integer + runAsNonRoot: + description: Indicates that the container must run as a non-root + user. If true, the Kubelet will validate the image at runtime + to ensure that it does not run as UID 0 (root) and fail + to start the container if it does. If unset or false, no + such validation will be performed. May also be set in PodSecurityContext. If + set in both SecurityContext and PodSecurityContext, the + value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: The UID to run the entrypoint of the container + process. Defaults to user specified in image metadata if + unspecified. May also be set in PodSecurityContext. If + set in both SecurityContext and PodSecurityContext, the + value specified in SecurityContext takes precedence. + format: int64 + type: integer + seLinuxOptions: + description: SELinuxOptions are the labels to be applied to + the container + properties: + level: + description: Level is SELinux level label that applies + to the container. + type: string + role: + description: Role is a SELinux role label that applies + to the container. + type: string + type: + description: Type is a SELinux type label that applies + to the container. + type: string + user: + description: User is a SELinux user label that applies + to the container. + type: string + type: object + type: object + stdin: + description: Whether this container should allocate a buffer for + stdin in the container runtime. If this is not set, reads from + stdin in the container will always result in EOF. Default is + false. + type: boolean + stdinOnce: + description: Whether the container runtime should close the stdin + channel after it has been opened by a single attach. When stdin + is true the stdin stream will remain open across multiple attach + sessions. If stdinOnce is set to true, stdin is opened on container + start, is empty until the first client attaches to stdin, and + then remains open and accepts data until the client disconnects, + at which time stdin is closed and remains closed until the container + is restarted. If this flag is false, a container processes that + reads from stdin will never receive an EOF. Default is false + type: boolean + terminationMessagePath: + description: 'Optional: Path at which the file to which the container''s + termination message will be written is mounted into the container''s + filesystem. Message written is intended to be brief final status, + such as an assertion failure message. Will be truncated by the + node if greater than 4096 bytes. The total message length across + all containers will be limited to 12kb. Defaults to /dev/termination-log. + Cannot be updated.' + type: string + terminationMessagePolicy: + description: Indicate how the termination message should be populated. + File will use the contents of terminationMessagePath to populate + the container status message on both success and failure. FallbackToLogsOnError + will use the last chunk of container log output if the termination + message file is empty and the container exited with an error. + The log output is limited to 2048 bytes or 80 lines, whichever + is smaller. Defaults to File. Cannot be updated. + type: string + tty: + description: Whether this container should allocate a TTY for + itself, also requires 'stdin' to be true. Default is false. + type: boolean + volumeDevices: + description: volumeDevices is the list of block devices to be + used by the container. This is a beta feature. + items: + description: volumeDevice describes a mapping of a raw block + device within a container. + properties: + devicePath: + description: devicePath is the path inside of the container + that the device will be mapped to. + type: string + name: + description: name must match the name of a persistentVolumeClaim + in the pod + type: string + required: + - name + - devicePath + type: object + type: array + volumeMounts: + description: Pod volumes to mount into the container's filesystem. + Cannot be updated. + items: + description: VolumeMount describes a mounting of a Volume within + a container. + properties: + mountPath: + description: Path within the container at which the volume + should be mounted. Must not contain ':'. + type: string + mountPropagation: + description: mountPropagation determines how mounts are + propagated from the host to container and the other way + around. When not set, MountPropagationNone is used. This + field is beta in 1.10. + type: string + name: + description: This must match the Name of a Volume. + type: string + readOnly: + description: Mounted read-only if true, read-write otherwise + (false or unspecified). Defaults to false. + type: boolean + subPath: + description: Path within the volume from which the container's + volume should be mounted. Defaults to "" (volume's root). + type: string + subPathExpr: + description: Expanded path within the volume from which + the container's volume should be mounted. Behaves similarly + to SubPath but environment variable references $(VAR_NAME) + are expanded using the container's environment. Defaults + to "" (volume's root). SubPathExpr and SubPath are mutually + exclusive. This field is alpha in 1.14. + type: string + required: + - name + - mountPath + type: object + type: array + workingDir: + description: Container's working directory. If not specified, + the container runtime's default will be used, which might be + configured in the container image. Cannot be updated. + type: string + required: + - name + type: object + type: array + enableAdminAPI: + description: 'Enable access to prometheus web admin API. Defaults to + the value of `false`. WARNING: Enabling the admin APIs enables mutating + endpoints, to delete data, shutdown Prometheus, and more. Enabling + this should be done with care and the user is advised to add additional + authentication authorization via a proxy to ensure only clients authorized + to perform these actions can do so. For more information see https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-admin-apis' + type: boolean + evaluationInterval: + description: Interval between consecutive evaluations. + type: string + externalLabels: + description: The labels to add to any time series or alerts when communicating + with external systems (federation, remote storage, Alertmanager). + type: object + externalUrl: + description: The external URL the Prometheus instances will be available + under. This is necessary to generate correct URLs. This is necessary + if Prometheus is not served from root of a DNS name. + type: string + image: + description: Image if specified has precedence over baseImage, tag and + sha combinations. Specifying the version is still necessary to ensure + the Prometheus Operator knows what version of Prometheus is being + configured. + type: string + imagePullSecrets: + description: An optional list of references to secrets in the same namespace + to use for pulling prometheus and alertmanager images from registries + see http://kubernetes.io/docs/user-guide/images#specifying-imagepullsecrets-on-a-pod + items: + description: LocalObjectReference contains enough information to let + you locate the referenced object inside the same namespace. + properties: + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + type: object + type: array + listenLocal: + description: ListenLocal makes the Prometheus server listen on loopback, + so that it does not bind against the Pod IP. + type: boolean + logFormat: + description: Log format for Prometheus to be configured with. + type: string + logLevel: + description: Log level for Prometheus to be configured with. + type: string + nodeSelector: + description: Define which Nodes the Pods are scheduled on. + type: object + paused: + description: When a Prometheus deployment is paused, no actions except + for deletion will be performed on the underlying objects. + type: boolean + podMetadata: + description: ObjectMeta is metadata that all persisted resources must + have, which includes all objects users must create. + properties: + annotations: + description: 'Annotations is an unstructured key value map stored + with a resource that may be set by external tools to store and + retrieve arbitrary metadata. They are not queryable and should + be preserved when modifying objects. More info: http://kubernetes.io/docs/user-guide/annotations' + type: object + clusterName: + description: The name of the cluster which the object belongs to. + This is used to distinguish resources with same name and namespace + in different clusters. This field is not set anywhere right now + and apiserver is going to ignore it if set in create or update + request. + type: string + creationTimestamp: + description: Time is a wrapper around time.Time which supports correct + marshaling to YAML and JSON. Wrappers are provided for many of + the factory methods that the time package offers. + format: date-time + type: string + deletionGracePeriodSeconds: + description: Number of seconds allowed for this object to gracefully + terminate before it will be removed from the system. Only set + when deletionTimestamp is also set. May only be shortened. Read-only. + format: int64 + type: integer + deletionTimestamp: + description: Time is a wrapper around time.Time which supports correct + marshaling to YAML and JSON. Wrappers are provided for many of + the factory methods that the time package offers. + format: date-time + type: string + finalizers: + description: Must be empty before the object is deleted from the + registry. Each entry is an identifier for the responsible component + that will remove the entry from the list. If the deletionTimestamp + of the object is non-nil, entries in this list can only be removed. + items: + type: string + type: array + generateName: + description: |- + GenerateName is an optional prefix, used by the server, to generate a unique name ONLY IF the Name field has not been provided. If this field is used, the name returned to the client will be different than the name passed. This value will also be combined with a unique suffix. The provided value has the same validation rules as the Name field, and may be truncated by the length of the suffix required to make the value unique on the server. + + If this field is specified and the generated name exists, the server will NOT return a 409 - instead, it will either return 201 Created or 500 with Reason ServerTimeout indicating a unique name could not be found in the time allotted, and the client should retry (optionally after the time indicated in the Retry-After header). + + Applied only if Name is not specified. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#idempotency + type: string + generation: + description: A sequence number representing a specific generation + of the desired state. Populated by the system. Read-only. + format: int64 + type: integer + initializers: + description: Initializers tracks the progress of initialization. + properties: + pending: + description: Pending is a list of initializers that must execute + in order before this object is visible. When the last pending + initializer is removed, and no failing result is set, the + initializers struct will be set to nil and the object is considered + as initialized and visible to all clients. + items: + description: Initializer is information about an initializer + that has not yet completed. + properties: + name: + description: name of the process that is responsible for + initializing this object. + type: string + required: + - name + type: object + type: array + result: + description: Status is a return value for calls that don't return + other objects. + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of + this representation of an object. Servers should convert + recognized schemas to the latest internal value, and may + reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' + type: string + code: + description: Suggested HTTP return code for this status, + 0 if not set. + format: int32 + type: integer + details: + description: StatusDetails is a set of additional properties + that MAY be set by the server to provide additional information + about a response. The Reason field of a Status object + defines what attributes will be set. Clients must ignore + fields that do not match the defined type of each attribute, + and should assume that any attribute may be empty, invalid, + or under defined. + properties: + causes: + description: The Causes array includes more details + associated with the StatusReason failure. Not all + StatusReasons may provide detailed causes. + items: + description: StatusCause provides more information + about an api.Status failure, including cases when + multiple errors are encountered. + properties: + field: + description: |- + The field of the resource that has caused this error, as named by its JSON serialization. May include dot and postfix notation for nested attributes. Arrays are zero-indexed. Fields may appear more than once in an array of causes due to fields having multiple errors. Optional. + + Examples: + "name" - the field "name" on the current resource + "items[0].name" - the field "name" on the first array entry in "items" + type: string + message: + description: A human-readable description of the + cause of the error. This field may be presented + as-is to a reader. + type: string + reason: + description: A machine-readable description of + the cause of the error. If this value is empty + there is no information available. + type: string + type: object + type: array + group: + description: The group attribute of the resource associated + with the status StatusReason. + type: string + kind: + description: 'The kind attribute of the resource associated + with the status StatusReason. On some operations may + differ from the requested resource Kind. More info: + https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + name: + description: The name attribute of the resource associated + with the status StatusReason (when there is a single + name which can be described). + type: string + retryAfterSeconds: + description: If specified, the time in seconds before + the operation should be retried. Some errors may indicate + the client must take an alternate action - for those + errors this field may indicate how long to wait before + taking the alternate action. + format: int32 + type: integer + uid: + description: 'UID of the resource. (when there is a + single resource which can be described). More info: + http://kubernetes.io/docs/user-guide/identifiers#uids' + type: string + type: object + kind: + description: 'Kind is a string value representing the REST + resource this object represents. Servers may infer this + from the endpoint the client submits requests to. Cannot + be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + message: + description: A human-readable description of the status + of this operation. + type: string + metadata: + description: ListMeta describes metadata that synthetic + resources must have, including lists and various status + objects. A resource may have only one of {ObjectMeta, + ListMeta}. + properties: + continue: + description: continue may be set if the user set a limit + on the number of items returned, and indicates that + the server has more data available. The value is opaque + and may be used to issue another request to the endpoint + that served this list to retrieve the next set of + available objects. Continuing a consistent list may + not be possible if the server configuration has changed + or more than a few minutes have passed. The resourceVersion + field returned when using this continue value will + be identical to the value in the first response, unless + you have received this token from an error message. + type: string + resourceVersion: + description: 'String that identifies the server''s internal + version of this object that can be used by clients + to determine when objects have changed. Value must + be treated as opaque by clients and passed unmodified + back to the server. Populated by the system. Read-only. + More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency' + type: string + selfLink: + description: selfLink is a URL representing this object. + Populated by the system. Read-only. + type: string + type: object + reason: + description: A machine-readable description of why this + operation is in the "Failure" status. If this value is + empty there is no information available. A Reason clarifies + an HTTP status code but does not override it. + type: string + status: + description: 'Status of the operation. One of: "Success" + or "Failure". More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#spec-and-status' + type: string + type: object + required: + - pending + type: object + labels: + description: 'Map of string keys and values that can be used to + organize and categorize (scope and select) objects. May match + selectors of replication controllers and services. More info: + http://kubernetes.io/docs/user-guide/labels' + type: object + managedFields: + description: |- + ManagedFields maps workflow-id and version to the set of fields that are managed by that workflow. This is mostly for internal housekeeping, and users typically shouldn't need to set or understand this field. A workflow can be the user's name, a controller's name, or the name of a specific apply path like "ci-cd". The set of fields is always in the version that the workflow used when modifying the object. + + This field is alpha and can be changed or removed without notice. + items: + description: ManagedFieldsEntry is a workflow-id, a FieldSet and + the group version of the resource that the fieldset applies + to. + properties: + apiVersion: + description: APIVersion defines the version of this resource + that this field set applies to. The format is "group/version" + just like the top-level APIVersion field. It is necessary + to track the version of a field set because it cannot be + automatically converted. + type: string + fields: + description: 'Fields stores a set of fields in a data structure + like a Trie. To understand how this is used, see: https://github.com/kubernetes-sigs/structured-merge-diff' + type: object + manager: + description: Manager is an identifier of the workflow managing + these fields. + type: string + operation: + description: Operation is the type of operation which lead + to this ManagedFieldsEntry being created. The only valid + values for this field are 'Apply' and 'Update'. + type: string + time: + description: Time is a wrapper around time.Time which supports + correct marshaling to YAML and JSON. Wrappers are provided + for many of the factory methods that the time package offers. + format: date-time + type: string + type: object + type: array + name: + description: 'Name must be unique within a namespace. Is required + when creating resources, although some resources may allow a client + to request the generation of an appropriate name automatically. + Name is primarily intended for creation idempotence and configuration + definition. Cannot be updated. More info: http://kubernetes.io/docs/user-guide/identifiers#names' + type: string + namespace: + description: |- + Namespace defines the space within each name must be unique. An empty namespace is equivalent to the "default" namespace, but "default" is the canonical representation. Not all objects are required to be scoped to a namespace - the value of this field for those objects will be empty. + + Must be a DNS_LABEL. Cannot be updated. More info: http://kubernetes.io/docs/user-guide/namespaces + type: string + ownerReferences: + description: List of objects depended by this object. If ALL objects + in the list have been deleted, this object will be garbage collected. + If this object is managed by a controller, then an entry in this + list will point to this controller, with the controller field + set to true. There cannot be more than one managing controller. + items: + description: OwnerReference contains enough information to let + you identify an owning object. An owning object must be in the + same namespace as the dependent, or be cluster-scoped, so there + is no namespace field. + properties: + apiVersion: + description: API version of the referent. + type: string + blockOwnerDeletion: + description: If true, AND if the owner has the "foregroundDeletion" + finalizer, then the owner cannot be deleted from the key-value + store until this reference is removed. Defaults to false. + To set this field, a user needs "delete" permission of the + owner, otherwise 422 (Unprocessable Entity) will be returned. + type: boolean + controller: + description: If true, this reference points to the managing + controller. + type: boolean + kind: + description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + name: + description: 'Name of the referent. More info: http://kubernetes.io/docs/user-guide/identifiers#names' + type: string + uid: + description: 'UID of the referent. More info: http://kubernetes.io/docs/user-guide/identifiers#uids' + type: string + required: + - apiVersion + - kind + - name + - uid + type: object + type: array + resourceVersion: + description: |- + An opaque value that represents the internal version of this object that can be used by clients to determine when objects have changed. May be used for optimistic concurrency, change detection, and the watch operation on a resource or set of resources. Clients must treat these values as opaque and passed unmodified back to the server. They may only be valid for a particular resource or set of resources. + + Populated by the system. Read-only. Value must be treated as opaque by clients and . More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency + type: string + selfLink: + description: SelfLink is a URL representing this object. Populated + by the system. Read-only. + type: string + uid: + description: |- + UID is the unique in time and space value for this object. It is typically generated by the server on successful creation of a resource and is not allowed to change on PUT operations. + + Populated by the system. Read-only. More info: http://kubernetes.io/docs/user-guide/identifiers#uids + type: string + type: object + priorityClassName: + description: Priority class assigned to the Pods + type: string + prometheusExternalLabelName: + description: Name of Prometheus external label used to denote Prometheus + instance name. Defaults to the value of `prometheus`. External label + will _not_ be added when value is set to empty string (`""`). + type: string + query: + description: QuerySpec defines the query command line flags when starting + Prometheus. + properties: + lookbackDelta: + description: The delta difference allowed for retrieving metrics + during expression evaluations. + type: string + maxConcurrency: + description: Number of concurrent queries that can be run at once. + format: int32 + type: integer + maxSamples: + description: Maximum number of samples a single query can load into + memory. Note that queries will fail if they would load more samples + than this into memory, so this also limits the number of samples + a query can return. + format: int32 + type: integer + timeout: + description: Maximum time a query may take before being aborted. + type: string + type: object + remoteRead: + description: If specified, the remote_read spec. This is an experimental + feature, it may change in any upcoming release in a breaking way. + items: + description: RemoteReadSpec defines the remote_read configuration + for prometheus. + properties: + basicAuth: + description: 'BasicAuth allow an endpoint to authenticate over + basic authentication More info: https://prometheus.io/docs/operating/configuration/#endpoints' + properties: + password: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's key must + be defined + type: boolean + required: + - key + type: object + username: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's key must + be defined + type: boolean + required: + - key + type: object + type: object + bearerToken: + description: bearer token for remote read. + type: string + bearerTokenFile: + description: File to read bearer token for remote read. + type: string + proxyUrl: + description: Optional ProxyURL + type: string + readRecent: + description: Whether reads should be made for queries for time + ranges that the local storage should have complete data for. + type: boolean + remoteTimeout: + description: Timeout for requests to the remote read endpoint. + type: string + requiredMatchers: + description: An optional list of equality matchers which have + to be present in a selector to query the remote read endpoint. + type: object + tlsConfig: + description: TLSConfig specifies TLS configuration parameters. + properties: + caFile: + description: The CA cert to use for the targets. + type: string + certFile: + description: The client cert file for the targets. + type: string + insecureSkipVerify: + description: Disable target certificate validation. + type: boolean + keyFile: + description: The client key file for the targets. + type: string + serverName: + description: Used to verify the hostname for the targets. + type: string + type: object + url: + description: The URL of the endpoint to send samples to. + type: string + required: + - url + type: object + type: array + remoteWrite: + description: If specified, the remote_write spec. This is an experimental + feature, it may change in any upcoming release in a breaking way. + items: + description: RemoteWriteSpec defines the remote_write configuration + for prometheus. + properties: + basicAuth: + description: 'BasicAuth allow an endpoint to authenticate over + basic authentication More info: https://prometheus.io/docs/operating/configuration/#endpoints' + properties: + password: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's key must + be defined + type: boolean + required: + - key + type: object + username: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's key must + be defined + type: boolean + required: + - key + type: object + type: object + bearerToken: + description: File to read bearer token for remote write. + type: string + bearerTokenFile: + description: File to read bearer token for remote write. + type: string + proxyUrl: + description: Optional ProxyURL + type: string + queueConfig: + description: QueueConfig allows the tuning of remote_write queue_config + parameters. This object is referenced in the RemoteWriteSpec + object. + properties: + batchSendDeadline: + description: BatchSendDeadline is the maximum time a sample + will wait in buffer. + type: string + capacity: + description: Capacity is the number of samples to buffer per + shard before we start dropping them. + format: int32 + type: integer + maxBackoff: + description: MaxBackoff is the maximum retry delay. + type: string + maxRetries: + description: MaxRetries is the maximum number of times to + retry a batch on recoverable errors. + format: int32 + type: integer + maxSamplesPerSend: + description: MaxSamplesPerSend is the maximum number of samples + per send. + format: int32 + type: integer + maxShards: + description: MaxShards is the maximum number of shards, i.e. + amount of concurrency. + format: int32 + type: integer + minBackoff: + description: MinBackoff is the initial retry delay. Gets doubled + for every retry. + type: string + minShards: + description: MinShards is the minimum number of shards, i.e. + amount of concurrency. + format: int32 + type: integer + type: object + remoteTimeout: + description: Timeout for requests to the remote write endpoint. + type: string + tlsConfig: + description: TLSConfig specifies TLS configuration parameters. + properties: + caFile: + description: The CA cert to use for the targets. + type: string + certFile: + description: The client cert file for the targets. + type: string + insecureSkipVerify: + description: Disable target certificate validation. + type: boolean + keyFile: + description: The client key file for the targets. + type: string + serverName: + description: Used to verify the hostname for the targets. + type: string + type: object + url: + description: The URL of the endpoint to send samples to. + type: string + writeRelabelConfigs: + description: The list of remote write relabel configurations. + items: + description: 'RelabelConfig allows dynamic rewriting of the + label set, being applied to samples before ingestion. It defines + ``-section of Prometheus configuration. + More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs' + properties: + action: + description: Action to perform based on regex matching. + Default is 'replace' + type: string + modulus: + description: Modulus to take of the hash of the source label + values. + format: int64 + type: integer + regex: + description: Regular expression against which the extracted + value is matched. defailt is '(.*)' + type: string + replacement: + description: Replacement value against which a regex replace + is performed if the regular expression matches. Regex + capture groups are available. Default is '$1' + type: string + separator: + description: Separator placed between concatenated source + label values. default is ';'. + type: string + sourceLabels: + description: The source labels select values from existing + labels. Their content is concatenated using the configured + separator and matched against the configured regular expression + for the replace, keep, and drop actions. + items: + type: string + type: array + targetLabel: + description: Label to which the resulting value is written + in a replace action. It is mandatory for replace actions. + Regex capture groups are available. + type: string + type: object + type: array + required: + - url + type: object + type: array + replicaExternalLabelName: + description: Name of Prometheus external label used to denote replica + name. Defaults to the value of `prometheus_replica`. External label + will _not_ be added when value is set to empty string (`""`). + type: string + replicas: + description: Number of instances to deploy for a Prometheus deployment. + format: int32 + type: integer + resources: + description: ResourceRequirements describes the compute resource requirements. + properties: + limits: + description: 'Limits describes the maximum amount of compute resources + allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + requests: + description: 'Requests describes the minimum amount of compute resources + required. If Requests is omitted for a container, it defaults + to Limits if that is explicitly specified, otherwise to an implementation-defined + value. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + type: object + retention: + description: Time duration Prometheus shall retain data for. Default + is '24h', and must match the regular expression `[0-9]+(ms|s|m|h|d|w|y)` + (milliseconds seconds minutes hours days weeks years). + type: string + routePrefix: + description: The route prefix Prometheus registers HTTP handlers for. + This is useful, if using ExternalURL and a proxy is rewriting HTTP + routes of a request, and the actual ExternalURL is still true, but + the server serves requests under a different route prefix. For example + for use with `kubectl proxy`. + type: string + ruleNamespaceSelector: + description: A label selector is a label query over a set of resources. + The result of matchLabels and matchExpressions are ANDed. An empty + label selector matches all objects. A null label selector matches + no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. + The requirements are ANDed. + items: + description: A label selector requirement is a selector that contains + values, a key, and an operator that relates the key and values. + properties: + key: + description: key is the label key that the selector applies + to. + type: string + operator: + description: operator represents a key's relationship to a + set of values. Valid operators are In, NotIn, Exists and + DoesNotExist. + type: string + values: + description: values is an array of string values. If the operator + is In or NotIn, the values array must be non-empty. If the + operator is Exists or DoesNotExist, the values array must + be empty. This array is replaced during a strategic merge + patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + description: matchLabels is a map of {key,value} pairs. A single + {key,value} in the matchLabels map is equivalent to an element + of matchExpressions, whose key field is "key", the operator is + "In", and the values array contains only "value". The requirements + are ANDed. + type: object + type: object + ruleSelector: + description: A label selector is a label query over a set of resources. + The result of matchLabels and matchExpressions are ANDed. An empty + label selector matches all objects. A null label selector matches + no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. + The requirements are ANDed. + items: + description: A label selector requirement is a selector that contains + values, a key, and an operator that relates the key and values. + properties: + key: + description: key is the label key that the selector applies + to. + type: string + operator: + description: operator represents a key's relationship to a + set of values. Valid operators are In, NotIn, Exists and + DoesNotExist. + type: string + values: + description: values is an array of string values. If the operator + is In or NotIn, the values array must be non-empty. If the + operator is Exists or DoesNotExist, the values array must + be empty. This array is replaced during a strategic merge + patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + description: matchLabels is a map of {key,value} pairs. A single + {key,value} in the matchLabels map is equivalent to an element + of matchExpressions, whose key field is "key", the operator is + "In", and the values array contains only "value". The requirements + are ANDed. + type: object + type: object + rules: + description: /--rules.*/ command-line arguments + properties: + alert: + description: /--rules.alert.*/ command-line arguments + properties: + forGracePeriod: + description: Minimum duration between alert and restored 'for' + state. This is maintained only for alerts with configured + 'for' time greater than grace period. + type: string + forOutageTolerance: + description: Max time to tolerate prometheus outage for restoring + 'for' state of alert. + type: string + resendDelay: + description: Minimum amount of time to wait before resending + an alert to Alertmanager. + type: string + type: object + type: object + scrapeInterval: + description: Interval between consecutive scrapes. + type: string + secrets: + description: Secrets is a list of Secrets in the same namespace as the + Prometheus object, which shall be mounted into the Prometheus Pods. + The Secrets are mounted into /etc/prometheus/secrets/. + items: + type: string + type: array + securityContext: + description: PodSecurityContext holds pod-level security attributes + and common container settings. Some fields are also present in container.securityContext. Field + values of container.securityContext take precedence over field values + of PodSecurityContext. + properties: + fsGroup: + description: |- + A special supplemental group that applies to all containers in a pod. Some volume types allow the Kubelet to change the ownership of that volume to be owned by the pod: + + 1. The owning GID will be the FSGroup 2. The setgid bit is set (new files created in the volume will be owned by FSGroup) 3. The permission bits are OR'd with rw-rw---- + + If unset, the Kubelet will not modify the ownership and permissions of any volume. + format: int64 + type: integer + runAsGroup: + description: The GID to run the entrypoint of the container process. + Uses runtime default if unset. May also be set in SecurityContext. If + set in both SecurityContext and PodSecurityContext, the value + specified in SecurityContext takes precedence for that container. + format: int64 + type: integer + runAsNonRoot: + description: Indicates that the container must run as a non-root + user. If true, the Kubelet will validate the image at runtime + to ensure that it does not run as UID 0 (root) and fail to start + the container if it does. If unset or false, no such validation + will be performed. May also be set in SecurityContext. If set + in both SecurityContext and PodSecurityContext, the value specified + in SecurityContext takes precedence. + type: boolean + runAsUser: + description: The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. May + also be set in SecurityContext. If set in both SecurityContext + and PodSecurityContext, the value specified in SecurityContext + takes precedence for that container. + format: int64 + type: integer + seLinuxOptions: + description: SELinuxOptions are the labels to be applied to the + container + properties: + level: + description: Level is SELinux level label that applies to the + container. + type: string + role: + description: Role is a SELinux role label that applies to the + container. + type: string + type: + description: Type is a SELinux type label that applies to the + container. + type: string + user: + description: User is a SELinux user label that applies to the + container. + type: string + type: object + supplementalGroups: + description: A list of groups applied to the first process run in + each container, in addition to the container's primary GID. If + unspecified, no groups will be added to any container. + items: + format: int64 + type: integer + type: array + sysctls: + description: Sysctls hold a list of namespaced sysctls used for + the pod. Pods with unsupported sysctls (by the container runtime) + might fail to launch. + items: + description: Sysctl defines a kernel parameter to be set + properties: + name: + description: Name of a property to set + type: string + value: + description: Value of a property to set + type: string + required: + - name + - value + type: object + type: array + type: object + serviceAccountName: + description: ServiceAccountName is the name of the ServiceAccount to + use to run the Prometheus Pods. + type: string + serviceMonitorNamespaceSelector: + description: A label selector is a label query over a set of resources. + The result of matchLabels and matchExpressions are ANDed. An empty + label selector matches all objects. A null label selector matches + no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. + The requirements are ANDed. + items: + description: A label selector requirement is a selector that contains + values, a key, and an operator that relates the key and values. + properties: + key: + description: key is the label key that the selector applies + to. + type: string + operator: + description: operator represents a key's relationship to a + set of values. Valid operators are In, NotIn, Exists and + DoesNotExist. + type: string + values: + description: values is an array of string values. If the operator + is In or NotIn, the values array must be non-empty. If the + operator is Exists or DoesNotExist, the values array must + be empty. This array is replaced during a strategic merge + patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + description: matchLabels is a map of {key,value} pairs. A single + {key,value} in the matchLabels map is equivalent to an element + of matchExpressions, whose key field is "key", the operator is + "In", and the values array contains only "value". The requirements + are ANDed. + type: object + type: object + serviceMonitorSelector: + description: A label selector is a label query over a set of resources. + The result of matchLabels and matchExpressions are ANDed. An empty + label selector matches all objects. A null label selector matches + no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. + The requirements are ANDed. + items: + description: A label selector requirement is a selector that contains + values, a key, and an operator that relates the key and values. + properties: + key: + description: key is the label key that the selector applies + to. + type: string + operator: + description: operator represents a key's relationship to a + set of values. Valid operators are In, NotIn, Exists and + DoesNotExist. + type: string + values: + description: values is an array of string values. If the operator + is In or NotIn, the values array must be non-empty. If the + operator is Exists or DoesNotExist, the values array must + be empty. This array is replaced during a strategic merge + patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + description: matchLabels is a map of {key,value} pairs. A single + {key,value} in the matchLabels map is equivalent to an element + of matchExpressions, whose key field is "key", the operator is + "In", and the values array contains only "value". The requirements + are ANDed. + type: object + type: object + sha: + description: SHA of Prometheus container image to be deployed. Defaults + to the value of `version`. Similar to a tag, but the SHA explicitly + deploys an immutable container image. Version and Tag are ignored + if SHA is set. + type: string + storage: + description: StorageSpec defines the configured storage for a group + Prometheus servers. If neither `emptyDir` nor `volumeClaimTemplate` + is specified, then by default an [EmptyDir](https://kubernetes.io/docs/concepts/storage/volumes/#emptydir) + will be used. + properties: + emptyDir: + description: Represents an empty directory for a pod. Empty directory + volumes support ownership management and SELinux relabeling. + properties: + medium: + description: 'What type of storage medium should back this directory. + The default is "" which means to use the node''s default medium. + Must be an empty string (default) or Memory. More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir' + type: string + sizeLimit: {} + type: object + volumeClaimTemplate: + description: PersistentVolumeClaim is a user's request for and claim + to a persistent volume + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this + representation of an object. Servers should convert recognized + schemas to the latest internal value, and may reject unrecognized + values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource + this object represents. Servers may infer this from the endpoint + the client submits requests to. Cannot be updated. In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + metadata: + description: ObjectMeta is metadata that all persisted resources + must have, which includes all objects users must create. + properties: + annotations: + description: 'Annotations is an unstructured key value map + stored with a resource that may be set by external tools + to store and retrieve arbitrary metadata. They are not + queryable and should be preserved when modifying objects. + More info: http://kubernetes.io/docs/user-guide/annotations' + type: object + clusterName: + description: The name of the cluster which the object belongs + to. This is used to distinguish resources with same name + and namespace in different clusters. This field is not + set anywhere right now and apiserver is going to ignore + it if set in create or update request. + type: string + creationTimestamp: + description: Time is a wrapper around time.Time which supports + correct marshaling to YAML and JSON. Wrappers are provided + for many of the factory methods that the time package + offers. + format: date-time + type: string + deletionGracePeriodSeconds: + description: Number of seconds allowed for this object to + gracefully terminate before it will be removed from the + system. Only set when deletionTimestamp is also set. May + only be shortened. Read-only. + format: int64 + type: integer + deletionTimestamp: + description: Time is a wrapper around time.Time which supports + correct marshaling to YAML and JSON. Wrappers are provided + for many of the factory methods that the time package + offers. + format: date-time + type: string + finalizers: + description: Must be empty before the object is deleted + from the registry. Each entry is an identifier for the + responsible component that will remove the entry from + the list. If the deletionTimestamp of the object is non-nil, + entries in this list can only be removed. + items: + type: string + type: array + generateName: + description: |- + GenerateName is an optional prefix, used by the server, to generate a unique name ONLY IF the Name field has not been provided. If this field is used, the name returned to the client will be different than the name passed. This value will also be combined with a unique suffix. The provided value has the same validation rules as the Name field, and may be truncated by the length of the suffix required to make the value unique on the server. + + If this field is specified and the generated name exists, the server will NOT return a 409 - instead, it will either return 201 Created or 500 with Reason ServerTimeout indicating a unique name could not be found in the time allotted, and the client should retry (optionally after the time indicated in the Retry-After header). + + Applied only if Name is not specified. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#idempotency + type: string + generation: + description: A sequence number representing a specific generation + of the desired state. Populated by the system. Read-only. + format: int64 + type: integer + initializers: + description: Initializers tracks the progress of initialization. + properties: + pending: + description: Pending is a list of initializers that + must execute in order before this object is visible. + When the last pending initializer is removed, and + no failing result is set, the initializers struct + will be set to nil and the object is considered as + initialized and visible to all clients. + items: + description: Initializer is information about an initializer + that has not yet completed. + properties: + name: + description: name of the process that is responsible + for initializing this object. + type: string + required: + - name + type: object + type: array + result: + description: Status is a return value for calls that + don't return other objects. + properties: + apiVersion: + description: 'APIVersion defines the versioned schema + of this representation of an object. Servers should + convert recognized schemas to the latest internal + value, and may reject unrecognized values. More + info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' + type: string + code: + description: Suggested HTTP return code for this + status, 0 if not set. + format: int32 + type: integer + details: + description: StatusDetails is a set of additional + properties that MAY be set by the server to provide + additional information about a response. The Reason + field of a Status object defines what attributes + will be set. Clients must ignore fields that do + not match the defined type of each attribute, + and should assume that any attribute may be empty, + invalid, or under defined. + properties: + causes: + description: The Causes array includes more + details associated with the StatusReason failure. + Not all StatusReasons may provide detailed + causes. + items: + description: StatusCause provides more information + about an api.Status failure, including cases + when multiple errors are encountered. + properties: + field: + description: |- + The field of the resource that has caused this error, as named by its JSON serialization. May include dot and postfix notation for nested attributes. Arrays are zero-indexed. Fields may appear more than once in an array of causes due to fields having multiple errors. Optional. + + Examples: + "name" - the field "name" on the current resource + "items[0].name" - the field "name" on the first array entry in "items" + type: string + message: + description: A human-readable description + of the cause of the error. This field + may be presented as-is to a reader. + type: string + reason: + description: A machine-readable description + of the cause of the error. If this value + is empty there is no information available. + type: string + type: object + type: array + group: + description: The group attribute of the resource + associated with the status StatusReason. + type: string + kind: + description: 'The kind attribute of the resource + associated with the status StatusReason. On + some operations may differ from the requested + resource Kind. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + name: + description: The name attribute of the resource + associated with the status StatusReason (when + there is a single name which can be described). + type: string + retryAfterSeconds: + description: If specified, the time in seconds + before the operation should be retried. Some + errors may indicate the client must take an + alternate action - for those errors this field + may indicate how long to wait before taking + the alternate action. + format: int32 + type: integer + uid: + description: 'UID of the resource. (when there + is a single resource which can be described). + More info: http://kubernetes.io/docs/user-guide/identifiers#uids' + type: string + type: object + kind: + description: 'Kind is a string value representing + the REST resource this object represents. Servers + may infer this from the endpoint the client submits + requests to. Cannot be updated. In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + message: + description: A human-readable description of the + status of this operation. + type: string + metadata: + description: ListMeta describes metadata that synthetic + resources must have, including lists and various + status objects. A resource may have only one of + {ObjectMeta, ListMeta}. + properties: + continue: + description: continue may be set if the user + set a limit on the number of items returned, + and indicates that the server has more data + available. The value is opaque and may be + used to issue another request to the endpoint + that served this list to retrieve the next + set of available objects. Continuing a consistent + list may not be possible if the server configuration + has changed or more than a few minutes have + passed. The resourceVersion field returned + when using this continue value will be identical + to the value in the first response, unless + you have received this token from an error + message. + type: string + resourceVersion: + description: 'String that identifies the server''s + internal version of this object that can be + used by clients to determine when objects + have changed. Value must be treated as opaque + by clients and passed unmodified back to the + server. Populated by the system. Read-only. + More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency' + type: string + selfLink: + description: selfLink is a URL representing + this object. Populated by the system. Read-only. + type: string + type: object + reason: + description: A machine-readable description of why + this operation is in the "Failure" status. If + this value is empty there is no information available. + A Reason clarifies an HTTP status code but does + not override it. + type: string + status: + description: 'Status of the operation. One of: "Success" + or "Failure". More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#spec-and-status' + type: string + type: object + required: + - pending + type: object + labels: + description: 'Map of string keys and values that can be + used to organize and categorize (scope and select) objects. + May match selectors of replication controllers and services. + More info: http://kubernetes.io/docs/user-guide/labels' + type: object + managedFields: + description: |- + ManagedFields maps workflow-id and version to the set of fields that are managed by that workflow. This is mostly for internal housekeeping, and users typically shouldn't need to set or understand this field. A workflow can be the user's name, a controller's name, or the name of a specific apply path like "ci-cd". The set of fields is always in the version that the workflow used when modifying the object. + + This field is alpha and can be changed or removed without notice. + items: + description: ManagedFieldsEntry is a workflow-id, a FieldSet + and the group version of the resource that the fieldset + applies to. + properties: + apiVersion: + description: APIVersion defines the version of this + resource that this field set applies to. The format + is "group/version" just like the top-level APIVersion + field. It is necessary to track the version of a + field set because it cannot be automatically converted. + type: string + fields: + description: 'Fields stores a set of fields in a data + structure like a Trie. To understand how this is + used, see: https://github.com/kubernetes-sigs/structured-merge-diff' + type: object + manager: + description: Manager is an identifier of the workflow + managing these fields. + type: string + operation: + description: Operation is the type of operation which + lead to this ManagedFieldsEntry being created. The + only valid values for this field are 'Apply' and + 'Update'. + type: string + time: + description: Time is a wrapper around time.Time which + supports correct marshaling to YAML and JSON. Wrappers + are provided for many of the factory methods that + the time package offers. + format: date-time + type: string + type: object + type: array + name: + description: 'Name must be unique within a namespace. Is + required when creating resources, although some resources + may allow a client to request the generation of an appropriate + name automatically. Name is primarily intended for creation + idempotence and configuration definition. Cannot be updated. + More info: http://kubernetes.io/docs/user-guide/identifiers#names' + type: string + namespace: + description: |- + Namespace defines the space within each name must be unique. An empty namespace is equivalent to the "default" namespace, but "default" is the canonical representation. Not all objects are required to be scoped to a namespace - the value of this field for those objects will be empty. + + Must be a DNS_LABEL. Cannot be updated. More info: http://kubernetes.io/docs/user-guide/namespaces + type: string + ownerReferences: + description: List of objects depended by this object. If + ALL objects in the list have been deleted, this object + will be garbage collected. If this object is managed by + a controller, then an entry in this list will point to + this controller, with the controller field set to true. + There cannot be more than one managing controller. + items: + description: OwnerReference contains enough information + to let you identify an owning object. An owning object + must be in the same namespace as the dependent, or be + cluster-scoped, so there is no namespace field. + properties: + apiVersion: + description: API version of the referent. + type: string + blockOwnerDeletion: + description: If true, AND if the owner has the "foregroundDeletion" + finalizer, then the owner cannot be deleted from + the key-value store until this reference is removed. + Defaults to false. To set this field, a user needs + "delete" permission of the owner, otherwise 422 + (Unprocessable Entity) will be returned. + type: boolean + controller: + description: If true, this reference points to the + managing controller. + type: boolean + kind: + description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + name: + description: 'Name of the referent. More info: http://kubernetes.io/docs/user-guide/identifiers#names' + type: string + uid: + description: 'UID of the referent. More info: http://kubernetes.io/docs/user-guide/identifiers#uids' + type: string + required: + - apiVersion + - kind + - name + - uid + type: object + type: array + resourceVersion: + description: |- + An opaque value that represents the internal version of this object that can be used by clients to determine when objects have changed. May be used for optimistic concurrency, change detection, and the watch operation on a resource or set of resources. Clients must treat these values as opaque and passed unmodified back to the server. They may only be valid for a particular resource or set of resources. + + Populated by the system. Read-only. Value must be treated as opaque by clients and . More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency + type: string + selfLink: + description: SelfLink is a URL representing this object. + Populated by the system. Read-only. + type: string + uid: + description: |- + UID is the unique in time and space value for this object. It is typically generated by the server on successful creation of a resource and is not allowed to change on PUT operations. + + Populated by the system. Read-only. More info: http://kubernetes.io/docs/user-guide/identifiers#uids + type: string + type: object + spec: + description: PersistentVolumeClaimSpec describes the common + attributes of storage devices and allows a Source for provider-specific + attributes + properties: + accessModes: + description: 'AccessModes contains the desired access modes + the volume should have. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1' + items: + type: string + type: array + dataSource: + description: TypedLocalObjectReference contains enough information + to let you locate the typed referenced object inside the + same namespace. + properties: + apiGroup: + description: APIGroup is the group for the resource + being referenced. If APIGroup is not specified, the + specified Kind must be in the core API group. For + any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource being referenced + type: string + name: + description: Name is the name of resource being referenced + type: string + required: + - kind + - name + type: object + resources: + description: ResourceRequirements describes the compute + resource requirements. + properties: + limits: + description: 'Limits describes the maximum amount of + compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + requests: + description: 'Requests describes the minimum amount + of compute resources required. If Requests is omitted + for a container, it defaults to Limits if that is + explicitly specified, otherwise to an implementation-defined + value. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + type: object + selector: + description: A label selector is a label query over a set + of resources. The result of matchLabels and matchExpressions + are ANDed. An empty label selector matches all objects. + A null label selector matches no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector + that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: operator represents a key's relationship + to a set of values. Valid operators are In, + NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. + If the operator is In or NotIn, the values array + must be non-empty. If the operator is Exists + or DoesNotExist, the values array must be empty. + This array is replaced during a strategic merge + patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + description: matchLabels is a map of {key,value} pairs. + A single {key,value} in the matchLabels map is equivalent + to an element of matchExpressions, whose key field + is "key", the operator is "In", and the values array + contains only "value". The requirements are ANDed. + type: object + type: object + storageClassName: + description: 'Name of the StorageClass required by the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1' + type: string + volumeMode: + description: volumeMode defines what type of volume is required + by the claim. Value of Filesystem is implied when not + included in claim spec. This is a beta feature. + type: string + volumeName: + description: VolumeName is the binding reference to the + PersistentVolume backing this claim. + type: string + type: object + status: + description: PersistentVolumeClaimStatus is the current status + of a persistent volume claim. + properties: + accessModes: + description: 'AccessModes contains the actual access modes + the volume backing the PVC has. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1' + items: + type: string + type: array + capacity: + description: Represents the actual resources of the underlying + volume. + type: object + conditions: + description: Current Condition of persistent volume claim. + If underlying persistent volume is being resized then + the Condition will be set to 'ResizeStarted'. + items: + description: PersistentVolumeClaimCondition contails details + about state of pvc + properties: + lastProbeTime: + description: Time is a wrapper around time.Time which + supports correct marshaling to YAML and JSON. Wrappers + are provided for many of the factory methods that + the time package offers. + format: date-time + type: string + lastTransitionTime: + description: Time is a wrapper around time.Time which + supports correct marshaling to YAML and JSON. Wrappers + are provided for many of the factory methods that + the time package offers. + format: date-time + type: string + message: + description: Human-readable message indicating details + about last transition. + type: string + reason: + description: Unique, this should be a short, machine + understandable string that gives the reason for + condition's last transition. If it reports "ResizeStarted" + that means the underlying persistent volume is being + resized. + type: string + status: + type: string + type: + type: string + required: + - type + - status + type: object + type: array + phase: + description: Phase represents the current phase of PersistentVolumeClaim. + type: string + type: object + type: object + type: object + tag: + description: Tag of Prometheus container image to be deployed. Defaults + to the value of `version`. Version is ignored if Tag is set. + type: string + thanos: + description: ThanosSpec defines parameters for a Prometheus server within + a Thanos deployment. + properties: + baseImage: + description: Thanos base image if other than default. + type: string + clusterAdvertiseAddress: + description: Explicit (external) ip:port address to advertise for + gossip in gossip cluster. Used internally for membership only. + type: string + gcs: + description: 'Deprecated: ThanosGCSSpec should be configured with + an ObjectStorageConfig secret starting with Thanos v0.2.0. ThanosGCSSpec + will be removed.' + properties: + bucket: + description: Google Cloud Storage bucket name for stored blocks. + If empty it won't store any block inside Google Cloud Storage. + type: string + credentials: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's key must + be defined + type: boolean + required: + - key + type: object + type: object + grpcAdvertiseAddress: + description: Explicit (external) host:port address to advertise + for gRPC StoreAPI in gossip cluster. If empty, 'grpc-address' + will be used. + type: string + image: + description: Image if specified has precedence over baseImage, tag + and sha combinations. Specifying the version is still necessary + to ensure the Prometheus Operator knows what version of Thanos + is being configured. + type: string + objectStorageConfig: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must be + a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's key must be + defined + type: boolean + required: + - key + type: object + peers: + description: Peers is a DNS name for Thanos to discover peers through. + type: string + resources: + description: ResourceRequirements describes the compute resource + requirements. + properties: + limits: + description: 'Limits describes the maximum amount of compute + resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + requests: + description: 'Requests describes the minimum amount of compute + resources required. If Requests is omitted for a container, + it defaults to Limits if that is explicitly specified, otherwise + to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + type: object + s3: + description: 'Deprecated: ThanosS3Spec should be configured with + an ObjectStorageConfig secret starting with Thanos v0.2.0. ThanosS3Spec + will be removed.' + properties: + accessKey: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's key must + be defined + type: boolean + required: + - key + type: object + bucket: + description: S3-Compatible API bucket name for stored blocks. + type: string + encryptsse: + description: Whether to use Server Side Encryption + type: boolean + endpoint: + description: S3-Compatible API endpoint for stored blocks. + type: string + insecure: + description: Whether to use an insecure connection with an S3-Compatible + API. + type: boolean + secretKey: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's key must + be defined + type: boolean + required: + - key + type: object + signatureVersion2: + description: Whether to use S3 Signature Version 2; otherwise + Signature Version 4 will be used. + type: boolean + type: object + sha: + description: SHA of Thanos container image to be deployed. Defaults + to the value of `version`. Similar to a tag, but the SHA explicitly + deploys an immutable container image. Version and Tag are ignored + if SHA is set. + type: string + tag: + description: Tag of Thanos sidecar container image to be deployed. + Defaults to the value of `version`. Version is ignored if Tag + is set. + type: string + version: + description: Version describes the version of Thanos to use. + type: string + type: object + tolerations: + description: If specified, the pod's tolerations. + items: + description: The pod this Toleration is attached to tolerates any + taint that matches the triple using the matching + operator . + properties: + effect: + description: Effect indicates the taint effect to match. Empty + means match all taint effects. When specified, allowed values + are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: Key is the taint key that the toleration applies + to. Empty means match all taint keys. If the key is empty, operator + must be Exists; this combination means to match all values and + all keys. + type: string + operator: + description: Operator represents a key's relationship to the value. + Valid operators are Exists and Equal. Defaults to Equal. Exists + is equivalent to wildcard for value, so that a pod can tolerate + all taints of a particular category. + type: string + tolerationSeconds: + description: TolerationSeconds represents the period of time the + toleration (which must be of effect NoExecute, otherwise this + field is ignored) tolerates the taint. By default, it is not + set, which means tolerate the taint forever (do not evict). + Zero and negative values will be treated as 0 (evict immediately) + by the system. + format: int64 + type: integer + value: + description: Value is the taint value the toleration matches to. + If the operator is Exists, the value should be empty, otherwise + just a regular string. + type: string + type: object + type: array + version: + description: Version of Prometheus to be deployed. + type: string + type: object + status: + description: 'PrometheusStatus is the most recent observed status of the + Prometheus cluster. Read-only. Not included when requesting from the apiserver, + only from the Prometheus Operator API itself. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#spec-and-status' + properties: + availableReplicas: + description: Total number of available pods (ready for at least minReadySeconds) + targeted by this Prometheus deployment. + format: int32 + type: integer + paused: + description: Represents whether any actions on the underlaying managed + objects are being performed. Only delete actions will be performed. + type: boolean + replicas: + description: Total number of non-terminated pods targeted by this Prometheus + deployment (their labels match the selector). + format: int32 + type: integer + unavailableReplicas: + description: Total number of unavailable pods targeted by this Prometheus + deployment. + format: int32 + type: integer + updatedReplicas: + description: Total number of non-terminated pods targeted by this Prometheus + deployment that have the desired version spec. + format: int32 + type: integer + required: + - paused + - replicas + - updatedReplicas + - availableReplicas + - unavailableReplicas + type: object + type: object + version: v1 diff --git a/1-custom-resource-definition/prometheusrule-crd.yaml b/1-custom-resource-definition/prometheusrule-crd.yaml new file mode 100644 index 0000000..9553c86 --- /dev/null +++ b/1-custom-resource-definition/prometheusrule-crd.yaml @@ -0,0 +1,392 @@ +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + creationTimestamp: null + name: prometheusrules.monitoring.coreos.com +spec: + group: monitoring.coreos.com + names: + kind: PrometheusRule + plural: prometheusrules + scope: Namespaced + validation: + openAPIV3Schema: + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + metadata: + description: ObjectMeta is metadata that all persisted resources must have, + which includes all objects users must create. + properties: + annotations: + description: 'Annotations is an unstructured key value map stored with + a resource that may be set by external tools to store and retrieve + arbitrary metadata. They are not queryable and should be preserved + when modifying objects. More info: http://kubernetes.io/docs/user-guide/annotations' + type: object + clusterName: + description: The name of the cluster which the object belongs to. This + is used to distinguish resources with same name and namespace in different + clusters. This field is not set anywhere right now and apiserver is + going to ignore it if set in create or update request. + type: string + creationTimestamp: + description: Time is a wrapper around time.Time which supports correct + marshaling to YAML and JSON. Wrappers are provided for many of the + factory methods that the time package offers. + format: date-time + type: string + deletionGracePeriodSeconds: + description: Number of seconds allowed for this object to gracefully + terminate before it will be removed from the system. Only set when + deletionTimestamp is also set. May only be shortened. Read-only. + format: int64 + type: integer + deletionTimestamp: + description: Time is a wrapper around time.Time which supports correct + marshaling to YAML and JSON. Wrappers are provided for many of the + factory methods that the time package offers. + format: date-time + type: string + finalizers: + description: Must be empty before the object is deleted from the registry. + Each entry is an identifier for the responsible component that will + remove the entry from the list. If the deletionTimestamp of the object + is non-nil, entries in this list can only be removed. + items: + type: string + type: array + generateName: + description: |- + GenerateName is an optional prefix, used by the server, to generate a unique name ONLY IF the Name field has not been provided. If this field is used, the name returned to the client will be different than the name passed. This value will also be combined with a unique suffix. The provided value has the same validation rules as the Name field, and may be truncated by the length of the suffix required to make the value unique on the server. + + If this field is specified and the generated name exists, the server will NOT return a 409 - instead, it will either return 201 Created or 500 with Reason ServerTimeout indicating a unique name could not be found in the time allotted, and the client should retry (optionally after the time indicated in the Retry-After header). + + Applied only if Name is not specified. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#idempotency + type: string + generation: + description: A sequence number representing a specific generation of + the desired state. Populated by the system. Read-only. + format: int64 + type: integer + initializers: + description: Initializers tracks the progress of initialization. + properties: + pending: + description: Pending is a list of initializers that must execute + in order before this object is visible. When the last pending + initializer is removed, and no failing result is set, the initializers + struct will be set to nil and the object is considered as initialized + and visible to all clients. + items: + description: Initializer is information about an initializer that + has not yet completed. + properties: + name: + description: name of the process that is responsible for initializing + this object. + type: string + required: + - name + type: object + type: array + result: + description: Status is a return value for calls that don't return + other objects. + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this + representation of an object. Servers should convert recognized + schemas to the latest internal value, and may reject unrecognized + values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' + type: string + code: + description: Suggested HTTP return code for this status, 0 if + not set. + format: int32 + type: integer + details: + description: StatusDetails is a set of additional properties + that MAY be set by the server to provide additional information + about a response. The Reason field of a Status object defines + what attributes will be set. Clients must ignore fields that + do not match the defined type of each attribute, and should + assume that any attribute may be empty, invalid, or under + defined. + properties: + causes: + description: The Causes array includes more details associated + with the StatusReason failure. Not all StatusReasons may + provide detailed causes. + items: + description: StatusCause provides more information about + an api.Status failure, including cases when multiple + errors are encountered. + properties: + field: + description: |- + The field of the resource that has caused this error, as named by its JSON serialization. May include dot and postfix notation for nested attributes. Arrays are zero-indexed. Fields may appear more than once in an array of causes due to fields having multiple errors. Optional. + + Examples: + "name" - the field "name" on the current resource + "items[0].name" - the field "name" on the first array entry in "items" + type: string + message: + description: A human-readable description of the cause + of the error. This field may be presented as-is + to a reader. + type: string + reason: + description: A machine-readable description of the + cause of the error. If this value is empty there + is no information available. + type: string + type: object + type: array + group: + description: The group attribute of the resource associated + with the status StatusReason. + type: string + kind: + description: 'The kind attribute of the resource associated + with the status StatusReason. On some operations may differ + from the requested resource Kind. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + name: + description: The name attribute of the resource associated + with the status StatusReason (when there is a single name + which can be described). + type: string + retryAfterSeconds: + description: If specified, the time in seconds before the + operation should be retried. Some errors may indicate + the client must take an alternate action - for those errors + this field may indicate how long to wait before taking + the alternate action. + format: int32 + type: integer + uid: + description: 'UID of the resource. (when there is a single + resource which can be described). More info: http://kubernetes.io/docs/user-guide/identifiers#uids' + type: string + type: object + kind: + description: 'Kind is a string value representing the REST resource + this object represents. Servers may infer this from the endpoint + the client submits requests to. Cannot be updated. In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + message: + description: A human-readable description of the status of this + operation. + type: string + metadata: + description: ListMeta describes metadata that synthetic resources + must have, including lists and various status objects. A resource + may have only one of {ObjectMeta, ListMeta}. + properties: + continue: + description: continue may be set if the user set a limit + on the number of items returned, and indicates that the + server has more data available. The value is opaque and + may be used to issue another request to the endpoint that + served this list to retrieve the next set of available + objects. Continuing a consistent list may not be possible + if the server configuration has changed or more than a + few minutes have passed. The resourceVersion field returned + when using this continue value will be identical to the + value in the first response, unless you have received + this token from an error message. + type: string + resourceVersion: + description: 'String that identifies the server''s internal + version of this object that can be used by clients to + determine when objects have changed. Value must be treated + as opaque by clients and passed unmodified back to the + server. Populated by the system. Read-only. More info: + https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency' + type: string + selfLink: + description: selfLink is a URL representing this object. + Populated by the system. Read-only. + type: string + type: object + reason: + description: A machine-readable description of why this operation + is in the "Failure" status. If this value is empty there is + no information available. A Reason clarifies an HTTP status + code but does not override it. + type: string + status: + description: 'Status of the operation. One of: "Success" or + "Failure". More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#spec-and-status' + type: string + type: object + required: + - pending + type: object + labels: + description: 'Map of string keys and values that can be used to organize + and categorize (scope and select) objects. May match selectors of + replication controllers and services. More info: http://kubernetes.io/docs/user-guide/labels' + type: object + managedFields: + description: |- + ManagedFields maps workflow-id and version to the set of fields that are managed by that workflow. This is mostly for internal housekeeping, and users typically shouldn't need to set or understand this field. A workflow can be the user's name, a controller's name, or the name of a specific apply path like "ci-cd". The set of fields is always in the version that the workflow used when modifying the object. + + This field is alpha and can be changed or removed without notice. + items: + description: ManagedFieldsEntry is a workflow-id, a FieldSet and the + group version of the resource that the fieldset applies to. + properties: + apiVersion: + description: APIVersion defines the version of this resource that + this field set applies to. The format is "group/version" just + like the top-level APIVersion field. It is necessary to track + the version of a field set because it cannot be automatically + converted. + type: string + fields: + description: 'Fields stores a set of fields in a data structure + like a Trie. To understand how this is used, see: https://github.com/kubernetes-sigs/structured-merge-diff' + type: object + manager: + description: Manager is an identifier of the workflow managing + these fields. + type: string + operation: + description: Operation is the type of operation which lead to + this ManagedFieldsEntry being created. The only valid values + for this field are 'Apply' and 'Update'. + type: string + time: + description: Time is a wrapper around time.Time which supports + correct marshaling to YAML and JSON. Wrappers are provided + for many of the factory methods that the time package offers. + format: date-time + type: string + type: object + type: array + name: + description: 'Name must be unique within a namespace. Is required when + creating resources, although some resources may allow a client to + request the generation of an appropriate name automatically. Name + is primarily intended for creation idempotence and configuration definition. + Cannot be updated. More info: http://kubernetes.io/docs/user-guide/identifiers#names' + type: string + namespace: + description: |- + Namespace defines the space within each name must be unique. An empty namespace is equivalent to the "default" namespace, but "default" is the canonical representation. Not all objects are required to be scoped to a namespace - the value of this field for those objects will be empty. + + Must be a DNS_LABEL. Cannot be updated. More info: http://kubernetes.io/docs/user-guide/namespaces + type: string + ownerReferences: + description: List of objects depended by this object. If ALL objects + in the list have been deleted, this object will be garbage collected. + If this object is managed by a controller, then an entry in this list + will point to this controller, with the controller field set to true. + There cannot be more than one managing controller. + items: + description: OwnerReference contains enough information to let you + identify an owning object. An owning object must be in the same + namespace as the dependent, or be cluster-scoped, so there is no + namespace field. + properties: + apiVersion: + description: API version of the referent. + type: string + blockOwnerDeletion: + description: If true, AND if the owner has the "foregroundDeletion" + finalizer, then the owner cannot be deleted from the key-value + store until this reference is removed. Defaults to false. To + set this field, a user needs "delete" permission of the owner, + otherwise 422 (Unprocessable Entity) will be returned. + type: boolean + controller: + description: If true, this reference points to the managing controller. + type: boolean + kind: + description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + name: + description: 'Name of the referent. More info: http://kubernetes.io/docs/user-guide/identifiers#names' + type: string + uid: + description: 'UID of the referent. More info: http://kubernetes.io/docs/user-guide/identifiers#uids' + type: string + required: + - apiVersion + - kind + - name + - uid + type: object + type: array + resourceVersion: + description: |- + An opaque value that represents the internal version of this object that can be used by clients to determine when objects have changed. May be used for optimistic concurrency, change detection, and the watch operation on a resource or set of resources. Clients must treat these values as opaque and passed unmodified back to the server. They may only be valid for a particular resource or set of resources. + + Populated by the system. Read-only. Value must be treated as opaque by clients and . More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency + type: string + selfLink: + description: SelfLink is a URL representing this object. Populated by + the system. Read-only. + type: string + uid: + description: |- + UID is the unique in time and space value for this object. It is typically generated by the server on successful creation of a resource and is not allowed to change on PUT operations. + + Populated by the system. Read-only. More info: http://kubernetes.io/docs/user-guide/identifiers#uids + type: string + type: object + spec: + description: PrometheusRuleSpec contains specification parameters for a + Rule. + properties: + groups: + description: Content of Prometheus rule file + items: + description: RuleGroup is a list of sequentially evaluated recording + and alerting rules. + properties: + interval: + type: string + name: + type: string + rules: + items: + description: Rule describes an alerting or recording rule. + properties: + alert: + type: string + annotations: + type: object + expr: + anyOf: + - type: string + - type: integer + for: + type: string + labels: + type: object + record: + type: string + required: + - expr + type: object + type: array + required: + - name + - rules + type: object + type: array + type: object + type: object + version: v1 diff --git a/1-custom-resource-definition/servicemonitor-crd.yaml b/1-custom-resource-definition/servicemonitor-crd.yaml new file mode 100644 index 0000000..b019f37 --- /dev/null +++ b/1-custom-resource-definition/servicemonitor-crd.yaml @@ -0,0 +1,303 @@ +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + creationTimestamp: null + name: servicemonitors.monitoring.coreos.com +spec: + group: monitoring.coreos.com + names: + kind: ServiceMonitor + plural: servicemonitors + scope: Namespaced + validation: + openAPIV3Schema: + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds' + type: string + spec: + description: ServiceMonitorSpec contains specification parameters for a + ServiceMonitor. + properties: + endpoints: + description: A list of endpoints allowed as part of this ServiceMonitor. + items: + description: Endpoint defines a scrapeable endpoint serving Prometheus + metrics. + properties: + basicAuth: + description: 'BasicAuth allow an endpoint to authenticate over + basic authentication More info: https://prometheus.io/docs/operating/configuration/#endpoints' + properties: + password: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's key must + be defined + type: boolean + required: + - key + type: object + username: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + optional: + description: Specify whether the Secret or it's key must + be defined + type: boolean + required: + - key + type: object + type: object + bearerTokenFile: + description: File to read bearer token for scraping targets. + type: string + honorLabels: + description: HonorLabels chooses the metric's labels on collisions + with target labels. + type: boolean + interval: + description: Interval at which metrics should be scraped + type: string + metricRelabelings: + description: MetricRelabelConfigs to apply to samples before ingestion. + items: + description: 'RelabelConfig allows dynamic rewriting of the + label set, being applied to samples before ingestion. It defines + ``-section of Prometheus configuration. + More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs' + properties: + action: + description: Action to perform based on regex matching. + Default is 'replace' + type: string + modulus: + description: Modulus to take of the hash of the source label + values. + format: int64 + type: integer + regex: + description: Regular expression against which the extracted + value is matched. defailt is '(.*)' + type: string + replacement: + description: Replacement value against which a regex replace + is performed if the regular expression matches. Regex + capture groups are available. Default is '$1' + type: string + separator: + description: Separator placed between concatenated source + label values. default is ';'. + type: string + sourceLabels: + description: The source labels select values from existing + labels. Their content is concatenated using the configured + separator and matched against the configured regular expression + for the replace, keep, and drop actions. + items: + type: string + type: array + targetLabel: + description: Label to which the resulting value is written + in a replace action. It is mandatory for replace actions. + Regex capture groups are available. + type: string + type: object + type: array + params: + description: Optional HTTP URL parameters + type: object + path: + description: HTTP path to scrape for metrics. + type: string + port: + description: Name of the service port this endpoint refers to. + Mutually exclusive with targetPort. + type: string + proxyUrl: + description: ProxyURL eg http://proxyserver:2195 Directs scrapes + to proxy through this endpoint. + type: string + relabelings: + description: 'RelabelConfigs to apply to samples before ingestion. + More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config' + items: + description: 'RelabelConfig allows dynamic rewriting of the + label set, being applied to samples before ingestion. It defines + ``-section of Prometheus configuration. + More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs' + properties: + action: + description: Action to perform based on regex matching. + Default is 'replace' + type: string + modulus: + description: Modulus to take of the hash of the source label + values. + format: int64 + type: integer + regex: + description: Regular expression against which the extracted + value is matched. defailt is '(.*)' + type: string + replacement: + description: Replacement value against which a regex replace + is performed if the regular expression matches. Regex + capture groups are available. Default is '$1' + type: string + separator: + description: Separator placed between concatenated source + label values. default is ';'. + type: string + sourceLabels: + description: The source labels select values from existing + labels. Their content is concatenated using the configured + separator and matched against the configured regular expression + for the replace, keep, and drop actions. + items: + type: string + type: array + targetLabel: + description: Label to which the resulting value is written + in a replace action. It is mandatory for replace actions. + Regex capture groups are available. + type: string + type: object + type: array + scheme: + description: HTTP scheme to use for scraping. + type: string + scrapeTimeout: + description: Timeout after which the scrape is ended + type: string + targetPort: + anyOf: + - type: string + - type: integer + tlsConfig: + description: TLSConfig specifies TLS configuration parameters. + properties: + caFile: + description: The CA cert to use for the targets. + type: string + certFile: + description: The client cert file for the targets. + type: string + insecureSkipVerify: + description: Disable target certificate validation. + type: boolean + keyFile: + description: The client key file for the targets. + type: string + serverName: + description: Used to verify the hostname for the targets. + type: string + type: object + type: object + type: array + jobLabel: + description: The label to use to retrieve the job name from. + type: string + namespaceSelector: + description: NamespaceSelector is a selector for selecting either all + namespaces or a list of namespaces. + properties: + any: + description: Boolean describing whether all namespaces are selected + in contrast to a list restricting them. + type: boolean + matchNames: + description: List of namespace names. + items: + type: string + type: array + type: object + podTargetLabels: + description: PodTargetLabels transfers labels on the Kubernetes Pod + onto the target. + items: + type: string + type: array + sampleLimit: + description: SampleLimit defines per-scrape limit on number of scraped + samples that will be accepted. + format: int64 + type: integer + selector: + description: A label selector is a label query over a set of resources. + The result of matchLabels and matchExpressions are ANDed. An empty + label selector matches all objects. A null label selector matches + no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. + The requirements are ANDed. + items: + description: A label selector requirement is a selector that contains + values, a key, and an operator that relates the key and values. + properties: + key: + description: key is the label key that the selector applies + to. + type: string + operator: + description: operator represents a key's relationship to a + set of values. Valid operators are In, NotIn, Exists and + DoesNotExist. + type: string + values: + description: values is an array of string values. If the operator + is In or NotIn, the values array must be non-empty. If the + operator is Exists or DoesNotExist, the values array must + be empty. This array is replaced during a strategic merge + patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + description: matchLabels is a map of {key,value} pairs. A single + {key,value} in the matchLabels map is equivalent to an element + of matchExpressions, whose key field is "key", the operator is + "In", and the values array contains only "value". The requirements + are ANDed. + type: object + type: object + targetLabels: + description: TargetLabels transfers labels on the Kubernetes Service + onto the target. + items: + type: string + type: array + required: + - endpoints + - selector + type: object + type: object + version: v1 diff --git a/10-grafana-dashboard/Elasticsearch-1605081369937.json b/10-grafana-dashboard/Elasticsearch-1605081369937.json new file mode 100644 index 0000000..a665446 --- /dev/null +++ b/10-grafana-dashboard/Elasticsearch-1605081369937.json @@ -0,0 +1,3560 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Elasticsearch detailed dashboard", + "editable": true, + "gnetId": 4358, + "graphTooltip": 1, + "id": 48, + "iteration": 1605081327328, + "links": [], + "panels": [ + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 50, + "panels": [], + "repeat": null, + "title": "Cluster", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(178, 49, 13, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "prometheus", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 10, + "x": 0, + "y": 1 + }, + "height": "50", + "id": 8, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(elasticsearch_cluster_health_status{service=~\"$service\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "A", + "step": 40 + } + ], + "thresholds": "0,1", + "title": "Cluster health status", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "GREEN", + "value": "1" + }, + { + "op": "=", + "text": "RED", + "value": "0" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "prometheus", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 10, + "y": 1 + }, + "height": "50", + "id": 10, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(elasticsearch_cluster_health_number_of_nodes{service=~\"$service\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "A", + "step": 40 + } + ], + "thresholds": "", + "title": "Nodes", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "prometheus", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 14, + "y": 1 + }, + "height": "50", + "id": 9, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "elasticsearch_cluster_health_number_of_data_nodes{service=\"$service\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "A", + "step": 40 + } + ], + "thresholds": "", + "title": "Data nodes", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "prometheus", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 18, + "y": 1 + }, + "height": "50", + "hideTimeOverride": true, + "id": 16, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "elasticsearch_cluster_health_number_of_pending_tasks{service=\"$service\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "A", + "step": 40 + } + ], + "thresholds": "", + "title": "Pending tasks", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "id": 51, + "panels": [], + "repeat": null, + "title": "Shards", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "prometheus", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 0, + "y": 5 + }, + "height": "50", + "id": 11, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "maxPerRow": 6, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "repeat": "shard_type", + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "elasticsearch_cluster_health_active_primary_shards{service=\"$service\"}", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 40 + } + ], + "thresholds": "", + "title": "active primary shards", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "prometheus", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 4, + "y": 5 + }, + "height": "50", + "id": 39, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "maxPerRow": 6, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "elasticsearch_cluster_health_active_shards{service=\"$service\"}", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 40 + } + ], + "thresholds": "", + "title": "active shards", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "prometheus", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 8, + "y": 5 + }, + "height": "50", + "id": 40, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "maxPerRow": 6, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "elasticsearch_cluster_health_initializing_shards{service=\"$service\"}", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 40 + } + ], + "thresholds": "", + "title": "initializing shards", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "prometheus", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 12, + "y": 5 + }, + "height": "50", + "id": 41, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "maxPerRow": 6, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "elasticsearch_cluster_health_relocating_shards{service=\"$service\"}", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 40 + } + ], + "thresholds": "", + "title": "relocating shards", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "prometheus", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 16, + "y": 5 + }, + "height": "50", + "id": 42, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "maxPerRow": 6, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "elasticsearch_cluster_health_delayed_unassigned_shards{service=\"$service\"}", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 40 + } + ], + "thresholds": "", + "title": "unassigned shards", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 52, + "panels": [], + "repeat": null, + "title": "System", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 10, + "w": 6, + "x": 0, + "y": 9 + }, + "height": "400", + "id": 30, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "elasticsearch_process_cpu_percent{service=\"$service\",es_master_node=\"true\",name=~\"$node\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ name }} - master", + "metric": "", + "refId": "A", + "step": 10 + }, + { + "expr": "elasticsearch_process_cpu_percent{service=\"$service\",es_data_node=\"true\",name=~\"$node\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ name }} - data", + "metric": "", + "refId": "B", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "CPU usage", + "logBase": 1, + "max": 100, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 10, + "w": 6, + "x": 6, + "y": 9 + }, + "height": "400", + "id": 31, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "elasticsearch_jvm_memory_used_bytes{service=\"$service\",name=~\"$node\",name=~\"$node\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ name }} - used: {{area}}", + "metric": "", + "refId": "A", + "step": 10 + }, + { + "expr": "elasticsearch_jvm_memory_committed_bytes{service=\"$service\",name=~\"$node\",name=~\"$node\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ name }} - committed: {{area}}", + "refId": "B", + "step": 10 + }, + { + "expr": "elasticsearch_jvm_memory_max_bytes{service=\"$service\",name=~\"$node\",name=~\"$node\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ name }} - max: {{area}}", + "refId": "C", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "JVM memory usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "Memory", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 10, + "w": 6, + "x": 12, + "y": 9 + }, + "height": "400", + "id": 32, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "1-(elasticsearch_filesystem_data_available_bytes{service=\"$service\"}/elasticsearch_filesystem_data_size_bytes{service=\"$service\",name=~\"$node\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ name }} - {{path}}", + "metric": "", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + { + "colorMode": "custom", + "fill": true, + "fillColor": "rgba(216, 200, 27, 0.27)", + "op": "gt", + "value": 0.8 + }, + { + "colorMode": "custom", + "fill": true, + "fillColor": "rgba(234, 112, 112, 0.22)", + "op": "gt", + "value": 0.9 + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": "Disk Usage %", + "logBase": 1, + "max": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 10, + "w": 6, + "x": 18, + "y": 9 + }, + "height": "400", + "id": 47, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "sent", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(elasticsearch_transport_tx_size_bytes_total{service=\"$service\",name=~\"$node\"}[$interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ name }} -sent", + "refId": "D", + "step": 10 + }, + { + "expr": "irate(elasticsearch_transport_rx_size_bytes_total{service=\"$service\",name=~\"$node\"}[$interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ name }} -received", + "refId": "C", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": "Bytes/sec", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "pps", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 53, + "panels": [], + "repeat": null, + "title": "Documents", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 10, + "w": 6, + "x": 0, + "y": 20 + }, + "height": "400", + "id": 1, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "elasticsearch_indices_docs{service=\"$service\",name=~\"$node\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ name }}", + "metric": "", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Documents count", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "Documents", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 10, + "w": 6, + "x": 6, + "y": 20 + }, + "height": "400", + "id": 24, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "irate(elasticsearch_indices_indexing_index_total{service=\"$service\",name=~\"$node\"}[$interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "metric": "", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Documents indexed rate", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "index calls/s", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 10, + "w": 6, + "x": 12, + "y": 20 + }, + "height": "400", + "id": 25, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "rate(elasticsearch_indices_docs_deleted{service=\"$service\",name=~\"$node\"}[$interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "metric": "", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Documents deleted rate", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "Documents/s", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 10, + "w": 6, + "x": 18, + "y": 20 + }, + "height": "400", + "id": 26, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "rate(elasticsearch_indices_merges_total{service=\"$service\",name=~\"$node\"}[$interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "metric": "", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Documents merged rate", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "Documents/s", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 30 + }, + "id": 54, + "panels": [], + "repeat": null, + "title": "Total Operations stats", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 31 + }, + "height": "400", + "id": 48, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(elasticsearch_indices_indexing_index_total{service=\"$service\",name=~\"$node\"}[$interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ name }} - indexing", + "metric": "", + "refId": "A", + "step": 4 + }, + { + "expr": "irate(elasticsearch_indices_search_query_total{service=\"$service\",name=~\"$node\"}[$interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ name }} - query", + "refId": "B", + "step": 4 + }, + { + "expr": "irate(elasticsearch_indices_search_fetch_total{service=\"$service\",name=~\"$node\"}[$interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ name }} - fetch", + "refId": "C", + "step": 4 + }, + { + "expr": "irate(elasticsearch_indices_merges_total{service=\"$service\",name=~\"$node\"}[$interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ name }} - merges", + "refId": "D", + "step": 4 + }, + { + "expr": "irate(elasticsearch_indices_refresh_total{service=\"$service\",name=~\"$node\"}[$interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ name }} - refresh", + "refId": "E", + "step": 4 + }, + { + "expr": "irate(elasticsearch_indices_flush_total{service=\"$service\",name=~\"$node\"}[$interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ name }} - flush", + "refId": "F", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Total Operations rate", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "Operations/s", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 31 + }, + "height": "400", + "id": 49, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(elasticsearch_indices_indexing_index_time_seconds_total{service=\"$service\",name=~\"$node\"}[$interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ name }} - indexing", + "metric": "", + "refId": "A", + "step": 4 + }, + { + "expr": "irate(elasticsearch_indices_search_query_time_ms_total{service=\"$service\",name=~\"$node\"}[$interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ name }} - query", + "refId": "B", + "step": 4 + }, + { + "expr": "irate(elasticsearch_indices_search_fetch_time_ms_total{service=\"$service\",name=~\"$node\"}[$interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ name }} - fetch", + "refId": "C", + "step": 4 + }, + { + "expr": "irate(elasticsearch_indices_merges_total_time_ms_total{service=\"$service\",name=~\"$node\"}[$interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ name }} - merges", + "refId": "D", + "step": 4 + }, + { + "expr": "irate(elasticsearch_indices_refresh_total_time_ms_total{service=\"$service\",name=~\"$node\"}[$interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ name }} - refresh", + "refId": "E", + "step": 4 + }, + { + "expr": "irate(elasticsearch_indices_flush_time_ms_total{service=\"$service\",name=~\"$node\"}[$interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ name }} - flush", + "refId": "F", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Total Operations time", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": "Time", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 41 + }, + "id": 55, + "panels": [], + "repeat": null, + "title": "Times", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 42 + }, + "height": "400", + "id": 33, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(elasticsearch_indices_search_query_time_seconds{service=\"$service\",name=~\"$node\"}[$interval]) ", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "metric": "", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Query time", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": "Time", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 42 + }, + "height": "400", + "id": 5, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(elasticsearch_indices_indexing_index_time_seconds_total{service=\"$service\",name=~\"$node\"}[$interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "metric": "", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Indexing time", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": "Time", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 42 + }, + "height": "400", + "id": 3, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(elasticsearch_indices_merges_total_time_seconds_total{service=\"$service\",name=~\"$node\"}[$interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "metric": "", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Merging time", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": "Time", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 52 + }, + "id": 56, + "panels": [], + "repeat": null, + "title": "Caches", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 10, + "w": 6, + "x": 0, + "y": 53 + }, + "height": "400", + "id": 4, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "elasticsearch_indices_fielddata_memory_size_bytes{service=\"$service\",name=~\"$node\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "metric": "", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Field data memory size", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "Memory", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 10, + "w": 6, + "x": 6, + "y": 53 + }, + "height": "400", + "id": 34, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "rate(elasticsearch_indices_fielddata_evictions{service=\"$service\",name=~\"$node\"}[$interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "metric": "", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Field data evictions", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "Evictions/s", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 10, + "w": 6, + "x": 12, + "y": 53 + }, + "height": "400", + "id": 35, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "elasticsearch_indices_query_cache_memory_size_bytes{service=\"$service\",name=~\"$node\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "metric": "", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Query cache size", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "Size", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 10, + "w": 6, + "x": 18, + "y": 53 + }, + "height": "400", + "id": 36, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "rate(elasticsearch_indices_query_cache_evictions{service=\"$service\",name=~\"$node\"}[$interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "metric": "", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Query cache evictions", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "Evictions/s", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 63 + }, + "id": 57, + "panels": [], + "repeat": null, + "title": "Thread Pool", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 19, + "w": 6, + "x": 0, + "y": 64 + }, + "id": 45, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": " irate(elasticsearch_thread_pool_rejected_count{service=\"$service\",name=~\"$node\"}[$interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{name}} - {{ type }}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Thread Pool operations rejected", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 19, + "w": 6, + "x": 6, + "y": 64 + }, + "id": 46, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "elasticsearch_thread_pool_active_count{service=\"$service\",name=~\"$node\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{name}} - {{ type }}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Thread Pool operations queued", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 19, + "w": 6, + "x": 12, + "y": 64 + }, + "height": "", + "id": 43, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "elasticsearch_thread_pool_active_count{service=\"$service\",name=~\"$node\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{name}} - {{ type }}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Thread Pool threads active", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 19, + "w": 6, + "x": 18, + "y": 64 + }, + "id": 44, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(elasticsearch_thread_pool_completed_count{service=\"$service\",name=~\"$node\"}[$interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{name}} - {{ type }}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Thread Pool operations completed", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 83 + }, + "id": 58, + "panels": [], + "repeat": null, + "title": "JVM Garbage Collection", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 84 + }, + "height": "400", + "id": 7, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "rate(elasticsearch_jvm_gc_collection_seconds_count{service=\"$service\",name=~\"$node\"}[$interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{name}} - {{gc}}", + "metric": "", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "GC count", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "GCs", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 84 + }, + "height": "400", + "id": 27, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(elasticsearch_jvm_gc_collection_seconds_count{service=\"$service\",name=~\"$node\"}[$interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{name}} - {{gc}}", + "metric": "", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "GC time", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": "Time", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "1m", + "schemaVersion": 20, + "style": "dark", + "tags": [ + "elasticsearch", + "App" + ], + "templating": { + "list": [ + { + "auto": true, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "auto", + "value": "$__auto_interval_interval" + }, + "hide": 0, + "label": "Interval", + "name": "interval", + "options": [ + { + "selected": true, + "text": "auto", + "value": "$__auto_interval_interval" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + }, + { + "selected": false, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "14d", + "value": "14d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + } + ], + "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", + "refresh": 2, + "skipUrlSync": false, + "type": "interval" + }, + { + "allValue": null, + "current": { + "tags": [], + "text": "elasticsearch", + "value": "elasticsearch" + }, + "datasource": "prometheus", + "definition": "", + "hide": 0, + "includeAll": false, + "label": "service", + "multi": false, + "name": "service", + "options": [], + "query": "label_values(elasticsearch_cluster_health_status,service)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": null, + "tags": [], + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": "prometheus", + "definition": "", + "hide": 0, + "includeAll": true, + "label": "node", + "multi": true, + "name": "node", + "options": [], + "query": "label_values(elasticsearch_process_cpu_percent,name)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": null, + "tags": [], + "tagsQuery": null, + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-12h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Elasticsearch", + "uid": "-xmZjjhMk", + "version": 1 +} \ No newline at end of file diff --git a/10-grafana-dashboard/JMX-1603705214796.json b/10-grafana-dashboard/JMX-1603705214796.json new file mode 100644 index 0000000..87d556f --- /dev/null +++ b/10-grafana-dashboard/JMX-1603705214796.json @@ -0,0 +1,1180 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Some Information obtained from JMX exporter", + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 5, + "iteration": 1603692392913, + "links": [], + "panels": [ + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 12, + "panels": [], + "title": "JVM", + "type": "row" + }, + { + "aliasColors": { + "heap": "blue" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 0, + "fillGradient": 5, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jvm_memory_bytes_used{namespace=\"$namespace\", service=\"$service\", pod=\"$pod\"}", + "legendFormat": "{{ area }}", + "refId": "A" + }, + { + "expr": "jvm_memory_bytes_max{namespace=\"$namespace\", service=\"$service\", pod=\"$pod\", area=\"heap\"}", + "legendFormat": "max heap", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "JVM Heap", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 10, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 10, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "jvm_memory_pool_bytes_used{namespace=\"$namespace\", service=\"$service\", pod=\"$pod\"}", + "legendFormat": "{{ pool }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "JVM Memory Pool", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "heap": "blue" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 0, + "fillGradient": 5, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 7 + }, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/time.*/i", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "increase(jvm_gc_collection_seconds_count{namespace=\"$namespace\", service=\"$service\", pod=\"$pod\"}[1m])", + "hide": false, + "legendFormat": "count - {{ gc }}", + "refId": "A" + }, + { + "expr": "increase(jvm_gc_collection_seconds_sum{namespace=\"$namespace\", service=\"$service\", pod=\"$pod\"}[1m])", + "hide": false, + "legendFormat": "time - {{ gc }}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "GC Occurring", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "none", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "current": "semi-dark-blue", + "daemon": "semi-dark-green", + "deadlocked": "dark-red", + "peak": "semi-dark-orange" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 7 + }, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jvm_threads_current{namespace=\"$namespace\", service=\"$service\", pod=\"$pod\"}", + "legendFormat": "current", + "refId": "A" + }, + { + "expr": "jvm_threads_daemon{namespace=\"$namespace\", service=\"$service\", pod=\"$pod\"}", + "hide": false, + "legendFormat": "daemon", + "refId": "B" + }, + { + "expr": "jvm_threads_deadlocked{namespace=\"$namespace\", service=\"$service\", pod=\"$pod\"}", + "hide": false, + "legendFormat": "deadlocked", + "refId": "C" + }, + { + "expr": "jvm_threads_peak{namespace=\"$namespace\", service=\"$service\", pod=\"$pod\"}", + "hide": false, + "legendFormat": "peak", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "JVM Threads", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "CPU": "blue", + "Memory (RSS)": "green", + "app-5c556c54bc-jmp2j": "semi-dark-blue" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 13 + }, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Memory (RSS)", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(process_cpu_seconds_total{namespace=\"$namespace\", service=\"$service\", pod=\"$pod\"}[1m]) * 100", + "hide": false, + "legendFormat": "CPU", + "refId": "A" + }, + { + "expr": "process_resident_memory_bytes{namespace=\"$namespace\", service=\"$service\", pod=\"$pod\"}", + "hide": false, + "legendFormat": "Memory (RSS)", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Process CPU & Memory", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "decbytes", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": true, + "alignLevel": null + } + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 7, + "x": 12, + "y": 13 + }, + "id": 23, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "jvm_info{namespace=\"$namespace\", service=\"$service\", pod=\"$pod\"}", + "instant": false, + "legendFormat": "{{ vendor }}

{{runtime}}", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "JVM Runtime", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "name" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 5, + "x": 19, + "y": 13 + }, + "id": 22, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "jvm_info{namespace=\"$namespace\", service=\"$service\", pod=\"$pod\"}", + "instant": false, + "legendFormat": "{{ version }}", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "JVM Version", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "name" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 4, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 14, + "x": 0, + "y": 2 + }, + "id": 2, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": true, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kafka_producer_producer_topic_metrics_record_send_rate{namespace=\"$namespace\", service=\"$service\", pod=\"$pod\"}", + "legendFormat": "{{ topic }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Kafka Producer Rate", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "columns": [], + "datasource": "prometheus", + "fontSize": "100%", + "gridPos": { + "h": 7, + "w": 10, + "x": 14, + "y": 2 + }, + "id": 6, + "options": {}, + "pageSize": null, + "pluginVersion": "6.4.2", + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Topic", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Metric", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Message(s)", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "mappingType": 1, + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "none" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "none" + } + ], + "targets": [ + { + "expr": "increase(kafka_producer_producer_topic_metrics_record_send_total{namespace=\"$namespace\", service=\"$service\", pod=\"$pod\"}[$__range]) > 0", + "instant": true, + "legendFormat": "{{ topic }}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka Total Produced During Current Time Range", + "transform": "timeseries_to_rows", + "type": "table" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": null, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 14, + "x": 0, + "y": 9 + }, + "id": 7, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": true, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{namespace=\"$namespace\", service=\"$service\", pod=\"$pod\", topic!=\"\"}) by (namespace, service, topic)", + "legendFormat": "{{ topic }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Kafka Consumer Rate", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "columns": [], + "datasource": "prometheus", + "fontSize": "100%", + "gridPos": { + "h": 7, + "w": 10, + "x": 14, + "y": 9 + }, + "id": 8, + "options": {}, + "pageSize": null, + "pluginVersion": "6.4.2", + "showHeader": true, + "sort": { + "col": null, + "desc": false + }, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Topic", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Metric", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Message(s)", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "mappingType": 1, + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "none" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "none" + } + ], + "targets": [ + { + "expr": "sum(increase(kafka_consumer_consumer_fetch_manager_metrics_records_consumed_total{namespace=\"$namespace\", service=\"$service\", pod=\"$pod\", topic!=\"\"}[$__range])) by (namespace, service, topic) > 0", + "instant": true, + "legendFormat": "{{ topic }}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka Total Consumed During Current Time Range", + "transform": "timeseries_to_rows", + "type": "table" + } + ], + "title": "Kafka", + "type": "row" + } + ], + "refresh": false, + "schemaVersion": 20, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "prometheus", + "definition": "label_values(jmx_exporter_build_info, namespace)", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": "label_values(jmx_exporter_build_info, namespace)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "reactflow", + "value": "reactflow" + }, + "datasource": "prometheus", + "definition": "label_values(jmx_exporter_build_info{namespace=\"$namespace\"}, service)", + "hide": 0, + "includeAll": false, + "label": "Service", + "multi": false, + "name": "service", + "options": [], + "query": "label_values(jmx_exporter_build_info{namespace=\"$namespace\"}, service)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "reactflow-0", + "value": "reactflow-0" + }, + "datasource": "prometheus", + "definition": "label_values(jmx_exporter_build_info{namespace=\"$namespace\", service=\"$service\"}, pod)", + "hide": 0, + "includeAll": false, + "label": "Pod", + "multi": false, + "name": "pod", + "options": [], + "query": "label_values(jmx_exporter_build_info{namespace=\"$namespace\", service=\"$service\"}, pod)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "JMX", + "uid": "DfcvP0qZk", + "version": 1 +} diff --git a/10-grafana-dashboard/JVM (Micrometer).json b/10-grafana-dashboard/JVM (Micrometer).json new file mode 100644 index 0000000..6e54527 --- /dev/null +++ b/10-grafana-dashboard/JVM (Micrometer).json @@ -0,0 +1,4026 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "limit": 100, + "name": "Annotations & Alerts", + "showIn": 0, + "type": "dashboard" + }, + { + "datasource": "prometheus", + "enable": true, + "expr": "resets(process_uptime_seconds{service=\"$service\", pod=\"$pod\"}[1m]) > 0", + "iconColor": "rgba(255, 96, 96, 1)", + "name": "Restart Detection", + "showIn": 0, + "step": "1m", + "tagKeys": "restart-tag", + "textFormat": "uptime reset", + "titleFormat": "Restart" + } + ] + }, + "description": "Dashboard for Micrometer instrumented applications (Java, Spring Boot, Micronaut)", + "editable": true, + "gnetId": 4701, + "graphTooltip": 1, + "id": 14, + "iteration": 1624877562559, + "links": [], + "panels": [ + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 125, + "panels": [], + "repeat": null, + "title": "Quick Facts", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "prometheus", + "decimals": 1, + "editable": true, + "error": false, + "format": "s", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 0, + "y": 1 + }, + "height": "", + "id": 63, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "70%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "process_uptime_seconds{service=\"$service\", pod=\"$pod\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "A", + "step": 14400 + } + ], + "thresholds": "", + "title": "Uptime", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "prometheus", + "decimals": null, + "editable": true, + "error": false, + "format": "dateTimeAsIso", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 6, + "y": 1 + }, + "height": "", + "id": 92, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "70%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "process_start_time_seconds{service=\"$service\", pod=\"$pod\"}*1000", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "A", + "step": 14400 + } + ], + "thresholds": "", + "title": "Start time", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "prometheus", + "decimals": 2, + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 12, + "y": 1 + }, + "id": 65, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "70%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(jvm_memory_used_bytes{service=\"$service\", pod=\"$pod\", area=\"heap\"})*100/sum(jvm_memory_max_bytes{service=\"$service\",pod=\"$pod\", area=\"heap\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 14400 + } + ], + "thresholds": "70,90", + "title": "Heap used", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "prometheus", + "decimals": 2, + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 18, + "y": 1 + }, + "id": 75, + "interval": null, + "links": [], + "mappingType": 2, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "70%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + }, + { + "from": "-99999999999999999999999999999999", + "text": "N/A", + "to": "0" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(jvm_memory_used_bytes{service=\"$service\", pod=\"$pod\", area=\"nonheap\"})*100/sum(jvm_memory_max_bytes{service=\"$service\",pod=\"$pod\", area=\"nonheap\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 14400 + } + ], + "thresholds": "70,90", + "title": "Non-Heap used", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "x", + "value": "" + } + ], + "valueName": "current" + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "id": 126, + "panels": [], + "repeat": null, + "title": "I/O Overview", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 5 + }, + "id": 111, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(http_server_requests_seconds_count{service=\"$service\", pod=\"$pod\"}[1m]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "HTTP", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "HTTP": "#890f02", + "HTTP - 5xx": "#bf1b00" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 5 + }, + "id": 112, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(http_server_requests_seconds_count{service=\"$service\", pod=\"$pod\", status=~\"5..\"}[1m]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "HTTP - 5xx", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Errors", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 5 + }, + "id": 113, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(http_server_requests_seconds_sum{service=\"$service\", pod=\"$pod\", status!~\"5..\"}[1m]))/sum(rate(http_server_requests_seconds_count{service=\"$service\", pod=\"$pod\", status!~\"5..\"}[1m]))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "HTTP - AVG", + "refId": "A" + }, + { + "expr": "max(http_server_requests_seconds_max{service=\"$service\", pod=\"$pod\", status!~\"5..\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "HTTP - MAX", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 5 + }, + "id": 119, + "legend": { + "alignAsTable": false, + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "tomcat_threads_busy_threads{service=\"$service\", pod=\"$pod\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "TOMCAT - BSY", + "refId": "A" + }, + { + "expr": "tomcat_threads_current_threads{service=\"$service\", pod=\"$pod\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "TOMCAT - CUR", + "refId": "B" + }, + { + "expr": "tomcat_threads_config_max_threads{service=\"$service\", pod=\"$pod\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "TOMCAT - MAX", + "refId": "C" + }, + { + "expr": "jetty_threads_busy{service=\"$service\", pod=\"$pod\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "JETTY - BSY", + "refId": "D" + }, + { + "expr": "jetty_threads_current{service=\"$service\", pod=\"$pod\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "JETTY - CUR", + "refId": "E" + }, + { + "expr": "jetty_threads_config_max{service=\"$service\", pod=\"$pod\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "JETTY - MAX", + "refId": "F" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Utilisation", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 127, + "panels": [], + "repeat": null, + "title": "JVM Memory", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": null, + "rightMin": null + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 13 + }, + "id": 24, + "legend": { + "avg": false, + "current": true, + "max": true, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(jvm_memory_used_bytes{service=\"$service\", pod=\"$pod\", area=\"heap\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "used", + "metric": "", + "refId": "A", + "step": 2400 + }, + { + "expr": "sum(jvm_memory_committed_bytes{service=\"$service\", pod=\"$pod\", area=\"heap\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "committed", + "refId": "B", + "step": 2400 + }, + { + "expr": "sum(jvm_memory_max_bytes{service=\"$service\", pod=\"$pod\", area=\"heap\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "max", + "refId": "C", + "step": 2400 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "JVM Heap", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "x-axis": true, + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "y-axis": true, + "y_formats": [ + "mbytes", + "short" + ], + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": null, + "rightMin": null + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 13 + }, + "id": 25, + "legend": { + "avg": false, + "current": true, + "max": true, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(jvm_memory_used_bytes{service=\"$service\", pod=\"$pod\", area=\"nonheap\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "used", + "metric": "", + "refId": "A", + "step": 2400 + }, + { + "expr": "sum(jvm_memory_committed_bytes{service=\"$service\", pod=\"$pod\", area=\"nonheap\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "committed", + "refId": "B", + "step": 2400 + }, + { + "expr": "sum(jvm_memory_max_bytes{service=\"$service\", pod=\"$pod\", area=\"nonheap\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "max", + "refId": "C", + "step": 2400 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "JVM Non-Heap", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "x-axis": true, + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "y-axis": true, + "y_formats": [ + "mbytes", + "short" + ], + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": null, + "rightMin": null + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 13 + }, + "id": 26, + "legend": { + "alignAsTable": false, + "avg": false, + "current": true, + "max": true, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(jvm_memory_used_bytes{service=\"$service\", pod=\"$pod\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "used", + "metric": "", + "refId": "A", + "step": 2400 + }, + { + "expr": "sum(jvm_memory_committed_bytes{service=\"$service\", pod=\"$pod\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "committed", + "refId": "B", + "step": 2400 + }, + { + "expr": "sum(jvm_memory_max_bytes{service=\"$service\", pod=\"$pod\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "max", + "refId": "C", + "step": 2400 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "JVM Total", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "x-axis": true, + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "y-axis": true, + "y_formats": [ + "mbytes", + "short" + ], + "yaxes": [ + { + "format": "bytes", + "label": "", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": null, + "rightMin": null + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 13 + }, + "id": 86, + "legend": { + "avg": false, + "current": true, + "max": true, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "process_memory_vss_bytes{service=\"$service\", pod=\"$pod\"}", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "legendFormat": "vss", + "metric": "", + "refId": "A", + "step": 2400 + }, + { + "expr": "process_memory_rss_bytes{service=\"$service\", pod=\"$pod\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "rss", + "refId": "B" + }, + { + "expr": "process_memory_swap_bytes{service=\"$service\", pod=\"$pod\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "swap", + "refId": "C" + }, + { + "expr": "process_memory_rss_bytes{service=\"$service\", pod=\"$pod\"} + process_memory_swap_bytes{service=\"$service\", pod=\"$pod\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "total", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "JVM Process Memory", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "x-axis": true, + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "y-axis": true, + "y_formats": [ + "mbytes", + "short" + ], + "yaxes": [ + { + "format": "bytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 128, + "panels": [], + "repeat": null, + "title": "JVM Misc", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": null, + "rightMin": null + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 21 + }, + "id": 106, + "legend": { + "avg": false, + "current": true, + "max": true, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "system_cpu_usage{service=\"$service\", pod=\"$pod\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "system", + "metric": "", + "refId": "A", + "step": 2400 + }, + { + "expr": "process_cpu_usage{service=\"$service\", pod=\"$pod\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "process", + "refId": "B" + }, + { + "expr": "avg_over_time(process_cpu_usage{service=\"$service\", pod=\"$pod\"}[1h])", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "process-1h", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "x-axis": true, + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "y-axis": true, + "y_formats": [ + "short", + "short" + ], + "yaxes": [ + { + "decimals": 1, + "format": "percentunit", + "label": "", + "logBase": 1, + "max": "1", + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": null, + "rightMin": null + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 21 + }, + "id": 93, + "legend": { + "avg": false, + "current": true, + "max": true, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "system_load_average_1m{service=\"$service\", pod=\"$pod\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "system-1m", + "metric": "", + "refId": "A", + "step": 2400 + }, + { + "expr": "system_cpu_count{service=\"$service\", pod=\"$pod\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "cpus", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Load", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "x-axis": true, + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "y-axis": true, + "y_formats": [ + "short", + "short" + ], + "yaxes": [ + { + "decimals": 1, + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": null, + "rightMin": null + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 21 + }, + "id": 32, + "legend": { + "avg": false, + "current": true, + "max": true, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jvm_threads_live_threads{service=\"$service\", pod=\"$pod\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "live", + "metric": "", + "refId": "A", + "step": 2400 + }, + { + "expr": "jvm_threads_daemon_threads{service=\"$service\", pod=\"$pod\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "daemon", + "metric": "", + "refId": "B", + "step": 2400 + }, + { + "expr": "jvm_threads_peak_threads{service=\"$service\", pod=\"$pod\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "peak", + "refId": "C", + "step": 2400 + }, + { + "expr": "process_threads{service=\"$service\", pod=\"$pod\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "process", + "refId": "D", + "step": 2400 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Threads", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "x-axis": true, + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "y-axis": true, + "y_formats": [ + "short", + "short" + ], + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "blocked": "#bf1b00", + "new": "#fce2de", + "runnable": "#7eb26d", + "terminated": "#511749", + "timed-waiting": "#c15c17", + "waiting": "#eab839" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 21 + }, + "id": 124, + "legend": { + "alignAsTable": false, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jvm_threads_states_threads{service=\"$service\", pod=\"$pod\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{state}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Thread States", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "debug": "#1F78C1", + "error": "#BF1B00", + "info": "#508642", + "trace": "#6ED0E0", + "warn": "#EAB839" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": null, + "rightMin": null + }, + "gridPos": { + "h": 7, + "w": 18, + "x": 0, + "y": 28 + }, + "height": "", + "id": 91, + "legend": { + "alignAsTable": false, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": true, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "error", + "yaxis": 1 + }, + { + "alias": "warn", + "yaxis": 1 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "increase(logback_events_total{service=\"$service\", pod=\"$pod\"}[1m])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{level}}", + "metric": "", + "refId": "A", + "step": 1200 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Log Events", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "x-axis": true, + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "y-axis": true, + "y_formats": [ + "short", + "short" + ], + "yaxes": [ + { + "decimals": 0, + "format": "opm", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": null, + "rightMin": null + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 28 + }, + "id": 61, + "legend": { + "avg": false, + "current": true, + "max": true, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "process_files_open_files{service=\"$service\", pod=\"$pod\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "open", + "metric": "", + "refId": "A", + "step": 2400 + }, + { + "expr": "process_files_max_files{service=\"$service\", pod=\"$pod\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "max", + "metric": "", + "refId": "B", + "step": 2400 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "File Descriptors", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "x-axis": true, + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "y-axis": true, + "y_formats": [ + "short", + "short" + ], + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": null, + "logBase": 10, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 35 + }, + "id": 129, + "panels": [], + "repeat": "persistence_counts", + "title": "JVM Memory Pools (Heap)", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": null, + "rightMin": null + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 36 + }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 3, + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "jvm_memory_pool_heap", + "scopedVars": { + "jvm_memory_pool_heap": { + "selected": false, + "text": "PS Eden Space", + "value": "PS Eden Space" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jvm_memory_used_bytes{service=\"$service\", pod=\"$pod\", id=~\"$jvm_memory_pool_heap\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "used", + "metric": "", + "refId": "A", + "step": 1800 + }, + { + "expr": "jvm_memory_committed_bytes{service=\"$service\", pod=\"$pod\", id=~\"$jvm_memory_pool_heap\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "commited", + "metric": "", + "refId": "B", + "step": 1800 + }, + { + "expr": "jvm_memory_max_bytes{service=\"$service\", pod=\"$pod\", id=~\"$jvm_memory_pool_heap\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "max", + "metric": "", + "refId": "C", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$jvm_memory_pool_heap", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "x-axis": true, + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "y-axis": true, + "y_formats": [ + "mbytes", + "short" + ], + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": null, + "rightMin": null + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 36 + }, + "id": 134, + "legend": { + "alignAsTable": false, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 3, + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1624877562559, + "repeatPanelId": 3, + "scopedVars": { + "jvm_memory_pool_heap": { + "selected": false, + "text": "PS Old Gen", + "value": "PS Old Gen" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jvm_memory_used_bytes{service=\"$service\", pod=\"$pod\", id=~\"$jvm_memory_pool_heap\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "used", + "metric": "", + "refId": "A", + "step": 1800 + }, + { + "expr": "jvm_memory_committed_bytes{service=\"$service\", pod=\"$pod\", id=~\"$jvm_memory_pool_heap\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "commited", + "metric": "", + "refId": "B", + "step": 1800 + }, + { + "expr": "jvm_memory_max_bytes{service=\"$service\", pod=\"$pod\", id=~\"$jvm_memory_pool_heap\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "max", + "metric": "", + "refId": "C", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$jvm_memory_pool_heap", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "x-axis": true, + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "y-axis": true, + "y_formats": [ + "mbytes", + "short" + ], + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": null, + "rightMin": null + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 36 + }, + "id": 135, + "legend": { + "alignAsTable": false, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 3, + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1624877562559, + "repeatPanelId": 3, + "scopedVars": { + "jvm_memory_pool_heap": { + "selected": false, + "text": "PS Survivor Space", + "value": "PS Survivor Space" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jvm_memory_used_bytes{service=\"$service\", pod=\"$pod\", id=~\"$jvm_memory_pool_heap\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "used", + "metric": "", + "refId": "A", + "step": 1800 + }, + { + "expr": "jvm_memory_committed_bytes{service=\"$service\", pod=\"$pod\", id=~\"$jvm_memory_pool_heap\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "commited", + "metric": "", + "refId": "B", + "step": 1800 + }, + { + "expr": "jvm_memory_max_bytes{service=\"$service\", pod=\"$pod\", id=~\"$jvm_memory_pool_heap\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "max", + "metric": "", + "refId": "C", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$jvm_memory_pool_heap", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "x-axis": true, + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "y-axis": true, + "y_formats": [ + "mbytes", + "short" + ], + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 43 + }, + "id": 130, + "panels": [], + "repeat": null, + "title": "JVM Memory Pools (Non-Heap)", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": null, + "rightMin": null + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 44 + }, + "id": 78, + "legend": { + "alignAsTable": false, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 3, + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "jvm_memory_pool_nonheap", + "scopedVars": { + "jvm_memory_pool_nonheap": { + "selected": false, + "text": "Metaspace", + "value": "Metaspace" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jvm_memory_used_bytes{service=\"$service\", pod=\"$pod\", id=~\"$jvm_memory_pool_nonheap\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "used", + "metric": "", + "refId": "A", + "step": 1800 + }, + { + "expr": "jvm_memory_committed_bytes{service=\"$service\", pod=\"$pod\", id=~\"$jvm_memory_pool_nonheap\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "commited", + "metric": "", + "refId": "B", + "step": 1800 + }, + { + "expr": "jvm_memory_max_bytes{service=\"$service\", pod=\"$pod\", id=~\"$jvm_memory_pool_nonheap\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "max", + "metric": "", + "refId": "C", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$jvm_memory_pool_nonheap", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "x-axis": true, + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "y-axis": true, + "y_formats": [ + "mbytes", + "short" + ], + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": null, + "rightMin": null + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 44 + }, + "id": 136, + "legend": { + "alignAsTable": false, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 3, + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1624877562559, + "repeatPanelId": 78, + "scopedVars": { + "jvm_memory_pool_nonheap": { + "selected": false, + "text": "Compressed Class Space", + "value": "Compressed Class Space" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jvm_memory_used_bytes{service=\"$service\", pod=\"$pod\", id=~\"$jvm_memory_pool_nonheap\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "used", + "metric": "", + "refId": "A", + "step": 1800 + }, + { + "expr": "jvm_memory_committed_bytes{service=\"$service\", pod=\"$pod\", id=~\"$jvm_memory_pool_nonheap\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "commited", + "metric": "", + "refId": "B", + "step": 1800 + }, + { + "expr": "jvm_memory_max_bytes{service=\"$service\", pod=\"$pod\", id=~\"$jvm_memory_pool_nonheap\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "max", + "metric": "", + "refId": "C", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$jvm_memory_pool_nonheap", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "x-axis": true, + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "y-axis": true, + "y_formats": [ + "mbytes", + "short" + ], + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": null, + "rightMin": null + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 44 + }, + "id": 137, + "legend": { + "alignAsTable": false, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 3, + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1624877562559, + "repeatPanelId": 78, + "scopedVars": { + "jvm_memory_pool_nonheap": { + "selected": false, + "text": "Code Cache", + "value": "Code Cache" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jvm_memory_used_bytes{service=\"$service\", pod=\"$pod\", id=~\"$jvm_memory_pool_nonheap\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "used", + "metric": "", + "refId": "A", + "step": 1800 + }, + { + "expr": "jvm_memory_committed_bytes{service=\"$service\", pod=\"$pod\", id=~\"$jvm_memory_pool_nonheap\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "commited", + "metric": "", + "refId": "B", + "step": 1800 + }, + { + "expr": "jvm_memory_max_bytes{service=\"$service\", pod=\"$pod\", id=~\"$jvm_memory_pool_nonheap\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "max", + "metric": "", + "refId": "C", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$jvm_memory_pool_nonheap", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "x-axis": true, + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "y-axis": true, + "y_formats": [ + "mbytes", + "short" + ], + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 51 + }, + "id": 131, + "panels": [], + "repeat": null, + "title": "Garbage Collection", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 52 + }, + "id": 98, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(jvm_gc_pause_seconds_count{service=\"$service\", pod=\"$pod\"}[1m])", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{action}} ({{cause}})", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Collections", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 52 + }, + "id": 101, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(jvm_gc_pause_seconds_sum{service=\"$service\", pod=\"$pod\"}[1m])/rate(jvm_gc_pause_seconds_count{service=\"$service\", pod=\"$pod\"}[1m])", + "format": "time_series", + "hide": false, + "instant": false, + "intervalFactor": 1, + "legendFormat": "avg {{action}} ({{cause}})", + "refId": "A" + }, + { + "expr": "jvm_gc_pause_seconds_max{service=\"$service\", pod=\"$pod\"}", + "format": "time_series", + "hide": false, + "instant": false, + "intervalFactor": 1, + "legendFormat": "max {{action}} ({{cause}})", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pause Durations", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 52 + }, + "id": 99, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(jvm_gc_memory_allocated_bytes_total{service=\"$service\", pod=\"$pod\"}[1m])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "allocated", + "refId": "A" + }, + { + "expr": "rate(jvm_gc_memory_promoted_bytes_total{service=\"$service\", pod=\"$pod\"}[1m])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "promoted", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Allocated/Promoted", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 59 + }, + "id": 132, + "panels": [], + "repeat": null, + "title": "Classloading", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": null, + "rightMin": null + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 60 + }, + "id": 37, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jvm_classes_loaded_classes{service=\"$service\", pod=\"$pod\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "loaded", + "metric": "", + "refId": "A", + "step": 1200 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Classes loaded", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "x-axis": true, + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "y-axis": true, + "y_formats": [ + "short", + "short" + ], + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": null, + "rightMin": null + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 60 + }, + "id": 38, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "delta(jvm_classes_loaded_classes{service=\"$service\",pod=\"$pod\"}[1m])", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "delta-1m", + "metric": "", + "refId": "A", + "step": 1200 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Class delta", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "x-axis": true, + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "y-axis": true, + "y_formats": [ + "ops", + "short" + ], + "yaxes": [ + { + "decimals": null, + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 67 + }, + "id": 133, + "panels": [], + "repeat": null, + "title": "Buffer Pools", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": null, + "rightMin": null + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 68 + }, + "id": 33, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jvm_buffer_memory_used_bytes{service=\"$service\", pod=\"$pod\", id=\"direct\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "used", + "metric": "", + "refId": "A", + "step": 2400 + }, + { + "expr": "jvm_buffer_total_capacity_bytes{service=\"$service\", pod=\"$pod\", id=\"direct\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "capacity", + "metric": "", + "refId": "B", + "step": 2400 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Direct Buffers", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "x-axis": true, + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "y-axis": true, + "y_formats": [ + "short", + "short" + ], + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": null, + "rightMin": null + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 68 + }, + "id": 83, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jvm_buffer_count_buffers{service=\"$service\", pod=\"$pod\", id=\"direct\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "count", + "metric": "", + "refId": "A", + "step": 2400 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Direct Buffers", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "x-axis": true, + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "y-axis": true, + "y_formats": [ + "short", + "short" + ], + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": null, + "rightMin": null + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 68 + }, + "id": 85, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jvm_buffer_memory_used_bytes{service=\"$service\", pod=\"$pod\", id=\"mapped\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "used", + "metric": "", + "refId": "A", + "step": 2400 + }, + { + "expr": "jvm_buffer_total_capacity_bytes{service=\"$service\", pod=\"$pod\", id=\"mapped\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "capacity", + "metric": "", + "refId": "B", + "step": 2400 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Mapped Buffers", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "x-axis": true, + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "y-axis": true, + "y_formats": [ + "short", + "short" + ], + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "leftLogBase": 1, + "leftMax": null, + "leftMin": null, + "rightLogBase": 1, + "rightMax": null, + "rightMin": null + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 68 + }, + "id": 84, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jvm_buffer_count_buffers{service=\"$service\", pod=\"$pod\", id=\"mapped\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "count", + "metric": "", + "refId": "A", + "step": 2400 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Mapped Buffers", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "x-axis": true, + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "y-axis": true, + "y_formats": [ + "short", + "short" + ], + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "refresh": "30s", + "schemaVersion": 20, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "text": "webhook", + "value": "webhook" + }, + "datasource": "prometheus", + "definition": "label_values(jvm_memory_used_bytes, service)", + "hide": 0, + "includeAll": false, + "label": "Service", + "multi": false, + "name": "service", + "options": [], + "query": "label_values(jvm_memory_used_bytes, service)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allFormat": "glob", + "allValue": null, + "current": { + "text": "webhook-76c4646c4-k2h76", + "value": "webhook-76c4646c4-k2h76" + }, + "datasource": "prometheus", + "definition": "label_values(jvm_memory_used_bytes{service=\"$service\"}, pod)", + "hide": 0, + "includeAll": false, + "label": "Pod", + "multi": false, + "multiFormat": "glob", + "name": "pod", + "options": [], + "query": "label_values(jvm_memory_used_bytes{service=\"$service\"}, pod)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allFormat": "glob", + "allValue": null, + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": "prometheus", + "definition": "", + "hide": 0, + "includeAll": true, + "label": "JVM Memory Pools Heap", + "multi": false, + "multiFormat": "glob", + "name": "jvm_memory_pool_heap", + "options": [], + "query": "label_values(jvm_memory_used_bytes{service=\"$service\", pod=\"$pod\", area=\"heap\"},id)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allFormat": "glob", + "allValue": null, + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": "prometheus", + "definition": "", + "hide": 0, + "includeAll": true, + "label": "JVM Memory Pools Non-Heap", + "multi": false, + "multiFormat": "glob", + "name": "jvm_memory_pool_nonheap", + "options": [], + "query": "label_values(jvm_memory_used_bytes{service=\"$service\", pod=\"$pod\", area=\"nonheap\"},id)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": { + "now": true, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "JVM (Micrometer)", + "uid": "s3634ogn1", + "version": 3 +} \ No newline at end of file diff --git a/10-grafana-dashboard/Kafka-1585242659117.json b/10-grafana-dashboard/Kafka-1585242659117.json new file mode 100644 index 0000000..b2fcdda --- /dev/null +++ b/10-grafana-dashboard/Kafka-1585242659117.json @@ -0,0 +1,3046 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "panel", + "id": "bargauge", + "name": "Bar Gauge", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "6.4.2" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1585242649561, + "links": [], + "panels": [ + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 53, + "panels": [], + "title": "Overview", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 55, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": false, + "show": true, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(kafka_consumergroup_current_offset_sum{kafka_cluster_name=\"$cluster\"}[$interval]) > 0", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ topic }} ({{ consumergroup }})", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Consume Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 56, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": false, + "show": true, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kafka_consumergroup_lag_sum{kafka_cluster_name=\"$cluster\"} > 0", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ topic }} ({{ consumergroup }})", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Consume Lag", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 8, + "panels": [], + "title": "Consumer Group", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 11, + "x": 0, + "y": 22 + }, + "id": 6, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(kafka_consumergroup_current_offset_sum{topic=~\"$topic\",consumergroup=~\"$consumergroup\",kafka_cluster_name=\"$cluster\"}[$interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{consumergroup}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Consumer Rate for $topic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 10, + "x": 11, + "y": 22 + }, + "id": 9, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kafka_consumergroup_lag_sum{topic=~\"$topic\",consumergroup=~\"$consumergroup\",kafka_cluster_name=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{consumergroup}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Current Lag for $topic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 21, + "y": 22 + }, + "id": 44, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(kafka_consumergroup_current_offset_sum{topic=~\"$topic\",consumergroup=~\"$consumergroup\",kafka_cluster_name=\"$cluster\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Selected Groups", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 21, + "y": 26 + }, + "id": 45, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(kafka_consumergroup_members{consumergroup=~\"$consumergroup\",kafka_cluster_name=\"$cluster\"})", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Total Consumers", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "datasource": "${DS_PROMETHEUS}", + "gridPos": { + "h": 7, + "w": 11, + "x": 0, + "y": 30 + }, + "id": 61, + "options": { + "displayMode": "basic", + "fieldOptions": { + "calcs": [ + "lastNotNull" + ], + "defaults": { + "decimals": 2, + "mappings": [ + { + "from": "-10", + "id": 1, + "operator": "", + "text": "Done", + "to": "0", + "type": 2, + "value": "-10" + } + ], + "max": 100, + "min": 0, + "thresholds": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 1800 + }, + { + "color": "red", + "value": 3600 + } + ], + "title": "", + "unit": "s" + }, + "override": {}, + "values": false + }, + "orientation": "horizontal" + }, + "pluginVersion": "6.4.2", + "targets": [ + { + "expr": "kafka_consumergroup_lag_sum{topic=~\"$topic\",consumergroup=~\"$consumergroup\",kafka_cluster_name=\"$cluster\"} / rate(kafka_consumergroup_current_offset_sum{topic=~\"$topic\",consumergroup=~\"$consumergroup\",kafka_cluster_name=\"$cluster\"}[5m])", + "legendFormat": "{{ consumergroup }}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ETA", + "type": "bargauge" + }, + { + "columns": [], + "datasource": "${DS_PROMETHEUS}", + "fontSize": "100%", + "gridPos": { + "h": 7, + "w": 13, + "x": 11, + "y": 30 + }, + "id": 59, + "options": {}, + "pageSize": null, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Group", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "consumergroup", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Partition", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "mappingType": 1, + "pattern": "partition", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Lag", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "mappingType": 1, + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "none" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "kafka_consumergroup_lag{topic=~\"$topic\",consumergroup=~\"$consumergroup\",kafka_cluster_name=\"$cluster\"}", + "format": "table", + "instant": true, + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Lag By Partition ($topic)", + "transform": "table", + "type": "table" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 37 + }, + "id": 2, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 0, + "y": 18 + }, + "id": 50, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(kafka_cluster_Partition_Value{topic=~\"$topic\",name=\"ReplicasCount\"})", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Partitions", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": { + "message in topic": "#1f78c1" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 10, + "x": 4, + "y": 18 + }, + "id": 47, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(kafka_log_Log_Value{topic=~\"$topic\",name=\"LogEndOffset\"}) - sum(kafka_log_Log_Value{topic=~\"$topic\",name=\"LogStartOffset\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "message in {{ topic }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Total Messages", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "message in topic": "#1f78c1", + "size": "#fceaca" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 10, + "x": 14, + "y": 18 + }, + "id": 48, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(kafka_log_Log_Value{topic=~\"$topic\",name=\"Size\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "size", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Total Size", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 0, + "y": 24 + }, + "id": 51, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "min(kafka_cluster_Partition_Value{topic=~\"$topic\",name=\"ReplicasCount\"})", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Replicas", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": { + "in": "#ea6460", + "out": "#806eb7", + "produce rate (message/s)": "semi-dark-purple" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 10, + "x": 4, + "y": 24 + }, + "id": 62, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(kafka_log_Log_Value{topic=\"$topic\",name=\"LogEndOffset\"}[$interval]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "produce rate (message/s)", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Produce Rate for $topic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 10, + "x": 14, + "y": 24 + }, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kafka_server_BrokerTopicMetrics_MeanRate{name=\"TotalProduceRequestsPerSec\",topic=\"$topic\"}", + "format": "time_series", + "hide": true, + "intervalFactor": 1, + "legendFormat": "{{ instance }} - Produce", + "refId": "B" + }, + { + "expr": "kafka_server_BrokerTopicMetrics_MeanRate{name=\"TotalFetchRequestsPerSec\",topic=\"$topic\"}", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ instance}} - Fetch", + "refId": "A" + }, + { + "expr": "sum(kafka_server_BrokerTopicMetrics_MeanRate{name=\"TotalProduceRequestsPerSec\",topic=\"$topic\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Produce - Total", + "refId": "D" + }, + { + "expr": "sum(kafka_server_BrokerTopicMetrics_MeanRate{name=\"TotalFetchRequestsPerSec\",topic=\"$topic\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Fetch - Total", + "refId": "C" + }, + { + "expr": "sum(kafka_server_BrokerTopicMetrics_MeanRate{name=\"FailedProduceRequestsPerSec\",topic=\"$topic\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Produce - Failed", + "refId": "E" + }, + { + "expr": "sum(kafka_server_BrokerTopicMetrics_MeanRate{name=\"FailedFetchRequestsPerSec\",topic=\"$topic\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Fetch - Failed", + "refId": "F" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Topic-Level Requests Per Second", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "reqps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Topic", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 38 + }, + "id": 16, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 19 + }, + "id": 35, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "java_lang_OperatingSystem_ProcessCpuLoad{job=\"$job\",instance=~\"$instance\"} * 100", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "70,85", + "title": "Process CPU Usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 19 + }, + "id": 36, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "java_lang_OperatingSystem_SystemCpuLoad{job=\"$job\",instance=~\"$instance\"} * 100", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "70,85", + "title": "System CPU Usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 19 + }, + "id": 34, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "process_open_fds{job=\"$job\",instance=~\"$instance\"} / process_max_fds{job=\"$job\",instance=~\"$instance\"} * 100", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "80,90", + "title": "File Description Usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 19 + }, + "id": 33, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "jvm_memory_bytes_used{area=\"heap\",instance=\"$instance\",job=\"$job\"} / jvm_memory_bytes_max{area=\"heap\",instance=\"$instance\",job=\"$job\"} * 100", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "80,90", + "title": "Heap Usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 25 + }, + "id": 23, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kafka_network_RequestMetrics_MeanRate{name=\"RequestsPerSec\",request=\"Produce\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ instance }} - {{ request }}", + "refId": "A" + }, + { + "expr": "kafka_network_RequestMetrics_MeanRate{name=\"RequestsPerSec\",request=\"FetchConsumer\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ instance }} - {{ request }}", + "refId": "B" + }, + { + "expr": "kafka_network_RequestMetrics_MeanRate{name=\"RequestsPerSec\",request=\"FetchFollower\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ instance }} - {{ request }}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Requests Per Sec", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "reqps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "192.168.110.246 - FetchConsumer": "#5195ce", + "192.168.110.246 - FetchFollower": "#3f6833", + "192.168.110.246 - Produce": "#eab839" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 25 + }, + "id": 31, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kafka_network_RequestMetrics_Mean{name=\"TotalTimeMs\",request=\"Produce\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ instance }} - {{ request }}", + "refId": "A" + }, + { + "expr": "kafka_network_RequestMetrics_Mean{name=\"TotalTimeMs\",request=\"FetchConsumer\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ instance }} - {{ request }}", + "refId": "B" + }, + { + "expr": "kafka_network_RequestMetrics_Mean{name=\"TotalTimeMs\",request=\"FetchFollower\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ instance }} - {{ request }}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Total Time to Server Request", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "ResponseQueueSize": "#447ebc", + "kafka_network_RequestChannel_Value{instance=\"192.168.110.245\",job=\"kafka-jmx\",name=\"RequestQueueSize\"}": "#9ac48a", + "kafka_network_RequestChannel_Value{instance=\"192.168.110.245\",job=\"kafka-jmx\",name=\"ResponseQueueSize\"}": "#447ebc" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "decimals": 0, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 25 + }, + "id": 30, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kafka_network_RequestChannel_Value{name=\"ResponseQueueSize\",instance=~\"$instance\",processor=\"\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ name }}", + "refId": "A" + }, + { + "expr": "kafka_network_RequestChannel_Value{name=\"RequestQueueSize\",instance=~\"$instance\",processor=\"\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ name }}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Request Queue Size", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 25 + }, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "process_resident_memory_bytes{job=\"$job\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ instance }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Process Memory (RSS)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Broker", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 39 + }, + "id": 28, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 0, + "y": 40 + }, + "id": 38, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "kafka_brokers{kafka_cluster_name=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Brokers", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 4, + "y": 40 + }, + "id": 39, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(kafka_topic_partitions{kafka_cluster_name=\"$cluster\"})", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Topics", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 8, + "y": 40 + }, + "id": 41, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(kafka_topic_partitions{kafka_cluster_name=\"$cluster\"})", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Partitions", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 12, + "y": 40 + }, + "id": 42, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(kafka_consumergroup_members{kafka_cluster_name=\"$cluster\"})", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Consumers", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 16, + "y": 40 + }, + "id": 40, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(kafka_consumergroup_members{kafka_cluster_name=\"$cluster\"})", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Consumer Groups", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "breakPoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "${DS_PROMETHEUS}", + "fontSize": "80%", + "format": "none", + "gridPos": { + "h": 6, + "w": 4, + "x": 20, + "y": 40 + }, + "id": 26, + "interval": null, + "legend": { + "header": "", + "percentage": false, + "show": false, + "values": true + }, + "legendType": "Right side", + "links": [], + "maxDataPoints": 3, + "nullPointMode": "connected", + "options": {}, + "pieType": "pie", + "strokeWidth": "1", + "targets": [ + { + "expr": "kafka_server_ReplicaManager_Value{name=\"PartitionCount\",kafka_cluster_name=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ instance }}", + "refId": "A" + } + ], + "title": "Partition Distribution", + "type": "grafana-piechart-panel", + "valueName": "current" + } + ], + "title": "Cluster", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 40 + }, + "id": 11, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 47 + }, + "id": 13, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Sum$/i", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jvm_gc_collection_seconds_count{gc=\"G1 Young Generation\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ instance }} - Count", + "refId": "A" + }, + { + "expr": "jvm_gc_collection_seconds_sum{gc=\"G1 Young Generation\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ instance }} - Sum", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "GC - G1 Young Generation", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": "count", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": 0, + "format": "none", + "label": "sum (seconds)", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 47 + }, + "id": 14, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jvm_memory_pool_bytes_used{instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}} - {{pool}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Pool Used", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": 0, + "format": "none", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "JVM", + "type": "row" + } + ], + "refresh": "30s", + "schemaVersion": 20, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "auto": true, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "3m", + "value": "3m" + }, + "hide": 0, + "label": "Interval", + "name": "interval", + "options": [ + { + "selected": false, + "text": "auto", + "value": "$__auto_interval_interval" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": true, + "text": "3m", + "value": "3m" + }, + { + "selected": false, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + }, + { + "selected": false, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "14d", + "value": "14d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + } + ], + "query": "1m,3m,5m,10m,30m,1h,6h,12h,1d,7d,14d,30d", + "refresh": 2, + "skipUrlSync": false, + "type": "interval" + }, + { + "allValue": null, + "current": { + "text": "kafka-jmx-exporter", + "value": "kafka-jmx-exporter" + }, + "hide": 2, + "includeAll": false, + "label": "Job", + "multi": false, + "name": "job", + "options": [ + { + "selected": true, + "text": "kafka-jmx-exporter", + "value": "kafka-jmx-exporter" + } + ], + "query": "kafka-jmx-exporter", + "skipUrlSync": false, + "type": "custom" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(kafka_brokers, kafka_cluster_name)", + "hide": 0, + "includeAll": false, + "label": "Cluster", + "multi": false, + "name": "cluster", + "options": [], + "query": "label_values(kafka_brokers, kafka_cluster_name)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(jvm_gc_collection_seconds_count{job=\"$job\"}, instance)", + "hide": 0, + "includeAll": false, + "label": "Broker", + "multi": false, + "name": "instance", + "options": [], + "query": "label_values(jvm_gc_collection_seconds_count{job=\"$job\"}, instance)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(kafka_consumergroup_current_offset{kafka_cluster_name=\"$cluster\"}, topic)", + "hide": 0, + "includeAll": false, + "label": "Topic", + "multi": false, + "name": "topic", + "options": [], + "query": "label_values(kafka_consumergroup_current_offset{kafka_cluster_name=\"$cluster\"}, topic)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": "", + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(kafka_consumergroup_current_offset{topic=~\"$topic\",kafka_cluster_name=\"$cluster\"}, consumergroup)", + "hide": 0, + "includeAll": true, + "label": "Consumer Group", + "multi": true, + "name": "consumergroup", + "options": [], + "query": "label_values(kafka_consumergroup_current_offset{topic=~\"$topic\",kafka_cluster_name=\"$cluster\"}, consumergroup)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Kafka", + "uid": "Nf0UIbQiz", + "version": 12 +} \ No newline at end of file diff --git a/10-grafana-dashboard/MongoDB-1557302008804.json b/10-grafana-dashboard/MongoDB-1557302008804.json new file mode 100644 index 0000000..78c3a2b --- /dev/null +++ b/10-grafana-dashboard/MongoDB-1557302008804.json @@ -0,0 +1,1848 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "MongoDB prometheus Exporter Dashboard. \r\nWorks well with https://github.com/dcu/mongodb_exporter\r\n\r\nIf you have the node_exporter running on the mongo instance, you will also get some useful alert panels related to disk io and cpu.", + "editable": true, + "gnetId": 2583, + "graphTooltip": 1, + "id": 6, + "iteration": 1557302004534, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 22, + "panels": [], + "repeat": "env", + "scopedVars": { + "env": { + "selected": false, + "text": "10.0.0.6:9001", + "value": "10.0.0.6:9001" + } + }, + "title": "Health metrics for $env", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "prometheus", + "decimals": null, + "format": "s", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 1 + }, + "id": 10, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "scopedVars": { + "env": { + "selected": false, + "text": "10.0.0.6:9001", + "value": "10.0.0.6:9001" + } + }, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "mongodb_instance_uptime_seconds{instance=~\"$env\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 1800 + } + ], + "thresholds": "0,360", + "title": "Uptime", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "prometheus", + "decimals": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 1 + }, + "id": 2, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "scopedVars": { + "env": { + "selected": false, + "text": "10.0.0.6:9001", + "value": "10.0.0.6:9001" + } + }, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "mongodb_connections{instance=~\"$env\",state=\"available\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "metric": "mongodb_connections", + "refId": "A", + "step": 1800 + } + ], + "thresholds": "", + "title": "Available Connections", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 16, + "x": 8, + "y": 1 + }, + "id": 1, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "scopedVars": { + "env": { + "selected": false, + "text": "10.0.0.6:9001", + "value": "10.0.0.6:9001" + } + }, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "mongodb_connections{instance=~\"$env\",state=\"current\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "metric": "mongodb_connections", + "refId": "A", + "step": 1800 + } + ], + "thresholds": "", + "title": "Open Connections", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 20, + "panels": [], + "repeat": "env", + "scopedVars": { + "env": { + "selected": false, + "text": "10.0.0.6:9001", + "value": "10.0.0.6:9001" + } + }, + "title": "Query Metrics for $env", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 6, + "w": 10, + "x": 0, + "y": 6 + }, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "env": { + "selected": false, + "text": "10.0.0.6:9001", + "value": "10.0.0.6:9001" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(mongodb_op_counters_total{instance=~\"$env\"}[$interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{type}}", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Query Operations", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 6, + "w": 8, + "x": 10, + "y": 6 + }, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "env": { + "selected": false, + "text": "10.0.0.6:9001", + "value": "10.0.0.6:9001" + } + }, + "seriesOverrides": [ + { + "alias": "returned", + "yaxis": 1 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(mongodb_metrics_document_total{instance=~\"$env\"}[$interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{state}}", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Document Operations", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 6 + }, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "env": { + "selected": false, + "text": "10.0.0.6:9001", + "value": "10.0.0.6:9001" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(mongodb_metrics_query_executor_total{instance=~\"$env\"}[$interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{state}}", + "refId": "A", + "step": 600 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Document Query Executor", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 23, + "panels": [], + "repeat": null, + "title": "Resource Metrics", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 13 + }, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "mongodb_replset_oplog_size_bytes{instance=~\"$env\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{type}}", + "metric": "mongodb_locks_time_acquiring_global_microseconds_total", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Oplog Size", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 13 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "mongodb_memory{instance=~\"$env\",type=~\"resident|virtual\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{type}}", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + "total" + ] + }, + "yaxes": [ + { + "format": "decmbytes", + "label": "MB", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 13 + }, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(mongodb_network_bytes_total{instance=~\"$env\"}[$interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{state}}", + "metric": "mongodb_metrics_operation_total", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network I/O", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 21, + "panels": [], + "repeat": "env", + "scopedVars": { + "env": { + "selected": false, + "text": "10.0.0.6:9001", + "value": "10.0.0.6:9001" + } + }, + "title": "Replica Set Metrics for $env", + "type": "row" + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 20 + }, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "env": { + "selected": false, + "text": "10.0.0.6:9001", + "value": "10.0.0.6:9001" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "mongodb_replset_number_of_members{instance=~\"$env\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "# members", + "refId": "A", + "step": 600 + }, + { + "expr": "sum(mongodb_replset_member_health{instance=~\"$env\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "healthy members", + "refId": "B", + "step": 600 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Member Health", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "series", + "name": null, + "show": false, + "values": [ + "current" + ] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "description": "See which node is currently acting as the primary, secondary, ...", + "fill": 1, + "gridPos": { + "h": 6, + "w": 12, + "x": 6, + "y": 20 + }, + "id": 14, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "env": { + "selected": false, + "text": "10.0.0.6:9001", + "value": "10.0.0.6:9001" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "mongodb_replset_member_state{instance=~\"$env\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{state}} - {{name}}", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Member State", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "series", + "name": null, + "show": false, + "values": [ + "current" + ] + }, + "yaxes": [ + { + "format": "none", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 20 + }, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "env": { + "selected": false, + "text": "10.0.0.6:9001", + "value": "10.0.0.6:9001" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(mongodb_op_counters_repl_total{instance=~\"$env\"}[$interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{type}}", + "refId": "A", + "step": 600 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Replica Query Operations", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 26 + }, + "id": 24, + "panels": [], + "repeat": null, + "title": "Alerts", + "type": "row" + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 300 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "15m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "keep_state", + "frequency": "60s", + "handler": 1, + "message": "A MongoDB oplog replication has been 5 minutes behind in its replication for more than 15 minutes.\nBeing able to fail over to a replica of your data is only useful if the data is up to date, so you need to know when that’s no longer the case!", + "name": "MongoDB Oplog lag alert", + "noDataState": "no_data", + "notifications": [ + { + "id": 2 + } + ] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 27 + }, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(mongodb_replset_oplog_head_timestamp[5m])", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 240 + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 300 + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Oplog Lag", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "dtdurations", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 0.98 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "keep_state", + "frequency": "60s", + "handler": 1, + "message": "MongoDB's average disk i/o utilization has been above 98% for 5 minutes", + "name": "MongoDB's Disk IO Utilization alert", + "noDataState": "no_data", + "notifications": [ + { + "id": 2 + } + ] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 27 + }, + "id": 17, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(rate(node_disk_io_time_ms{instance=~\"shr.*\"}[5m])/1000 or irate(node_disk_io_time_ms{instance=~\"shr.*\"}[5m])/1000)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{device}} - {{instance}}", + "refId": "A", + "step": 240 + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.98 + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk IO Utilization", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": "", + "logBase": 1, + "max": "1", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 34 + }, + "id": 25, + "panels": [], + "repeat": null, + "title": "Dashboard Row", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 35 + }, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_disk_reads_completed{instance=~\"shr.*\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}} - {{instance}}", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk Reads Completed", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 35 + }, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_disk_writes_completed{instance=~\"shr.*\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}} - {{instance}}", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk Writes Completed", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "5s", + "schemaVersion": 18, + "style": "dark", + "tags": [ + "prometheus" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": "prometheus", + "definition": "", + "hide": 0, + "includeAll": true, + "label": "env", + "multi": true, + "name": "env", + "options": [], + "query": "label_values(mongodb_connections, instance)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "/.*-(.*?)-.*/", + "tags": [], + "tagsQuery": "label_values(mongodb_connections, instance)", + "type": "query", + "useTags": false + }, + { + "auto": true, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "auto", + "value": "$__auto_interval_interval" + }, + "hide": 0, + "label": null, + "name": "interval", + "options": [ + { + "selected": true, + "text": "auto", + "value": "$__auto_interval_interval" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + }, + { + "selected": false, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "14d", + "value": "14d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + } + ], + "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", + "refresh": 2, + "skipUrlSync": false, + "type": "interval" + } + ] + }, + "time": { + "from": "now/d", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "MongoDB", + "uid": "iaX5Tsyiz", + "version": 1 +} diff --git a/10-grafana-dashboard/RabbitMQ-1585242673691.json b/10-grafana-dashboard/RabbitMQ-1585242673691.json new file mode 100644 index 0000000..1bbd494 --- /dev/null +++ b/10-grafana-dashboard/RabbitMQ-1585242673691.json @@ -0,0 +1,2120 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "6.4.2" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1585242667399, + "links": [], + "panels": [ + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 32, + "panels": [], + "title": "Overview", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 34, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*deliverd/i", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(rabbitmq_queue_messages_published_total{queue!~\"amq.gen.*\",cluster=\"$cluster\"}[$interval]) > 0", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ queue }} - published", + "refId": "A" + }, + { + "expr": "irate(rabbitmq_queue_messages_delivered_total{queue!~\"amq.gen.*\",cluster=\"$cluster\"}[$interval]) > 0", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ queue }} - deliverd", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Queue Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 35, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*deliverd/i", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rabbitmq_queue_messages_ready{queue!~\"amq.gen.*\",cluster=\"$cluster\"} > 0", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ queue }} - ready", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Queue Messages", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 8, + "panels": [ + { + "aliasColors": { + "single_customer_attr_score_reset_queue - publish /s": "#0a50a1" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 18, + "x": 0, + "y": 20 + }, + "id": 24, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(rabbitmq_queue_messages_published_total{queue=~\"$queue\",cluster=\"$cluster\"}[$interval])", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{ queue }} - publish /s", + "refId": "B" + }, + { + "expr": "irate(rabbitmq_queue_messages_delivered_total{queue=~\"$queue\",cluster=\"$cluster\"}[$interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ queue }} - deliver /s", + "refId": "A" + }, + { + "expr": "irate(rabbitmq_queue_messages_ack_total{queue=~\"$queue\"}[$interval])", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{ queue }} - ack /s", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Queue Rate - publish & deliver & ack", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 20 + }, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rabbitmq_queue_messages_ready{queue=~\"$queue\",cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ queue }} - ready", + "refId": "A" + }, + { + "expr": "rabbitmq_queue_messages_unacknowledged{queue=~\"$queue\",cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ queue }} - unack", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Queue Messages - ready & unack", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#d44a3a", + "rgba(237, 129, 40, 0.89)", + "#299c46" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 26 + }, + "id": 29, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "rabbitmq_queue_consumers{queue=~\"$queue\",cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "1,1", + "title": "Queue Consumers", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#d44a3a", + "rgba(237, 129, 40, 0.89)", + "#299c46" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "percentunit", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 26 + }, + "id": 30, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "rabbitmq_queue_consumer_utilisation{queue=~\"$queue\",cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "1,1", + "title": "Queue Consumers Utilisation", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 26 + }, + "id": 25, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rabbitmq_queue_message_bytes{queue=~\"$queue\",cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ queue }} - total", + "refId": "A" + }, + { + "expr": "rabbitmq_queue_message_bytes_ram{queue=~\"$queue\",cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ queue }} - in ram", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Queue Messages Size Total", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 26 + }, + "id": 27, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rabbitmq_queue_memory{queue=~\"$queue\",cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ queue }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Queue Used Memory", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Queue", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 12, + "panels": [ + { + "aliasColors": { + "in": "#64b0c8", + "in/s": "#447ebc", + "out": "#508642", + "out/s": "#629e51" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 15 + }, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(rabbitmq_exchange_messages_published_in_total{exchange=~\"$exchange\",cluster=\"$cluster\"}[$interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ exchange }} in/s", + "refId": "A" + }, + { + "expr": "irate(rabbitmq_exchange_messages_published_out_total{exchange=~\"$exchange\",cluster=\"$cluster\"}[$interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ exchange }} out/s", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Exchange IN/OUT Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "in": "#64b0c8", + "out": "#508642" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 15 + }, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rabbitmq_exchange_messages_published_in_total{exchange=~\"$exchange\",cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ exchange }} in", + "refId": "A" + }, + { + "expr": "rabbitmq_exchange_messages_published_out_total{exchange=~\"$exchange\",cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ exchange }} out", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Exchange IN/OUT Total", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Exchange", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 6, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": true, + "colorPostfix": false, + "colorValue": false, + "colors": [ + "#d44a3a", + "rgba(237, 129, 40, 0.89)", + "#299c46" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 0, + "y": 16 + }, + "id": 3, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "rabbitmq_up{cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "1,1", + "title": "Status", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "UP", + "value": "1" + }, + { + "op": "=", + "text": "DOWN", + "value": "0" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "#d44a3a", + "rgba(237, 129, 40, 0.89)", + "#299c46" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 4, + "y": 16 + }, + "id": 4, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(rabbitmq_running{cluster=\"$cluster\"})", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "2,3", + "title": "Running Nodes", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 8, + "y": 16 + }, + "id": 16, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "rabbitmq_connections{cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Total Connections", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 12, + "y": 16 + }, + "id": 17, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "rabbitmq_consumers{cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Total Consumers", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 16, + "y": 16 + }, + "id": 23, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "rabbitmq_exchanges{cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Total Exchanges", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 20, + "y": 16 + }, + "id": 2, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "rabbitmq_queues{cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Total Queues", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 22 + }, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rabbitmq_fd_used{cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ node }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Used File Descriptions", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 22 + }, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rabbitmq_sockets_used{cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ node }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Used Sockets", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 22 + }, + "id": 22, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rabbitmq_node_mem_used{cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ node }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Used Memory", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 22 + }, + "id": 21, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rabbitmq_node_disk_free{cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ node }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Free Disk Space", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Node", + "type": "row" + } + ], + "refresh": false, + "schemaVersion": 20, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "auto": true, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "1m", + "value": "1m" + }, + "hide": 0, + "label": "Interval", + "name": "interval", + "options": [ + { + "selected": false, + "text": "auto", + "value": "$__auto_interval_interval" + }, + { + "selected": true, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "3m", + "value": "3m" + }, + { + "selected": false, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + }, + { + "selected": false, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "14d", + "value": "14d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + } + ], + "query": "1m,3m,5m,10m,30m,1h,6h,12h,1d,7d,14d,30d", + "refresh": 2, + "skipUrlSync": false, + "type": "interval" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(rabbitmq_up, cluster)", + "hide": 0, + "includeAll": false, + "label": "Cluster", + "multi": false, + "name": "cluster", + "options": [], + "query": "label_values(rabbitmq_up, cluster)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(rabbitmq_exchange_messages_published_in_total{cluster=\"$cluster\"}, exchange)", + "hide": 0, + "includeAll": false, + "label": "Exchange", + "multi": false, + "name": "exchange", + "options": [], + "query": "label_values(rabbitmq_exchange_messages_published_in_total{cluster=\"$cluster\"}, exchange)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(rabbitmq_queue_messages{queue!~\"amq.gen.*\",cluster=\"$cluster\"}, queue)", + "hide": 0, + "includeAll": false, + "label": "Queue", + "multi": false, + "name": "queue", + "options": [], + "query": "label_values(rabbitmq_queue_messages{queue!~\"amq.gen.*\",cluster=\"$cluster\"}, queue)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "RabbitMQ", + "uid": "yfz2DAQik", + "version": 3 +} \ No newline at end of file diff --git a/10-grafana-dashboard/Redis-1605076427129.json b/10-grafana-dashboard/Redis-1605076427129.json new file mode 100644 index 0000000..4a9d6d6 --- /dev/null +++ b/10-grafana-dashboard/Redis-1605076427129.json @@ -0,0 +1,1184 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Prometheus dashboard for Redis servers", + "editable": true, + "gnetId": 2751, + "graphTooltip": 0, + "id": 19, + "iteration": 1605076401656, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "prometheus", + "decimals": 0, + "editable": true, + "error": false, + "format": "s", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 2, + "x": 0, + "y": 0 + }, + "id": 9, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "redis_uptime_in_seconds{service=\"$service\"}", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "A", + "step": 1800 + } + ], + "thresholds": "", + "title": "Uptime", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "prometheus", + "decimals": 0, + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 2, + "x": 2, + "y": 0 + }, + "hideTimeOverride": true, + "id": 12, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "redis_connected_clients{service=\"$service\"}", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "A", + "step": 2 + } + ], + "thresholds": "", + "timeFrom": "1m", + "timeShift": null, + "title": "Clients", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "prometheus", + "decimals": 0, + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 4, + "y": 0 + }, + "hideTimeOverride": true, + "id": 14, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "(irate(redis_cpu_sys_seconds_total{service=~\"$service\"}[2m]) + irate(redis_cpu_user_seconds_total{service=~\"$service\"}[2m])) * 100", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "A", + "step": 2 + } + ], + "thresholds": "80,95", + "timeFrom": "1m", + "timeShift": null, + "title": "CPU Usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "prometheus", + "decimals": 0, + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 9, + "y": 0 + }, + "hideTimeOverride": true, + "id": 11, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "100 * (redis_memory_used_bytes{service=~\"$service\"} / redis_config_maxmemory{service=~\"$service\"} )", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "A", + "step": 2 + } + ], + "thresholds": "80,95", + "timeFrom": "1m", + "timeShift": null, + "title": "Memory Usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 10, + "x": 14, + "y": 0 + }, + "id": 2, + "isNew": true, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(redis_commands_processed_total{service=~\"$service\"}[5m])", + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "A", + "refId": "A", + "step": 240, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Commands Executed / sec", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "max": "#BF1B00" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 7 + }, + "id": 7, + "isNew": true, + "legend": { + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "redis_memory_used_bytes{service=~\"$service\"} ", + "intervalFactor": 2, + "legendFormat": "used", + "metric": "", + "refId": "A", + "step": 240, + "target": "" + }, + { + "expr": "redis_config_maxmemory{service=~\"$service\"} ", + "hide": false, + "intervalFactor": 2, + "legendFormat": "max", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Total Memory Usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 7 + }, + "id": 10, + "isNew": true, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(redis_net_input_bytes_total{service=\"$service\"}[5m])", + "intervalFactor": 2, + "legendFormat": "{{ input }}", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(redis_net_output_bytes_total{service=\"$service\"}[5m])", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ output }}", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network I/O", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 7, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 14 + }, + "id": 5, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum (redis_db_keys{service=~\"$service\"}) by (db)", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ db }} ", + "refId": "A", + "step": 240, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Total Items per DB", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 7, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 14 + }, + "id": 13, + "isNew": true, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum (redis_db_keys{service=~\"$service\"}) - sum (redis_db_keys_expiring{service=~\"$service\"}) ", + "interval": "", + "intervalFactor": 2, + "legendFormat": "not expiring", + "refId": "A", + "step": 240, + "target": "" + }, + { + "expr": "sum (redis_db_keys_expiring{service=~\"$service\"}) ", + "interval": "", + "intervalFactor": 2, + "legendFormat": "expiring", + "metric": "", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Expiring vs Not-Expiring Keys", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "evicts": "#890F02", + "memcached_items_evicted_total{instance=\"172.17.0.1:9150\",job=\"prometheus\"}": "#890F02", + "reclaims": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 21 + }, + "id": 8, + "isNew": true, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "reclaims", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(redis_expired_keys_total{service=~\"$service\"}[5m])) by (service)", + "interval": "", + "intervalFactor": 2, + "legendFormat": "expired", + "metric": "", + "refId": "A", + "step": 240, + "target": "" + }, + { + "expr": "sum(rate(redis_evicted_keys_total{service=~\"$service\"}[5m])) by (service)", + "interval": "", + "intervalFactor": 2, + "legendFormat": "evicted", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Expired / Evicted", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": 2, + "editable": true, + "error": false, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 21 + }, + "id": 1, + "isNew": true, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": true, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(redis_keyspace_hits_total{service=\"$service\"}[5m])", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "hits", + "metric": "", + "refId": "A", + "step": 240, + "target": "" + }, + { + "expr": "irate(redis_keyspace_misses_total{service=\"$service\"}[5m])", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "misses", + "metric": "", + "refId": "B", + "step": 240, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Hits / Misses per Sec", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "30s", + "schemaVersion": 20, + "style": "dark", + "tags": [ + "prometheus", + "redis" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "text": "redis-exporter", + "value": "redis-exporter" + }, + "datasource": "prometheus", + "definition": "", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "service", + "options": [], + "query": "label_values(redis_connected_clients, service)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-24h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Redis", + "uid": "3rsqaChMk", + "version": 1 +} \ No newline at end of file diff --git a/10-grafana-dashboard/mysql-1605080457760.json b/10-grafana-dashboard/mysql-1605080457760.json new file mode 100644 index 0000000..b64fc03 --- /dev/null +++ b/10-grafana-dashboard/mysql-1605080457760.json @@ -0,0 +1,1977 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": false, + "iconColor": "#e0752d", + "limit": 100, + "name": "PMM Annotations", + "showIn": 0, + "tags": [ + "pmm_annotation" + ], + "type": "tags" + } + ] + }, + "description": "Dashboard from Percona Monitoring and Management project. ", + "editable": true, + "gnetId": 12650, + "graphTooltip": 1, + "id": 43, + "iteration": 1605079814958, + "links": [ + { + "icon": "dashboard", + "includeVars": true, + "keepTime": true, + "tags": [ + "QAN" + ], + "targetBlank": false, + "title": "Query Analytics", + "type": "link", + "url": "http://192.168.149.131:9091/graph/d/7w6Q3PJmz/_pmm-query-analytics" + } + ], + "panels": [ + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 106, + "panels": [], + "title": "", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "prometheus", + "decimals": 1, + "description": "**MySQL Uptime**\n\nThe amount of time since the last restart of the MySQL server process.", + "editable": true, + "error": false, + "format": "s", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 0, + "y": 1 + }, + "height": "125px", + "id": 12, + "interval": "$interval", + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "s", + "postfixFontSize": "80%", + "prefix": "", + "prefixFontSize": "80%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "calculatedInterval": "10m", + "datasourceErrors": {}, + "errors": {}, + "expr": "mysql_global_status_uptime{service=\"$service\"}", + "format": "time_series", + "interval": "5m", + "intervalFactor": 1, + "legendFormat": "", + "metric": "", + "refId": "A", + "step": 300 + } + ], + "thresholds": "300,3600", + "title": "MySQL 在线时长", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "#37872D", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 4, + "y": 1 + }, + "id": 104, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "mysql_slave_status_slave_sql_running{service=\"$service\"}", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "SQL Thread Running", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "未配置主从", + "value": "null" + }, + { + "op": "=", + "text": "NO", + "value": "0" + }, + { + "op": "=", + "text": "YES", + "value": "1" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "#37872D", + "rgba(237, 129, 40, 0.89)", + "#F2495C" + ], + "datasource": "prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 8, + "y": 1 + }, + "id": 102, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "pluginVersion": "6.6.2", + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "mysql_slave_status_slave_io_running{service=\"$service\"}", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "IO Thread Running", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "YES", + "value": "1" + }, + { + "op": "=", + "text": "NO", + "value": "0" + }, + { + "op": "=", + "text": "未配置主从", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "format": "decbytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 12, + "y": 1 + }, + "id": 96, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "mysql_global_variables_innodb_buffer_pool_size{service=\"$service\"}", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "InnoDB Buffer Pool Size", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 16, + "y": 1 + }, + "id": 94, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "rate(mysql_global_status_queries{service=~\"$service\"}[$interval]) or irate(mysql_global_status_queries{service=~\"$service\"}[5m])", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "实时QPS", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 20, + "y": 1 + }, + "id": 100, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(rate(mysql_global_status_commands_total{command=\"commit\",service=~\"$service\"}[5m]))by(service)+sum(rate(mysql_global_status_commands_total{command=\"rollback\",service=~\"$service\"}[5m]))by(service)", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "实时TPS", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "description": "**Current QPS**\n\nBased on the queries reported by MySQL's ``SHOW STATUS`` command, it is the number of statements executed by the server within the last second. This variable includes statements executed within stored programs, unlike the Questions variable. It does not count \n``COM_PING`` or ``COM_STATISTICS`` commands.", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 4 + }, + "hiddenSeries": false, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "targetBlank": true, + "title": "MySQL Server Status Variables", + "url": "https://dev.mysql.com/doc/refman/5.7/en/server-status-variables.html#statvar_Queries" + } + ], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "calculatedInterval": "10m", + "datasourceErrors": {}, + "errors": {}, + "expr": "rate(mysql_global_status_queries{service=\"$service\"}[$interval]) or irate(mysql_global_status_queries{service=\"$service\"}[5m])", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "qps", + "metric": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "QPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 4 + }, + "hiddenSeries": false, + "id": 98, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(mysql_global_status_commands_total{command=\"commit\",service=~\"$service\"}[5m]))by(service)+sum(rate(mysql_global_status_commands_total{command=\"rollback\",service=~\"$service\"}[5m]))by(service)", + "legendFormat": "{{service}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "TPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": 2, + "description": "**Table Locks**\n\nMySQL takes a number of different locks for varying reasons. In this graph we see how many Table level locks MySQL has requested from the storage engine. In the case of InnoDB, many times the locks could actually be row locks as it only takes table level locks in a few specific cases.\n\nIt is most useful to compare Locks Immediate and Locks Waited. If Locks waited is rising, it means you have lock contention. Otherwise, Locks Immediate rising and falling is normal activity.", + "editable": true, + "error": false, + "fill": 2, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 4 + }, + "hiddenSeries": false, + "id": 32, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "calculatedInterval": "2m", + "datasourceErrors": {}, + "errors": {}, + "expr": "rate(mysql_global_status_table_locks_immediate{service=~\"$service\"}[$interval]) or irate(mysql_global_status_table_locks_immediate{service=~\"$service\"}[5m])", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Table Locks Immediate", + "metric": "", + "refId": "A", + "step": 20 + }, + { + "calculatedInterval": "2m", + "datasourceErrors": {}, + "errors": {}, + "expr": "rate(mysql_global_status_table_locks_waited{service=~\"$service\"}[$interval]) or irate(mysql_global_status_table_locks_waited{service=~\"$service\"}[5m])", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Table Locks Waited", + "metric": "", + "refId": "B", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "MySQL Table Locks", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": 2, + "description": "**MySQL Network Traffic**\n\nHere we can see how much network traffic is generated by MySQL. Outbound is network traffic sent from MySQL and Inbound is network traffic MySQL has received.", + "editable": true, + "error": false, + "fill": 6, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 11 + }, + "hiddenSeries": false, + "id": 9, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "calculatedInterval": "2m", + "datasourceErrors": {}, + "errors": {}, + "expr": "rate(mysql_global_status_bytes_received{service=\"$service\"}[$interval]) or irate(mysql_global_status_bytes_received{service=\"$service\"}[5m])", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "流入流量", + "metric": "", + "refId": "A", + "step": 20 + }, + { + "calculatedInterval": "2m", + "datasourceErrors": {}, + "errors": {}, + "expr": "rate(mysql_global_status_bytes_sent{service=\"$service\"}[$interval]) or irate(mysql_global_status_bytes_sent{service=\"$service\"}[5m])", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "流出流量", + "metric": "", + "refId": "B", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "MySQL 网络流量", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "none", + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": 0, + "description": "**Max Connections** \n\nMax Connections is the maximum permitted number of simultaneous client connections. By default, this is 151. Increasing this value increases the number of file descriptors that mysqld requires. If the required number of descriptors are not available, the server reduces the value of Max Connections.\n\nmysqld actually permits Max Connections + 1 clients to connect. The extra connection is reserved for use by accounts that have the SUPER privilege, such as root.\n\nMax Used Connections is the maximum number of connections that have been in use simultaneously since the server started.\n\nConnections is the number of connection attempts (successful or not) to the MySQL server.", + "editable": true, + "error": false, + "fill": 2, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 11 + }, + "height": "250px", + "hiddenSeries": false, + "id": 92, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [ + { + "targetBlank": true, + "title": "MySQL Server System Variables", + "url": "https://dev.mysql.com/doc/refman/5.7/en/server-system-variables.html#sysvar_max_connections" + } + ], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Max Connections", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "calculatedInterval": "2m", + "datasourceErrors": {}, + "errors": {}, + "expr": "mysql_global_status_threads_running{service=\"$service\"}", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "当前运行连接数", + "metric": "", + "refId": "C", + "step": 20, + "target": "" + }, + { + "calculatedInterval": "2m", + "datasourceErrors": {}, + "errors": {}, + "expr": "mysql_global_variables_max_connections{service=\"$service\"}", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "系统最大连接数限", + "metric": "", + "refId": "A", + "step": 20, + "target": "" + }, + { + "calculatedInterval": "2m", + "datasourceErrors": {}, + "errors": {}, + "expr": "max(max_over_time(mysql_global_status_threads_connected{service=\"$service\"}[$interval]) or mysql_global_status_threads_connected{service=\"$service\"} )", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "已建立的连接", + "metric": "", + "refId": "B", + "step": 20, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Mysql 连接数", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": 2, + "description": "**MySQL Slow Queries**\n\nSlow queries are defined as queries being slower than the long_query_time setting. For example, if you have long_query_time set to 3, all queries that take longer than 3 seconds to complete will show on this graph.", + "editable": true, + "error": false, + "fill": 2, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 11 + }, + "hiddenSeries": false, + "id": 48, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "calculatedInterval": "2m", + "datasourceErrors": {}, + "errors": {}, + "expr": "rate(mysql_global_status_slow_queries{service=\"$service\"}[$interval]) or irate(mysql_global_status_slow_queries{service=\"$service\"}[5m])", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "慢查询", + "metric": "", + "refId": "A", + "step": 20 + }, + { + "calculatedInterval": "2m", + "datasourceErrors": {}, + "errors": {}, + "expr": "rate(mysql_global_status_slow_queries{service=\"$service\"}[1m])", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "慢查询", + "metric": "", + "refId": "B", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "MySQL 慢查询总数", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 108, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": 0, + "description": "***System Memory***: Total Memory for the system.\\\n***InnoDB Buffer Pool Data***: InnoDB maintains a storage area called the buffer pool for caching data and indexes in memory.\\\n***TokuDB Cache Size***: Similar in function to the InnoDB Buffer Pool, TokuDB will allocate 50% of the installed RAM for its own cache.\\\n***Key Buffer Size***: Index blocks for MYISAM tables are buffered and are shared by all threads. key_buffer_size is the size of the buffer used for index blocks.\\\n***Adaptive Hash Index Size***: When InnoDB notices that some index values are being accessed very frequently, it builds a hash index for them in memory on top of B-Tree indexes.\\\n ***Query Cache Size***: The query cache stores the text of a SELECT statement together with the corresponding result that was sent to the client. The query cache has huge scalability problems in that only one thread can do an operation in the query cache at the same time.\\\n***InnoDB Dictionary Size***: The data dictionary is InnoDB ‘s internal catalog of tables. InnoDB stores the data dictionary on disk, and loads entries into memory while the server is running.\\\n***InnoDB Log Buffer Size***: The MySQL InnoDB log buffer allows transactions to run without having to write the log to disk before the transactions commit.", + "editable": true, + "error": false, + "fill": 6, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 20 + }, + "hiddenSeries": false, + "id": 110, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [ + { + "title": "Detailed descriptions about metrics", + "url": "https://www.percona.com/doc/percona-monitoring-and-management/dashboard.mysql-overview.html#mysql-internal-memory-overview" + } + ], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "System Memory", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_MemTotal{service=~\"$service\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "System Memory", + "refId": "G", + "step": 4 + }, + { + "expr": "mysql_global_status_innodb_page_size{service=~\"$service\"} * on (service) mysql_global_status_buffer_pool_pages{service=~\"$service\",state=\"data\"}", + "format": "time_series", + "hide": false, + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "InnoDB Buffer Pool Data", + "refId": "A", + "step": 20 + }, + { + "expr": "mysql_global_variables_innodb_log_buffer_size{service=~\"$service\"}", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "InnoDB Log Buffer Size", + "refId": "D", + "step": 20 + }, + { + "expr": "mysql_global_variables_innodb_additional_mem_pool_size{service=~\"$service\"}", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 2, + "legendFormat": "InnoDB Additional Memory Pool Size", + "refId": "H", + "step": 40 + }, + { + "expr": "mysql_global_status_innodb_mem_dictionary{service=~\"$service\"}", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "InnoDB Dictionary Size", + "refId": "F", + "step": 20 + }, + { + "expr": "mysql_global_variables_key_buffer_size{service=~\"$service\"}", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Key Buffer Size", + "refId": "B", + "step": 20 + }, + { + "expr": "mysql_global_variables_query_cache_size{service=~\"$service\"}", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Query Cache Size", + "refId": "C", + "step": 20 + }, + { + "expr": "mysql_global_status_innodb_mem_adaptive_hash{service=~\"$service\"}", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Adaptive Hash Index Size", + "refId": "E", + "step": 20 + }, + { + "expr": "mysql_global_variables_tokudb_cache_size{service=~\"$service\"}", + "format": "time_series", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "TokuDB Cache Size", + "refId": "I", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "MySQL Internal Memory Overview", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Memory", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 112, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": 2, + "description": "**Top Command Counters**\n\nThe Com_{{xxx}} statement counter variables indicate the number of times each xxx statement has been executed. There is one status variable for each type of statement. For example, Com_delete and Com_update count [``DELETE``](https://dev.mysql.com/doc/refman/5.7/en/delete.html) and [``UPDATE``](https://dev.mysql.com/doc/refman/5.7/en/update.html) statements, respectively. Com_delete_multi and Com_update_multi are similar but apply to [``DELETE``](https://dev.mysql.com/doc/refman/5.7/en/delete.html) and [``UPDATE``](https://dev.mysql.com/doc/refman/5.7/en/update.html) statements that use multiple-table syntax.", + "editable": true, + "error": false, + "fill": 2, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 3 + }, + "hiddenSeries": false, + "id": 114, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [ + { + "title": "Server Status Variables (Com_xxx)", + "url": "https://dev.mysql.com/doc/refman/5.7/en/server-status-variables.html#statvar_Com_xxx" + } + ], + "nullPointMode": "null as zero", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "calculatedInterval": "2m", + "datasourceErrors": {}, + "errors": {}, + "expr": "topk(5, rate(mysql_global_status_commands_total{service=~\"$service\"}[$interval])>0) or irate(mysql_global_status_commands_total{service=~\"$service\"}[5m])>0", + "format": "time_series", + "hide": false, + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Com_{{ command }}", + "metric": "", + "refId": "B", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Top Command Counters", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Command, Handlers, Processes", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 116, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": 2, + "editable": true, + "error": false, + "fill": 2, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 4 + }, + "hiddenSeries": false, + "id": 118, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "calculatedInterval": "2m", + "datasourceErrors": {}, + "errors": {}, + "expr": "rate(mysql_global_status_opened_files{service=~\"$service\"}[$interval]) or irate(mysql_global_status_opened_files{service=~\"$service\"}[5m])", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Openings", + "metric": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "MySQL File Openings", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": 2, + "editable": true, + "error": false, + "fill": 2, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 4 + }, + "hiddenSeries": false, + "id": 120, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "calculatedInterval": "2m", + "datasourceErrors": {}, + "errors": {}, + "expr": "mysql_global_status_open_files{service=~\"$service\"}", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Open Files", + "metric": "", + "refId": "A", + "step": 20 + }, + { + "calculatedInterval": "2m", + "datasourceErrors": {}, + "errors": {}, + "expr": "mysql_global_variables_open_files_limit{service=~\"$service\"}", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "Open Files Limit", + "metric": "", + "refId": "D", + "step": 20 + }, + { + "expr": "mysql_global_status_innodb_num_open_files{service=~\"$service\"}", + "interval": "$interval", + "intervalFactor": 1, + "legendFormat": "InnoDB Open Files", + "refId": "B", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "MySQL Open Files", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Files and Tables", + "type": "row" + } + ], + "refresh": "1m", + "schemaVersion": 20, + "style": "dark", + "tags": [ + "Percona", + "MySQL", + "Linux_OS" + ], + "templating": { + "list": [ + { + "allFormat": "glob", + "auto": true, + "auto_count": 200, + "auto_min": "1s", + "current": { + "text": "auto", + "value": "$__auto_interval_interval" + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Interval", + "multi": false, + "multiFormat": "glob", + "name": "interval", + "options": [ + { + "selected": true, + "text": "auto", + "value": "$__auto_interval_interval" + }, + { + "selected": false, + "text": "1s", + "value": "1s" + }, + { + "selected": false, + "text": "5s", + "value": "5s" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + } + ], + "query": "1s,5s,1m,5m,1h,6h,1d", + "refresh": 2, + "skipUrlSync": false, + "type": "interval" + }, + { + "allFormat": "glob", + "allValue": null, + "current": { + "text": "172.20.0.152:9104", + "value": "172.20.0.152:9104" + }, + "datasource": "prometheus", + "definition": "label_values(mysql_up, service)", + "hide": 0, + "includeAll": false, + "label": "service", + "multi": false, + "multiFormat": "regex values", + "name": "service", + "options": [], + "query": "label_values(mysql_up, service)", + "refresh": 1, + "refresh_on_load": false, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": null, + "tags": [], + "tagsQuery": null, + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-5m", + "to": "now-0m" + }, + "timepicker": { + "collapse": false, + "enable": true, + "hidden": false, + "notice": false, + "now": true, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "status": "Stable", + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ], + "type": "timepicker" + }, + "timezone": "browser", + "title": "mysql", + "uid": "MQWgroiiz", + "version": 3 +} \ No newline at end of file diff --git a/2-prometheus-operator/prometheus-operator-clusterRole.yaml b/2-prometheus-operator/prometheus-operator-clusterRole.yaml new file mode 100644 index 0000000..ff8c547 --- /dev/null +++ b/2-prometheus-operator/prometheus-operator-clusterRole.yaml @@ -0,0 +1,72 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + apps.kubernetes.io/component: controller + apps.kubernetes.io/name: prometheus-operator + apps.kubernetes.io/version: v0.30.0 + name: prometheus-operator +rules: + - apiGroups: + - apiextensions.k8s.io + resources: + - customresourcedefinitions + verbs: + - '*' + - apiGroups: + - monitoring.coreos.com + resources: + - alertmanagers + - prometheuses + - prometheuses/finalizers + - alertmanagers/finalizers + - servicemonitors + - prometheusrules + verbs: + - '*' + - apiGroups: + - apps + resources: + - statefulsets + verbs: + - '*' + - apiGroups: + - "" + resources: + - configmaps + - secrets + verbs: + - '*' + - apiGroups: + - "" + resources: + - pods + verbs: + - list + - delete + - apiGroups: + - "" + resources: + - services + - services/finalizers + - endpoints + verbs: + - get + - create + - update + - delete + - apiGroups: + - "" + resources: + - nodes + verbs: + - list + - watch + - apiGroups: + - "" + resources: + - namespaces + verbs: + - get + - list + - watch diff --git a/2-prometheus-operator/prometheus-operator-clusterRoleBinding.yaml b/2-prometheus-operator/prometheus-operator-clusterRoleBinding.yaml new file mode 100644 index 0000000..c2f55f0 --- /dev/null +++ b/2-prometheus-operator/prometheus-operator-clusterRoleBinding.yaml @@ -0,0 +1,16 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + apps.kubernetes.io/component: controller + apps.kubernetes.io/name: prometheus-operator + apps.kubernetes.io/version: v0.30.0 + name: prometheus-operator +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: prometheus-operator +subjects: + - kind: ServiceAccount + name: prometheus-operator + namespace: monitoring diff --git a/2-prometheus-operator/prometheus-operator-deployment.yaml b/2-prometheus-operator/prometheus-operator-deployment.yaml new file mode 100644 index 0000000..b62c663 --- /dev/null +++ b/2-prometheus-operator/prometheus-operator-deployment.yaml @@ -0,0 +1,49 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + apps.kubernetes.io/component: controller + apps.kubernetes.io/name: prometheus-operator + apps.kubernetes.io/version: v0.30.0 + name: prometheus-operator + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + apps.kubernetes.io/component: controller + apps.kubernetes.io/name: prometheus-operator + template: + metadata: + labels: + apps.kubernetes.io/component: controller + apps.kubernetes.io/name: prometheus-operator + apps.kubernetes.io/version: v0.30.0 + spec: + containers: + - args: + - --kubelet-service=kube-system/kubelet + - --logtostderr=true + - --config-reloader-image=registry.cn-hangzhou.aliyuncs.com/clab-docker/configmap-reload:v0.0.1 + - --prometheus-config-reloader=registry.cn-hangzhou.aliyuncs.com/clab-docker/prometheus-config-reloader:v0.30.0 + image: registry.cn-hangzhou.aliyuncs.com/clab-docker/prometheus-operator:v0.30.0 + name: prometheus-operator + ports: + - containerPort: 8080 + name: http + resources: + limits: + cpu: 200m + memory: 200Mi + requests: + cpu: 100m + memory: 100Mi + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + nodeSelector: + beta.kubernetes.io/os: linux + securityContext: + runAsNonRoot: true + runAsUser: 65534 + serviceAccountName: prometheus-operator diff --git a/2-prometheus-operator/prometheus-operator-service.yaml b/2-prometheus-operator/prometheus-operator-service.yaml new file mode 100644 index 0000000..91df02a --- /dev/null +++ b/2-prometheus-operator/prometheus-operator-service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + apps.kubernetes.io/component: controller + apps.kubernetes.io/name: prometheus-operator + apps.kubernetes.io/version: v0.30.0 + name: prometheus-operator + namespace: monitoring +spec: + clusterIP: None + ports: + - name: http + port: 8080 + targetPort: http + selector: + apps.kubernetes.io/component: controller + apps.kubernetes.io/name: prometheus-operator diff --git a/2-prometheus-operator/prometheus-operator-serviceAccount.yaml b/2-prometheus-operator/prometheus-operator-serviceAccount.yaml new file mode 100644 index 0000000..fa0b5cc --- /dev/null +++ b/2-prometheus-operator/prometheus-operator-serviceAccount.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + apps.kubernetes.io/component: controller + apps.kubernetes.io/name: prometheus-operator + apps.kubernetes.io/version: v0.30.0 + name: prometheus-operator + namespace: monitoring diff --git a/3-prometheus/prometheus-clusterRole.yaml b/3-prometheus/prometheus-clusterRole.yaml new file mode 100644 index 0000000..e991486 --- /dev/null +++ b/3-prometheus/prometheus-clusterRole.yaml @@ -0,0 +1,24 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: prometheus-k8s +rules: +- apiGroups: + - "" + resources: + - nodes + - nodes/metrics + - services + - endpoints + - pods + verbs: + - get + - list + - watch + +- nonResourceURLs: + - /metrics + - /scrape + - /probe + verbs: + - get diff --git a/3-prometheus/prometheus-clusterRoleBinding.yaml b/3-prometheus/prometheus-clusterRoleBinding.yaml new file mode 100644 index 0000000..554bb6f --- /dev/null +++ b/3-prometheus/prometheus-clusterRoleBinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: prometheus-k8s +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: prometheus-k8s +subjects: +- kind: ServiceAccount + name: prometheus-k8s + namespace: monitoring diff --git a/3-prometheus/prometheus-prometheus.yaml b/3-prometheus/prometheus-prometheus.yaml new file mode 100644 index 0000000..0c5ad10 --- /dev/null +++ b/3-prometheus/prometheus-prometheus.yaml @@ -0,0 +1,48 @@ +apiVersion: monitoring.coreos.com/v1 +kind: Prometheus +metadata: + labels: + prometheus: k8s + name: k8s + namespace: monitoring +spec: + alerting: + alertmanagers: + - name: alertmanager-main + namespace: monitoring + port: web + baseImage: registry.cn-hangzhou.aliyuncs.com/clab-docker/prometheus + nodeSelector: + beta.kubernetes.io/os: linux + replicas: 1 + retention: 240h + resources: + requests: + memory: 400Mi + ruleSelector: + matchLabels: + prometheus: k8s + role: alert-rules + securityContext: + fsGroup: 0 + runAsNonRoot: false + runAsUser: 0 + serviceAccountName: prometheus-k8s + serviceMonitorNamespaceSelector: {} + serviceMonitorSelector: {} + ### comment below if you want to use temp storage ### + storage: + volumeClaimTemplate: + selector: {} + spec: + # 阿里云高效云盘 + # storageClassName: alicloud-disk-topology-efficiency + storageClassName: prometheus-local-storage + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 50Gi + ### comment above if you want to use temp storage ### + version: v2.11.1 + diff --git a/3-prometheus/prometheus-roleBindingConfig.yaml b/3-prometheus/prometheus-roleBindingConfig.yaml new file mode 100644 index 0000000..ec0129d --- /dev/null +++ b/3-prometheus/prometheus-roleBindingConfig.yaml @@ -0,0 +1,13 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: prometheus-k8s-config + namespace: monitoring +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: prometheus-k8s-config +subjects: +- kind: ServiceAccount + name: prometheus-k8s + namespace: monitoring diff --git a/3-prometheus/prometheus-roleConfig.yaml b/3-prometheus/prometheus-roleConfig.yaml new file mode 100644 index 0000000..e6dd062 --- /dev/null +++ b/3-prometheus/prometheus-roleConfig.yaml @@ -0,0 +1,15 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: prometheus-k8s-config + namespace: monitoring +rules: +- apiGroups: + - "" + resources: + - configmaps + - secrets + verbs: + - get + - list + - watch diff --git a/3-prometheus/prometheus-rules.yaml b/3-prometheus/prometheus-rules.yaml new file mode 100644 index 0000000..644e0d6 --- /dev/null +++ b/3-prometheus/prometheus-rules.yaml @@ -0,0 +1,1049 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + prometheus: k8s + role: alert-rules + name: prometheus-k8s-rules + namespace: monitoring +spec: + groups: + - name: k8s.rules + rules: + - expr: | + sum(rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container!=""}[5m])) by (namespace) + record: namespace:container_cpu_usage_seconds_total:sum_rate + - expr: | + sum(container_memory_usage_bytes{job="kubelet", image!="", container!=""}) by (namespace) + record: namespace:container_memory_usage_bytes:sum + - expr: | + sum by (namespace, pod, container, node) ( + rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container!=""}[5m]) + ) + record: namespace_pod_container:container_cpu_usage_seconds_total:sum_rate + - expr: | + sum by(namespace) ( + kube_pod_container_resource_requests_memory_bytes{job="kube-state-metrics"} + * on (endpoint, instance, job, namespace, pod, service) + group_left(phase) (kube_pod_status_phase{phase=~"^(Pending|Running)$"} == 1) + ) + record: namespace_name:kube_pod_container_resource_requests_memory_bytes:sum + - expr: | + sum by (namespace) ( + kube_pod_container_resource_requests_cpu_cores{job="kube-state-metrics"} + * on (endpoint, instance, job, namespace, pod, service) + group_left(phase) (kube_pod_status_phase{phase=~"^(Pending|Running)$"} == 1) + ) + record: namespace_name:kube_pod_container_resource_requests_cpu_cores:sum + - expr: | + sum( + label_replace( + label_replace( + kube_pod_owner{job="kube-state-metrics", owner_kind="ReplicaSet"}, + "replicaset", "$1", "owner_name", "(.*)" + ) * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{job="kube-state-metrics"}, + "workload", "$1", "owner_name", "(.*)" + ) + ) by (namespace, workload, pod) + labels: + workload_type: deployment + record: mixin_pod_workload + - expr: | + sum( + label_replace( + kube_pod_owner{job="kube-state-metrics", owner_kind="DaemonSet"}, + "workload", "$1", "owner_name", "(.*)" + ) + ) by (namespace, workload, pod) + labels: + workload_type: daemonset + record: mixin_pod_workload + - expr: | + sum( + label_replace( + kube_pod_owner{job="kube-state-metrics", owner_kind="StatefulSet"}, + "workload", "$1", "owner_name", "(.*)" + ) + ) by (namespace, workload, pod) + labels: + workload_type: statefulset + record: mixin_pod_workload + - name: kube-apiserver.rules + rules: + - expr: | + histogram_quantile(0.99, sum(rate(apiserver_request_latencies_bucket{job="apiserver"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: "0.99" + record: cluster_quantile:apiserver_request_latencies:histogram_quantile + - expr: | + histogram_quantile(0.9, sum(rate(apiserver_request_latencies_bucket{job="apiserver"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: "0.9" + record: cluster_quantile:apiserver_request_latencies:histogram_quantile + - expr: | + histogram_quantile(0.5, sum(rate(apiserver_request_latencies_bucket{job="apiserver"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: "0.5" + record: cluster_quantile:apiserver_request_latencies:histogram_quantile + - name: node.rules + rules: + - expr: sum(min(kube_pod_info) by (node)) + record: ':kube_pod_info_node_count:' + - expr: | + max(label_replace(kube_pod_info{job="kube-state-metrics"}, "pod", "$1", "pod", "(.*)")) by (node, namespace, pod) + record: 'node_namespace_pod:kube_pod_info:' + - expr: | + count by (node) (sum by (node, cpu) ( + node_cpu_seconds_total{job="node-exporter"} + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info: + )) + record: node:node_num_cpu:sum + - expr: | + 1 - avg(rate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[1m])) + record: :node_cpu_utilisation:avg1m + - expr: | + 1 - avg by (node) ( + rate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[1m]) + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info:) + record: node:node_cpu_utilisation:avg1m + - expr: | + node:node_cpu_utilisation:avg1m + * + node:node_num_cpu:sum + / + scalar(sum(node:node_num_cpu:sum)) + record: node:cluster_cpu_utilisation:ratio + - expr: | + sum(node_load1{job="node-exporter"}) + / + sum(node:node_num_cpu:sum) + record: ':node_cpu_saturation_load1:' + - expr: | + sum by (node) ( + node_load1{job="node-exporter"} + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info: + ) + / + node:node_num_cpu:sum + record: 'node:node_cpu_saturation_load1:' + - expr: | + 1 - + sum(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"}) + / + sum(node_memory_MemTotal_bytes{job="node-exporter"}) + record: ':node_memory_utilisation:' + - expr: | + sum(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"}) + record: :node_memory_MemFreeCachedBuffers_bytes:sum + - expr: | + sum(node_memory_MemTotal_bytes{job="node-exporter"}) + record: :node_memory_MemTotal_bytes:sum + - expr: | + sum by (node) ( + (node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"}) + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info: + ) + record: node:node_memory_bytes_available:sum + - expr: | + sum by (node) ( + node_memory_MemTotal_bytes{job="node-exporter"} + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info: + ) + record: node:node_memory_bytes_total:sum + - expr: | + (node:node_memory_bytes_total:sum - node:node_memory_bytes_available:sum) + / + node:node_memory_bytes_total:sum + record: node:node_memory_utilisation:ratio + - expr: | + (node:node_memory_bytes_total:sum - node:node_memory_bytes_available:sum) + / + scalar(sum(node:node_memory_bytes_total:sum)) + record: node:cluster_memory_utilisation:ratio + - expr: | + 1e3 * sum( + (rate(node_vmstat_pgpgin{job="node-exporter"}[1m]) + + rate(node_vmstat_pgpgout{job="node-exporter"}[1m])) + ) + record: :node_memory_swap_io_bytes:sum_rate + - expr: | + 1 - + sum by (node) ( + (node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"}) + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info: + ) + / + sum by (node) ( + node_memory_MemTotal_bytes{job="node-exporter"} + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info: + ) + record: 'node:node_memory_utilisation:' + - expr: | + 1 - (node:node_memory_bytes_available:sum / node:node_memory_bytes_total:sum) + record: 'node:node_memory_utilisation_2:' + - expr: | + 1e3 * sum by (node) ( + (rate(node_vmstat_pgpgin{job="node-exporter"}[1m]) + + rate(node_vmstat_pgpgout{job="node-exporter"}[1m])) + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info: + ) + record: node:node_memory_swap_io_bytes:sum_rate + - expr: | + avg(irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m])) + record: :node_disk_utilisation:avg_irate + - expr: | + avg by (node) ( + irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]) + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info: + ) + record: node:node_disk_utilisation:avg_irate + - expr: | + avg(irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m])) + record: :node_disk_saturation:avg_irate + - expr: | + avg by (node) ( + irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]) + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info: + ) + record: node:node_disk_saturation:avg_irate + - expr: | + max by (instance, namespace, pod, device) ((node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"} + - node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}) + / node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}) + record: 'node:node_filesystem_usage:' + - expr: | + max by (instance, namespace, pod, device) (node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"} / node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}) + record: 'node:node_filesystem_avail:' + - expr: | + sum(irate(node_network_receive_bytes_total{job="node-exporter",device!~"veth.+"}[1m])) + + sum(irate(node_network_transmit_bytes_total{job="node-exporter",device!~"veth.+"}[1m])) + record: :node_net_utilisation:sum_irate + - expr: | + sum by (node) ( + (irate(node_network_receive_bytes_total{job="node-exporter",device!~"veth.+"}[1m]) + + irate(node_network_transmit_bytes_total{job="node-exporter",device!~"veth.+"}[1m])) + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info: + ) + record: node:node_net_utilisation:sum_irate + - expr: | + sum(irate(node_network_receive_drop_total{job="node-exporter",device!~"veth.+"}[1m])) + + sum(irate(node_network_transmit_drop_total{job="node-exporter",device!~"veth.+"}[1m])) + record: :node_net_saturation:sum_irate + - expr: | + sum by (node) ( + (irate(node_network_receive_drop_total{job="node-exporter",device!~"veth.+"}[1m]) + + irate(node_network_transmit_drop_total{job="node-exporter",device!~"veth.+"}[1m])) + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info: + ) + record: node:node_net_saturation:sum_irate + - expr: | + max( + max( + kube_pod_info{job="kube-state-metrics", host_ip!=""} + ) by (node, host_ip) + * on (host_ip) group_right (node) + label_replace( + (max(node_filesystem_files{job="node-exporter", mountpoint="/"}) by (instance)), "host_ip", "$1", "instance", "(.*):.*" + ) + ) by (node) + record: 'node:node_inodes_total:' + - expr: | + max( + max( + kube_pod_info{job="kube-state-metrics", host_ip!=""} + ) by (node, host_ip) + * on (host_ip) group_right (node) + label_replace( + (max(node_filesystem_files_free{job="node-exporter", mountpoint="/"}) by (instance)), "host_ip", "$1", "instance", "(.*):.*" + ) + ) by (node) + record: 'node:node_inodes_free:' + - name: kube-prometheus-node-recording.rules + rules: + - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY + (instance) + record: instance:node_cpu:rate:sum + - expr: sum((node_filesystem_size_bytes{mountpoint="/"} - node_filesystem_free_bytes{mountpoint="/"})) + BY (instance) + record: instance:node_filesystem_usage:sum + - expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance) + record: instance:node_network_receive_bytes:rate:sum + - expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance) + record: instance:node_network_transmit_bytes:rate:sum + - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) WITHOUT + (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) + BY (instance, cpu)) BY (instance) + record: instance:node_cpu:ratio + - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) + record: cluster:node_cpu:sum_rate5m + - expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total) + BY (instance, cpu)) + record: cluster:node_cpu:ratio + - name: kubernetes-absent + rules: + - alert: AlertmanagerDown + annotations: + summary: Alertmanager has disappeared from Prometheus target discovery. + expr: | + absent(up{job="alertmanager-main"} == 1) + for: 15m + labels: + everity: critical + - alert: CoreDNSDown + annotations: + summary: CoreDNS has disappeared from Prometheus target discovery. + expr: | + absent(up{job="kube-dns"} == 1) + for: 15m + labels: + everity: critical + - alert: KubeAPIDown + annotations: + summary: KubeAPI has disappeared from Prometheus target discovery. + expr: | + absent(up{job="apiserver"} == 1) + for: 15m + labels: + everity: critical + - alert: KubeStateMetricsDown + annotations: + summary: KubeStateMetrics has disappeared from Prometheus target discovery. + expr: | + absent(up{job="kube-state-metrics"} == 1) + for: 15m + labels: + everity: critical + - alert: KubeletDown + annotations: + summary: Kubelet has disappeared from Prometheus target discovery. + expr: | + absent(up{job="kubelet"} == 1) + for: 15m + labels: + everity: critical + - alert: NodeExporterDown + annotations: + summary: NodeExporter has disappeared from Prometheus target discovery. + expr: | + absent(up{job="node-exporter"} == 1) + for: 15m + labels: + everity: critical + - alert: PrometheusDown + annotations: + summary: Prometheus has disappeared from Prometheus target discovery. + expr: | + absent(up{job="prometheus-k8s"} == 1) + for: 15m + labels: + everity: critical + - alert: PrometheusOperatorDown + annotations: + summary: PrometheusOperator has disappeared from Prometheus target discovery. + expr: | + absent(up{job="prometheus-operator"} == 1) + for: 15m + labels: + everity: critical + - name: kubernetes-apps + rules: + - alert: KubePodRestart + annotations: + Summary: "pod不断重启持续时间30m" + Description: "pod={{ $labels.namespace }}/{{ $labels.pod }}\t重启次数={{ $value }}" + expr: | + rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[15m]) * 60 * 5 > 0 + for: 30m + labels: + severity: warning + - alert: KubePodNotReady + annotations: + summary: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready + state for longer than an hour. + expr: | + sum by (namespace, pod) (kube_pod_status_phase{job="kube-state-metrics", phase=~"Pending|Unknown"}) > 0 + for: 1h + labels: + severity: warning + - alert: KubeDeploymentGenerationMismatch + annotations: + summary: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment + }} does not match, this indicates that the Deployment has failed but has + not been rolled back. + expr: | + kube_deployment_status_observed_generation{job="kube-state-metrics"} + != + kube_deployment_metadata_generation{job="kube-state-metrics"} + for: 30m + labels: + everity: critical + - alert: KubeDeploymentReplicasMismatch + annotations: + summary: "Deployment: {{ $labels.namespace }}/{{ $labels.deployment }} 超过10m 已运行Pod数量小于replicas指定数量" + expr: | + kube_deployment_spec_replicas{job="kube-state-metrics"} + != + kube_deployment_status_replicas_available{job="kube-state-metrics"} + for: 10m + labels: + everity: critical + - alert: KubeStatefulSetReplicasMismatch + annotations: + summary: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has + not matched the expected number of replicas for longer than 15 minutes. + expr: | + kube_statefulset_status_replicas_ready{job="kube-state-metrics"} + != + kube_statefulset_status_replicas{job="kube-state-metrics"} + for: 30m + labels: + everity: critical + - alert: KubeStatefulSetGenerationMismatch + annotations: + summary: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset + }} does not match, this indicates that the StatefulSet has failed but has + not been rolled back. + expr: | + kube_statefulset_status_observed_generation{job="kube-state-metrics"} + != + kube_statefulset_metadata_generation{job="kube-state-metrics"} + for: 30m + labels: + everity: critical + - alert: KubeStatefulSetUpdateNotRolledOut + annotations: + summary: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update + has not been rolled out. + expr: | + max without (revision) ( + kube_statefulset_status_current_revision{job="kube-state-metrics"} + unless + kube_statefulset_status_update_revision{job="kube-state-metrics"} + ) + * + ( + kube_statefulset_replicas{job="kube-state-metrics"} + != + kube_statefulset_status_replicas_updated{job="kube-state-metrics"} + ) + for: 30m + labels: + everity: critical + - alert: KubeDaemonSetRolloutStuck + annotations: + summary: Only {{ $value }}% of the desired Pods of DaemonSet {{ $labels.namespace + }}/{{ $labels.daemonset }} are scheduled and ready. + expr: | + kube_daemonset_status_number_ready{job="kube-state-metrics"} + / + kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"} * 100 < 100 + for: 30m + labels: + everity: critical + - alert: KubeDaemonSetNotScheduled + annotations: + summary: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset + }} are not scheduled.' + expr: | + kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"} + - + kube_daemonset_status_current_number_scheduled{job="kube-state-metrics"} > 0 + for: 30m + labels: + everity: critical + - alert: KubeDaemonSetMisScheduled + annotations: + summary: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset + }} are running where they are not supposed to run.' + expr: | + kube_daemonset_status_number_misscheduled{job="kube-state-metrics"} > 0 + for: 10m + labels: + severity: warning + - alert: KubeCronJobRunning + annotations: + summary: CronJob {{ $labels.namespace }}/{{ $labels.cronjob }} is taking more + than 1h to complete. + expr: | + time() - kube_cronjob_next_schedule_time{job="kube-state-metrics"} > 3600 + for: 1h + labels: + severity: warning + - alert: KubeJobCompletion + annotations: + summary: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more + than one hour to complete. + expr: | + kube_job_spec_completions{job="kube-state-metrics"} - kube_job_status_succeeded{job="kube-state-metrics"} > 0 + for: 1h + labels: + severity: warning + - alert: KubeJobFailed + annotations: + summary: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete. + expr: | + kube_job_status_failed{job="kube-state-metrics"} > 0 + for: 1h + labels: + severity: warning + - name: kubernetes-resources + rules: + - alert: KubeMemOvercommit + annotations: + summary: Cluster has overcommitted memory resource requests for Pods and cannot + tolerate node failure. + expr: | + sum(namespace_name:kube_pod_container_resource_requests_memory_bytes:sum) + / + sum(node_memory_MemTotal_bytes) + > + (count(node:node_num_cpu:sum)-1) + / + count(node:node_num_cpu:sum) + for: 5m + labels: + severity: warning + - alert: KubeCPUOvercommit + annotations: + summary: Cluster has overcommitted CPU resource requests for Namespaces. + expr: | + sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="cpu"}) + / + sum(node:node_num_cpu:sum) + > 1.5 + for: 5m + labels: + severity: warning + - alert: KubeMemOvercommit + annotations: + summary: Cluster has overcommitted memory resource requests for Namespaces. + expr: | + sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="memory"}) + / + sum(node_memory_MemTotal_bytes{job="node-exporter"}) + > 1.5 + for: 5m + labels: + severity: warning + - alert: KubeQuotaExceeded + annotations: + summary: Namespace {{ $labels.namespace }} is using {{ printf "%0.0f" $value + }}% of its {{ $labels.resource }} quota. + expr: | + 100 * kube_resourcequota{job="kube-state-metrics", type="used"} + / ignoring(instance, job, type) + (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0) + > 90 + for: 15m + labels: + severity: warning + - alert: CPUThrottlinghigh + annotations: + summary: '{{ printf "%0.0f" $value }}% throttling of CPU in namespace {{ $labels.namespace + }} for container {{ $labels.container }} in pod {{ $labels.pod + }}.' + expr: "100 * sum(increase(container_cpu_cfs_throttled_periods_total{container!=\"\", + }[5m])) by (container, pod, namespace)\n /\nsum(increase(container_cpu_cfs_periods_total{}[5m])) + by (container, pod, namespace)\n > 90 \n" + for: 15m + labels: + severity: warning + - name: kubernetes-storage + rules: + - alert: KubePersistentVolumeUsagehigh + annotations: + summary: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim + }} in Namespace {{ $labels.namespace }} is only {{ printf "%0.2f" $value + }}% free. + expr: | + 100 * kubelet_volume_stats_available_bytes{job="kubelet"} + / + kubelet_volume_stats_capacity_bytes{job="kubelet"} + < 3 + for: 1m + labels: + everity: critical + - alert: KubePersistentVolumeFullInFourDays + annotations: + summary: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim + }} in Namespace {{ $labels.namespace }} is expected to fill up within four + days. Currently {{ printf "%0.2f" $value }}% is available. + expr: | + 100 * ( + kubelet_volume_stats_available_bytes{job="kubelet"} + / + kubelet_volume_stats_capacity_bytes{job="kubelet"} + ) < 15 + and + predict_linear(kubelet_volume_stats_available_bytes{job="kubelet"}[6h], 4 * 24 * 3600) < 0 + for: 5m + labels: + everity: critical + - alert: KubePersistentVolumeErrors + annotations: + summary: The persistent volume {{ $labels.persistentvolume }} has status {{ + $labels.phase }}. + expr: | + kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"} > 0 + for: 5m + labels: + everity: critical + - name: kubernetes-system + rules: + - alert: KubeNodeNotReady + annotations: + summary: '{{ $labels.node }} has been unready for more than an hour.' + expr: | + kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0 + for: 1h + labels: + severity: warning + - alert: KubeVersionMismatch + annotations: + summary: There are {{ $value }} different semantic versions of Kubernetes + components running. + expr: | + count(count by (gitVersion) (label_replace(kubernetes_build_info{job!="kube-dns"},"gitVersion","$1","gitVersion","(v[0-9]*.[0-9]*.[0-9]*).*"))) > 1 + for: 1h + labels: + severity: warning + - alert: KubeClientErrors + annotations: + summary: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance + }}' is experiencing {{ printf "%0.0f" $value }}% errors.' + expr: | + (sum(rate(rest_client_requests_total{code=~"5.."}[5m])) by (instance, job) + / + sum(rate(rest_client_requests_total[5m])) by (instance, job)) + * 100 > 1 + for: 15m + labels: + severity: warning + - alert: KubeClientErrors + annotations: + summary: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance + }}' is experiencing {{ printf "%0.0f" $value }} errors / second. + expr: | + sum(rate(ksm_scrape_error_total{job="kube-state-metrics"}[5m])) by (instance, job) > 0.1 + for: 15m + labels: + severity: warning + - alert: KubeletTooManyPods + annotations: + summary: Kubelet {{ $labels.instance }} is running {{ $value }} Pods, close + to the limit of 110. + expr: | + kubelet_running_pod_count{job="kubelet"} > 110 * 0.9 + for: 15m + labels: + severity: warning + - alert: KubeAPILatencyhigh + annotations: + summary: The API server has a 99th percentile latency of {{ $value }} seconds + for {{ $labels.verb }} {{ $labels.resource }}. + expr: | + cluster_quantile:apiserver_request_latencies:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:LIST|WATCH|WATCHLIST|PROXY|CONNECT)$"} > 1 + for: 10m + labels: + severity: warning + - alert: KubeAPILatencyhigh + annotations: + summary: The API server has a 99th percentile latency of {{ $value }} seconds + for {{ $labels.verb }} {{ $labels.resource }}. + expr: | + cluster_quantile:apiserver_request_latencies:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:LIST|WATCH|WATCHLIST|PROXY|CONNECT)$"} > 4 + for: 10m + labels: + everity: critical + - alert: KubeAPIErrorshigh + annotations: + summary: API server is returning errors for {{ $value }}% of requests. + expr: | + sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) + / + sum(rate(apiserver_request_count{job="apiserver"}[5m])) * 100 > 3 + for: 10m + labels: + everity: critical + - alert: KubeAPIErrorshigh + annotations: + summary: API server is returning errors for {{ $value }}% of requests. + expr: | + sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) + / + sum(rate(apiserver_request_count{job="apiserver"}[5m])) * 100 > 1 + for: 10m + labels: + severity: warning + - alert: KubeAPIErrorshigh + annotations: + summary: API server is returning errors for {{ $value }}% of requests for + {{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource }}. + expr: | + sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb) + / + sum(rate(apiserver_request_count{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 10 + for: 10m + labels: + everity: critical + - alert: KubeAPIErrorshigh + annotations: + summary: API server is returning errors for {{ $value }}% of requests for + {{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource }}. + expr: | + sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb) + / + sum(rate(apiserver_request_count{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 5 + for: 10m + labels: + severity: warning + - alert: KubeClientCertificateExpiration + annotations: + summary: A client certificate used to authenticate to the apiserver is expiring + in less than 7.0 days. + expr: | + apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800 + labels: + severity: warning + - alert: KubeClientCertificateExpiration + annotations: + summary: A client certificate used to authenticate to the apiserver is expiring + in less than 24.0 hours. + expr: | + apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400 + labels: + everity: critical + - name: alertmanager.rules + rules: + - alert: AlertmanagerConfigInconsistent + annotations: + summary: The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` + are out of sync. + expr: | + count_values("config_hash", alertmanager_config_hash{job="alertmanager-main"}) BY (service) / ON(service) GROUP_LEFT() label_replace(prometheus_operator_spec_replicas{job="prometheus-operator",controller="alertmanager"}, "service", "alertmanager-$1", "name", "(.*)") != 1 + for: 5m + labels: + everity: critical + - alert: AlertmanagerFailedReload + annotations: + summary: Reloading Alertmanager's configuration has failed for {{ $labels.namespace + }}/{{ $labels.pod}}. + expr: | + alertmanager_config_last_reload_successful{job="alertmanager-main"} == 0 + for: 10m + labels: + severity: warning + - alert: AlertmanagerMembersInconsistent + annotations: + summary: Alertmanager has not found all other members of the cluster. + expr: | + alertmanager_cluster_members{job="alertmanager-main"} + != on (service) GROUP_LEFT() + count by (service) (alertmanager_cluster_members{job="alertmanager-main"}) + for: 5m + labels: + everity: critical + - name: general.rules + rules: + - alert: TargetDown + annotations: + summary: '{{ $value }}% of the {{ $labels.job }} targets are down.' + expr: 100 * (count(up{job!="jmx-exporter"} == 0) BY (service,instance,namespace) / count(up{job="jmx-exporter"}) BY (service,instance,namespace)) > 10 + for: 10m + labels: + severity: warning + #- alert: Watchdog + # annotations: + # summary: | + # This is an alert meant to ensure that the entire alerting pipeline is functional. + # This alert is always firing, therefore it should always be firing in Alertmanager + # and always fire against a receiver. There are integrations with various notification + # mechanisms that send a notification when this alert is not firing. For example the + # "DeadMansSnitch" integration in PagerDuty. + # expr: vector(1) + # labels: + # severity: none + - name: kube-prometheus-node-alerting.rules + rules: + - alert: NodeDiskRunningFull + annotations: + summary: Device {{ $labels.device }} of node-exporter {{ $labels.namespace + }}/{{ $labels.pod }} will be full within the next 24 hours. + expr: | + (node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[6h], 3600 * 24) < 0) + for: 30m + labels: + severity: warning + - alert: NodeDiskRunningFull + annotations: + summary: Device {{ $labels.device }} of node-exporter {{ $labels.namespace + }}/{{ $labels.pod }} will be full within the next 2 hours. + expr: | + (node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[30m], 3600 * 2) < 0) + for: 10m + labels: + everity: critical + - name: node-time + rules: + - alert: ClockSkewDetected + annotations: + Summary: "本地时间出现偏差" + Description: "服务器={{ $labels.instance }}\t时间偏移量={{ $value }}" + expr: | + ( node_timex_offset_seconds > 0.05 and deriv(node_timex_offset_seconds[5m]) >= 0 ) or ( node_timex_offset_seconds < -0.05 and deriv(node_timex_offset_seconds[5m]) <= 0 ) + for: 2m + labels: + severity: warning + - name: node-network + rules: + - alert: NetworkReceiveErrors + annotations: + summary: Network interface "{{ $labels.device }}" showing receive errors on + node-exporter {{ $labels.namespace }}/{{ $labels.pod }}" + expr: | + rate(node_network_receive_errs_total{job="node-exporter",device!~"veth.+"}[2m]) > 0 + for: 2m + labels: + severity: warning + - alert: NetworkTransmitErrors + annotations: + summary: Network interface "{{ $labels.device }}" showing transmit errors + on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}" + expr: | + rate(node_network_transmit_errs_total{job="node-exporter",device!~"veth.+"}[2m]) > 0 + for: 2m + labels: + severity: warning + - alert: NodeNetworkInterfaceFlapping + annotations: + summary: Network interface "{{ $labels.device }}" changing it's up status + often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}" + expr: | + changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2 + for: 2m + labels: + severity: warning + - name: prometheus.rules + rules: + - alert: PrometheusConfigReloadFailed + annotations: + description: Reloading Prometheus' configuration has failed for {{$labels.namespace}}/{{$labels.pod}} + summary: Reloading Prometheus' configuration failed + expr: | + prometheus_config_last_reload_successful{job="prometheus-k8s"} == 0 + for: 10m + labels: + severity: warning + - alert: PrometheusNotificationQueueRunningFull + annotations: + description: Prometheus' alert notification queue is running full for {{$labels.namespace}}/{{ + $labels.pod}} + summary: Prometheus' alert notification queue is running full + expr: | + predict_linear(prometheus_notifications_queue_length{job="prometheus-k8s"}[5m], 60 * 30) > prometheus_notifications_queue_capacity{job="prometheus-k8s"} + for: 10m + labels: + severity: warning + - alert: PrometheusErrorSendingAlerts + annotations: + description: Errors while sending alerts from Prometheus {{$labels.namespace}}/{{ + $labels.pod}} to Alertmanager {{$labels.Alertmanager}} + summary: Errors while sending alert from Prometheus + expr: | + rate(prometheus_notifications_errors_total{job="prometheus-k8s"}[5m]) / rate(prometheus_notifications_sent_total{job="prometheus-k8s"}[5m]) > 0.01 + for: 10m + labels: + severity: warning + - alert: PrometheusErrorSendingAlerts + annotations: + description: Errors while sending alerts from Prometheus {{$labels.namespace}}/{{ + $labels.pod}} to Alertmanager {{$labels.Alertmanager}} + summary: Errors while sending alerts from Prometheus + expr: | + rate(prometheus_notifications_errors_total{job="prometheus-k8s"}[5m]) / rate(prometheus_notifications_sent_total{job="prometheus-k8s"}[5m]) > 0.03 + for: 10m + labels: + everity: critical + - alert: PrometheusNotConnectedToAlertmanagers + annotations: + description: Prometheus {{ $labels.namespace }}/{{ $labels.pod}} is not connected + to any Alertmanagers + summary: Prometheus is not connected to any Alertmanagers + expr: | + prometheus_notifications_alertmanagers_discovered{job="prometheus-k8s"} < 1 + for: 10m + labels: + severity: warning + - alert: PrometheusTSDBReloadsFailing + annotations: + description: '{{$labels.job}} at {{$labels.instance}} had {{$value | humanize}} + reload failures over the last four hours.' + summary: Prometheus has issues reloading data blocks from disk + expr: | + increase(prometheus_tsdb_reloads_failures_total{job="prometheus-k8s"}[2h]) > 0 + for: 12h + labels: + severity: warning + - alert: PrometheusTSDBCompactionsFailing + annotations: + description: '{{$labels.job}} at {{$labels.instance}} had {{$value | humanize}} + compaction failures over the last four hours.' + summary: Prometheus has issues compacting sample blocks + expr: | + increase(prometheus_tsdb_compactions_failed_total{job="prometheus-k8s"}[2h]) > 0 + for: 12h + labels: + severity: warning + - alert: PrometheusTSDBWALCorruptions + annotations: + description: '{{$labels.job}} at {{$labels.instance}} has a corrupted write-ahead + log (WAL).' + summary: Prometheus write-ahead log is corrupted + expr: | + prometheus_tsdb_wal_corruptions_total{job="prometheus-k8s"} > 0 + for: 4h + labels: + severity: warning + - alert: PrometheusNotIngestingSamples + annotations: + description: Prometheus {{ $labels.namespace }}/{{ $labels.pod}} isn't ingesting + samples. + summary: Prometheus isn't ingesting samples + expr: | + rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-k8s"}[5m]) <= 0 + for: 10m + labels: + severity: warning + - alert: PrometheusTargetScrapesDuplicate + annotations: + description: '{{$labels.namespace}}/{{$labels.pod}} has many samples rejected + due to duplicate timestamps but different values' + summary: Prometheus has many samples rejected + expr: | + increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-k8s"}[5m]) > 0 + for: 10m + labels: + severity: warning + - name: prometheus-operator + rules: + - alert: PrometheusOperatorReconcileErrors + annotations: + summary: Errors while reconciling {{ $labels.controller }} in {{ $labels.namespace + }} Namespace. + expr: | + rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator"}[5m]) > 0.1 + for: 10m + labels: + severity: warning + - alert: PrometheusOperatorNodeLookupErrors + annotations: + summary: Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace. + expr: | + rate(prometheus_operator_node_address_lookup_errors_total{job="prometheus-operator"}[5m]) > 0.1 + for: 10m + labels: + severity: warning + - alert: TooManyMessagesInQueue + expr: rabbitmq_queue_messages_ready{queue="my-queue"} > 100000 + for: 5m + labels: + severity: warning + annotations: + summary: "Too many messages in queue (instance {{ $labels.instance }})" + description: "Queue is filling up (> 1000 msgs)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}" + - alert: SlowQueueConsuming + expr: time() - rabbitmq_queue_head_message_timestamp{queue="my-queue"} > 60 + for: 5m + labels: + severity: warning + annotations: + summary: "Slow queue consuming (instance {{ $labels.instance }})" + description: "Queue messages are consumed slowly (> 60s)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}" +##node + - name: user-defined-alarm-rule + rules: + - alert: NodeLoad + expr: node_load5 > count without(cpu, mode) (node_cpu_seconds_total{mode="idle"}) * 2.5 + for: 15m + labels: + severity: critical + annotations: + summary: "服务器负载过高" + description: "服务器={{ $labels.instance }}\t负载值={{ $value }}\n" + - alert: nodeTcpCurrEstab + expr: node_netstat_Tcp_CurrEstab > 10000 + for: 5m + labels: + severity: warning + annotations: + summary: "TCP已建立连接数持续5m超出10000" + description: "服务器={{ $labels.instance }}\tTCP已建立连接数={{ $value }}\n" + - alert: NodeMemoryUsage + expr: | + ceil((node_memory_MemTotal_bytes - (node_memory_MemFree_bytes + node_memory_Buffers_bytes + node_memory_Cached_bytes)) / node_memory_MemTotal_bytes * 100) >= 95 + for: 30m + labels: + severity: critical + annotations: + summary: "服务器内存使用率持续30m超过95%" + description: "服务器={{$labels.instance}}\t内存使用率={{ $value }}%\n" + - alert: highCpuLoad + expr: ceil(100 - (avg by(instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100)) > 90 + for: 10m + labels: + severity: critical + annotations: + summary: "服务器CPU使用率持续10m超出90%" + description: "服务器={{$labels.instance}}\tCPU使用率={{ $value }}%\n" + - alert: FilesystemUsage + expr: | + ceil(100-(node_filesystem_free_bytes{fstype=~"ext4|xfs"}/node_filesystem_size_bytes {fstype=~"ext4|xfs"}*100)) > 82 + for: 10m + labels: + severity: critical + annotations: + summary: "服务器磁盘使用率持续10m超出82%" + description: "服务器={{$labels.instance}}\t挂载点={{$labels.mountpoint}}\t磁盘使用率={{ $value }}%\n" + - alert: node-up + expr: avg(up{job!~"^(jmx-exporter|spring-boot-actuator)$"}) by (service,instance,namespace) == 0 + for: 2m + labels: + severity: critical + annotations: + summary: "命名空间{{ $labels.namespace }}下采集对象{{ $labels.service }}持续2m收集不到数据!" + - alert: node-reboot + expr: time()-node_boot_time_seconds{} < 600 + for: 30s + labels: + severity: critical + annotations: + summary: "服务器重启" + description: "服务器={{ $labels.instance }} 已重启\n" + - alert: containerCpuTooHigh + expr: ceil(sum(namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{pod!~".*-exporter-.*"}) by (container, pod, namespace, node) / sum(kube_pod_container_resource_limits_cpu_cores{pod!~".*-exporter-.*"}) by (container, pod, namespace, node) * 100) > 90 + for: 10m + labels: + severity: critical + annotations: + summary: "容器CPU使用率持续10m超出90%" + description: "所在节点={{ $labels.node }}\tPod名称={{ $labels.pod }}\tContainer名称={{ $labels.container }}\t命名空间={{ $labels.namespace }}\tCPU使用率={{ $value }}%\n" + - alert: containerMemoryTooHigh + expr: ceil( sum(container_memory_working_set_bytes{pod!~".*exporter.*"}) by (container, namespace, pod, node) / sum(kube_pod_container_resource_limits_memory_bytes{pod!~".*exporter.*"}) by (container, namespace, pod, node) * 100) > 95 + for: 30m + labels: + severity: warning + annotations: + summary: "容器内存使用率持续30m超出95%" + description: "所在节点={{ $labels.node }}\tPod名称={{ $labels.pod }}\tContainer名称={{ $labels.container }}\t命名空间={{ $labels.namespace }}\t内存使用率={{ $value }}%\n" diff --git a/3-prometheus/prometheus-service.yaml b/3-prometheus/prometheus-service.yaml new file mode 100644 index 0000000..0bc662c --- /dev/null +++ b/3-prometheus/prometheus-service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + prometheus: k8s + name: prometheus-k8s + namespace: monitoring +spec: + type: NodePort + ports: + - name: web + port: 9090 + targetPort: web + nodePort: 30900 + selector: + app: prometheus + prometheus: k8s + sessionAffinity: ClientIP diff --git a/3-prometheus/prometheus-serviceAccount.yaml b/3-prometheus/prometheus-serviceAccount.yaml new file mode 100644 index 0000000..3e55fad --- /dev/null +++ b/3-prometheus/prometheus-serviceAccount.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: prometheus-k8s + namespace: monitoring diff --git a/4-prometheus-adapter/prometheus-adapter-apiService.yaml b/4-prometheus-adapter/prometheus-adapter-apiService.yaml new file mode 100644 index 0000000..a215efe --- /dev/null +++ b/4-prometheus-adapter/prometheus-adapter-apiService.yaml @@ -0,0 +1,13 @@ +apiVersion: apiregistration.k8s.io/v1 +kind: APIService +metadata: + name: v1beta1.metrics.k8s.io +spec: + group: metrics.k8s.io + groupPriorityMinimum: 100 + insecureSkipTLSVerify: true + service: + name: prometheus-adapter + namespace: monitoring + version: v1beta1 + versionPriority: 100 diff --git a/4-prometheus-adapter/prometheus-adapter-clusterRole.yaml b/4-prometheus-adapter/prometheus-adapter-clusterRole.yaml new file mode 100644 index 0000000..7af0982 --- /dev/null +++ b/4-prometheus-adapter/prometheus-adapter-clusterRole.yaml @@ -0,0 +1,16 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: prometheus-adapter +rules: + - apiGroups: + - "" + resources: + - nodes + - namespaces + - pods + - services + verbs: + - get + - list + - watch diff --git a/4-prometheus-adapter/prometheus-adapter-clusterRoleAggregatedMetricsReader.yaml b/4-prometheus-adapter/prometheus-adapter-clusterRoleAggregatedMetricsReader.yaml new file mode 100644 index 0000000..dba897c --- /dev/null +++ b/4-prometheus-adapter/prometheus-adapter-clusterRoleAggregatedMetricsReader.yaml @@ -0,0 +1,17 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + rbac.authorization.k8s.io/aggregate-to-admin: "true" + rbac.authorization.k8s.io/aggregate-to-edit: "true" + rbac.authorization.k8s.io/aggregate-to-view: "true" + name: system:aggregated-metrics-reader +rules: + - apiGroups: + - metrics.k8s.io + resources: + - pods + verbs: + - get + - list + - watch diff --git a/4-prometheus-adapter/prometheus-adapter-clusterRoleBinding.yaml b/4-prometheus-adapter/prometheus-adapter-clusterRoleBinding.yaml new file mode 100644 index 0000000..ae4cf53 --- /dev/null +++ b/4-prometheus-adapter/prometheus-adapter-clusterRoleBinding.yaml @@ -0,0 +1,13 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: prometheus-adapter + namespace: monitoring +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: prometheus-adapter +subjects: + - kind: ServiceAccount + name: prometheus-adapter + namespace: monitoring diff --git a/4-prometheus-adapter/prometheus-adapter-clusterRoleBindingDelegator.yaml b/4-prometheus-adapter/prometheus-adapter-clusterRoleBindingDelegator.yaml new file mode 100644 index 0000000..58742aa --- /dev/null +++ b/4-prometheus-adapter/prometheus-adapter-clusterRoleBindingDelegator.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: resource-metrics:system:auth-delegator +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: system:auth-delegator +subjects: + - kind: ServiceAccount + name: prometheus-adapter + namespace: monitoring diff --git a/4-prometheus-adapter/prometheus-adapter-clusterRoleServerResources.yaml b/4-prometheus-adapter/prometheus-adapter-clusterRoleServerResources.yaml new file mode 100644 index 0000000..80d1fc1 --- /dev/null +++ b/4-prometheus-adapter/prometheus-adapter-clusterRoleServerResources.yaml @@ -0,0 +1,11 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: resource-metrics-server-resources +rules: + - apiGroups: + - metrics.k8s.io + resources: + - '*' + verbs: + - '*' diff --git a/4-prometheus-adapter/prometheus-adapter-configMap.yaml b/4-prometheus-adapter/prometheus-adapter-configMap.yaml new file mode 100644 index 0000000..1826d41 --- /dev/null +++ b/4-prometheus-adapter/prometheus-adapter-configMap.yaml @@ -0,0 +1,33 @@ +apiVersion: v1 +data: + config.yaml: | + resourceRules: + cpu: + containerQuery: sum(rate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}[1m])) by (<<.GroupBy>>) + nodeQuery: sum(1 - rate(node_cpu_seconds_total{mode="idle"}[1m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>) + resources: + overrides: + node: + resource: node + namespace: + resource: namespace + pod: + resource: pod + containerLabel: container + memory: + containerQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}) by (<<.GroupBy>>) + nodeQuery: sum(node:node_memory_bytes_total:sum{<<.LabelMatchers>>} - node:node_memory_bytes_available:sum{<<.LabelMatchers>>}) by (<<.GroupBy>>) + resources: + overrides: + node: + resource: node + namespace: + resource: namespace + pod: + resource: pod + containerLabel: container + window: 1m +kind: ConfigMap +metadata: + name: adapter-config + namespace: monitoring diff --git a/4-prometheus-adapter/prometheus-adapter-deployment.yaml b/4-prometheus-adapter/prometheus-adapter-deployment.yaml new file mode 100644 index 0000000..04c4ddb --- /dev/null +++ b/4-prometheus-adapter/prometheus-adapter-deployment.yaml @@ -0,0 +1,53 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: prometheus-adapter + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + name: prometheus-adapter + strategy: + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + template: + metadata: + labels: + name: prometheus-adapter + spec: + containers: + - args: + - --cert-dir=/var/run/serving-cert + - --config=/etc/adapter/config.yaml + - --logtostderr=true + - --metrics-relist-interval=1m + - --prometheus-url=http://prometheus-k8s.monitoring.svc:9090/ + - --secure-port=6443 + # image: quay.io/coreos/k8s-prometheus-adapter-amd64:v0.4.1 + image: gaven/k8s-prometheus-adapter-amd64:v0.4.1 + name: prometheus-adapter + ports: + - containerPort: 6443 + volumeMounts: + - mountPath: /tmp + name: tmpfs + readOnly: false + - mountPath: /var/run/serving-cert + name: volume-serving-cert + readOnly: false + - mountPath: /etc/adapter + name: config + readOnly: false + nodeSelector: + beta.kubernetes.io/os: linux + serviceAccountName: prometheus-adapter + volumes: + - emptyDir: {} + name: tmpfs + - emptyDir: {} + name: volume-serving-cert + - configMap: + name: adapter-config + name: config diff --git a/4-prometheus-adapter/prometheus-adapter-roleBindingAuthReader.yaml b/4-prometheus-adapter/prometheus-adapter-roleBindingAuthReader.yaml new file mode 100644 index 0000000..00da03c --- /dev/null +++ b/4-prometheus-adapter/prometheus-adapter-roleBindingAuthReader.yaml @@ -0,0 +1,13 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: resource-metrics-auth-reader + namespace: kube-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: extension-apiserver-authentication-reader +subjects: + - kind: ServiceAccount + name: prometheus-adapter + namespace: monitoring diff --git a/4-prometheus-adapter/prometheus-adapter-service.yaml b/4-prometheus-adapter/prometheus-adapter-service.yaml new file mode 100644 index 0000000..a374c7f --- /dev/null +++ b/4-prometheus-adapter/prometheus-adapter-service.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + name: prometheus-adapter + name: prometheus-adapter + namespace: monitoring +spec: + ports: + - name: https + port: 443 + targetPort: 6443 + selector: + name: prometheus-adapter diff --git a/4-prometheus-adapter/prometheus-adapter-serviceAccount.yaml b/4-prometheus-adapter/prometheus-adapter-serviceAccount.yaml new file mode 100644 index 0000000..d7e7050 --- /dev/null +++ b/4-prometheus-adapter/prometheus-adapter-serviceAccount.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: prometheus-adapter + namespace: monitoring diff --git a/5-alertmanager/alertmanager-alertmanager.yaml b/5-alertmanager/alertmanager-alertmanager.yaml new file mode 100644 index 0000000..b334ee0 --- /dev/null +++ b/5-alertmanager/alertmanager-alertmanager.yaml @@ -0,0 +1,18 @@ +apiVersion: monitoring.coreos.com/v1 +kind: Alertmanager +metadata: + labels: + alertmanager: main + name: main + namespace: monitoring +spec: + baseImage: registry.cn-hangzhou.aliyuncs.com/clab-docker/alertmanager + nodeSelector: + beta.kubernetes.io/os: linux + replicas: 1 + securityContext: + fsGroup: 2000 + runAsNonRoot: true + runAsUser: 1000 + serviceAccountName: alertmanager-main + version: v0.17.0 diff --git a/5-alertmanager/alertmanager-clusterRole.yaml b/5-alertmanager/alertmanager-clusterRole.yaml new file mode 100644 index 0000000..1c41a58 --- /dev/null +++ b/5-alertmanager/alertmanager-clusterRole.yaml @@ -0,0 +1,10 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: alertmanager-main +rules: +- nonResourceURLs: + - /dingtalk/* + verbs: + - get + - create diff --git a/5-alertmanager/alertmanager-clusterRoleBinding.yaml b/5-alertmanager/alertmanager-clusterRoleBinding.yaml new file mode 100644 index 0000000..78d916b --- /dev/null +++ b/5-alertmanager/alertmanager-clusterRoleBinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: alertmanager-main +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: alertmanager-main +subjects: +- kind: ServiceAccount + name: alertmanager-main + namespace: monitoring diff --git a/5-alertmanager/alertmanager-secret.yaml b/5-alertmanager/alertmanager-secret.yaml new file mode 100644 index 0000000..af35891 --- /dev/null +++ b/5-alertmanager/alertmanager-secret.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +data: + # default dingtalk + alertmanager.yaml: Z2xvYmFsOgogIHJlc29sdmVfdGltZW91dDogNW0KcmVjZWl2ZXJzOgotIG5hbWU6IGRpbmd0YWxrCiAgd2ViaG9va19jb25maWdzOgogICAgLSB1cmw6IGh0dHBzOi8vcHJvbWV0aGV1cy13ZWJob29rLWRpbmd0YWxrOjgwNjAvZGluZ3RhbGsvb3BzL3NlbmQKICAgICAgaHR0cF9jb25maWc6CiAgICAgICAgYmVhcmVyX3Rva2VuX2ZpbGU6IC92YXIvcnVuL3NlY3JldHMva3ViZXJuZXRlcy5pby9zZXJ2aWNlYWNjb3VudC90b2tlbgogICAgICAgIHRsc19jb25maWc6CiAgICAgICAgICBpbnNlY3VyZV9za2lwX3ZlcmlmeTogdHJ1ZQojICAgIC0gdXJsOiBodHRwczovL3NyZS5kbWh1Yi5jbi9tb25pdG9yV2ViaG9vay94eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4CiMgICAgICBodHRwX2NvbmZpZzoKIyAgICAgICAgdGxzX2NvbmZpZzoKIyAgICAgICAgICBpbnNlY3VyZV9za2lwX3ZlcmlmeTogdHJ1ZQoKcm91dGU6CiAgZ3JvdXBfYnk6CiAgICAtIGFsZXJ0bmFtZQogIGdyb3VwX3dhaXQ6ICIxNXMiCiAgZ3JvdXBfaW50ZXJ2YWw6ICI1bSIKICByZXBlYXRfaW50ZXJ2YWw6ICIyNGgiCiAgcmVjZWl2ZXI6ICJkaW5ndGFsayIKICByb3V0ZXM6CiAgICAtIHJlY2VpdmVyOiAnZGluZ3RhbGsnCiAgICAgIGdyb3VwX3dhaXQ6ICIxNXMiCiAgICAgIGdyb3VwX2ludGVydmFsOiAiNW0iCiAgICAgIHJlcGVhdF9pbnRlcnZhbDogIjZoIgogICAgICBtYXRjaF9yZToKICAgICAgICBzZXZlcml0eTogbWVkaXVtfHdhcm5pbmcKICAgIC0gcmVjZWl2ZXI6ICJkaW5ndGFsayIKICAgICAgZ3JvdXBfd2FpdDogIjE1cyIKICAgICAgZ3JvdXBfaW50ZXJ2YWw6ICI1bSIKICAgICAgcmVwZWF0X2ludGVydmFsOiAiMTBzIgogICAgICBtYXRjaF9yZToKICAgICAgICBzZXZlcml0eTogaGlnaHxjcml0aWNhbAo= +kind: Secret +metadata: + name: alertmanager-main + namespace: monitoring diff --git a/5-alertmanager/alertmanager-service.yaml b/5-alertmanager/alertmanager-service.yaml new file mode 100644 index 0000000..0aa134f --- /dev/null +++ b/5-alertmanager/alertmanager-service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + alertmanager: main + name: alertmanager-main + namespace: monitoring +spec: + type: NodePort + ports: + - name: web + port: 9093 + targetPort: web + nodePort: 30093 + selector: + alertmanager: main + app: alertmanager + sessionAffinity: ClientIP diff --git a/5-alertmanager/alertmanager-serviceAccount.yaml b/5-alertmanager/alertmanager-serviceAccount.yaml new file mode 100644 index 0000000..5c06d5e --- /dev/null +++ b/5-alertmanager/alertmanager-serviceAccount.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: alertmanager-main + namespace: monitoring diff --git a/6-exporter/alertmanager/alertmanager-serviceMonitor.yaml b/6-exporter/alertmanager/alertmanager-serviceMonitor.yaml new file mode 100644 index 0000000..548af0d --- /dev/null +++ b/6-exporter/alertmanager/alertmanager-serviceMonitor.yaml @@ -0,0 +1,14 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: alertmanager + name: alertmanager + namespace: monitoring +spec: + endpoints: + - interval: 30s + port: web + selector: + matchLabels: + alertmanager: main diff --git a/6-exporter/apiserver/apiserver-serviceMonitor.yaml b/6-exporter/apiserver/apiserver-serviceMonitor.yaml new file mode 100644 index 0000000..ee69924 --- /dev/null +++ b/6-exporter/apiserver/apiserver-serviceMonitor.yaml @@ -0,0 +1,37 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: apiserver + name: kube-apiserver + namespace: monitoring +spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + interval: 30s + metricRelabelings: + - action: drop + regex: etcd_(debugging|disk|request|server).* + sourceLabels: + - __name__ + - action: drop + regex: apiserver_admission_controller_admission_latencies_seconds_.* + sourceLabels: + - __name__ + - action: drop + regex: apiserver_admission_step_admission_latencies_seconds_.* + sourceLabels: + - __name__ + port: https + scheme: https + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + serverName: kubernetes + jobLabel: component + namespaceSelector: + matchNames: + - default + selector: + matchLabels: + component: apiserver + provider: kubernetes diff --git a/6-exporter/coredns/coredns-service.yaml b/6-exporter/coredns/coredns-service.yaml new file mode 100644 index 0000000..682e0ba --- /dev/null +++ b/6-exporter/coredns/coredns-service.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: Service +metadata: + namespace: kube-system + name: coredns-prometheus + labels: + k8s-app: kube-dns +spec: + selector: + k8s-app: kube-dns + type: ClusterIP + clusterIP: None + ports: + - name: metrics + port: 9153 + targetPort: metrics + protocol: TCP diff --git a/6-exporter/coredns/coredns-serviceMonitor.yaml b/6-exporter/coredns/coredns-serviceMonitor.yaml new file mode 100644 index 0000000..7d8bb4b --- /dev/null +++ b/6-exporter/coredns/coredns-serviceMonitor.yaml @@ -0,0 +1,19 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: coredns + name: coredns + namespace: monitoring +spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + interval: 15s + port: metrics + jobLabel: k8s-app + namespaceSelector: + matchNames: + - kube-system + selector: + matchLabels: + k8s-app: kube-dns diff --git a/6-exporter/grafana/grafana-serviceMonitor.yaml b/6-exporter/grafana/grafana-serviceMonitor.yaml new file mode 100644 index 0000000..f450160 --- /dev/null +++ b/6-exporter/grafana/grafana-serviceMonitor.yaml @@ -0,0 +1,12 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: grafana + namespace: monitoring +spec: + endpoints: + - interval: 15s + port: http + selector: + matchLabels: + app: grafana diff --git a/6-exporter/jmx-exporter/jmx-exporter-prometheusRule.yaml b/6-exporter/jmx-exporter/jmx-exporter-prometheusRule.yaml new file mode 100644 index 0000000..920ba74 --- /dev/null +++ b/6-exporter/jmx-exporter/jmx-exporter-prometheusRule.yaml @@ -0,0 +1,19 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + prometheus: k8s + role: alert-rules + name: jmx-exporter-rules + namespace: monitoring +spec: + groups: + - name: jmx.alerts + rules: + - alert: TooMuchOldGenerationGCTime + expr: ceil(increase(jvm_gc_collection_seconds_sum{gc=~"(PS MarkSweep|G1 Old Generation)"}[5m])) > 20 + labels: + severity: critical + annotations: + summary: "FullGC耗时过长,5分钟内GC时间超过20s" + description: "服务={{ $labels.service }}\t容器={{ $labels.pod }}\t5分钟内GC时间={{ printf \"%.2f\" $value }}s\n" diff --git a/6-exporter/jmx-exporter/jmx-exporter-serviceMonitor.yaml b/6-exporter/jmx-exporter/jmx-exporter-serviceMonitor.yaml new file mode 100644 index 0000000..930890c --- /dev/null +++ b/6-exporter/jmx-exporter/jmx-exporter-serviceMonitor.yaml @@ -0,0 +1,26 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + prometheus-exporter: jmx-exporter + name: jmx-exporter + namespace: monitoring +spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + interval: 30s + #port: "7777" + scheme: http + relabelings: + - sourceLabels: [__address__, __meta_kubernetes_service_label_jmx_scrape_port] + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + targetLabel: __address__ + - sourceLabels: [job] + replacement: 'jmx-exporter' + targetLabel: job + selector: + matchLabels: + jmx_should_be_scraped: "true" + namespaceSelector: + any: true diff --git a/6-exporter/kube-state-metrics/kube-state-metrics-clusterRole.yaml b/6-exporter/kube-state-metrics/kube-state-metrics-clusterRole.yaml new file mode 100644 index 0000000..2b52059 --- /dev/null +++ b/6-exporter/kube-state-metrics/kube-state-metrics-clusterRole.yaml @@ -0,0 +1,91 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: kube-state-metrics +rules: + - apiGroups: + - "" + resources: + - configmaps + - secrets + - nodes + - pods + - services + - resourcequotas + - replicationcontrollers + - limitranges + - persistentvolumeclaims + - persistentvolumes + - namespaces + - endpoints + verbs: + - list + - watch + - apiGroups: + - extensions + resources: + - daemonsets + - deployments + - replicasets + - ingresses + verbs: + - list + - watch + - apiGroups: + - apps + resources: + - statefulsets + - daemonsets + - deployments + - replicasets + verbs: + - list + - watch + - apiGroups: + - batch + resources: + - cronjobs + - jobs + verbs: + - list + - watch + - apiGroups: + - autoscaling + resources: + - horizontalpodautoscalers + verbs: + - list + - watch + - apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create + - apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create + - apiGroups: + - policy + resources: + - poddisruptionbudgets + verbs: + - list + - watch + - apiGroups: + - storage.k8s.io + resources: + - storageclasses + verbs: + - list + - watch + - apiGroups: + - certificates.k8s.io + resources: + - certificatesigningrequests + verbs: + - list + - watch \ No newline at end of file diff --git a/6-exporter/kube-state-metrics/kube-state-metrics-clusterRoleBinding.yaml b/6-exporter/kube-state-metrics/kube-state-metrics-clusterRoleBinding.yaml new file mode 100644 index 0000000..82e27e9 --- /dev/null +++ b/6-exporter/kube-state-metrics/kube-state-metrics-clusterRoleBinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: kube-state-metrics +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kube-state-metrics +subjects: + - kind: ServiceAccount + name: kube-state-metrics + namespace: monitoring diff --git a/6-exporter/kube-state-metrics/kube-state-metrics-deployment.yaml b/6-exporter/kube-state-metrics/kube-state-metrics-deployment.yaml new file mode 100644 index 0000000..b2a5093 --- /dev/null +++ b/6-exporter/kube-state-metrics/kube-state-metrics-deployment.yaml @@ -0,0 +1,101 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: kube-state-metrics + name: kube-state-metrics + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: kube-state-metrics + template: + metadata: + labels: + app: kube-state-metrics + spec: + containers: + - args: + - --logtostderr + - --secure-listen-address=:8443 + - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 + - --upstream=http://127.0.0.1:8081/ + image: registry.cn-hangzhou.aliyuncs.com/clab-docker/kube-rbac-proxy:v0.4.1 + name: kube-rbac-proxy-main + ports: + - containerPort: 8443 + name: https-main + resources: + limits: + cpu: 20m + memory: 40Mi + requests: + cpu: 10m + memory: 20Mi + - args: + - --logtostderr + - --secure-listen-address=:9443 + - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 + - --upstream=http://127.0.0.1:8082/ + image: registry.cn-hangzhou.aliyuncs.com/clab-docker/kube-rbac-proxy:v0.4.1 + name: kube-rbac-proxy-self + ports: + - containerPort: 9443 + name: https-self + resources: + limits: + cpu: 20m + memory: 40Mi + requests: + cpu: 10m + memory: 20Mi + - args: + - --host=127.0.0.1 + - --port=8081 + - --telemetry-host=127.0.0.1 + - --telemetry-port=8082 + image: registry.cn-hangzhou.aliyuncs.com/clab-docker/kube-state-metrics:v1.7.2 + name: kube-state-metrics + resources: + limits: + cpu: 100m + memory: 150Mi + requests: + cpu: 100m + memory: 150Mi + - command: + - /pod_nanny + - --container=kube-state-metrics + - --cpu=100m + - --extra-cpu=2m + - --memory=150Mi + - --extra-memory=30Mi + - --threshold=5 + - --deployment=kube-state-metrics + env: + - name: MY_POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: MY_POD_NAMESPACE + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + image: registry.cn-hangzhou.aliyuncs.com/clab-docker/addon-resizer:1.8.4 + name: addon-resizer + resources: + limits: + cpu: 50m + memory: 30Mi + requests: + cpu: 10m + memory: 30Mi + nodeSelector: + beta.kubernetes.io/os: linux + securityContext: + runAsNonRoot: true + runAsUser: 65534 + serviceAccountName: kube-state-metrics diff --git a/6-exporter/kube-state-metrics/kube-state-metrics-role.yaml b/6-exporter/kube-state-metrics/kube-state-metrics-role.yaml new file mode 100644 index 0000000..3c68f2e --- /dev/null +++ b/6-exporter/kube-state-metrics/kube-state-metrics-role.yaml @@ -0,0 +1,30 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: kube-state-metrics + namespace: monitoring +rules: + - apiGroups: + - "" + resources: + - pods + verbs: + - get + - apiGroups: + - extensions + resourceNames: + - kube-state-metrics + resources: + - deployments + verbs: + - get + - update + - apiGroups: + - apps + resourceNames: + - kube-state-metrics + resources: + - deployments + verbs: + - get + - update diff --git a/6-exporter/kube-state-metrics/kube-state-metrics-roleBinding.yaml b/6-exporter/kube-state-metrics/kube-state-metrics-roleBinding.yaml new file mode 100644 index 0000000..004a83a --- /dev/null +++ b/6-exporter/kube-state-metrics/kube-state-metrics-roleBinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: kube-state-metrics + namespace: monitoring +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: kube-state-metrics +subjects: + - kind: ServiceAccount + name: kube-state-metrics diff --git a/6-exporter/kube-state-metrics/kube-state-metrics-service.yaml b/6-exporter/kube-state-metrics/kube-state-metrics-service.yaml new file mode 100644 index 0000000..5cfeec7 --- /dev/null +++ b/6-exporter/kube-state-metrics/kube-state-metrics-service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + k8s-app: kube-state-metrics + name: kube-state-metrics + namespace: monitoring +spec: + clusterIP: None + ports: + - name: https-main + port: 8443 + targetPort: https-main + - name: https-self + port: 9443 + targetPort: https-self + selector: + app: kube-state-metrics diff --git a/6-exporter/kube-state-metrics/kube-state-metrics-serviceAccount.yaml b/6-exporter/kube-state-metrics/kube-state-metrics-serviceAccount.yaml new file mode 100644 index 0000000..fff1028 --- /dev/null +++ b/6-exporter/kube-state-metrics/kube-state-metrics-serviceAccount.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kube-state-metrics + namespace: monitoring diff --git a/6-exporter/kube-state-metrics/kube-state-metrics-serviceMonitor.yaml b/6-exporter/kube-state-metrics/kube-state-metrics-serviceMonitor.yaml new file mode 100644 index 0000000..c110f4f --- /dev/null +++ b/6-exporter/kube-state-metrics/kube-state-metrics-serviceMonitor.yaml @@ -0,0 +1,27 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: kube-state-metrics + name: kube-state-metrics + namespace: monitoring +spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + honorLabels: true + interval: 30s + port: https-main + scheme: https + scrapeTimeout: 30s + tlsConfig: + insecureSkipVerify: true + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + interval: 30s + port: https-self + scheme: https + tlsConfig: + insecureSkipVerify: true + jobLabel: k8s-app + selector: + matchLabels: + k8s-app: kube-state-metrics diff --git a/6-exporter/kubelet/kubelet-serviceMonitor.yaml b/6-exporter/kubelet/kubelet-serviceMonitor.yaml new file mode 100644 index 0000000..10151b0 --- /dev/null +++ b/6-exporter/kubelet/kubelet-serviceMonitor.yaml @@ -0,0 +1,51 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: kubelet + name: kubelet + namespace: monitoring +spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + honorLabels: true + interval: 30s + port: https-metrics + scheme: https + # port: http-metrics + # scheme: http + tlsConfig: + insecureSkipVerify: true + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + honorLabels: true + interval: 30s + metricRelabelings: + metricRelabelings: + - sourceLabels: [pod_name, pod] + regex: ';?(.*);?' + targetLabel: pod + action: replace + - sourceLabels: [container_name, container] + regex: ';?(.*);?' + targetLabel: container + action: replace + - regex: (pod|container)_name + action: labeldrop + - action: drop + regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s) + sourceLabels: + - __name__ + path: /metrics/cadvisor + port: https-metrics + scheme: https + # port: http-metrics + # scheme: http + tlsConfig: + insecureSkipVerify: true + jobLabel: k8s-app + namespaceSelector: + matchNames: + - kube-system + selector: + matchLabels: + k8s-app: kubelet diff --git a/6-exporter/maxwell/maxwell-prometheusRule.yaml b/6-exporter/maxwell/maxwell-prometheusRule.yaml new file mode 100644 index 0000000..e338a9f --- /dev/null +++ b/6-exporter/maxwell/maxwell-prometheusRule.yaml @@ -0,0 +1,61 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + prometheus: k8s + role: alert-rules + name: maxwell-rules + namespace: monitoring +spec: + groups: + - name: maxwell.alerts + rules: + - alert: maxwellUp + expr: | + absent(MaxwellMetrics_messages_succeeded) + for: 10m + labels: + severity: warning + annotations: + summary: "prometheus未能成功抓取maxwell指标超过10分钟,请留意" + - alert: maxwellReplicationQueueTime + expr: | + MaxwellMetrics_replication_queue_time{quantile="0.99"} > 1 * 1000 + labels: + severity: warning + annotations: + summary: "maxwell binlog等待处理时间P99超过1秒,请留意" + description: "the time it took to enqueue a given binlog event for processing, in milliseconds" + - alert: maxwellReplicationLagNegative + expr: | + MaxwellMetrics_replication_lag < 0 + for: 5m + labels: + severity: warning + annotations: + summary: "maxwell binlog同步延迟报告为负数,持续超过5分钟,可能同步有中断需要重启,请注意" + description: "the time elapsed between the database transaction commit and the time it was processed by Maxwell, in milliseconds" + - alert: maxwellReplicationLag + expr: | + MaxwellMetrics_replication_lag > 5 * 1000 + labels: + severity: warning + annotations: + summary: "maxwell binlog同步延迟超过5秒,可能有大事务正在执行,请留意" + description: "the time elapsed between the database transaction commit and the time it was processed by Maxwell, in milliseconds" + - alert: maxwellFailedMessage + expr: | + round(delta(MaxwellMetrics_messages_failed[5m])) > 0 + labels: + severity: warning + annotations: + summary: "maxwell 5分钟内有{{ $value }}条消息发送到kafka失败,后续请留意" + description: "count of messages that failed to send to Kafka" + - alert: maxwellMessagePublishAge + expr: | + MaxwellMetrics_message_publish_age{quantile="0.99"} > 3 * 1000 + labels: + severity: warning + annotations: + summary: "maxwell消息同步P99延迟超过3秒,请留意" + description: "the time between an event occurring on the DB and being published to kafka, in milliseconds. Note: since MySQL timestamps are accurate to the second, this is only accurate to +/- 500ms." diff --git a/6-exporter/maxwell/maxwell-serviceMonitor.yaml b/6-exporter/maxwell/maxwell-serviceMonitor.yaml new file mode 100644 index 0000000..2ca542f --- /dev/null +++ b/6-exporter/maxwell/maxwell-serviceMonitor.yaml @@ -0,0 +1,18 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + app: maxwell + name: maxwell + namespace: monitoring +spec: + endpoints: + - interval: 30s + scheme: http + port: web + path: /prometheus + selector: + matchLabels: + app: maxwell + namespaceSelector: + any: true diff --git a/6-exporter/node-exporter/node-exporter-clusterRole.yaml b/6-exporter/node-exporter/node-exporter-clusterRole.yaml new file mode 100644 index 0000000..89d532d --- /dev/null +++ b/6-exporter/node-exporter/node-exporter-clusterRole.yaml @@ -0,0 +1,17 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: node-exporter +rules: + - apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create + - apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create diff --git a/6-exporter/node-exporter/node-exporter-clusterRoleBinding.yaml b/6-exporter/node-exporter/node-exporter-clusterRoleBinding.yaml new file mode 100644 index 0000000..9a994cd --- /dev/null +++ b/6-exporter/node-exporter/node-exporter-clusterRoleBinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: node-exporter +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: node-exporter +subjects: + - kind: ServiceAccount + name: node-exporter + namespace: monitoring diff --git a/6-exporter/node-exporter/node-exporter-daemonset.yaml b/6-exporter/node-exporter/node-exporter-daemonset.yaml new file mode 100644 index 0000000..6c932b6 --- /dev/null +++ b/6-exporter/node-exporter/node-exporter-daemonset.yaml @@ -0,0 +1,87 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + labels: + app: node-exporter + name: node-exporter + namespace: monitoring +spec: + selector: + matchLabels: + app: node-exporter + template: + metadata: + labels: + app: node-exporter + spec: + containers: + - args: + - --web.listen-address=127.0.0.1:9100 + - --path.procfs=/host/proc + - --path.sysfs=/host/sys + - --path.rootfs=/host/root + - --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+)($|/) + - --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|cgroup|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|mqueue|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|sysfs|tracefs)$ + image: registry.cn-hangzhou.aliyuncs.com/clab-docker/node-exporter:v1.3.1 + name: node-exporter + resources: + limits: + cpu: 250m + memory: 180Mi + requests: + cpu: 102m + memory: 180Mi + volumeMounts: + - mountPath: /host/proc + name: proc + readOnly: false + - mountPath: /host/sys + name: sys + readOnly: false + - mountPath: /host/root + mountPropagation: HostToContainer + name: root + readOnly: true + - args: + - --logtostderr + - --secure-listen-address=$(IP):9100 + - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 + - --upstream=http://127.0.0.1:9100/ + env: + - name: IP + valueFrom: + fieldRef: + fieldPath: status.podIP + image: registry.cn-hangzhou.aliyuncs.com/clab-docker/kube-rbac-proxy:v0.4.1 + name: kube-rbac-proxy + ports: + - containerPort: 9100 + hostPort: 9100 + name: https + resources: + limits: + cpu: 20m + memory: 40Mi + requests: + cpu: 10m + memory: 20Mi + hostNetwork: true + hostPID: true + nodeSelector: + beta.kubernetes.io/os: linux + securityContext: + runAsNonRoot: true + runAsUser: 65534 + serviceAccountName: node-exporter + tolerations: + - operator: Exists + volumes: + - hostPath: + path: /proc + name: proc + - hostPath: + path: /sys + name: sys + - hostPath: + path: / + name: root diff --git a/6-exporter/node-exporter/node-exporter-service.yaml b/6-exporter/node-exporter/node-exporter-service.yaml new file mode 100644 index 0000000..f710cd2 --- /dev/null +++ b/6-exporter/node-exporter/node-exporter-service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + k8s-app: node-exporter + name: node-exporter + namespace: monitoring +spec: + clusterIP: None + ports: + - name: https + port: 9100 + targetPort: https + selector: + app: node-exporter diff --git a/6-exporter/node-exporter/node-exporter-serviceAccount.yaml b/6-exporter/node-exporter/node-exporter-serviceAccount.yaml new file mode 100644 index 0000000..8a03ac1 --- /dev/null +++ b/6-exporter/node-exporter/node-exporter-serviceAccount.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: node-exporter + namespace: monitoring diff --git a/6-exporter/node-exporter/node-exporter-serviceMonitor.yaml b/6-exporter/node-exporter/node-exporter-serviceMonitor.yaml new file mode 100644 index 0000000..b614999 --- /dev/null +++ b/6-exporter/node-exporter/node-exporter-serviceMonitor.yaml @@ -0,0 +1,19 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: node-exporter + name: node-exporter + namespace: monitoring +spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + interval: 30s + port: https + scheme: https + tlsConfig: + insecureSkipVerify: true + jobLabel: k8s-app + selector: + matchLabels: + k8s-app: node-exporter diff --git a/6-exporter/prometheus-operator/prometheus-operator-serviceMonitor.yaml b/6-exporter/prometheus-operator/prometheus-operator-serviceMonitor.yaml new file mode 100644 index 0000000..f4aa9d5 --- /dev/null +++ b/6-exporter/prometheus-operator/prometheus-operator-serviceMonitor.yaml @@ -0,0 +1,18 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + apps.kubernetes.io/component: controller + apps.kubernetes.io/name: prometheus-operator + apps.kubernetes.io/version: v0.30.0 + name: prometheus-operator + namespace: monitoring +spec: + endpoints: + - honorLabels: true + port: http + selector: + matchLabels: + apps.kubernetes.io/component: controller + apps.kubernetes.io/name: prometheus-operator + apps.kubernetes.io/version: v0.30.0 diff --git a/6-exporter/prometheus/prometheus-serviceMonitor.yaml b/6-exporter/prometheus/prometheus-serviceMonitor.yaml new file mode 100644 index 0000000..23d1270 --- /dev/null +++ b/6-exporter/prometheus/prometheus-serviceMonitor.yaml @@ -0,0 +1,14 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: prometheus + name: prometheus + namespace: monitoring +spec: + endpoints: + - interval: 30s + port: web + selector: + matchLabels: + prometheus: k8s \ No newline at end of file diff --git a/6-exporter/reactflow/reactflow-serviceMonitor.yaml b/6-exporter/reactflow/reactflow-serviceMonitor.yaml new file mode 100644 index 0000000..acc55d9 --- /dev/null +++ b/6-exporter/reactflow/reactflow-serviceMonitor.yaml @@ -0,0 +1,17 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + app: reactflow + name: reactflow + namespace: monitoring +spec: + endpoints: + - interval: 30s + scheme: http + port: "9095" + selector: + matchLabels: + app: reactflow + namespaceSelector: + any: true diff --git a/6-exporter/spring-boot-actuator/spring-boot-actuator-serviceMonitor.yaml b/6-exporter/spring-boot-actuator/spring-boot-actuator-serviceMonitor.yaml new file mode 100644 index 0000000..333dc79 --- /dev/null +++ b/6-exporter/spring-boot-actuator/spring-boot-actuator-serviceMonitor.yaml @@ -0,0 +1,27 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + prometheus-exporter: spring-boot-actuator + name: spring-boot-actuator + namespace: monitoring +spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + interval: 30s + #port: "9999" + path: /actuator/prometheus + scheme: http + relabelings: + - sourceLabels: [__address__, __meta_kubernetes_service_label_actuator_scrape_port] + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + targetLabel: __address__ + - sourceLabels: [job] + replacement: 'spring-boot-actuator' + targetLabel: job + selector: + matchLabels: + actuator_should_be_scraped: "true" + namespaceSelector: + any: true diff --git a/7-grafana/grafana-config.yaml b/7-grafana/grafana-config.yaml new file mode 100644 index 0000000..b8e79c1 --- /dev/null +++ b/7-grafana/grafana-config.yaml @@ -0,0 +1,525 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-config + namespace: monitoring +data: + grafana.ini: | + ##################### Grafana Configuration Example ##################### + # + # Everything has defaults so you only need to uncomment things you want to + # change + + # possible values : production, development + ;app_mode = production + + # instance name, defaults to HOSTNAME environment variable value or hostname if HOSTNAME var is empty + ;instance_name = ${HOSTNAME} + + #################################### Paths #################################### + [paths] + # Path to where grafana can store temp files, sessions, and the sqlite3 db (if that is used) + ;data = /var/lib/grafana + + # Temporary files in `data` directory older than given duration will be removed + ;temp_data_lifetime = 24h + + # Directory where grafana can store logs + ;logs = /var/log/grafana + + # Directory where grafana will automatically scan and look for plugins + ;plugins = /var/lib/grafana/plugins + + # folder that contains provisioning config files that grafana will apply on startup and while running. + ;provisioning = conf/provisioning + + #################################### Server #################################### + [server] + # Protocol (http, https, socket) + ;protocol = http + + # The ip address to bind to, empty will bind to all interfaces + ;http_addr = + + # The http port to use + ;http_port = 3000 + + # The public facing domain name used to access grafana from a browser + ;domain = localhost + + # Redirect to correct domain if host header does not match domain + # Prevents DNS rebinding attacks + ;enforce_domain = false + + # The full public facing url you use in browser, used for redirects and emails + # If you use reverse proxy and sub path specify full url (with sub path) + ;root_url = http://localhost:3000/grafana/ + #;root_url = %(protocol)s://%(domain)s:%(http_port)s/grafana + ;serve_from_sub_path = true + # Log web requests + ;router_logging = false + + # the path relative working path + ;static_root_path = public + + # enable gzip + ;enable_gzip = false + + # https certs & key file + ;cert_file = + ;cert_key = + + # Unix socket path + ;socket = + + #################################### Database #################################### + [database] + # You can configure the database connection by specifying type, host, name, user and password + # as separate properties or as on string using the url properties. + + # Either "mysql", "postgres" or "sqlite3", it's your choice + ;type = sqlite3 + ;host = 127.0.0.1:3306 + ;name = grafana + ;user = root + # If the password contains # or ; you have to wrap it with triple quotes. Ex """#password;""" + ;password = + + # Use either URL or the previous fields to configure the database + # Example: mysql://user:secret@host:port/database + ;url = + + # For "postgres" only, either "disable", "require" or "verify-full" + ;ssl_mode = disable + + # For "sqlite3" only, path relative to data_path setting + ;path = grafana.db + + # Max idle conn setting default is 2 + ;max_idle_conn = 2 + + # Max conn setting default is 0 (mean not set) + ;max_open_conn = + + # Connection Max Lifetime default is 14400 (means 14400 seconds or 4 hours) + ;conn_max_lifetime = 14400 + + # Set to true to log the sql calls and execution times. + log_queries = + + # For "sqlite3" only. cache mode setting used for connecting to the database. (private, shared) + ;cache_mode = private + + #################################### Session #################################### + [session] + # Either "memory", "file", "redis", "mysql", "postgres", default is "file" + ;provider = file + + # Provider config options + # memory: not have any config yet + # file: session dir path, is relative to grafana data_path + # redis: config like redis server e.g. `addr=127.0.0.1:6379,pool_size=100,db=grafana` + # mysql: go-sql-driver/mysql dsn config string, e.g. `user:password@tcp(127.0.0.1:3306)/database_name` + # postgres: user=a password=b host=localhost port=5432 dbname=c sslmode=disable + ;provider_config = sessions + + # Session cookie name + ;cookie_name = grafana_sess + + # If you use session in https only, default is false + ;cookie_secure = false + + # Session life time, default is 86400 + ;session_life_time = 86400 + + #################################### Data proxy ########################### + [dataproxy] + + # This enables data proxy logging, default is false + ;logging = false + + # How long the data proxy should wait before timing out default is 30 (seconds) + ;timeout = 30 + + #################################### Analytics #################################### + [analytics] + # Server reporting, sends usage counters to stats.grafana.org every 24 hours. + # No ip addresses are being tracked, only simple counters to track + # running instances, dashboard and error counts. It is very helpful to us. + # Change this option to false to disable reporting. + ;reporting_enabled = true + + # Set to false to disable all checks to https://grafana.net + # for new vesions (grafana itself and plugins), check is used + # in some UI views to notify that grafana or plugin update exists + # This option does not cause any auto updates, nor send any information + # only a GET request to http://grafana.com to get latest versions + ;check_for_updates = true + + # Google Analytics universal tracking code, only enabled if you specify an id here + ;google_analytics_ua_id = + + # Google Tag Manager ID, only enabled if you specify an id here + ;google_tag_manager_id = + + #################################### Security #################################### + [security] + # default admin user, created on startup + ;admin_user = admin + + # default admin password, can be changed before first start of grafana, or in profile settings + ;admin_password = admin + + # used for signing + ;secret_key = SW2YcwTIb9zpOOhoPsMm + + # disable gravatar profile images + ;disable_gravatar = false + + # data source proxy whitelist (ip_or_domain:port separated by spaces) + ;data_source_proxy_whitelist = + + # disable protection against brute force login attempts + ;disable_brute_force_login_protection = false + + # set to true if you host Grafana behind HTTPS. default is false. + ;cookie_secure = false + + # set cookie SameSite attribute. defaults to `lax`. can be set to "lax", "strict" and "none" + ;cookie_samesite = lax + + #################################### Snapshots ########################### + [snapshots] + # snapshot sharing options + ;external_enabled = true + ;external_snapshot_url = https://snapshots-origin.raintank.io + ;external_snapshot_name = Publish to snapshot.raintank.io + + # remove expired snapshot + ;snapshot_remove_expired = true + + #################################### Dashboards History ################## + [dashboards] + # Number dashboard versions to keep (per dashboard). Default: 20, Minimum: 1 + ;versions_to_keep = 20 + + #################################### Users ############################### + [users] + # disable user signup / registration + ;allow_sign_up = true + + # Allow non admin users to create organizations + ;allow_org_create = true + + # Set to true to automatically assign new users to the default organization (id 1) + ;auto_assign_org = true + + # Default role new users will be automatically assigned (if disabled above is set to true) + ;auto_assign_org_role = Viewer + + # Background text for the user field on the login page + ;login_hint = email or username + + # Default UI theme ("dark" or "light") + ;default_theme = dark + + # External user management, these options affect the organization users view + ;external_manage_link_url = + ;external_manage_link_name = + ;external_manage_info = + + # Viewers can edit/inspect dashboard settings in the browser. But not save the dashboard. + ;viewers_can_edit = false + + [auth] + # Login cookie name + ;login_cookie_name = grafana_session + + # The lifetime (days) an authenticated user can be inactive before being required to login at next visit. Default is 7 days, + ;login_maximum_inactive_lifetime_days = 7 + + # The maximum lifetime (days) an authenticated user can be logged in since login time before being required to login. Default is 30 days. + ;login_maximum_lifetime_days = 30 + + # How often should auth tokens be rotated for authenticated users when being active. The default is each 10 minutes. + ;token_rotation_interval_minutes = 10 + + # Set to true to disable (hide) the login form, useful if you use OAuth, defaults to false + ;disable_login_form = false + + # Set to true to disable the signout link in the side menu. useful if you use auth.proxy, defaults to false + ;disable_signout_menu = false + + # URL to redirect the user to after sign out + ;signout_redirect_url = + + # Set to true to attempt login with OAuth automatically, skipping the login screen. + # This setting is ignored if multiple OAuth providers are configured. + ;oauth_auto_login = false + + #################################### Anonymous Auth ###################### + [auth.anonymous] + # enable anonymous access + ;enabled = true + + # specify organization name that should be used for unauthenticated users + ;org_name = Main Org. + + # specify role for unauthenticated users + ;org_role = Admin + + #################################### Github Auth ########################## + [auth.github] + ;enabled = false + ;allow_sign_up = true + ;client_id = some_id + ;client_secret = some_secret + ;scopes = user:email,read:org + ;auth_url = https://github.com/login/oauth/authorize + ;token_url = https://github.com/login/oauth/access_token + ;api_url = https://api.github.com/user + ;team_ids = + ;allowed_organizations = + + #################################### Google Auth ########################## + [auth.google] + ;enabled = false + ;allow_sign_up = true + ;client_id = some_client_id + ;client_secret = some_client_secret + ;scopes = https://www.googleapis.com/auth/userinfo.profile https://www.googleapis.com/auth/userinfo.email + ;auth_url = https://accounts.google.com/o/oauth2/auth + ;token_url = https://accounts.google.com/o/oauth2/token + ;api_url = https://www.googleapis.com/oauth2/v1/userinfo + ;allowed_domains = + + #################################### Generic OAuth ########################## + [auth.generic_oauth] + ;enabled = false + ;name = OAuth + ;allow_sign_up = true + ;client_id = some_id + ;client_secret = some_secret + ;scopes = user:email,read:org + ;auth_url = https://foo.bar/login/oauth/authorize + ;token_url = https://foo.bar/login/oauth/access_token + ;api_url = https://foo.bar/user + ;team_ids = + ;allowed_organizations = + ;tls_skip_verify_insecure = false + ;tls_client_cert = + ;tls_client_key = + ;tls_client_ca = + + ; Set to true to enable sending client_id and client_secret via POST body instead of Basic authentication HTTP header + ; This might be required if the OAuth provider is not RFC6749 compliant, only supporting credentials passed via POST payload + ;send_client_credentials_via_post = false + + #################################### Grafana.com Auth #################### + [auth.grafana_com] + ;enabled = false + ;allow_sign_up = true + ;client_id = some_id + ;client_secret = some_secret + ;scopes = user:email + ;allowed_organizations = + + #################################### Auth Proxy ########################## + [auth.proxy] + ;enabled = false + ;header_name = X-WEBAUTH-USER + ;header_property = username + ;auto_sign_up = true + ;ldap_sync_ttl = 60 + ;whitelist = 192.168.1.1, 192.168.2.1 + ;headers = Email:X-User-Email, Name:X-User-Name + + #################################### Basic Auth ########################## + [auth.basic] + ;enabled = true + + #################################### Auth LDAP ########################## + [auth.ldap] + ;enabled = false + ;config_file = /etc/grafana/ldap.toml + ;allow_sign_up = true + + #################################### SMTP / Emailing ########################## + [smtp] + ;enabled = false + ;host = localhost:25 + ;user = + # If the password contains # or ; you have to wrap it with trippel quotes. Ex """#password;""" + ;password = + ;cert_file = + ;key_file = + ;skip_verify = false + ;from_address = admin@grafana.localhost + ;from_name = Grafana + # EHLO identity in SMTP dialog (defaults to instance_name) + ;ehlo_identity = dashboard.example.com + + [emails] + ;welcome_email_on_sign_up = false + + #################################### Logging ########################## + [log] + # Either "console", "file", "syslog". Default is console and file + # Use space to separate multiple modes, e.g. "console file" + ;mode = console file + + # Either "debug", "info", "warn", "error", "critical", default is "info" + ;level = info + + # optional settings to set different levels for specific loggers. Ex filters = sqlstore:debug + ;filters = + + # For "console" mode only + [log.console] + ;level = + + # log line format, valid options are text, console and json + ;format = console + + # For "file" mode only + [log.file] + ;level = + + # log line format, valid options are text, console and json + ;format = text + + # This enables automated log rotate(switch of following options), default is true + ;log_rotate = true + + # Max line number of single file, default is 1000000 + ;max_lines = 1000000 + + # Max size shift of single file, default is 28 means 1 << 28, 256MB + ;max_size_shift = 28 + + # Segment log daily, default is true + ;daily_rotate = true + + # Expired days of log file(delete after max days), default is 7 + ;max_days = 7 + + [log.syslog] + ;level = + + # log line format, valid options are text, console and json + ;format = text + + # Syslog network type and address. This can be udp, tcp, or unix. If left blank, the default unix endpoints will be used. + ;network = + ;address = + + # Syslog facility. user, daemon and local0 through local7 are valid. + ;facility = + + # Syslog tag. By default, the process' argv[0] is used. + ;tag = + + #################################### Alerting ############################ + [alerting] + # Disable alerting engine & UI features + ;enabled = true + # Makes it possible to turn off alert rule execution but alerting UI is visible + ;execute_alerts = true + + # Default setting for new alert rules. Defaults to categorize error and timeouts as alerting. (alerting, keep_state) + ;error_or_timeout = alerting + + # Default setting for how Grafana handles nodata or null values in alerting. (alerting, no_data, keep_state, ok) + ;nodata_or_nullvalues = no_data + + # Alert notifications can include images, but rendering many images at the same time can overload the server + # This limit will protect the server from render overloading and make sure notifications are sent out quickly + ;concurrent_render_limit = 5 + + #################################### Explore ############################# + [explore] + # Enable the Explore section + ;enabled = true + + #################################### Internal Grafana Metrics ########################## + # Metrics available at HTTP API Url /metrics + [metrics] + # Disable / Enable internal metrics + ;enabled = true + + # Publish interval + ;interval_seconds = 10 + + # Send internal metrics to Graphite + [metrics.graphite] + # Enable by setting the address setting (ex localhost:2003) + ;address = + ;prefix = prod.grafana.%(instance_name)s. + + #################################### Distributed tracing ############ + [tracing.jaeger] + # Enable by setting the address sending traces to jaeger (ex localhost:6831) + ;address = localhost:6831 + # Tag that will always be included in when creating new spans. ex (tag1:value1,tag2:value2) + ;always_included_tag = tag1:value1 + # Type specifies the type of the sampler: const, probabilistic, rateLimiting, or remote + ;sampler_type = const + # jaeger samplerconfig param + # for "const" sampler, 0 or 1 for always false/true respectively + # for "probabilistic" sampler, a probability between 0 and 1 + # for "rateLimiting" sampler, the number of spans per second + # for "remote" sampler, param is the same as for "probabilistic" + # and indicates the initial sampling rate before the actual one + # is received from the mothership + ;sampler_param = 1 + + #################################### Grafana.com integration ########################## + # Url used to import dashboards directly from Grafana.com + [grafana_com] + ;url = https://grafana.com + + #################################### External image storage ########################## + [external_image_storage] + # Used for uploading images to public servers so they can be included in slack/email messages. + # you can choose between (s3, webdav, gcs, azure_blob, local) + ;provider = + + [external_image_storage.s3] + ;bucket = + ;region = + ;path = + ;access_key = + ;secret_key = + + [external_image_storage.webdav] + ;url = + ;public_url = + ;username = + ;password = + + [external_image_storage.gcs] + ;key_file = + ;bucket = + ;path = + + [external_image_storage.azure_blob] + ;account_name = + ;account_key = + ;container_name = + + [external_image_storage.local] + # does not require any configuration + + [rendering] + # Options to configure external image rendering server like https://github.com/grafana/grafana-image-renderer + ;server_url = + ;callback_url = + + [enterprise] + # Path to a valid Grafana Enterprise license.jwt file + ;license_path = + + [panels] + ;enable_alpha = false + # If set to true Grafana will allow script tags in text panels. Not recommended as it enable XSS vulnerabilities. + ;disable_sanitize_html = false diff --git a/7-grafana/grafana-customDashboardDefinitions.yaml b/7-grafana/grafana-customDashboardDefinitions.yaml new file mode 100644 index 0000000..279c181 --- /dev/null +++ b/7-grafana/grafana-customDashboardDefinitions.yaml @@ -0,0 +1,6845 @@ +apiVersion: v1 +items: + - apiVersion: v1 + data: + kafka.json: |- + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 12, + "iteration": 1596792225598, + "links": [], + "panels": [ + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 53, + "panels": [], + "title": "Overview", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 55, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": false, + "show": true, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(kafka_consumergroup_current_offset_sum{kafka_cluster_name=\"$cluster\"}[$interval]) > 0", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ topic }} ({{ consumergroup }})", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Consume Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 56, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": false, + "show": true, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kafka_consumergroup_lag_sum{kafka_cluster_name=\"$cluster\"} > 0", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ topic }} ({{ consumergroup }})", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Consume Lag", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 8, + "panels": [], + "title": "Consumer Group", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 11, + "x": 0, + "y": 22 + }, + "id": 6, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(kafka_consumergroup_current_offset_sum{topic=~\"$topic\",consumergroup=~\"$consumergroup\",kafka_cluster_name=\"$cluster\"}[$interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{consumergroup}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Consumer Rate for $topic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 10, + "x": 11, + "y": 22 + }, + "id": 9, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kafka_consumergroup_lag_sum{topic=~\"$topic\",consumergroup=~\"$consumergroup\",kafka_cluster_name=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{consumergroup}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Current Lag for $topic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 21, + "y": 22 + }, + "id": 44, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(kafka_consumergroup_current_offset_sum{topic=~\"$topic\",consumergroup=~\"$consumergroup\",kafka_cluster_name=\"$cluster\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Selected Groups", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 21, + "y": 26 + }, + "id": 45, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(kafka_consumergroup_members{consumergroup=~\"$consumergroup\",kafka_cluster_name=\"$cluster\"})", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Total Consumers", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "datasource": "prometheus", + "gridPos": { + "h": 7, + "w": 11, + "x": 0, + "y": 30 + }, + "id": 61, + "options": { + "displayMode": "basic", + "fieldOptions": { + "calcs": [ + "lastNotNull" + ], + "defaults": { + "decimals": 2, + "mappings": [ + { + "from": "-10", + "id": 1, + "operator": "", + "text": "Done", + "to": "0", + "type": 2, + "value": "-10" + } + ], + "max": 100, + "min": 0, + "thresholds": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 1800 + }, + { + "color": "red", + "value": 3600 + } + ], + "title": "", + "unit": "s" + }, + "override": {}, + "values": false + }, + "orientation": "horizontal" + }, + "pluginVersion": "6.4.2", + "targets": [ + { + "expr": "kafka_consumergroup_lag_sum{topic=~\"$topic\",consumergroup=~\"$consumergroup\",kafka_cluster_name=\"$cluster\"} / rate(kafka_consumergroup_current_offset_sum{topic=~\"$topic\",consumergroup=~\"$consumergroup\",kafka_cluster_name=\"$cluster\"}[5m])", + "legendFormat": "{{ consumergroup }}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ETA", + "type": "bargauge" + }, + { + "columns": [], + "datasource": "prometheus", + "fontSize": "100%", + "gridPos": { + "h": 7, + "w": 13, + "x": 11, + "y": 30 + }, + "id": 59, + "options": {}, + "pageSize": null, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Group", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "consumergroup", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Partition", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "mappingType": 1, + "pattern": "partition", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Lag", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "mappingType": 1, + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "none" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "kafka_consumergroup_lag{topic=~\"$topic\",consumergroup=~\"$consumergroup\",kafka_cluster_name=\"$cluster\"}", + "format": "table", + "instant": true, + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Lag By Partition ($topic)", + "transform": "table", + "type": "table" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 37 + }, + "id": 2, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 0, + "y": 18 + }, + "id": 50, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(kafka_cluster_Partition_Value{topic=~\"$topic\",name=\"ReplicasCount\"})", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Partitions", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": { + "message in topic": "#1f78c1" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 10, + "x": 4, + "y": 18 + }, + "id": 47, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(kafka_log_Log_Value{topic=~\"$topic\",name=\"LogEndOffset\"}) - sum(kafka_log_Log_Value{topic=~\"$topic\",name=\"LogStartOffset\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "message in {{ topic }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Total Messages", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "message in topic": "#1f78c1", + "size": "#fceaca" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 10, + "x": 14, + "y": 18 + }, + "id": 48, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(kafka_log_Log_Value{topic=~\"$topic\",name=\"Size\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "size", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Total Size", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 0, + "y": 24 + }, + "id": 51, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "min(kafka_cluster_Partition_Value{topic=~\"$topic\",name=\"ReplicasCount\"})", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Replicas", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": { + "in": "#ea6460", + "out": "#806eb7", + "produce rate (message/s)": "semi-dark-purple" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 10, + "x": 4, + "y": 24 + }, + "id": 62, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(kafka_log_Log_Value{topic=\"$topic\",name=\"LogEndOffset\"}[$interval]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "produce rate (message/s)", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Produce Rate for $topic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 10, + "x": 14, + "y": 24 + }, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kafka_server_BrokerTopicMetrics_MeanRate{name=\"TotalProduceRequestsPerSec\",topic=\"$topic\"}", + "format": "time_series", + "hide": true, + "intervalFactor": 1, + "legendFormat": "{{ instance }} - Produce", + "refId": "B" + }, + { + "expr": "kafka_server_BrokerTopicMetrics_MeanRate{name=\"TotalFetchRequestsPerSec\",topic=\"$topic\"}", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ instance}} - Fetch", + "refId": "A" + }, + { + "expr": "sum(kafka_server_BrokerTopicMetrics_MeanRate{name=\"TotalProduceRequestsPerSec\",topic=\"$topic\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Produce - Total", + "refId": "D" + }, + { + "expr": "sum(kafka_server_BrokerTopicMetrics_MeanRate{name=\"TotalFetchRequestsPerSec\",topic=\"$topic\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Fetch - Total", + "refId": "C" + }, + { + "expr": "sum(kafka_server_BrokerTopicMetrics_MeanRate{name=\"FailedProduceRequestsPerSec\",topic=\"$topic\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Produce - Failed", + "refId": "E" + }, + { + "expr": "sum(kafka_server_BrokerTopicMetrics_MeanRate{name=\"FailedFetchRequestsPerSec\",topic=\"$topic\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Fetch - Failed", + "refId": "F" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Topic-Level Requests Per Second", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "reqps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Topic", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 38 + }, + "id": 16, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 19 + }, + "id": 35, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "java_lang_OperatingSystem_ProcessCpuLoad{job=\"$job\",instance=~\"$instance\"} * 100", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "70,85", + "title": "Process CPU Usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 19 + }, + "id": 36, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "java_lang_OperatingSystem_SystemCpuLoad{job=\"$job\",instance=~\"$instance\"} * 100", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "70,85", + "title": "System CPU Usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 19 + }, + "id": 34, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "process_open_fds{job=\"$job\",instance=~\"$instance\"} / process_max_fds{job=\"$job\",instance=~\"$instance\"} * 100", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "80,90", + "title": "File Description Usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 19 + }, + "id": 33, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "jvm_memory_bytes_used{area=\"heap\",instance=\"$instance\",job=\"$job\"} / jvm_memory_bytes_max{area=\"heap\",instance=\"$instance\",job=\"$job\"} * 100", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "80,90", + "title": "Heap Usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 25 + }, + "id": 23, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kafka_network_RequestMetrics_MeanRate{name=\"RequestsPerSec\",request=\"Produce\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ instance }} - {{ request }}", + "refId": "A" + }, + { + "expr": "kafka_network_RequestMetrics_MeanRate{name=\"RequestsPerSec\",request=\"FetchConsumer\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ instance }} - {{ request }}", + "refId": "B" + }, + { + "expr": "kafka_network_RequestMetrics_MeanRate{name=\"RequestsPerSec\",request=\"FetchFollower\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ instance }} - {{ request }}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Requests Per Sec", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "reqps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "192.168.110.246 - FetchConsumer": "#5195ce", + "192.168.110.246 - FetchFollower": "#3f6833", + "192.168.110.246 - Produce": "#eab839" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 25 + }, + "id": 31, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kafka_network_RequestMetrics_Mean{name=\"TotalTimeMs\",request=\"Produce\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ instance }} - {{ request }}", + "refId": "A" + }, + { + "expr": "kafka_network_RequestMetrics_Mean{name=\"TotalTimeMs\",request=\"FetchConsumer\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ instance }} - {{ request }}", + "refId": "B" + }, + { + "expr": "kafka_network_RequestMetrics_Mean{name=\"TotalTimeMs\",request=\"FetchFollower\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ instance }} - {{ request }}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Total Time to Server Request", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "ResponseQueueSize": "#447ebc", + "kafka_network_RequestChannel_Value{instance=\"192.168.110.245\",job=\"kafka-jmx\",name=\"RequestQueueSize\"}": "#9ac48a", + "kafka_network_RequestChannel_Value{instance=\"192.168.110.245\",job=\"kafka-jmx\",name=\"ResponseQueueSize\"}": "#447ebc" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": 0, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 25 + }, + "id": 30, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kafka_network_RequestChannel_Value{name=\"ResponseQueueSize\",instance=~\"$instance\",processor=\"\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ name }}", + "refId": "A" + }, + { + "expr": "kafka_network_RequestChannel_Value{name=\"RequestQueueSize\",instance=~\"$instance\",processor=\"\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ name }}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Request Queue Size", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 25 + }, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "process_resident_memory_bytes{job=\"$job\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ instance }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Process Memory (RSS)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Broker", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 39 + }, + "id": 28, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 0, + "y": 40 + }, + "id": 38, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "kafka_brokers{kafka_cluster_name=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Brokers", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 4, + "y": 40 + }, + "id": 39, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(kafka_topic_partitions{kafka_cluster_name=\"$cluster\"})", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Topics", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 8, + "y": 40 + }, + "id": 41, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(kafka_topic_partitions{kafka_cluster_name=\"$cluster\"})", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Partitions", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 12, + "y": 40 + }, + "id": 42, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(kafka_consumergroup_members{kafka_cluster_name=\"$cluster\"})", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Consumers", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 16, + "y": 40 + }, + "id": 40, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(kafka_consumergroup_members{kafka_cluster_name=\"$cluster\"})", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Consumer Groups", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "breakPoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "prometheus", + "fontSize": "80%", + "format": "none", + "gridPos": { + "h": 6, + "w": 4, + "x": 20, + "y": 40 + }, + "id": 26, + "interval": null, + "legend": { + "header": "", + "percentage": false, + "show": false, + "values": true + }, + "legendType": "Right side", + "links": [], + "maxDataPoints": 3, + "nullPointMode": "connected", + "options": {}, + "pieType": "pie", + "strokeWidth": "1", + "targets": [ + { + "expr": "kafka_server_ReplicaManager_Value{name=\"PartitionCount\",kafka_cluster_name=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ instance }}", + "refId": "A" + } + ], + "title": "Partition Distribution", + "type": "grafana-piechart-panel", + "valueName": "current" + } + ], + "title": "Cluster", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 40 + }, + "id": 11, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 47 + }, + "id": 13, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Sum$/i", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jvm_gc_collection_seconds_count{gc=\"G1 Young Generation\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ instance }} - Count", + "refId": "A" + }, + { + "expr": "jvm_gc_collection_seconds_sum{gc=\"G1 Young Generation\",instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ instance }} - Sum", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "GC - G1 Young Generation", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": "count", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": 0, + "format": "none", + "label": "sum (seconds)", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 47 + }, + "id": 14, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "paceLength": 10, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jvm_memory_pool_bytes_used{instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}} - {{pool}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Pool Used", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": 0, + "format": "none", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "JVM", + "type": "row" + } + ], + "refresh": "30s", + "schemaVersion": 20, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "auto": true, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "3m", + "value": "3m" + }, + "hide": 0, + "label": "Interval", + "name": "interval", + "options": [ + { + "selected": false, + "text": "auto", + "value": "$__auto_interval_interval" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": true, + "text": "3m", + "value": "3m" + }, + { + "selected": false, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + }, + { + "selected": false, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "14d", + "value": "14d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + } + ], + "query": "1m,3m,5m,10m,30m,1h,6h,12h,1d,7d,14d,30d", + "refresh": 2, + "skipUrlSync": false, + "type": "interval" + }, + { + "allValue": null, + "current": { + "text": "kafka-jmx-exporter", + "value": "kafka-jmx-exporter" + }, + "hide": 2, + "includeAll": false, + "label": "Job", + "multi": false, + "name": "job", + "options": [ + { + "selected": true, + "text": "kafka-jmx-exporter", + "value": "kafka-jmx-exporter" + } + ], + "query": "kafka-jmx-exporter", + "skipUrlSync": false, + "type": "custom" + }, + { + "allValue": null, + "current": { + "text": "new", + "value": "new" + }, + "datasource": "prometheus", + "definition": "label_values(kafka_brokers, kafka_cluster_name)", + "hide": 0, + "includeAll": false, + "label": "Cluster", + "multi": false, + "name": "cluster", + "options": [], + "query": "label_values(kafka_brokers, kafka_cluster_name)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "text": "192.168.11.176:9898", + "value": "192.168.11.176:9898" + }, + "datasource": "prometheus", + "definition": "label_values(jvm_gc_collection_seconds_count{job=\"$job\"}, instance)", + "hide": 0, + "includeAll": false, + "label": "Broker", + "multi": false, + "name": "instance", + "options": [], + "query": "label_values(jvm_gc_collection_seconds_count{job=\"$job\"}, instance)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "text": "flow-stat", + "value": "flow-stat" + }, + "datasource": "prometheus", + "definition": "label_values(kafka_consumergroup_current_offset{kafka_cluster_name=\"$cluster\"}, topic)", + "hide": 0, + "includeAll": false, + "label": "Topic", + "multi": false, + "name": "topic", + "options": [], + "query": "label_values(kafka_consumergroup_current_offset{kafka_cluster_name=\"$cluster\"}, topic)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": "", + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": "prometheus", + "definition": "label_values(kafka_consumergroup_current_offset{topic=~\"$topic\",kafka_cluster_name=\"$cluster\"}, consumergroup)", + "hide": 0, + "includeAll": true, + "label": "Consumer Group", + "multi": true, + "name": "consumergroup", + "options": [], + "query": "label_values(kafka_consumergroup_current_offset{topic=~\"$topic\",kafka_cluster_name=\"$cluster\"}, consumergroup)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Kafka", + "uid": "Nf0UIbQiz", + "version": 12 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-kafka + namespace: monitoring + - apiVersion: v1 + data: + yarn.json: |- + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 17, + "iteration": 1608004407828, + "links": [], + "panels": [ + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 11, + "panels": [], + "title": "Overview", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#d44a3a", + "rgba(237, 129, 40, 0.89)", + "#299c46" + ], + "datasource": "prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 0, + "y": 1 + }, + "id": 4, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "yarn_up", + "instant": true, + "refId": "A" + } + ], + "thresholds": "0,1", + "timeFrom": null, + "timeShift": null, + "title": "API Status", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "UP", + "value": "1" + }, + { + "op": "=", + "text": "DOWN", + "value": "0" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 4, + "y": 1 + }, + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "yarn_info", + "instant": true, + "legendFormat": "{{ state }}", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Yarn State", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "name" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "format": "dateTimeAsIso", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 8, + "y": 1 + }, + "id": 9, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "yarn_start_time", + "instant": true, + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Start Time", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 12, + "y": 1 + }, + "id": 8, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "yarn_nodes_total", + "instant": true, + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Total Nodes", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 16, + "y": 1 + }, + "id": 5, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "yarn_virtual_cores_total", + "instant": true, + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Total Cores", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "format": "mbytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 20, + "y": 1 + }, + "id": 6, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "yarn_memory_total_megabytes", + "instant": true, + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Total Memory", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 7 + }, + "id": 13, + "legend": { + "alignAsTable": false, + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "yarn_apps", + "legendFormat": "{{ status }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Applications", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 7 + }, + "id": 14, + "legend": { + "alignAsTable": false, + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "yarn_containers", + "legendFormat": "{{ status }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Containers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 15 + }, + "id": 23, + "panels": [], + "title": "Group Info - $name", + "type": "row" + }, + { + "aliasColors": { + "application_1590245889957_32126": "blue" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 16 + }, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (name) (yarn_app_allocated_memory_megabytes{name=\"$name\"})", + "legendFormat": "{{ name }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Allocated Memory", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "mbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "application_1590245889957_32126": "purple" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 16 + }, + "id": 25, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (name) (yarn_app_allocated_virtual_cores{name=\"$name\"})", + "legendFormat": "{{ name }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Allocated Virtual Cores", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "application_1590245889957_32126": "dark-green" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 16 + }, + "id": 26, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(yarn_app_elapsed_time_milliseconds{name=\"$name\"})", + "legendFormat": "{{ name }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Elapsed Time", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "application_1590245889957_32126": "orange" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 16 + }, + "id": 27, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (name) (yarn_app_cluster_usage_percentage{name=\"$name\"})", + "legendFormat": "{{ name }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Cluster Usage Percentage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 22 + }, + "id": 16, + "panels": [ + { + "aliasColors": { + "application_1590245889957_32126": "blue" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 9 + }, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "yarn_app_allocated_memory_megabytes{name=\"$name\", id=\"$id\"}", + "legendFormat": "{{ id }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Allocated Memory", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "mbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "application_1590245889957_32126": "purple" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 9 + }, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "yarn_app_allocated_virtual_cores{name=\"$name\", id=\"$id\"}", + "legendFormat": "{{ id }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Allocated Virtual Cores", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "application_1590245889957_32126": "dark-green" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 9 + }, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "yarn_app_elapsed_time_milliseconds{name=\"$name\", id=\"$id\"}", + "legendFormat": "{{ id }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Elapsed Time", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "application_1590245889957_32126": "orange" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 9 + }, + "id": 21, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "yarn_app_cluster_usage_percentage{name=\"$name\", id=\"$id\"}", + "legendFormat": "{{ id }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Cluster Usage Percentage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Application Info - $id", + "type": "row" + } + ], + "schemaVersion": 20, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "text": "spark-list-operation-prod", + "value": "spark-list-operation-prod" + }, + "datasource": "prometheus", + "definition": "label_values(yarn_app_elapsed_time_milliseconds, name)", + "hide": 0, + "includeAll": false, + "label": "Name", + "multi": false, + "name": "name", + "options": [], + "query": "label_values(yarn_app_elapsed_time_milliseconds, name)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "text": "application_1590245889957_32145", + "value": "application_1590245889957_32145" + }, + "datasource": "prometheus", + "definition": "label_values(yarn_app_elapsed_time_milliseconds{name=\"$name\"}, id)", + "hide": 0, + "includeAll": false, + "label": "ID", + "multi": false, + "name": "id", + "options": [], + "query": "label_values(yarn_app_elapsed_time_milliseconds{name=\"$name\"}, id)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Yarn", + "uid": "Rel2gYJGz", + "version": 3 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-yarn + namespace: monitoring + - apiVersion: v1 + data: + maxwell.json: |- + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "", + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 16, + "iteration": 1607496083290, + "links": [], + "panels": [ + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 11, + "panels": [], + "title": "Overview", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorPostfix": false, + "colorValue": true, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "description": "The time elapsed between the database transaction commit and the time it was processed by Maxwell, in milliseconds", + "format": "ms", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 0, + "y": 1 + }, + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "MaxwellMetrics_replication_lag{namespace=\"$namespace\"}", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "2000,5000", + "timeFrom": null, + "timeShift": null, + "title": "Current Replication Lag", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorPostfix": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "description": "the time between an event occurring on the DB and being published to kafka, in milliseconds. Note: since MySQL timestamps are accurate to the second, this is only accurate to +/- 500ms.", + "format": "ms", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 6, + "y": 1 + }, + "id": 15, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "MaxwellMetrics_message_publish_age{quantile=\"0.99\",namespace=\"$namespace\"}", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Message Publish Age P99", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorPostfix": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "description": "The time it took to send a given record to Kafka, in milliseconds", + "format": "ms", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 12, + "y": 1 + }, + "id": 14, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "MaxwellMetrics_message_publish_time{quantile=\"0.99\",namespace=\"$namespace\"}", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Message Publish Time P99", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorPostfix": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "description": "the time it took to enqueue a given binlog event for processing, in milliseconds", + "format": "ms", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 18, + "y": 1 + }, + "id": 16, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "MaxwellMetrics_replication_queue_time{quantile=\"0.99\",namespace=\"$namespace\"}", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Replication Queue Time P99", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorPostfix": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "description": "A count of rows that have been processed from the binlog. note that not every row results in a message being sent to Kafka.", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 0, + "y": 4 + }, + "id": 4, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "MaxwellMetrics_row_count{namespace=\"$namespace\"}", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Processed Rows", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorPostfix": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "description": "the number of messages that are currently in-flight (awaiting acknowledgement from the destination, or ahead of messages which are)\n", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 6, + "y": 4 + }, + "id": 17, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "MaxwellMetrics_inflightmessages_count{namespace=\"$namespace\"}", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Inflight Messages", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorPostfix": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "description": "Count of messages that were successfully sent to Kafka", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 12, + "y": 4 + }, + "id": 5, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "MaxwellMetrics_messages_succeeded{namespace=\"$namespace\"}", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Successful Kafka Messages", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorPostfix": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "description": "Count of messages that were successfully sent to Kafka", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 18, + "y": 4 + }, + "id": 6, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "MaxwellMetrics_messages_failed{namespace=\"$namespace\"}", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Failed Kafka Messages", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 7 + }, + "id": 13, + "panels": [], + "title": "Graph", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 2, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(MaxwellMetrics_row_count{namespace=\"$namespace\"}[5m])", + "legendFormat": "processed", + "refId": "A" + }, + { + "expr": "rate(MaxwellMetrics_messages_succeeded{namespace=\"$namespace\"}[5m])", + "legendFormat": "success", + "refId": "B" + }, + { + "expr": "rate(MaxwellMetrics_messages_failed{namespace=\"$namespace\"}[5m])", + "legendFormat": "failed", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Processing Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "none", + "label": "msg/s", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "description": "The time elapsed between the database transaction commit and the time it was processed by Maxwell, in milliseconds\n", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 14 + }, + "id": 8, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "MaxwellMetrics_replication_lag{namespace=\"$namespace\"}", + "legendFormat": "lag time", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Replication Lag", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "ms", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "description": "the number of messages that are currently in-flight (awaiting acknowledgement from the destination, or ahead of messages which are)\n", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 14 + }, + "id": 9, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "MaxwellMetrics_inflightmessages_count{namespace=\"$namespace\"}", + "legendFormat": "messages", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Inflight Messages", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "none", + "label": "msg", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "schemaVersion": 20, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "prometheus", + "definition": "label_values(MaxwellMetrics_row_count, namespace)", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": "label_values(MaxwellMetrics_row_count, namespace)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Maxwell", + "uid": "BvFu87JA2", + "version": 2 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-maxwell + namespace: monitoring + - apiVersion: v1 + data: + jvm.json: |- + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Some Information obtained from JMX exporter", + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 5, + "iteration": 1603692392913, + "links": [], + "panels": [ + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 12, + "panels": [], + "title": "JVM", + "type": "row" + }, + { + "aliasColors": { + "heap": "blue" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 0, + "fillGradient": 5, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jvm_memory_bytes_used{namespace=\"$namespace\", service=\"$service\", pod=\"$pod\"}", + "legendFormat": "{{ area }}", + "refId": "A" + }, + { + "expr": "jvm_memory_bytes_max{namespace=\"$namespace\", service=\"$service\", pod=\"$pod\", area=\"heap\"}", + "legendFormat": "max heap", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "JVM Heap", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 10, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 10, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "jvm_memory_pool_bytes_used{namespace=\"$namespace\", service=\"$service\", pod=\"$pod\"}", + "legendFormat": "{{ pool }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "JVM Memory Pool", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "heap": "blue" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 0, + "fillGradient": 5, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 7 + }, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/time.*/i", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "increase(jvm_gc_collection_seconds_count{namespace=\"$namespace\", service=\"$service\", pod=\"$pod\"}[1m])", + "hide": false, + "legendFormat": "count - {{ gc }}", + "refId": "A" + }, + { + "expr": "increase(jvm_gc_collection_seconds_sum{namespace=\"$namespace\", service=\"$service\", pod=\"$pod\"}[1m])", + "hide": false, + "legendFormat": "time - {{ gc }}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "GC Occurring", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "none", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "current": "semi-dark-blue", + "daemon": "semi-dark-green", + "deadlocked": "dark-red", + "peak": "semi-dark-orange" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 7 + }, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jvm_threads_current{namespace=\"$namespace\", service=\"$service\", pod=\"$pod\"}", + "legendFormat": "current", + "refId": "A" + }, + { + "expr": "jvm_threads_daemon{namespace=\"$namespace\", service=\"$service\", pod=\"$pod\"}", + "hide": false, + "legendFormat": "daemon", + "refId": "B" + }, + { + "expr": "jvm_threads_deadlocked{namespace=\"$namespace\", service=\"$service\", pod=\"$pod\"}", + "hide": false, + "legendFormat": "deadlocked", + "refId": "C" + }, + { + "expr": "jvm_threads_peak{namespace=\"$namespace\", service=\"$service\", pod=\"$pod\"}", + "hide": false, + "legendFormat": "peak", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "JVM Threads", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "CPU": "blue", + "Memory (RSS)": "green", + "app-5c556c54bc-jmp2j": "semi-dark-blue" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 13 + }, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Memory (RSS)", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(process_cpu_seconds_total{namespace=\"$namespace\", service=\"$service\", pod=\"$pod\"}[1m]) * 100", + "hide": false, + "legendFormat": "CPU", + "refId": "A" + }, + { + "expr": "process_resident_memory_bytes{namespace=\"$namespace\", service=\"$service\", pod=\"$pod\"}", + "hide": false, + "legendFormat": "Memory (RSS)", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Process CPU & Memory", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "decbytes", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": true, + "alignLevel": null + } + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 7, + "x": 12, + "y": 13 + }, + "id": 23, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "jvm_info{namespace=\"$namespace\", service=\"$service\", pod=\"$pod\"}", + "instant": false, + "legendFormat": "{{ vendor }}

{{runtime}}", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "JVM Runtime", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "name" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 5, + "x": 19, + "y": 13 + }, + "id": 22, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "jvm_info{namespace=\"$namespace\", service=\"$service\", pod=\"$pod\"}", + "instant": false, + "legendFormat": "{{ version }}", + "refId": "A" + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "JVM Version", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "name" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 4, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 14, + "x": 0, + "y": 2 + }, + "id": 2, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": true, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kafka_producer_producer_topic_metrics_record_send_rate{namespace=\"$namespace\", service=\"$service\", pod=\"$pod\"}", + "legendFormat": "{{ topic }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Kafka Producer Rate", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "columns": [], + "datasource": "prometheus", + "fontSize": "100%", + "gridPos": { + "h": 7, + "w": 10, + "x": 14, + "y": 2 + }, + "id": 6, + "options": {}, + "pageSize": null, + "pluginVersion": "6.4.2", + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Topic", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Metric", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Message(s)", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "mappingType": 1, + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "none" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "none" + } + ], + "targets": [ + { + "expr": "increase(kafka_producer_producer_topic_metrics_record_send_total{namespace=\"$namespace\", service=\"$service\", pod=\"$pod\"}[$__range]) > 0", + "instant": true, + "legendFormat": "{{ topic }}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka Total Produced During Current Time Range", + "transform": "timeseries_to_rows", + "type": "table" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": null, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 14, + "x": 0, + "y": 9 + }, + "id": 7, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": true, + "max": true, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{namespace=\"$namespace\", service=\"$service\", pod=\"$pod\", topic!=\"\"}) by (namespace, service, topic)", + "legendFormat": "{{ topic }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Kafka Consumer Rate", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "columns": [], + "datasource": "prometheus", + "fontSize": "100%", + "gridPos": { + "h": 7, + "w": 10, + "x": 14, + "y": 9 + }, + "id": 8, + "options": {}, + "pageSize": null, + "pluginVersion": "6.4.2", + "showHeader": true, + "sort": { + "col": null, + "desc": false + }, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Topic", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Metric", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Message(s)", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "mappingType": 1, + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "none" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "none" + } + ], + "targets": [ + { + "expr": "sum(increase(kafka_consumer_consumer_fetch_manager_metrics_records_consumed_total{namespace=\"$namespace\", service=\"$service\", pod=\"$pod\", topic!=\"\"}[$__range])) by (namespace, service, topic) > 0", + "instant": true, + "legendFormat": "{{ topic }}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka Total Consumed During Current Time Range", + "transform": "timeseries_to_rows", + "type": "table" + } + ], + "title": "Kafka", + "type": "row" + } + ], + "refresh": false, + "schemaVersion": 20, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "prometheus", + "definition": "label_values(jmx_exporter_build_info, namespace)", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": "label_values(jmx_exporter_build_info, namespace)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "reactflow", + "value": "reactflow" + }, + "datasource": "prometheus", + "definition": "label_values(jmx_exporter_build_info{namespace=\"$namespace\"}, service)", + "hide": 0, + "includeAll": false, + "label": "Service", + "multi": false, + "name": "service", + "options": [], + "query": "label_values(jmx_exporter_build_info{namespace=\"$namespace\"}, service)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "reactflow-0", + "value": "reactflow-0" + }, + "datasource": "prometheus", + "definition": "label_values(jmx_exporter_build_info{namespace=\"$namespace\", service=\"$service\"}, pod)", + "hide": 0, + "includeAll": false, + "label": "Pod", + "multi": false, + "name": "pod", + "options": [], + "query": "label_values(jmx_exporter_build_info{namespace=\"$namespace\", service=\"$service\"}, pod)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "JMX", + "uid": "DfcvP0qZk", + "version": 1 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-jvm + namespace: monitoring +kind: ConfigMapList diff --git a/7-grafana/grafana-dashboardDatasources.yaml b/7-grafana/grafana-dashboardDatasources.yaml new file mode 100644 index 0000000..b92c16d --- /dev/null +++ b/7-grafana/grafana-dashboardDatasources.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +data: + datasources.yaml: ewogICAgImFwaVZlcnNpb24iOiAxLAogICAgImRhdGFzb3VyY2VzIjogWwogICAgICAgIHsKICAgICAgICAgICAgImFjY2VzcyI6ICJwcm94eSIsCiAgICAgICAgICAgICJlZGl0YWJsZSI6IGZhbHNlLAogICAgICAgICAgICAibmFtZSI6ICJwcm9tZXRoZXVzIiwKICAgICAgICAgICAgIm9yZ0lkIjogMSwKICAgICAgICAgICAgInR5cGUiOiAicHJvbWV0aGV1cyIsCiAgICAgICAgICAgICJ1cmwiOiAiaHR0cDovL3Byb21ldGhldXMtazhzOjkwOTAiLAogICAgICAgICAgICAidmVyc2lvbiI6IDEKICAgICAgICB9CiAgICBdCn0= +kind: Secret +metadata: + name: grafana-datasources + namespace: monitoring +type: Opaque diff --git a/7-grafana/grafana-dashboardDefinitions.yaml b/7-grafana/grafana-dashboardDefinitions.yaml new file mode 100644 index 0000000..85a0f00 --- /dev/null +++ b/7-grafana/grafana-dashboardDefinitions.yaml @@ -0,0 +1,13183 @@ +apiVersion: v1 +items: + - apiVersion: v1 + data: + k8s-cluster-rsrc-use.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node:cluster_cpu_utilisation:ratio{cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Utilisation", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node:node_cpu_saturation_load1:{cluster=\"$cluster\"} / scalar(sum(min(kube_pod_info{cluster=\"$cluster\"}) by (node)))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Saturation (Load1)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node:cluster_memory_utilisation:ratio{cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Utilisation", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node:node_memory_swap_io_bytes:sum_rate{cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Saturation (Swap I/O)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node:node_disk_utilisation:avg_irate{cluster=\"$cluster\"} / scalar(:kube_pod_info_node_count:{cluster=\"$cluster\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk IO Utilisation", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node:node_disk_saturation:avg_irate{cluster=\"$cluster\"} / scalar(:kube_pod_info_node_count:{cluster=\"$cluster\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk IO Saturation", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Disk", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node:node_net_utilisation:sum_irate{cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Net Utilisation (Transmitted)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node:node_net_saturation:sum_irate{cluster=\"$cluster\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Net Saturation (Dropped)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(max(node_filesystem_size_bytes{fstype=~\"ext[234]|btrfs|xfs|zfs\", cluster=\"$cluster\"} - node_filesystem_avail_bytes{fstype=~\"ext[234]|btrfs|xfs|zfs\", cluster=\"$cluster\"}) by (device,pod,namespace)) by (pod,namespace)\n/ scalar(sum(max(node_filesystem_size_bytes{fstype=~\"ext[234]|btrfs|xfs|zfs\", cluster=\"$cluster\"}) by (device,pod,namespace)))\n* on (namespace, pod) group_left (node) node_namespace_pod:kube_pod_info:{cluster=\"$cluster\"}\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{node}}", + "legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Capacity", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Storage", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_node_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / USE Method / Cluster", + "uid": "a6e7d1362e1ddbb79db21d5bb40d7137", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-k8s-cluster-rsrc-use + namespace: monitoring + - apiVersion: v1 + data: + k8s-node-rsrc-use.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node:node_cpu_utilisation:avg1m{cluster=\"$cluster\", node=\"$node\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Utilisation", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Utilisation", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node:node_cpu_saturation_load1:{cluster=\"$cluster\", node=\"$node\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Saturation", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Saturation (Load1)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node:node_memory_utilisation:{cluster=\"$cluster\", node=\"$node\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Memory", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Utilisation", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node:node_memory_swap_io_bytes:sum_rate{cluster=\"$cluster\", node=\"$node\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Swap IO", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Saturation (Swap I/O)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node:node_disk_utilisation:avg_irate{cluster=\"$cluster\", node=\"$node\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Utilisation", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk IO Utilisation", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node:node_disk_saturation:avg_irate{cluster=\"$cluster\", node=\"$node\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Saturation", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk IO Saturation", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Disk", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node:node_net_utilisation:sum_irate{cluster=\"$cluster\", node=\"$node\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Utilisation", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Net Utilisation (Transmitted)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node:node_net_saturation:sum_irate{cluster=\"$cluster\", node=\"$node\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Saturation", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Net Saturation (Dropped)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Net", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node:node_filesystem_usage:{cluster=\"$cluster\"}\n* on (namespace, pod) group_left (node) node_namespace_pod:kube_pod_info:{cluster=\"$cluster\", node=\"$node\"}\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Utilisation", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Disk", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_node_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "node", + "multi": false, + "name": "node", + "options": [ + + ], + "query": "label_values(kube_node_info{cluster=\"$cluster\"}, node)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / USE Method / Node", + "uid": "4ac4f123aae0ff6dbaf4f4f66120033b", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-k8s-node-rsrc-use + namespace: monitoring + - apiVersion: v1 + data: + k8s-resources-cluster.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "100px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", cluster=\"$cluster\"}[1m]))", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "CPU Utilisation", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) / sum(node:node_num_cpu:sum{cluster=\"$cluster\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "CPU Requests Commitment", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) / sum(node:node_num_cpu:sum{cluster=\"$cluster\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "CPU Limits Commitment", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "1 - sum(:node_memory_MemFreeCachedBuffers_bytes:sum{cluster=\"$cluster\"}) / sum(:node_memory_MemTotal_bytes:sum{cluster=\"$cluster\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Memory Utilisation", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) / sum(:node_memory_MemTotal_bytes:sum{cluster=\"$cluster\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Memory Requests Commitment", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "percentunit", + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) / sum(:node_memory_MemTotal_bytes:sum{cluster=\"$cluster\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Memory Limits Commitment", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Headlines", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{namespace}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Pods", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": true, + "linkTooltip": "Drill down to pods", + "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Workloads", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": true, + "linkTooltip": "Drill down to workloads", + "linkUrl": "/d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "CPU Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #G", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Namespace", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down to pods", + "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell", + "pattern": "namespace", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "count(mixin_pod_workload{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "count(avg(mixin_pod_workload{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "expr": "sum(namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Quota", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{namespace}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage (w/o cache)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Pods", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": true, + "linkTooltip": "Drill down to pods", + "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Workloads", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": true, + "linkTooltip": "Drill down to workloads", + "linkUrl": "/d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Memory Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #G", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Namespace", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down to pods", + "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell", + "pattern": "namespace", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "count(mixin_pod_workload{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "count(avg(mixin_pod_workload{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Requests by Namespace", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Requests", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(node_cpu_seconds_total, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Compute Resources / Cluster", + "uid": "efa86fd1d0c121a26444b636a3f509a8", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-k8s-resources-cluster + namespace: monitoring + - apiVersion: v1 + data: + k8s-resources-namespace.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "CPU Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "CPU Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(label_replace(namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}, \"pod\", \"$1\", \"pod\", \"(.*)\")) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(label_replace(namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}, \"pod\", \"$1\", \"pod\", \"(.*)\")) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(label_replace(namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}, \"pod\", \"$1\", \"pod\", \"(.*)\")) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Quota", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}) by (pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage (w/o cache)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Memory Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Usage (RSS)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Usage (Cache)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #G", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Usage (Swap", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #H", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(label_replace(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}, \"pod\", \"$1\", \"pod\", \"(.*)\")) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(label_replace(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}, \"pod\", \"$1\", \"pod\", \"(.*)\")) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(label_replace(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}, \"pod\", \"$1\", \"pod\", \"(.*)\")) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\"}) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(label_replace(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}, \"pod\", \"$1\", \"pod\", \"(.*)\")) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "expr": "sum(label_replace(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}, \"pod\", \"$1\", \"pod\", \"(.*)\")) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + }, + { + "expr": "sum(label_replace(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}, \"pod\", \"$1\", \"pod\", \"(.*)\")) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "H", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Quota", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Quota", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Compute Resources / Namespace (Pods)", + "uid": "85a562078cdf77779eaa1add43ccec1e", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-k8s-resources-namespace + namespace: monitoring + - apiVersion: v1 + data: + k8s-resources-pod.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", cluster=\"$cluster\"}) by (container)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{container}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "CPU Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "CPU Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Container", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "container", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(label_replace(namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\"}, \"container\", \"$1\", \"container\", \"(.*)\")) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(label_replace(namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}, \"container\", \"$1\", \"container\", \"(.*)\")) by (container) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(label_replace(namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}, \"container\", \"$1\", \"container\", \"(.*)\")) by (container) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Quota", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{container}} (RSS)", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{container}} (Cache)", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{container}} (Swap)", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Memory Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Usage (RSS)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Usage (Cache)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #G", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Usage (Swap", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #H", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Container", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "container", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(label_replace(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}, \"container\", \"$1\", \"container\", \"(.*)\")) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(label_replace(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}, \"container\", \"$1\", \"container\", \"(.*)\")) by (container) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(label_replace(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}, \"container\", \"$1\", \"container\", \"(.*)\")) by (container) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(label_replace(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}, \"container\", \"$1\", \"container\", \"(.*)\")) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "expr": "sum(label_replace(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}, \"container\", \"$1\", \"container\", \"(.*)\")) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + }, + { + "expr": "sum(label_replace(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}, \"container\", \"$1\", \"container\", \"(.*)\")) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "H", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Quota", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Quota", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "pod", + "multi": false, + "name": "pod", + "options": [ + + ], + "query": "label_values(kube_pod_info{cluster=\"$cluster\", namespace=\"$namespace\"}, pod)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Compute Resources / Pod", + "uid": "6581e46e4e5c7ba40a07646395ef7b23", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-k8s-resources-pod + namespace: monitoring + - apiVersion: v1 + data: + k8s-resources-workload.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n label_replace(\n namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"},\n \"pod\", \"$1\", \"pod\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "CPU Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "CPU Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(\n label_replace(\n namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"},\n \"pod\", \"$1\", \"pod\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(\n label_replace(\n namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"},\n \"pod\", \"$1\", \"pod\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(\n label_replace(\n namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"},\n \"pod\", \"$1\", \"pod\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Quota", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n label_replace(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"},\n \"pod\", \"$1\", \"pod\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n ) by (pod)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Memory Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Pod", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", + "pattern": "pod", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum(\n label_replace(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"},\n \"pod\", \"$1\", \"pod\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n ) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(\n label_replace(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"},\n \"pod\", \"$1\", \"pod\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n ) by (pod)\n/sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(\n label_replace(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"},\n \"pod\", \"$1\", \"pod\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n ) by (pod)\n/sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Quota", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Quota", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "workload", + "multi": false, + "name": "workload", + "options": [ + + ], + "query": "label_values(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}, workload)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "type", + "multi": false, + "name": "type", + "options": [ + + ], + "query": "label_values(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\"}, workload_type)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Compute Resources / Workload", + "uid": "a164a7f0339f99e89cea5cb47e9be617", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-k8s-resources-workload + namespace: monitoring + - apiVersion: v1 + data: + k8s-resources-workloads-namespace.json: |- + { + "annotations": { + "list": [ + + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n label_replace(\n namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"},\n \"pod\", \"$1\", \"pod\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{workload}} - {{workload_type}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Running Pods", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "CPU Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "CPU Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Workload", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2", + "pattern": "workload", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Workload Type", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "workload_type", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "count(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}) by (workload, workload_type)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(\n label_replace(\n namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"},\n \"pod\", \"$1\", \"pod\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(\n label_replace(\n namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"},\n \"pod\", \"$1\", \"pod\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(\n label_replace(\n namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"},\n \"pod\", \"$1\", \"pod\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Quota", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "CPU Quota", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 10, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n label_replace(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"},\n \"pod\", \"$1\", \"pod\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n ) by (workload, workload_type)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{workload}} - {{workload_type}}", + "legendLink": null, + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Usage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Running Pods", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Memory Usage", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #B", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #C", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Requests %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #D", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Memory Limits", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #E", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Limits %", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "percentunit" + }, + { + "alias": "Workload", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTooltip": "Drill down", + "linkUrl": "/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2", + "pattern": "workload", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Workload Type", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "workload_type", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "count(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}) by (workload, workload_type)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(\n label_replace(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"},\n \"pod\", \"$1\", \"pod\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n ) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(\n label_replace(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"},\n \"pod\", \"$1\", \"pod\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n ) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "expr": "sum(\n label_replace(\n container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"},\n \"pod\", \"$1\", \"pod\", \"(.*)\"\n ) * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n ) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod) group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Quota", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memory Quota", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Compute Resources / Namespace (Workloads)", + "uid": "a87fb0d919ec0ea5f6543124e16c42a5", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-k8s-resources-workloads-namespace + namespace: monitoring + - apiVersion: v1 + data: + nodes.json: |- + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "【English version】Update 2020.10.10, add the overall resource overview! Support Grafana6&7,Support Node Exporter v0.16 and above.Optimize the main metrics display. Includes: CPU, memory, disk IO, network, temperature and other monitoring metrics。https://github.com/starsliao/Prometheus", + "editable": true, + "gnetId": 11074, + "graphTooltip": 0, + "id": 19, + "iteration": 1642141291540, + "links": [ + { + "icon": "bolt", + "tags": [], + "targetBlank": true, + "title": "Update", + "tooltip": "Update dashboard", + "type": "link", + "url": "https://grafana.com/grafana/dashboards/11074" + }, + { + "icon": "question", + "tags": [], + "targetBlank": true, + "title": "GitHub", + "tooltip": "more dashboard", + "type": "link", + "url": "https://github.com/starsliao" + }, + { + "asDropdown": true, + "icon": "external link", + "tags": [], + "targetBlank": true, + "title": "", + "type": "dashboards" + } + ], + "panels": [ + { + "collapsed": false, + "datasource": "prometheus", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 187, + "panels": [], + "title": "Resource Overview (associated JOB),Host:$show_hostname,Instance:$node", + "type": "row" + }, + { + "columns": [], + "datasource": "prometheus", + "description": "Partition utilization, disk read, disk write, download bandwidth, upload bandwidth, if there are multiple network cards or multiple partitions, it is the value of the network card or partition with the highest utilization rate collected.\n\nCurrEstab: The number of TCP connections whose current status is ESTABLISHED or CLOSE-WAIT.", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fontSize": "80%", + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 185, + "pageSize": null, + "showHeader": true, + "sort": { + "col": 31, + "desc": false + }, + "styles": [ + { + "alias": "Hostname", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 1, + "link": false, + "linkTooltip": "", + "linkUrl": "", + "mappingType": 1, + "pattern": "nodename", + "thresholds": [], + "type": "string", + "unit": "bytes" + }, + { + "alias": "IP(Link to details)", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTargetBlank": false, + "linkTooltip": "Browse host details", + "linkUrl": "d/xfpJB9FGz/node-exporter?orgId=1&var-job=${job}&var-hostname=All&var-node=${__cell}&var-device=All&var-origin_prometheus=$origin_prometheus", + "mappingType": 1, + "pattern": "instance", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Memory", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "mappingType": 1, + "pattern": "Value #B", + "thresholds": [], + "type": "number", + "unit": "bytes" + }, + { + "alias": "CPU Cores", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": null, + "mappingType": 1, + "pattern": "Value #C", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": " Uptime", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #D", + "thresholds": [], + "type": "number", + "unit": "s" + }, + { + "alias": "Partition used%*", + "align": "auto", + "colorMode": "cell", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #E", + "thresholds": [ + "70", + "85" + ], + "type": "number", + "unit": "percent" + }, + { + "alias": "CPU used%", + "align": "auto", + "colorMode": "cell", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #F", + "thresholds": [ + "70", + "85" + ], + "type": "number", + "unit": "percent" + }, + { + "alias": "Memory used%", + "align": "auto", + "colorMode": "cell", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #G", + "thresholds": [ + "70", + "85" + ], + "type": "number", + "unit": "percent" + }, + { + "alias": "Disk read*", + "align": "auto", + "colorMode": "cell", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #H", + "thresholds": [ + "10485760", + "20485760" + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Disk write*", + "align": "auto", + "colorMode": "cell", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #I", + "thresholds": [ + "10485760", + "20485760" + ], + "type": "number", + "unit": "Bps" + }, + { + "alias": "Download*", + "align": "auto", + "colorMode": "cell", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #J", + "thresholds": [ + "30485760", + "104857600" + ], + "type": "number", + "unit": "bps" + }, + { + "alias": "Upload*", + "align": "auto", + "colorMode": "cell", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #K", + "thresholds": [ + "30485760", + "104857600" + ], + "type": "number", + "unit": "bps" + }, + { + "alias": "5m load", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #L", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "CurrEstab", + "align": "auto", + "colorMode": "cell", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #M", + "thresholds": [ + "1000", + "1500" + ], + "type": "string", + "unit": "short" + }, + { + "alias": "TCP_tw", + "align": "center", + "colorMode": "cell", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": null, + "mappingType": 1, + "pattern": "Value #N", + "thresholds": [ + "5000", + "20000" + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "align": "right", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "node_uname_info{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"} - 0", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "主机名", + "refId": "A" + }, + { + "expr": "sum(time() - node_boot_time_seconds{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"})by(instance)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "运行时间", + "refId": "D" + }, + { + "expr": "node_memory_MemTotal_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"} - 0", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "总内存", + "refId": "B" + }, + { + "expr": "count(node_cpu_seconds_total{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",mode='system'}) by (instance)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "总核数", + "refId": "C" + }, + { + "expr": "node_load5{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"}", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "5分钟负载", + "refId": "L" + }, + { + "expr": "(1 - avg(rate(node_cpu_seconds_total{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",mode=\"idle\"}[$interval])) by (instance)) * 100", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "CPU使用率", + "refId": "F" + }, + { + "expr": "(1 - (node_memory_MemAvailable_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"} / (node_memory_MemTotal_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"})))* 100", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "内存使用率", + "refId": "G" + }, + { + "expr": "max((node_filesystem_size_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",fstype=~\"ext.?|xfs\"}-node_filesystem_free_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",fstype=~\"ext.?|xfs\"}) *100/(node_filesystem_avail_bytes {origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",fstype=~\"ext.?|xfs\"}+(node_filesystem_size_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",fstype=~\"ext.?|xfs\"}-node_filesystem_free_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",fstype=~\"ext.?|xfs\"})))by(instance)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "分区使用率", + "refId": "E" + }, + { + "expr": "max(rate(node_disk_read_bytes_total{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"}[$interval])) by (instance)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "最大读取", + "refId": "H" + }, + { + "expr": "max(rate(node_disk_written_bytes_total{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"}[$interval])) by (instance)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "最大写入", + "refId": "I" + }, + { + "expr": "node_netstat_Tcp_CurrEstab{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"} - 0", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "连接数", + "refId": "M" + }, + { + "expr": "node_sockstat_TCP_tw{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"} - 0", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "TIME_WAIT", + "refId": "N" + }, + { + "expr": "max(rate(node_network_receive_bytes_total{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"}[$interval])*8) by (instance)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "下载带宽", + "refId": "J" + }, + { + "expr": "max(rate(node_network_transmit_bytes_total{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"}[$interval])*8) by (instance)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "上传带宽", + "refId": "K" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Server Resource Overview【JOB:$job,Total:$total】", + "transform": "table", + "type": "table-old" + }, + { + "aliasColors": { + "192.168.200.241:9100_Total": "dark-red", + "Idle - Waiting for something to happen": "#052B51", + "guest": "#9AC48A", + "idle": "#052B51", + "iowait": "#EAB839", + "irq": "#BF1B00", + "nice": "#C15C17", + "sdb_每秒I/O操作%": "#d683ce", + "softirq": "#E24D42", + "steal": "#FCE2DE", + "system": "#508642", + "user": "#5195CE", + "磁盘花费在I/O操作占比": "#ba43a9" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": null, + "description": "", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 8 + }, + "hiddenSeries": false, + "id": 191, + "legend": { + "alignAsTable": false, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.2.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "Overall average used%", + "lines": false, + "pointradius": 1, + "points": true, + "yaxis": 2 + }, + { + "alias": "CPU Cores", + "color": "#C4162A" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "count(node_cpu_seconds_total{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\", mode='system'})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "30m", + "intervalFactor": 1, + "legendFormat": "CPU Cores", + "refId": "B", + "step": 240 + }, + { + "expr": "sum(node_load5{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"})", + "format": "time_series", + "hide": false, + "interval": "30m", + "intervalFactor": 1, + "legendFormat": "Total 5m load", + "refId": "A", + "step": 240 + }, + { + "expr": "avg(1 - avg(rate(node_cpu_seconds_total{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",mode=\"idle\"}[$interval])) by (instance)) * 100", + "format": "time_series", + "hide": false, + "interval": "30m", + "intervalFactor": 1, + "legendFormat": "Overall average used%", + "refId": "F", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "$job:Overall total 5m load & average CPU used%", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "short", + "label": "Total 5m load", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": 0, + "format": "percent", + "label": "Overall average used%", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "192.168.200.241:9100_总内存": "dark-red", + "内存_Avaliable": "#6ED0E0", + "内存_Cached": "#EF843C", + "内存_Free": "#629E51", + "内存_Total": "#6d1f62", + "内存_Used": "#eab839", + "可用": "#9ac48a", + "总内存": "#bf1b00" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": 1, + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 8 + }, + "height": "300", + "hiddenSeries": false, + "id": 195, + "legend": { + "alignAsTable": false, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.2.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Total", + "color": "#C4162A", + "fill": 0 + }, + { + "alias": "Overall Average Used%", + "lines": false, + "pointradius": 1, + "points": true, + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(node_memory_MemTotal_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "30m", + "intervalFactor": 1, + "legendFormat": "Total", + "refId": "A", + "step": 4 + }, + { + "expr": "sum(node_memory_MemTotal_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"} - node_memory_MemAvailable_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"})", + "format": "time_series", + "hide": false, + "interval": "30m", + "intervalFactor": 1, + "legendFormat": "Total Used", + "refId": "B", + "step": 4 + }, + { + "expr": "(sum(node_memory_MemTotal_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"} - node_memory_MemAvailable_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"}) / sum(node_memory_MemTotal_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"}))*100", + "format": "time_series", + "hide": false, + "interval": "30m", + "intervalFactor": 1, + "legendFormat": "Overall Average Used%", + "refId": "H" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "$job:Overall total memory & average memory used%", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "bytes", + "label": "Total", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "decimals": null, + "format": "percent", + "label": "Overall Average Used%", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": 1, + "description": "", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 8 + }, + "hiddenSeries": false, + "id": 197, + "legend": { + "alignAsTable": false, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.2.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Overall Average Used%", + "lines": false, + "pointradius": 1, + "points": true, + "yaxis": 2 + }, + { + "alias": "Total", + "color": "#C4162A" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(avg(node_filesystem_size_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",fstype=~\"xfs|ext.*\"})by(device,instance))", + "format": "time_series", + "instant": false, + "interval": "30m", + "intervalFactor": 1, + "legendFormat": "Total", + "refId": "E" + }, + { + "expr": "sum(avg(node_filesystem_size_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",fstype=~\"xfs|ext.*\"})by(device,instance)) - sum(avg(node_filesystem_free_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",fstype=~\"xfs|ext.*\"})by(device,instance))", + "format": "time_series", + "instant": false, + "interval": "30m", + "intervalFactor": 1, + "legendFormat": "Total Used", + "refId": "C" + }, + { + "expr": "(sum(avg(node_filesystem_size_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",fstype=~\"xfs|ext.*\"})by(device,instance)) - sum(avg(node_filesystem_free_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",fstype=~\"xfs|ext.*\"})by(device,instance))) *100/(sum(avg(node_filesystem_avail_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",fstype=~\"xfs|ext.*\"})by(device,instance))+(sum(avg(node_filesystem_size_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",fstype=~\"xfs|ext.*\"})by(device,instance)) - sum(avg(node_filesystem_free_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",fstype=~\"xfs|ext.*\"})by(device,instance))))", + "format": "time_series", + "instant": false, + "interval": "30m", + "intervalFactor": 1, + "legendFormat": "Overall Average Used%", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "$job:Overall total disk & average disk used%", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "bytes", + "label": "Total", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "decimals": null, + "format": "percent", + "label": "Overall Average Used%", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": "prometheus", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 15 + }, + "id": 189, + "panels": [], + "title": "Resource Details:【$show_hostname】", + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": 0, + "mappings": [ + { + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" + } + ], + "nullValueMode": "null", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(245, 54, 54, 0.9)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 1 + }, + { + "color": "rgba(50, 172, 45, 0.97)", + "value": 3 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 0, + "y": 16 + }, + "hideTimeOverride": true, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "7.2.0", + "targets": [ + { + "expr": "avg(time() - node_boot_time_seconds{instance=~\"$node\"})", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 40 + } + ], + "title": "Uptime", + "type": "stat" + }, + { + "datasource": "prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": {}, + "decimals": 1, + "mappings": [ + { + "from": "", + "id": 1, + "operator": "", + "text": "N/A", + "to": "", + "type": 1, + "value": "0" + } + ], + "max": 100, + "min": 0.1, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 70 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 3, + "x": 2, + "y": 16 + }, + "id": 177, + "options": { + "displayMode": "lcd", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showUnfilled": true + }, + "pluginVersion": "7.2.0", + "targets": [ + { + "expr": "100 - (avg(rate(node_cpu_seconds_total{instance=~\"$node\",mode=\"idle\"}[$interval])) * 100)", + "instant": true, + "interval": "", + "legendFormat": "CPU Busy", + "refId": "A" + }, + { + "expr": "avg(rate(node_cpu_seconds_total{instance=~\"$node\",mode=\"iowait\"}[$interval])) * 100", + "hide": true, + "instant": true, + "interval": "", + "legendFormat": "IOwait使用率", + "refId": "C" + }, + { + "expr": "(1 - (node_memory_MemAvailable_bytes{instance=~\"$node\"} / (node_memory_MemTotal_bytes{instance=~\"$node\"})))* 100", + "instant": true, + "interval": "", + "legendFormat": "Used RAM Memory", + "refId": "B" + }, + { + "expr": "(node_filesystem_size_bytes{instance=~'$node',fstype=~\"ext.*|xfs\",mountpoint=\"$maxmount\"}-node_filesystem_free_bytes{instance=~'$node',fstype=~\"ext.*|xfs\",mountpoint=\"$maxmount\"})*100 /(node_filesystem_avail_bytes {instance=~'$node',fstype=~\"ext.*|xfs\",mountpoint=\"$maxmount\"}+(node_filesystem_size_bytes{instance=~'$node',fstype=~\"ext.*|xfs\",mountpoint=\"$maxmount\"}-node_filesystem_free_bytes{instance=~'$node',fstype=~\"ext.*|xfs\",mountpoint=\"$maxmount\"}))", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Used Max Mount($maxmount)", + "refId": "D" + }, + { + "expr": "(1 - ((node_memory_SwapFree_bytes{instance=~\"$node\"} + 1)/ (node_memory_SwapTotal_bytes{instance=~\"$node\"} + 1))) * 100", + "instant": true, + "interval": "", + "legendFormat": "Used SWAP", + "refId": "F" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "", + "transformations": [], + "type": "bargauge" + }, + { + "columns": [], + "datasource": "prometheus", + "description": "In this kanban: the total disk, usage, available, and usage rate are consistent with the values of the Size, Used, Avail, and Use% columns of the df command, and the value of Use% will be rounded to one decimal place, which will be more accurate .\n\nNote: The Use% algorithm in df is: (size-free) * 100 / (avail + (size-free)), the result is divisible by this value, non-divisible by this value is +1, and the unit of the result is %.\nRefer to the df command source code:", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fontSize": "80%", + "gridPos": { + "h": 6, + "w": 10, + "x": 5, + "y": 16 + }, + "id": 181, + "links": [ + { + "targetBlank": true, + "title": "https://github.com/coreutils/coreutils/blob/master/src/df.c", + "url": "https://github.com/coreutils/coreutils/blob/master/src/df.c" + } + ], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 6, + "desc": false + }, + "styles": [ + { + "alias": "Mounted on", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "mountpoint", + "thresholds": [ + "" + ], + "type": "string", + "unit": "bytes" + }, + { + "alias": "Avail", + "align": "auto", + "colorMode": "value", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 1, + "mappingType": 1, + "pattern": "Value #A", + "thresholds": [ + "10000000000", + "20000000000" + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Used", + "align": "auto", + "colorMode": "cell", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 1, + "mappingType": 1, + "pattern": "Value #B", + "thresholds": [ + "70", + "85" + ], + "type": "number", + "unit": "percent" + }, + { + "alias": "Size", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": false, + "mappingType": 1, + "pattern": "Value #C", + "thresholds": [], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Filesystem", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "mappingType": 1, + "pattern": "fstype", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Device", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "mappingType": 1, + "pattern": "device", + "preserveFormat": false, + "sanitize": false, + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "preserveFormat": true, + "sanitize": false, + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "node_filesystem_size_bytes{instance=~'$node',fstype=~\"ext.*|xfs\",mountpoint !~\".*pod.*\"}-0", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "总量", + "refId": "C" + }, + { + "expr": "node_filesystem_avail_bytes {instance=~'$node',fstype=~\"ext.*|xfs\",mountpoint !~\".*pod.*\"}-0", + "format": "table", + "hide": false, + "instant": true, + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + }, + { + "expr": "(node_filesystem_size_bytes{instance=~'$node',fstype=~\"ext.*|xfs\",mountpoint !~\".*pod.*\"}-node_filesystem_free_bytes{instance=~'$node',fstype=~\"ext.*|xfs\",mountpoint !~\".*pod.*\"}) *100/(node_filesystem_avail_bytes {instance=~'$node',fstype=~\"ext.*|xfs\",mountpoint !~\".*pod.*\"}+(node_filesystem_size_bytes{instance=~'$node',fstype=~\"ext.*|xfs\",mountpoint !~\".*pod.*\"}-node_filesystem_free_bytes{instance=~'$node',fstype=~\"ext.*|xfs\",mountpoint !~\".*pod.*\"}))", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "B" + } + ], + "title": "【$show_hostname】:Disk Space Used Basic(EXT?/XFS)", + "transform": "table", + "type": "table-old" + }, + { + "cacheTimeout": null, + "datasource": "prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": 2, + "mappings": [ + { + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" + } + ], + "nullValueMode": "connected", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 20 + }, + { + "color": "#d44a3a", + "value": 50 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 15, + "y": 16 + }, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "7.2.0", + "targets": [ + { + "expr": "avg(rate(node_cpu_seconds_total{instance=~\"$node\",mode=\"iowait\"}[$interval])) * 100", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU iowait", + "type": "stat" + }, + { + "aliasColors": { + "cn-shenzhen.i-wz9cq1dcb6zwc39ehw59_cni0_in": "light-red", + "cn-shenzhen.i-wz9cq1dcb6zwc39ehw59_cni0_in下载": "green", + "cn-shenzhen.i-wz9cq1dcb6zwc39ehw59_cni0_out上传": "yellow", + "cn-shenzhen.i-wz9cq1dcb6zwc39ehw59_eth0_in下载": "purple", + "cn-shenzhen.i-wz9cq1dcb6zwc39ehw59_eth0_out": "purple", + "cn-shenzhen.i-wz9cq1dcb6zwc39ehw59_eth0_out上传": "blue" + }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 6, + "w": 7, + "x": 17, + "y": 16 + }, + "hiddenSeries": false, + "id": 183, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "show": false, + "sort": "current", + "sortDesc": true, + "total": true, + "values": true + }, + "lines": false, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.2.0", + "pointradius": 1, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/.*_transmit$/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "increase(node_network_receive_bytes_total{instance=~\"$node\",device=~\"$device\"}[60m])", + "interval": "60m", + "intervalFactor": 1, + "legendFormat": "{{device}}_receive", + "metric": "", + "refId": "A", + "step": 600, + "target": "" + }, + { + "expr": "increase(node_network_transmit_bytes_total{instance=~\"$node\",device=~\"$device\"}[60m])", + "hide": false, + "interval": "60m", + "intervalFactor": 1, + "legendFormat": "{{device}}_transmit", + "refId": "B", + "step": 600 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Internet traffic per hour $device", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "transmit(-)/receive(+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cacheTimeout": null, + "datasource": "prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "custom": {}, + "mappings": [ + { + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" + } + ], + "nullValueMode": "null", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(245, 54, 54, 0.9)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 1 + }, + { + "color": "rgba(50, 172, 45, 0.97)", + "value": 2 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 0, + "y": 18 + }, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "7.2.0", + "targets": [ + { + "expr": "count(node_cpu_seconds_total{instance=~\"$node\", mode='system'})", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "title": "CPU Cores", + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "custom": {}, + "mappings": [ + { + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" + } + ], + "nullValueMode": "null", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(245, 54, 54, 0.9)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 100000 + }, + { + "color": "rgba(50, 172, 45, 0.97)", + "value": 1000000 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 15, + "y": 18 + }, + "id": 179, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "7.2.0", + "targets": [ + { + "expr": "avg(node_filesystem_files_free{instance=~\"$node\",mountpoint=\"$maxmount\",fstype=~\"ext.?|xfs\"})", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "title": "Free inodes:$maxmount ", + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": 0, + "mappings": [ + { + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" + } + ], + "nullValueMode": "null", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(245, 54, 54, 0.9)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 2 + }, + { + "color": "rgba(50, 172, 45, 0.97)", + "value": 3 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 0, + "y": 20 + }, + "id": 75, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "7.2.0", + "targets": [ + { + "expr": "sum(node_memory_MemTotal_bytes{instance=~\"$node\"})", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 20 + } + ], + "title": "Total RAM", + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "custom": {}, + "mappings": [ + { + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" + } + ], + "nullValueMode": "null", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(245, 54, 54, 0.9)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 1024 + }, + { + "color": "rgba(50, 172, 45, 0.97)", + "value": 10000 + } + ] + }, + "unit": "locale" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 15, + "y": 20 + }, + "id": 178, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "7.2.0", + "targets": [ + { + "expr": "avg(node_filefd_maximum{instance=~\"$node\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "title": "Total filefd", + "type": "stat" + }, + { + "aliasColors": { + "192.168.200.241:9100_Total": "dark-red", + "Idle - Waiting for something to happen": "#052B51", + "guest": "#9AC48A", + "idle": "#052B51", + "iowait": "#EAB839", + "irq": "#BF1B00", + "nice": "#C15C17", + "sdb_每秒I/O操作%": "#d683ce", + "softirq": "#E24D42", + "steal": "#FCE2DE", + "system": "#508642", + "user": "#5195CE", + "磁盘花费在I/O操作占比": "#ba43a9" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": 2, + "description": "", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 22 + }, + "hiddenSeries": false, + "id": 7, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.2.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/.*Total/", + "color": "#C4162A", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(rate(node_cpu_seconds_total{instance=~\"$node\",mode=\"system\"}[$interval])) by (instance) *100", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "System", + "refId": "A", + "step": 20 + }, + { + "expr": "avg(rate(node_cpu_seconds_total{instance=~\"$node\",mode=\"user\"}[$interval])) by (instance) *100", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "User", + "refId": "B", + "step": 240 + }, + { + "expr": "avg(rate(node_cpu_seconds_total{instance=~\"$node\",mode=\"iowait\"}[$interval])) by (instance) *100", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Iowait", + "refId": "D", + "step": 240 + }, + { + "expr": "(1 - avg(rate(node_cpu_seconds_total{instance=~\"$node\",mode=\"idle\"}[$interval])) by (instance))*100", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Total", + "refId": "F", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU% Basic", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "percent", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "192.168.200.241:9100_总内存": "dark-red", + "使用率": "yellow", + "内存_Avaliable": "#6ED0E0", + "内存_Cached": "#EF843C", + "内存_Free": "#629E51", + "内存_Total": "#6d1f62", + "内存_Used": "#eab839", + "可用": "#9ac48a", + "总内存": "#bf1b00" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": 2, + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 22 + }, + "height": "300", + "hiddenSeries": false, + "id": 156, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.2.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Total", + "color": "#C4162A", + "fill": 0 + }, + { + "alias": "Used%", + "color": "rgb(0, 209, 255)", + "lines": false, + "pointradius": 1, + "points": true, + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_MemTotal_bytes{instance=~\"$node\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Total", + "refId": "A", + "step": 4 + }, + { + "expr": "node_memory_MemTotal_bytes{instance=~\"$node\"} - node_memory_MemAvailable_bytes{instance=~\"$node\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Used", + "refId": "B", + "step": 4 + }, + { + "expr": "node_memory_MemAvailable_bytes{instance=~\"$node\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Avaliable", + "refId": "F", + "step": 4 + }, + { + "expr": "node_memory_Buffers_bytes{instance=~\"$node\"}", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "内存_Buffers", + "refId": "D", + "step": 4 + }, + { + "expr": "node_memory_MemFree_bytes{instance=~\"$node\"}", + "format": "time_series", + "hide": true, + "intervalFactor": 1, + "legendFormat": "内存_Free", + "refId": "C", + "step": 4 + }, + { + "expr": "node_memory_Cached_bytes{instance=~\"$node\"}", + "format": "time_series", + "hide": true, + "intervalFactor": 1, + "legendFormat": "内存_Cached", + "refId": "E", + "step": 4 + }, + { + "expr": "node_memory_MemTotal_bytes{instance=~\"$node\"} - (node_memory_Cached_bytes{instance=~\"$node\"} + node_memory_Buffers_bytes{instance=~\"$node\"} + node_memory_MemFree_bytes{instance=~\"$node\"})", + "format": "time_series", + "hide": true, + "intervalFactor": 1, + "refId": "G" + }, + { + "expr": "(1 - (node_memory_MemAvailable_bytes{instance=~\"$node\"} / (node_memory_MemTotal_bytes{instance=~\"$node\"})))* 100", + "format": "time_series", + "hide": false, + "interval": "30m", + "intervalFactor": 10, + "legendFormat": "Used%", + "refId": "H" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Basic", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "percent", + "label": "Utilization%", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "192.168.10.227:9100_em1_in下载": "super-light-green", + "192.168.10.227:9100_em1_out上传": "dark-blue" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": 2, + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 22 + }, + "height": "300", + "hiddenSeries": false, + "id": 157, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.2.0", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*_transmit$/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_network_receive_bytes_total{instance=~'$node',device=~\"$device\"}[$interval])*8", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}_receive", + "refId": "A", + "step": 4 + }, + { + "expr": "rate(node_network_transmit_bytes_total{instance=~'$node',device=~\"$device\"}[$interval])*8", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}_transmit", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network bandwidth usage per second $device", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bps", + "label": "transmit(-)/receive(+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "15分钟": "#6ED0E0", + "1分钟": "#BF1B00", + "5分钟": "#CCA300" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": 2, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 1, + "grid": {}, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 30 + }, + "height": "300", + "hiddenSeries": false, + "id": 13, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.2.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/.*CPU cores/", + "color": "#C4162A" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_load1{instance=~\"$node\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "1m", + "metric": "", + "refId": "A", + "step": 20, + "target": "" + }, + { + "expr": "node_load5{instance=~\"$node\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "5m", + "refId": "B", + "step": 20 + }, + { + "expr": "node_load15{instance=~\"$node\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "15m", + "refId": "C", + "step": 20 + }, + { + "expr": " sum(count(node_cpu_seconds_total{instance=~\"$node\", mode='system'}) by (cpu,instance)) by(instance)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "CPU cores", + "refId": "D", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "System Load", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "vda_write": "#6ED0E0" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": 2, + "description": "Per second read / write bytes ", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 1, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 30 + }, + "height": "300", + "hiddenSeries": false, + "id": 168, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.2.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*_Read bytes$/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_disk_read_bytes_total{instance=~\"$node\"}[$interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}_Read bytes", + "refId": "A", + "step": 10 + }, + { + "expr": "rate(node_disk_written_bytes_total{instance=~\"$node\"}[$interval])", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}_Written bytes", + "refId": "B", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk R/W Data", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "Bps", + "label": "Bytes read (-) / write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": 1, + "description": "", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 30 + }, + "hiddenSeries": false, + "id": 174, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.2.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/Inodes.*/", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(node_filesystem_size_bytes{instance=~'$node',fstype=~\"ext.*|xfs\",mountpoint !~\".*pod.*\"}-node_filesystem_free_bytes{instance=~'$node',fstype=~\"ext.*|xfs\",mountpoint !~\".*pod.*\"}) *100/(node_filesystem_avail_bytes {instance=~'$node',fstype=~\"ext.*|xfs\",mountpoint !~\".*pod.*\"}+(node_filesystem_size_bytes{instance=~'$node',fstype=~\"ext.*|xfs\",mountpoint !~\".*pod.*\"}-node_filesystem_free_bytes{instance=~'$node',fstype=~\"ext.*|xfs\",mountpoint !~\".*pod.*\"}))", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{mountpoint}}", + "refId": "A" + }, + { + "expr": "node_filesystem_files_free{instance=~'$node',fstype=~\"ext.?|xfs\"} / node_filesystem_files{instance=~'$node',fstype=~\"ext.?|xfs\"}", + "hide": true, + "interval": "", + "legendFormat": "Inodes:{{instance}}:{{mountpoint}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk Space Used% Basic", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percent", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "decimals": 2, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "1", + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "vda_write": "#6ED0E0" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": 2, + "description": "Read/write completions per second\n\nWrites completed: 每个磁盘分区每秒写完成次数\n\nIO now 每个磁盘分区每秒正在处理的输入/输出请求数", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 8, + "x": 0, + "y": 38 + }, + "height": "300", + "hiddenSeries": false, + "id": 161, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.2.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*_Reads completed$/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_disk_reads_completed_total{instance=~\"$node\"}[$interval])", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}_Reads completed", + "refId": "A", + "step": 10 + }, + { + "expr": "rate(node_disk_writes_completed_total{instance=~\"$node\"}[$interval])", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}_Writes completed", + "refId": "B", + "step": 10 + }, + { + "expr": "node_disk_io_now{instance=~\"$node\"}", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk IOps Completed(IOPS)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "iops", + "label": "IO read (-) / write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Idle - Waiting for something to happen": "#052B51", + "guest": "#9AC48A", + "idle": "#052B51", + "iowait": "#EAB839", + "irq": "#BF1B00", + "nice": "#C15C17", + "sdb_每秒I/O操作%": "#d683ce", + "softirq": "#E24D42", + "steal": "#FCE2DE", + "system": "#508642", + "user": "#5195CE", + "磁盘花费在I/O操作占比": "#ba43a9" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": null, + "description": "The time spent on I/O in the natural time of each second.(wall-clock time)", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 8, + "x": 8, + "y": 38 + }, + "hiddenSeries": false, + "id": 175, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "sort": null, + "sortDesc": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.2.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_disk_io_time_seconds_total{instance=~\"$node\"}[$interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}_ IO time", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Time Spent Doing I/Os", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "vda": "#6ED0E0" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": 2, + "description": "Time spent on each read/write operation", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 1, + "gridPos": { + "h": 9, + "w": 8, + "x": 16, + "y": 38 + }, + "height": "300", + "hiddenSeries": false, + "id": 160, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.2.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/,*_Read time$/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_disk_read_time_seconds_total{instance=~\"$node\"}[$interval]) / rate(node_disk_reads_completed_total{instance=~\"$node\"}[$interval])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}_Read time", + "refId": "B" + }, + { + "expr": "rate(node_disk_write_time_seconds_total{instance=~\"$node\"}[$interval]) / rate(node_disk_writes_completed_total{instance=~\"$node\"}[$interval])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}_Write time", + "refId": "C" + }, + { + "expr": "rate(node_disk_io_time_seconds_total{instance=~\"$node\"}[$interval])", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}", + "refId": "A", + "step": 10 + }, + { + "expr": "rate(node_disk_io_time_weighted_seconds_total{instance=~\"$node\"}[$interval])", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}_加权", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk R/W Time(Reference: less than 100ms)(beta)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": "Time read (-) / write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "192.168.200.241:9100_TCP_alloc": "semi-dark-blue", + "TCP": "#6ED0E0", + "TCP_alloc": "blue" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": 2, + "description": "Sockets_used - Sockets currently in use\n\nCurrEstab - TCP connections for which the current state is either ESTABLISHED or CLOSE- WAIT\n\nTCP_alloc - Allocated sockets\n\nTCP_tw - Sockets wating close\n\nUDP_inuse - Udp sockets currently in use\n\nRetransSegs - TCP retransmission packets\n\nOutSegs - Number of packets sent by TCP\n\nInSegs - Number of packets received by TCP", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 16, + "x": 0, + "y": 47 + }, + "height": "300", + "hiddenSeries": false, + "id": 158, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.2.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Sockets_used/", + "color": "#E02F44", + "lines": false, + "pointradius": 1, + "points": true, + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_netstat_Tcp_CurrEstab{instance=~'$node'}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "CurrEstab", + "refId": "A", + "step": 20 + }, + { + "expr": "node_sockstat_TCP_tw{instance=~'$node'}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "TCP_tw", + "refId": "D" + }, + { + "expr": "node_sockstat_sockets_used{instance=~'$node'}", + "hide": false, + "interval": "30m", + "intervalFactor": 1, + "legendFormat": "Sockets_used", + "refId": "B" + }, + { + "expr": "node_sockstat_UDP_inuse{instance=~'$node'}", + "interval": "", + "legendFormat": "UDP_inuse", + "refId": "C" + }, + { + "expr": "node_sockstat_TCP_alloc{instance=~'$node'}", + "interval": "", + "legendFormat": "TCP_alloc", + "refId": "E" + }, + { + "expr": "rate(node_netstat_Tcp_PassiveOpens{instance=~'$node'}[$interval])", + "hide": true, + "interval": "", + "legendFormat": "{{instance}}_Tcp_PassiveOpens", + "refId": "G" + }, + { + "expr": "rate(node_netstat_Tcp_ActiveOpens{instance=~'$node'}[$interval])", + "hide": true, + "interval": "", + "legendFormat": "{{instance}}_Tcp_ActiveOpens", + "refId": "F" + }, + { + "expr": "rate(node_netstat_Tcp_InSegs{instance=~'$node'}[$interval])", + "interval": "", + "legendFormat": "Tcp_InSegs", + "refId": "H" + }, + { + "expr": "rate(node_netstat_Tcp_OutSegs{instance=~'$node'}[$interval])", + "interval": "", + "legendFormat": "Tcp_OutSegs", + "refId": "I" + }, + { + "expr": "rate(node_netstat_Tcp_RetransSegs{instance=~'$node'}[$interval])", + "hide": false, + "interval": "", + "legendFormat": "Tcp_RetransSegs", + "refId": "J" + }, + { + "expr": "rate(node_netstat_TcpExt_ListenDrops{instance=~'$node'}[$interval])", + "hide": true, + "interval": "", + "legendFormat": "", + "refId": "K" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network Sockstat", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "transformations": [], + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": "Total_Sockets_used", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "filefd_192.168.200.241:9100": "super-light-green", + "switches_192.168.200.241:9100": "semi-dark-red", + "使用的文件描述符_10.118.72.128:9100": "red", + "每秒上下文切换次数_10.118.71.245:9100": "yellow", + "每秒上下文切换次数_10.118.72.128:9100": "yellow" + }, + "bars": false, + "cacheTimeout": null, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 1, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 47 + }, + "hiddenSeries": false, + "hideTimeOverride": false, + "id": 16, + "legend": { + "alignAsTable": false, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.2.0", + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "switches", + "color": "#FADE2A", + "lines": false, + "pointradius": 1, + "points": true, + "yaxis": 2 + }, + { + "alias": "used filefd", + "color": "#F2495C" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_filefd_allocated{instance=~\"$node\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 5, + "legendFormat": "used filefd", + "refId": "B" + }, + { + "expr": "rate(node_context_switches_total{instance=~\"$node\"}[$interval])", + "interval": "", + "intervalFactor": 5, + "legendFormat": "switches", + "refId": "A" + }, + { + "expr": " (node_filefd_allocated{instance=~\"$node\"}/node_filefd_maximum{instance=~\"$node\"}) *100", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 5, + "legendFormat": "使用的文件描述符占比_{{instance}}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Open File Descriptor(left)/Context switches(right)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "used filefd", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": "context_switches", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "", + "schemaVersion": 26, + "style": "dark", + "tags": [ + "Prometheus", + "node_exporter", + "StarsL.cn" + ], + "templating": { + "list": [ + { + "allValue": "", + "current": { + "isNone": true, + "selected": false, + "text": "None", + "value": "" + }, + "datasource": "prometheus", + "definition": "label_values(origin_prometheus)", + "hide": 0, + "includeAll": false, + "label": "Origin_prom", + "multi": false, + "name": "origin_prometheus", + "options": [], + "query": "label_values(origin_prometheus)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 5, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": false, + "text": "node-exporter", + "value": "node-exporter" + }, + "datasource": "prometheus", + "definition": "label_values(node_uname_info{origin_prometheus=~\"$origin_prometheus\"}, job)", + "hide": 0, + "includeAll": false, + "label": "JOB", + "multi": false, + "name": "job", + "options": [], + "query": "label_values(node_uname_info{origin_prometheus=~\"$origin_prometheus\"}, job)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 5, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": "prometheus", + "definition": "label_values(node_uname_info{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"}, nodename)", + "hide": 0, + "includeAll": true, + "label": "Host", + "multi": false, + "name": "hostname", + "options": [], + "query": "label_values(node_uname_info{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"}, nodename)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 5, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allFormat": "glob", + "allValue": null, + "current": { + "selected": false, + "text": "10.0.0.12:9100", + "value": "10.0.0.12:9100" + }, + "datasource": "prometheus", + "definition": "label_values(node_uname_info{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",nodename=~\"$hostname\"},instance)", + "hide": 0, + "includeAll": false, + "label": "Instance", + "multi": true, + "multiFormat": "regex values", + "name": "node", + "options": [], + "query": "label_values(node_uname_info{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",nodename=~\"$hostname\"},instance)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 5, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allFormat": "glob", + "allValue": null, + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": "prometheus", + "definition": "label_values(node_network_info{origin_prometheus=~\"$origin_prometheus\",device!~'tap.*|veth.*|br.*|docker.*|virbr.*|lo.*|cni.*'},device)", + "hide": 0, + "includeAll": true, + "label": "NIC", + "multi": true, + "multiFormat": "regex values", + "name": "device", + "options": [], + "query": "label_values(node_network_info{origin_prometheus=~\"$origin_prometheus\",device!~'tap.*|veth.*|br.*|docker.*|virbr.*|lo.*|cni.*'},device)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "auto": false, + "auto_count": 100, + "auto_min": "10s", + "current": { + "selected": false, + "text": "2m", + "value": "2m" + }, + "hide": 0, + "label": "Interval", + "name": "interval", + "options": [ + { + "selected": false, + "text": "30s", + "value": "30s" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": true, + "text": "2m", + "value": "2m" + }, + { + "selected": false, + "text": "3m", + "value": "3m" + }, + { + "selected": false, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + } + ], + "query": "30s,1m,2m,3m,5m,10m,30m", + "queryValue": "", + "refresh": 2, + "skipUrlSync": false, + "type": "interval" + }, + { + "allValue": null, + "current": { + "selected": false, + "text": "/opt", + "value": "/opt" + }, + "datasource": "prometheus", + "definition": "query_result(topk(1,sort_desc (max(node_filesystem_size_bytes{origin_prometheus=~\"$origin_prometheus\",instance=~'$node',fstype=~\"ext.?|xfs\",mountpoint!~\".*pods.*\"}) by (mountpoint))))", + "hide": 2, + "includeAll": false, + "label": "maxmount", + "multi": false, + "name": "maxmount", + "options": [], + "query": "query_result(topk(1,sort_desc (max(node_filesystem_size_bytes{origin_prometheus=~\"$origin_prometheus\",instance=~'$node',fstype=~\"ext.?|xfs\",mountpoint!~\".*pods.*\"}) by (mountpoint))))", + "refresh": 2, + "regex": "/.*\\\"(.*)\\\".*/", + "skipUrlSync": false, + "sort": 5, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": false, + "text": "VM-0-12-centos", + "value": "VM-0-12-centos" + }, + "datasource": "prometheus", + "definition": "label_values(node_uname_info{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",instance=~\"$node\"}, nodename)", + "hide": 2, + "includeAll": false, + "label": "show_hostname", + "multi": false, + "name": "show_hostname", + "options": [], + "query": "label_values(node_uname_info{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",instance=~\"$node\"}, nodename)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 5, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": false, + "text": "17", + "value": "17" + }, + "datasource": "prometheus", + "definition": "query_result(count(node_uname_info{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"}))", + "hide": 2, + "includeAll": false, + "label": "total_servers", + "multi": false, + "name": "total", + "options": [], + "query": "query_result(count(node_uname_info{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"}))", + "refresh": 1, + "regex": "/{} (.*) .*/", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": { + "hidden": false, + "now": true, + "refresh_intervals": [ + "15s", + "30s", + "1m", + "5m", + "15m", + "30m" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Hosts", + "uid": "xfpJB9FGz", + "version": 1 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-nodes + namespace: monitoring + - apiVersion: v1 + data: + persistentvolumesusage.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 2, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 9, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(\n sum without(instance, node) (kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Used Space", + "refId": "A" + }, + { + "expr": "sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Free Space", + "refId": "B" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Volume Space Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "$datasource", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 3, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "(\n kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n -\n kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n)\n/\nkubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "80, 90", + "title": "Volume Space Usage", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 4, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 9, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Used inodes", + "refId": "A" + }, + { + "expr": "(\n sum without(instance, node) (kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": " Free inodes", + "refId": "B" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Volume inodes Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "$datasource", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 5, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n/\nkubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "80, 90", + "title": "Volume inodes Usage", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kubelet_volume_stats_capacity_bytes, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\"}, namespace)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "PersistentVolumeClaim", + "multi": false, + "name": "volume", + "options": [ + + ], + "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\"}, persistentvolumeclaim)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-7d", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Persistent Volumes", + "uid": "919b92a8e8041bd567af9edab12c840c", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-persistentvolumesusage + namespace: monitoring + - apiVersion: v1 + data: + pods.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "$datasource", + "enable": true, + "expr": "time() == BOOL timestamp(rate(kube_pod_container_status_restarts_total{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[2m]) > 0)", + "hide": false, + "iconColor": "rgba(215, 44, 44, 1)", + "name": "Restarts", + "showIn": 0, + "tags": [ + "restart" + ], + "type": "rows" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 2, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(container) (container_memory_usage_bytes{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\", container!=\"POD\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Current: {{ container }}", + "refId": "A" + }, + { + "expr": "sum by(container) (kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\", pod=\"$pod\", container=~\"$container\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Requested: {{ container }}", + "refId": "B" + }, + { + "expr": "sum by(container) (kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\", pod=\"$pod\", container=~\"$container\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Limit: {{ container }}", + "refId": "C" + }, + { + "expr": "sum by(container) (container_memory_cache{job=\"kubelet\", namespace=\"$namespace\", pod=~\"$pod\", container=~\"$container\", container!=\"POD\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Cache: {{ container }}", + "refId": "D" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 3, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (container) (rate(container_cpu_usage_seconds_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", image!=\"\", pod=\"$pod\", container=~\"$container\", container!=\"POD\"}[2m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Current: {{ container }}", + "refId": "A" + }, + { + "expr": "sum by(container) (kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\", pod=\"$pod\", container=~\"$container\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Requested: {{ container }}", + "refId": "B" + }, + { + "expr": "sum by(container) (kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\", pod=\"$pod\", container=~\"$container\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Limit: {{ container }}", + "refId": "C" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 4, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum by (pod) (rate(container_network_receive_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[2m])))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "RX: {{ pod }}", + "refId": "A" + }, + { + "expr": "sort_desc(sum by (pod) (rate(container_network_transmit_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[2m])))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "TX: {{ pod }}", + "refId": "B" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Network I/O", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 5, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max by (container) (kube_pod_container_status_restarts_total{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Restarts: {{ container }}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Total Restarts Per Container", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Pod", + "multi": false, + "name": "pod", + "options": [ + + ], + "query": "label_values(kube_pod_info{cluster=\"$cluster\", namespace=~\"$namespace\"}, pod)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "Container", + "multi": false, + "name": "container", + "options": [ + + ], + "query": "label_values(kube_pod_container_info{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}, container)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / Pods", + "uid": "ab4f13a9892a76a4d21ce8c2445bf4ea", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-pods + namespace: monitoring + - apiVersion: v1 + data: + statefulset.json: |- + { + "__inputs": [ + + ], + "__requires": [ + + ], + "annotations": { + "list": [ + + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ + + ], + "refresh": "", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 2, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "cores", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(rate(container_cpu_usage_seconds_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}[3m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "CPU", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 3, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "GB", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(container_memory_usage_bytes{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}) / 1024^3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Memory", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 4, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "Bps", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(rate(container_network_transmit_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\",pod=~\"$statefulset.*\"}[3m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Network", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "height": "100px", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 5, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Desired Replicas", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 6, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Replicas of current version", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 7, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(kube_statefulset_status_observed_generation{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Observed Generation", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + + }, + "id": 8, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Metadata Generation", + "tooltip": { + "shared": false + }, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "replicas specified", + "refId": "A" + }, + { + "expr": "max(kube_statefulset_status_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "replicas created", + "refId": "B" + }, + { + "expr": "min(kube_statefulset_status_replicas_ready{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "ready", + "refId": "C" + }, + { + "expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "replicas of current version", + "refId": "D" + }, + { + "expr": "min(kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "updated", + "refId": "E" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeShift": null, + "title": "Replicas", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_statefulset_metadata_generation, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [ + + ], + "query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\"}, namespace)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Name", + "multi": false, + "name": "statefulset", + "options": [ + + ], + "query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", namespace=\"$namespace\"}, statefulset)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kubernetes / StatefulSets", + "uid": "a31c1f46e6f727cb37c0d731a7245005", + "version": 0 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-statefulset + namespace: monitoring +kind: ConfigMapList diff --git a/7-grafana/grafana-dashboardSources.yaml b/7-grafana/grafana-dashboardSources.yaml new file mode 100644 index 0000000..dbe9e18 --- /dev/null +++ b/7-grafana/grafana-dashboardSources.yaml @@ -0,0 +1,31 @@ +apiVersion: v1 +data: + dashboards.yaml: |- + { + "apiVersion": 1, + "providers": [ + { + "folder": "", + "name": "0", + "options": { + "path": "/grafana-dashboard-definitions/0" + }, + "orgId": 1, + "type": "file" + }, + { + "folder": "", + "name": "1", + "options": { + "path": "/grafana-dashboard-definitions/1" + }, + "orgId": 1, + "type": "file", + allowUiUpdates: true + } + ] + } +kind: ConfigMap +metadata: + name: grafana-dashboards + namespace: monitoring diff --git a/7-grafana/grafana-deployment.yaml b/7-grafana/grafana-deployment.yaml new file mode 100644 index 0000000..7b9dbe9 --- /dev/null +++ b/7-grafana/grafana-deployment.yaml @@ -0,0 +1,169 @@ +### comment below if you want to use temp storage ### +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: grafana-storage-pvc + namespace: monitoring +spec: + # 阿里云高效云盘(最小20G) + # storageClassName: alicloud-disk-topology-efficiency + storageClassName: prometheus-local-storage + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 5Gi +### comment above if you want to use temp storage ### +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: grafana + name: grafana + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: grafana + template: + metadata: + labels: + app: grafana + spec: + containers: + - name: grafana + image: registry.cn-hangzhou.aliyuncs.com/clab-docker/grafana:7.2.0 + ports: + - containerPort: 3000 + name: http + env: + - name: GF_SERVER_SERVE_FROM_SUB_PATH + value: "true" + - name: GF_SERVER_ROOT_URL + value: "%(protocol)s://%(domain)s/grafana" + readinessProbe: + httpGet: + path: /api/health + port: http + resources: + limits: + cpu: 200m + memory: 200Mi + requests: + cpu: 100m + memory: 100Mi + volumeMounts: + - mountPath: /var/lib/grafana + name: grafana-storage + readOnly: false + - mountPath: /etc/grafana/provisioning/datasources + name: grafana-datasources + readOnly: false + - mountPath: /etc/grafana/provisioning/dashboards + name: grafana-dashboards + readOnly: false + - mountPath: /grafana-dashboard-definitions/0/k8s-cluster-rsrc-use + name: grafana-dashboard-k8s-cluster-rsrc-use + readOnly: false + - mountPath: /grafana-dashboard-definitions/0/k8s-node-rsrc-use + name: grafana-dashboard-k8s-node-rsrc-use + readOnly: false + - mountPath: /grafana-dashboard-definitions/0/k8s-resources-cluster + name: grafana-dashboard-k8s-resources-cluster + readOnly: false + - mountPath: /grafana-dashboard-definitions/0/k8s-resources-namespace + name: grafana-dashboard-k8s-resources-namespace + readOnly: false + - mountPath: /grafana-dashboard-definitions/0/k8s-resources-pod + name: grafana-dashboard-k8s-resources-pod + readOnly: false + - mountPath: /grafana-dashboard-definitions/0/k8s-resources-workload + name: grafana-dashboard-k8s-resources-workload + readOnly: false + - mountPath: /grafana-dashboard-definitions/0/k8s-resources-workloads-namespace + name: grafana-dashboard-k8s-resources-workloads-namespace + readOnly: false + - mountPath: /grafana-dashboard-definitions/0/nodes + name: grafana-dashboard-nodes + readOnly: false + - mountPath: /grafana-dashboard-definitions/0/persistentvolumesusage + name: grafana-dashboard-persistentvolumesusage + readOnly: false + - mountPath: /grafana-dashboard-definitions/0/pods + name: grafana-dashboard-pods + readOnly: false + - mountPath: /grafana-dashboard-definitions/0/statefulset + name: grafana-dashboard-statefulset + readOnly: false + - mountPath: /grafana-dashboard-definitions/1/kafka + name: grafana-dashboard-kafka + readOnly: false + nodeSelector: + beta.kubernetes.io/os: linux + securityContext: + fsGroup: 0 + runAsNonRoot: false + runAsUser: 0 + serviceAccountName: grafana + volumes: + - name: grafana-storage + ### uncomment below if you want to use temp storage ### + # emptyDir: {} + ### uncomment above if you want to use temp storage ### + ### comment below if you want to use temp storage ### + persistentVolumeClaim: + claimName: grafana-storage-pvc + ### comment above if you want to use temp storage ### + - name: grafana-datasources + secret: + secretName: grafana-datasources + - configMap: + name: grafana-dashboards + name: grafana-dashboards + - configMap: + name: grafana-dashboard-k8s-cluster-rsrc-use + name: grafana-dashboard-k8s-cluster-rsrc-use + - configMap: + name: grafana-dashboard-k8s-node-rsrc-use + name: grafana-dashboard-k8s-node-rsrc-use + - configMap: + name: grafana-dashboard-k8s-resources-cluster + name: grafana-dashboard-k8s-resources-cluster + - configMap: + name: grafana-dashboard-k8s-resources-namespace + name: grafana-dashboard-k8s-resources-namespace + - configMap: + name: grafana-dashboard-k8s-resources-pod + name: grafana-dashboard-k8s-resources-pod + - configMap: + name: grafana-dashboard-k8s-resources-workload + name: grafana-dashboard-k8s-resources-workload + - configMap: + name: grafana-dashboard-k8s-resources-workloads-namespace + name: grafana-dashboard-k8s-resources-workloads-namespace + - configMap: + name: grafana-dashboard-nodes + name: grafana-dashboard-nodes + - configMap: + name: grafana-dashboard-persistentvolumesusage + name: grafana-dashboard-persistentvolumesusage + - configMap: + name: grafana-dashboard-pods + name: grafana-dashboard-pods + - configMap: + name: grafana-dashboard-statefulset + name: grafana-dashboard-statefulset + - configMap: + name: grafana-dashboard-kafka + name: grafana-dashboard-kafka + - configMap: + name: grafana-dashboard-yarn + name: grafana-dashboard-yarn + - configMap: + name: grafana-dashboard-maxwell + name: grafana-dashboard-maxwell + - configMap: + name: grafana-dashboard-jvm + name: grafana-dashboard-jvm diff --git a/7-grafana/grafana-service.yaml b/7-grafana/grafana-service.yaml new file mode 100644 index 0000000..df8006c --- /dev/null +++ b/7-grafana/grafana-service.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + app: grafana + name: grafana + namespace: monitoring +spec: + type: NodePort + ports: + - name: http + port: 3000 + targetPort: http + nodePort: 30300 + selector: + app: grafana diff --git a/7-grafana/grafana-serviceAccount.yaml b/7-grafana/grafana-serviceAccount.yaml new file mode 100644 index 0000000..3ed3e03 --- /dev/null +++ b/7-grafana/grafana-serviceAccount.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: grafana + namespace: monitoring diff --git a/8-custom-exporter/azure-scheduledevents-exporter/azure-scheduledevents-exporter-daemonSet.yaml b/8-custom-exporter/azure-scheduledevents-exporter/azure-scheduledevents-exporter-daemonSet.yaml new file mode 100644 index 0000000..446de6e --- /dev/null +++ b/8-custom-exporter/azure-scheduledevents-exporter/azure-scheduledevents-exporter-daemonSet.yaml @@ -0,0 +1,43 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + labels: + app: azure-scheduledevents-exporter + name: azure-scheduledevents-exporter + namespace: monitoring +spec: + selector: + matchLabels: + app: azure-scheduledevents-exporter + template: + metadata: + labels: + app: azure-scheduledevents-exporter + spec: + containers: + - args: + - --scrape-time=15s + - --verbose + - --bind=:9879 + - --api-timeout=120s + name: azure-scheduledevents-exporter + ports: + - containerPort: 9879 + hostPort: 9879 + name: http + image: registry.cn-hangzhou.aliyuncs.com/clab-docker/azure-scheduledevents-exporter:21.01.1 + resources: + limits: + cpu: 100m + memory: 300Mi + requests: + cpu: 10m + memory: 30Mi + nodeSelector: + beta.kubernetes.io/os: linux + securityContext: + fsGroup: 0 + runAsNonRoot: false + runAsUser: 0 + tolerations: + - operator: Exists diff --git a/8-custom-exporter/azure-scheduledevents-exporter/azure-scheduledevents-exporter-prometheusRule.yaml b/8-custom-exporter/azure-scheduledevents-exporter/azure-scheduledevents-exporter-prometheusRule.yaml new file mode 100644 index 0000000..e69eb5f --- /dev/null +++ b/8-custom-exporter/azure-scheduledevents-exporter/azure-scheduledevents-exporter-prometheusRule.yaml @@ -0,0 +1,28 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + prometheus: k8s + role: alert-rules + name: azure-scheduledevents-exporter-rules + namespace: monitoring +spec: + groups: + - name: azure.scheduledevents.alerts + rules: + - alert: AzureScheduledEventsNotification + expr: | + azure_scheduledevent_event{eventStatus="Scheduled"} > 0 + labels: + severity: warning + annotations: + summary: "收到Azure通知,已安排时间进行维护操作,请留意" + description: 事件类型={{ $labels.eventType }},资源类型={{ $labels.resourceType }},资源名称={{ $labels.resource }},预计时间(不早于)={{ $labels.notBefore }},事件ID={{ $labels.eventID }},事件来源={{ $labels.eventSource }} + - alert: AzureStartedEventsNotification + expr: | + azure_scheduledevent_event{eventStatus="Started"} > 0 + labels: + severity: warning + annotations: + summary: "收到Azure通知,维护操作已开始,请留意" + description: 事件类型={{ $labels.eventType }},资源类型={{ $labels.resourceType }},资源名称={{ $labels.resource }},事件ID={{ $labels.eventID }},事件来源={{ $labels.eventSource }} diff --git a/8-custom-exporter/azure-scheduledevents-exporter/azure-scheduledevents-exporter-service.yaml b/8-custom-exporter/azure-scheduledevents-exporter/azure-scheduledevents-exporter-service.yaml new file mode 100644 index 0000000..dff9494 --- /dev/null +++ b/8-custom-exporter/azure-scheduledevents-exporter/azure-scheduledevents-exporter-service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + k8s-app: azure-scheduledevents-exporter + name: azure-scheduledevents-exporter + namespace: monitoring +spec: + clusterIP: None + ports: + - name: http + port: 9879 + targetPort: http + selector: + app: azure-scheduledevents-exporter diff --git a/8-custom-exporter/azure-scheduledevents-exporter/azure-scheduledevents-exporter-serviceMonitor.yaml b/8-custom-exporter/azure-scheduledevents-exporter/azure-scheduledevents-exporter-serviceMonitor.yaml new file mode 100644 index 0000000..fd6405c --- /dev/null +++ b/8-custom-exporter/azure-scheduledevents-exporter/azure-scheduledevents-exporter-serviceMonitor.yaml @@ -0,0 +1,20 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: azure-scheduledevents-exporter + name: azure-scheduledevents-exporter + namespace: monitoring +spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + interval: 15s + scrapeTimeout: 10s + port: http + scheme: http + tlsConfig: + insecureSkipVerify: true + jobLabel: k8s-app + selector: + matchLabels: + k8s-app: azure-scheduledevents-exporter diff --git a/8-custom-exporter/azure-scheduledevents-exporter/external-azure-scheduledevents-exporter-endpoints.yaml b/8-custom-exporter/azure-scheduledevents-exporter/external-azure-scheduledevents-exporter-endpoints.yaml new file mode 100644 index 0000000..f3197db --- /dev/null +++ b/8-custom-exporter/azure-scheduledevents-exporter/external-azure-scheduledevents-exporter-endpoints.yaml @@ -0,0 +1,14 @@ +kind: Endpoints +apiVersion: v1 +metadata: + labels: + k8s-app: azure-scheduledevents-exporter + name: external-azure-scheduledevents-exporter + namespace: monitoring +subsets: + - addresses: + - ip: 127.0.0.1 + - ip: 127.0.1.1 + ports: + - port: 9879 + name: http diff --git a/8-custom-exporter/azure-scheduledevents-exporter/external-azure-scheduledevents-exporter-service.yaml b/8-custom-exporter/azure-scheduledevents-exporter/external-azure-scheduledevents-exporter-service.yaml new file mode 100644 index 0000000..3d0ccde --- /dev/null +++ b/8-custom-exporter/azure-scheduledevents-exporter/external-azure-scheduledevents-exporter-service.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + k8s-app: azure-scheduledevents-exporter + name: external-azure-scheduledevents-exporter + namespace: monitoring +spec: + clusterIP: None + ports: + - name: http + port: 9879 + targetPort: http diff --git a/8-custom-exporter/cloudera-exporter/cloudera-exporter-clusterRole.yaml b/8-custom-exporter/cloudera-exporter/cloudera-exporter-clusterRole.yaml new file mode 100644 index 0000000..e61eafc --- /dev/null +++ b/8-custom-exporter/cloudera-exporter/cloudera-exporter-clusterRole.yaml @@ -0,0 +1,17 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: cloudera-exporter +rules: + - apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create + - apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create diff --git a/8-custom-exporter/cloudera-exporter/cloudera-exporter-clusterRoleBinding.yaml b/8-custom-exporter/cloudera-exporter/cloudera-exporter-clusterRoleBinding.yaml new file mode 100644 index 0000000..db21b29 --- /dev/null +++ b/8-custom-exporter/cloudera-exporter/cloudera-exporter-clusterRoleBinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: cloudera-exporter +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cloudera-exporter +subjects: + - kind: ServiceAccount + name: cloudera-exporter + namespace: monitoring diff --git a/8-custom-exporter/cloudera-exporter/cloudera-exporter-configMap.yaml b/8-custom-exporter/cloudera-exporter/cloudera-exporter-configMap.yaml new file mode 100644 index 0000000..be33c68 --- /dev/null +++ b/8-custom-exporter/cloudera-exporter/cloudera-exporter-configMap.yaml @@ -0,0 +1,31 @@ +apiVersion: v1 +data: + config.ini: | + [target] + host = scm-server-ip-here + port = 7180 + + [user] + # User name (Only read permision is required) + username = admin + password = admin + + [modules] + global_status_module = true + host_module = false + hdfs_module = false + impala_module = false + # Yarn metrics module (Still doesn't work) + yarn_module = false + + [system] + num_procs = 8 + deploy_ip = 127.0.0.1 + deploy_port = 9200 + log_level = 1 + +kind: ConfigMap +metadata: + name: cloudera-exporter-config + namespace: monitoring + diff --git a/8-custom-exporter/cloudera-exporter/cloudera-exporter-deployment.yaml b/8-custom-exporter/cloudera-exporter/cloudera-exporter-deployment.yaml new file mode 100644 index 0000000..b13f670 --- /dev/null +++ b/8-custom-exporter/cloudera-exporter/cloudera-exporter-deployment.yaml @@ -0,0 +1,70 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: cloudera-exporter + name: cloudera-exporter + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: cloudera-exporter + template: + metadata: + labels: + app: cloudera-exporter + spec: + containers: + - name: cloudera-exporter + image: registry.cn-hangzhou.aliyuncs.com/clab-docker/cloudera_exporter:v0.0.1 + resources: + limits: + cpu: 100m + memory: 500Mi + requests: + cpu: 10m + memory: 50Mi + volumeMounts: + - name: config-volume + mountPath: /config.ini + subPath: config.ini + - args: + - --logtostderr + - --secure-listen-address=$(IP):9200 + - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 + - --upstream=http://127.0.0.1:9200/ + env: + - name: IP + valueFrom: + fieldRef: + fieldPath: status.podIP + image: registry.cn-hangzhou.aliyuncs.com/clab-docker/kube-rbac-proxy:v0.4.1 + name: kube-rbac-proxy + ports: + - containerPort: 9200 + hostPort: 9200 + name: https + resources: + limits: + cpu: 20m + memory: 40Mi + requests: + cpu: 10m + memory: 20Mi + nodeSelector: + beta.kubernetes.io/os: linux + volumes: + - name: config-volume + configMap: + name: cloudera-exporter-config + items: + - key: config.ini + path: config.ini + securityContext: + fsGroup: 0 + runAsNonRoot: false + runAsUser: 0 + serviceAccountName: cloudera-exporter + tolerations: + - operator: Exists diff --git a/8-custom-exporter/cloudera-exporter/cloudera-exporter-prometheusRule.yaml b/8-custom-exporter/cloudera-exporter/cloudera-exporter-prometheusRule.yaml new file mode 100644 index 0000000..7a38372 --- /dev/null +++ b/8-custom-exporter/cloudera-exporter/cloudera-exporter-prometheusRule.yaml @@ -0,0 +1,56 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + prometheus: k8s + role: alert-rules + name: cloudera-exporter-rules + namespace: monitoring +spec: + groups: + - name: cloudera.alerts + rules: + - alert: ClouderaExporterNotHealthy + expr: | + kbdi_exporter_up != 1 or absent(kbdi_exporter_up) + for: 5m + labels: + severity: warning + annotations: + summary: "cloudera-exporter未能正常抓取监控数据,请留意" + - alert: ClouderaClusterNotHealthy + expr: | + kbdi_status_cluster_up{maintenance_mode="false"} != 5 + for: 5m + labels: + severity: warning + annotations: + summary: "CDH集群非健康,请注意" + description: 集群名={{ $labels.cluster_name }},集群状态={{ $labels.cluster_state }} + - alert: ClouderaHostNotHealthy + expr: | + kbdi_status_host_up{maintenance_mode="false"} != 5 + for: 5m + labels: + severity: warning + annotations: + summary: "CDH节点非健康,请注意" + description: 节点名={{ $labels.hostname }},健康状态={{ $labels.health_summary }} + - alert: ClouderaServiceNotHealthy + expr: | + kbdi_status_service_up !=5 and kbdi_status_service_up !=3 + for: 5m + labels: + severity: warning + annotations: + summary: "CDH服务非健康,请注意" + description: 服务名={{ $labels.service_name }},服务状态={{ $labels.health_summary }} + - alert: ClouderaRoleNotHealthy + expr: | + kbdi_status_role_up !=5 and kbdi_status_role_up !=3 + for: 5m + labels: + severity: warning + annotations: + summary: "CDH角色非健康,请注意" + description: 角色名={{ $labels.exported_service }}-{{ $labels.role_type }},所在节点={{ $labels.host_name }},服务状态={{ $labels.health_summary }} diff --git a/8-custom-exporter/cloudera-exporter/cloudera-exporter-service.yaml b/8-custom-exporter/cloudera-exporter/cloudera-exporter-service.yaml new file mode 100644 index 0000000..a1e3c96 --- /dev/null +++ b/8-custom-exporter/cloudera-exporter/cloudera-exporter-service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + k8s-app: cloudera-exporter + name: cloudera-exporter + namespace: monitoring +spec: + clusterIP: None + ports: + - name: https + port: 9200 + targetPort: https + selector: + app: cloudera-exporter diff --git a/8-custom-exporter/cloudera-exporter/cloudera-exporter-serviceAccount.yaml b/8-custom-exporter/cloudera-exporter/cloudera-exporter-serviceAccount.yaml new file mode 100644 index 0000000..55958a9 --- /dev/null +++ b/8-custom-exporter/cloudera-exporter/cloudera-exporter-serviceAccount.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: cloudera-exporter + namespace: monitoring diff --git a/8-custom-exporter/cloudera-exporter/cloudera-exporter-serviceMonitor.yaml b/8-custom-exporter/cloudera-exporter/cloudera-exporter-serviceMonitor.yaml new file mode 100644 index 0000000..4625cd9 --- /dev/null +++ b/8-custom-exporter/cloudera-exporter/cloudera-exporter-serviceMonitor.yaml @@ -0,0 +1,20 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: cloudera-exporter + name: cloudera-exporter + namespace: monitoring +spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + interval: 30s + scrapeTimeout: 20s + port: https + scheme: https + tlsConfig: + insecureSkipVerify: true + jobLabel: k8s-app + selector: + matchLabels: + k8s-app: cloudera-exporter diff --git a/8-custom-exporter/elasticsearch-exporter/elasticsearch-exporter-clusterRole.yaml b/8-custom-exporter/elasticsearch-exporter/elasticsearch-exporter-clusterRole.yaml new file mode 100644 index 0000000..26649ce --- /dev/null +++ b/8-custom-exporter/elasticsearch-exporter/elasticsearch-exporter-clusterRole.yaml @@ -0,0 +1,17 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: elasticsearch-exporter +rules: + - apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create + - apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create diff --git a/8-custom-exporter/elasticsearch-exporter/elasticsearch-exporter-clusterRoleBinding.yaml b/8-custom-exporter/elasticsearch-exporter/elasticsearch-exporter-clusterRoleBinding.yaml new file mode 100644 index 0000000..0d0474a --- /dev/null +++ b/8-custom-exporter/elasticsearch-exporter/elasticsearch-exporter-clusterRoleBinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: elasticsearch-exporter +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: elasticsearch-exporter +subjects: + - kind: ServiceAccount + name: elasticsearch-exporter + namespace: monitoring diff --git a/8-custom-exporter/elasticsearch-exporter/elasticsearch-exporter-deployment.yaml b/8-custom-exporter/elasticsearch-exporter/elasticsearch-exporter-deployment.yaml new file mode 100644 index 0000000..e77f939 --- /dev/null +++ b/8-custom-exporter/elasticsearch-exporter/elasticsearch-exporter-deployment.yaml @@ -0,0 +1,67 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: elasticsearch-exporter + name: elasticsearch-exporter + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: elasticsearch-exporter + template: + metadata: + labels: + app: elasticsearch-exporter + spec: + containers: + - args: + #例 es.uri=http://es账号:es密码@esip地址:9200 + - --es.uri= + - --es.all + - --es.indices + - --web.listen-address=127.0.0.1:9114 + name: elasticsearch-exporter + imagePullPolicy: IfNotPresent + image: registry.cn-hangzhou.aliyuncs.com/clab-docker/elasticsearch_exporter:1.1.0rc1 + resources: + limits: + cpu: 200m + memory: 200Mi + requests: + cpu: 100m + memory: 100Mi + - args: + - --logtostderr + - --secure-listen-address=$(IP):9114 + - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 + - --upstream=http://127.0.0.1:9114/ + env: + - name: IP + valueFrom: + fieldRef: + fieldPath: status.podIP + imagePullPolicy: IfNotPresent + image: registry.cn-hangzhou.aliyuncs.com/clab-docker/kube-rbac-proxy:v0.4.1 + name: kube-rbac-proxy + ports: + - containerPort: 9114 + hostPort: 9114 + name: https + resources: + limits: + cpu: 20m + memory: 40Mi + requests: + cpu: 10m + memory: 20Mi + nodeSelector: + beta.kubernetes.io/os: linux + securityContext: + fsGroup: 0 + runAsNonRoot: false + runAsUser: 0 + serviceAccountName: elasticsearch-exporter + tolerations: + - operator: Exists diff --git a/8-custom-exporter/elasticsearch-exporter/elasticsearch-exporter-prometheusRule.yaml b/8-custom-exporter/elasticsearch-exporter/elasticsearch-exporter-prometheusRule.yaml new file mode 100644 index 0000000..31afba3 --- /dev/null +++ b/8-custom-exporter/elasticsearch-exporter/elasticsearch-exporter-prometheusRule.yaml @@ -0,0 +1,67 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + prometheus: k8s + role: alert-rules + name: elasticsearch-exporter-rules + namespace: monitoring +spec: + groups: + - name: elasticsearch.alerts + rules: + - alert: ElasticsearchJvmheapPercent-5m + expr: | + ceil(elasticsearch_jvm_memory_used_bytes{area="heap"} / elasticsearch_jvm_memory_max_bytes{area="heap"} * 100) > 85 + for: 5m + labels: + severity: low + annotations: + summary: "elasticsearch jvm_heap使用率持续5m超出85%" + description: "elasticsearch服务器={{ $labels.host }} jvm_heap={{ $value }}%" + - alert: ElasticsearchJvmheapPercent-30m + expr: | + ceil(elasticsearch_jvm_memory_used_bytes{area="heap"} / elasticsearch_jvm_memory_max_bytes{area="heap"} * 100) > 85 + for: 30m + labels: + severity: warning + annotations: + summary: "elasticsearch jvm_heap使用率持续30m超出85%" + description: "elasticsearch服务器={{ $labels.host }} jvm_heap={{ $value }}%" + - alert: ElasticsearchFilesystemUsage + expr: | + ceil(100 - elasticsearch_filesystem_data_free_bytes / elasticsearch_filesystem_data_size_bytes * 100) > 85 + for: 5m + labels: + severity: critical + annotations: + summary: "elasticsearch磁盘使用率持续5m超出85%" + description: "elasticsearch服务器={{ $labels.host }} 磁盘={{ $labels.mount }} 磁盘使用率={{ $value }}%" + - alert: ElasticsearchStatus-red + expr: | + elasticsearch_cluster_health_status{color="red"} == 1 + for: 10s + labels: + severity: critical + annotations: + summary: "elasticsearch集群不可用" + description: "elasticsearch集群当前健康状态={{ $labels.color }}" + - alert: ElasticsearchStatusyellow + expr: | + elasticsearch_cluster_health_status{color="yellow"} == 1 + for: 10s + labels: + severity: warning + annotations: + summary: "elasticsearch集群状态不良" + description: "elasticsearch集群当前健康状态={{ $labels.color }}" + - alert: ElasticsearchNodesDown + expr: | + count_over_time(elasticsearch_os_mem_used_bytes[2m]) unless elasticsearch_os_mem_used_bytes + for: 1m + labels: + severity: critical + annotations: + summary: "elasticsearch节点故障" + description: "服务器IP={{ $labels.host }} 集群名={{ $labels.cluster }} down" + diff --git a/8-custom-exporter/elasticsearch-exporter/elasticsearch-exporter-service.yaml b/8-custom-exporter/elasticsearch-exporter/elasticsearch-exporter-service.yaml new file mode 100644 index 0000000..a52d3e4 --- /dev/null +++ b/8-custom-exporter/elasticsearch-exporter/elasticsearch-exporter-service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + k8s-app: elasticsearch-exporter + name: elasticsearch-exporter + namespace: monitoring +spec: + clusterIP: None + ports: + - name: https + port: 9114 + targetPort: https + selector: + app: elasticsearch-exporter diff --git a/8-custom-exporter/elasticsearch-exporter/elasticsearch-exporter-serviceAccount.yaml b/8-custom-exporter/elasticsearch-exporter/elasticsearch-exporter-serviceAccount.yaml new file mode 100644 index 0000000..c658086 --- /dev/null +++ b/8-custom-exporter/elasticsearch-exporter/elasticsearch-exporter-serviceAccount.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: elasticsearch-exporter + namespace: monitoring diff --git a/8-custom-exporter/elasticsearch-exporter/elasticsearch-exporter-serviceMonitor.yaml b/8-custom-exporter/elasticsearch-exporter/elasticsearch-exporter-serviceMonitor.yaml new file mode 100644 index 0000000..027c95e --- /dev/null +++ b/8-custom-exporter/elasticsearch-exporter/elasticsearch-exporter-serviceMonitor.yaml @@ -0,0 +1,19 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: elasticsearch-exporter + name: elasticsearch-exporter + namespace: monitoring +spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + interval: 30s + port: https + scheme: https + tlsConfig: + insecureSkipVerify: true + jobLabel: k8s-app + selector: + matchLabels: + k8s-app: elasticsearch-exporter diff --git a/8-custom-exporter/elasticsearch-exporter/elasticsearch-log-exporter-deployment.yaml b/8-custom-exporter/elasticsearch-exporter/elasticsearch-log-exporter-deployment.yaml new file mode 100644 index 0000000..daf386d --- /dev/null +++ b/8-custom-exporter/elasticsearch-exporter/elasticsearch-log-exporter-deployment.yaml @@ -0,0 +1,66 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: elasticsearch-exporter-log + name: elasticsearch-exporter-log + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: elasticsearch-exporter-log + template: + metadata: + labels: + app: elasticsearch-exporter-log + spec: + containers: + - args: + - --es.uri=http://10.0.35.206:9200 + - --es.all + - --es.indices + - --web.listen-address=127.0.0.1:9114 + name: elasticsearch-exporter-log + image: registry.cn-hangzhou.aliyuncs.com/clab-docker/elasticsearch_exporter:1.1.0rc1 + imagePullPolicy: IfNotPresent + resources: + limits: + cpu: 200m + memory: 200Mi + requests: + cpu: 100m + memory: 100Mi + - args: + - --logtostderr + - --secure-listen-address=$(IP):9114 + - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 + - --upstream=http://127.0.0.1:9114/ + env: + - name: IP + valueFrom: + fieldRef: + fieldPath: status.podIP + image: registry.cn-hangzhou.aliyuncs.com/clab-docker/kube-rbac-proxy:v0.4.1 + imagePullPolicy: IfNotPresent + name: kube-rbac-proxy + ports: + - containerPort: 9114 + hostPort: 9114 + name: https + resources: + limits: + cpu: 20m + memory: 40Mi + requests: + cpu: 10m + memory: 20Mi + nodeSelector: + beta.kubernetes.io/os: linux + securityContext: + fsGroup: 0 + runAsNonRoot: false + runAsUser: 0 + serviceAccountName: elasticsearch-exporter + tolerations: + - operator: Exists diff --git a/8-custom-exporter/elasticsearch-exporter/elasticsearch-log-exporter-service.yaml b/8-custom-exporter/elasticsearch-exporter/elasticsearch-log-exporter-service.yaml new file mode 100644 index 0000000..9c085ba --- /dev/null +++ b/8-custom-exporter/elasticsearch-exporter/elasticsearch-log-exporter-service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + k8s-app: elasticsearch-exporter + name: elasticsearch-exporter-log + namespace: monitoring +spec: + clusterIP: None + ports: + - name: https + port: 9114 + targetPort: https + selector: + app: elasticsearch-exporter-log diff --git a/8-custom-exporter/elasticsearch-log-exporter/elasticsearch-log-exporter-configMap.yaml b/8-custom-exporter/elasticsearch-log-exporter/elasticsearch-log-exporter-configMap.yaml new file mode 100644 index 0000000..76bfe30 --- /dev/null +++ b/8-custom-exporter/elasticsearch-log-exporter/elasticsearch-log-exporter-configMap.yaml @@ -0,0 +1,96 @@ +apiVersion: v1 +data: + log.cfg: | + [DEFAULT] + QueryIntervalSecs = 15 + QueryTimeoutSecs = 10 + QueryIndices = _all + QueryOnError = drop + QueryOnMissing = drop + + [query_log_nginx] + QueryIntervalSecs = 300 + QueryTimeoutSecs = 30 + QueryIndices = + QueryOnError = drop + QueryOnMissing = drop + QueryJson = { + "size": 0, + "query": { + "range": { + "@timestamp": { + "gte": "now-5m", + "lte": "now", + "format": "strict_date_optional_time" + } + } + }, + "aggs": { + "host": { + "terms": { + "field": "http_host.keyword", + "size": 50 + }, + "aggs": { + "status": { + "terms": { + "field": "status.keyword", + "size": 50 + } + } + } + } + }, + "timeout": "30000ms" + } + + [query_log] + QueryIntervalSecs = 300 + QueryTimeoutSecs = 30 + QueryIndices = + QueryOnError = drop + QueryOnMissing = drop + QueryJson = { + "size": 0, + "query": { + "range": { + "@timestamp": { + "gte": "now-5m", + "lte": "now", + "format": "strict_date_optional_time" + } + } + }, + "aggs": { + "application": { + "terms": { + "field": "service", + "size": 100 + }, + "aggs": { + "error": { + "filter": { + "term": { + "level": "ERROR" + } + } + }, + "error_percentage": { + "bucket_script": { + "buckets_path": { + "numerator": "error>_count", + "denominator": "_count" + }, + "script": "params.numerator != null && params.denominator != null && params.denominator > 0 ? params.numerator / params.denominator : 0" + } + } + } + } + }, + "timeout": "30000ms" + } + +kind: ConfigMap +metadata: + name: elasticsearch-log-exporter-config + namespace: monitoring diff --git a/8-custom-exporter/elasticsearch-log-exporter/elasticsearch-log-exporter-deployment.yaml b/8-custom-exporter/elasticsearch-log-exporter/elasticsearch-log-exporter-deployment.yaml new file mode 100644 index 0000000..569fcf3 --- /dev/null +++ b/8-custom-exporter/elasticsearch-log-exporter/elasticsearch-log-exporter-deployment.yaml @@ -0,0 +1,55 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: elasticsearch-log-exporter + name: elasticsearch-log-exporter + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: elasticsearch-log-exporter + template: + metadata: + labels: + app: elasticsearch-log-exporter + spec: + containers: + - args: + - --es-cluster=es01,es02,es03 + - --cluster-health-disable + - --nodes-stats-disable + - --indices-aliases-disable + - --indices-mappings-disable + - --indices-stats-disable + - --config-dir=/config + - --verbose + name: elasticsearch-log-exporter + ports: + - containerPort: 9206 + hostPort: 9206 + name: http + image: registry.cn-hangzhou.aliyuncs.com/clab-docker/prometheus-es-exporter:0.14.0 + resources: + limits: + cpu: 100m + memory: 500Mi + requests: + cpu: 10m + memory: 50Mi + volumeMounts: + - name: config-volume + mountPath: /config + nodeSelector: + beta.kubernetes.io/os: linux + volumes: + - name: config-volume + configMap: + name: elasticsearch-log-exporter-config + securityContext: + fsGroup: 0 + runAsNonRoot: false + runAsUser: 0 + tolerations: + - operator: Exists diff --git a/8-custom-exporter/elasticsearch-log-exporter/elasticsearch-log-exporter-prometheusRule.yaml b/8-custom-exporter/elasticsearch-log-exporter/elasticsearch-log-exporter-prometheusRule.yaml new file mode 100644 index 0000000..c50536f --- /dev/null +++ b/8-custom-exporter/elasticsearch-log-exporter/elasticsearch-log-exporter-prometheusRule.yaml @@ -0,0 +1,39 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + prometheus: k8s + role: alert-rules + name: elasticsearch-log-exporter-rules + namespace: monitoring +spec: + groups: + - name: elasticsearch.log.application.alerts + rules: + - alert: ElasticsearchLogApplicationKeepingError + expr: | + log_application_error_percentage_value * 100 > 0 + for: 1h + labels: + severity: warning + annotations: + summary: "ES中服务日志持续有报错,超过1小时,请留意" + description: 服务名={{ $labels.application }},最近5分钟错误率={{ printf "%.2f" $value }}% + - name: elasticsearch.log.nginx.alerts + rules: + - alert: ElasticsearchLogNginxErrorPercentage + expr: | + log_nginx_host_status_doc_count{status=~"(400|499|502|503|504)"} / ignoring(status) group_left log_nginx_host_doc_count * 100 > 1 + labels: + severity: warning + annotations: + summary: "最近5分钟nginx异常状态码超过1%" + description: 域名={{ $labels.host }},状态码={{ $labels.status }},最近5分钟错误率={{ printf "%.2f" $value }}%,最近5分钟错误数={{ printf "log_nginx_host_status_doc_count{host='%s',status='%s'}" .Labels.host .Labels.status | query | first | value }} + - alert: ElasticsearchLogNginxErrorCount + expr: | + log_nginx_host_status_doc_count{status=~"(400|499|502|503|504)"} > 50 + labels: + severity: warning + annotations: + summary: "最近5分钟nginx异常状态码超过50条" + description: 域名={{ $labels.host }},状态码={{ $labels.status }},最近5分钟错误数={{ $value }} diff --git a/8-custom-exporter/elasticsearch-log-exporter/elasticsearch-log-exporter-service.yaml b/8-custom-exporter/elasticsearch-log-exporter/elasticsearch-log-exporter-service.yaml new file mode 100644 index 0000000..dcb5170 --- /dev/null +++ b/8-custom-exporter/elasticsearch-log-exporter/elasticsearch-log-exporter-service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + k8s-app: elasticsearch-log-exporter + name: elasticsearch-log-exporter + namespace: monitoring +spec: + clusterIP: None + ports: + - name: http + port: 9206 + targetPort: http + selector: + app: elasticsearch-log-exporter diff --git a/8-custom-exporter/elasticsearch-log-exporter/elasticsearch-log-exporter-serviceMonitor.yaml b/8-custom-exporter/elasticsearch-log-exporter/elasticsearch-log-exporter-serviceMonitor.yaml new file mode 100644 index 0000000..04f6482 --- /dev/null +++ b/8-custom-exporter/elasticsearch-log-exporter/elasticsearch-log-exporter-serviceMonitor.yaml @@ -0,0 +1,20 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: elasticsearch-log-exporter + name: elasticsearch-log-exporter + namespace: monitoring +spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + interval: 300s + scrapeTimeout: 20s + port: http + scheme: http + tlsConfig: + insecureSkipVerify: true + jobLabel: k8s-app + selector: + matchLabels: + k8s-app: elasticsearch-log-exporter diff --git a/8-custom-exporter/external-node-exporter/external-node-exporter-endpoints.yaml b/8-custom-exporter/external-node-exporter/external-node-exporter-endpoints.yaml new file mode 100644 index 0000000..923ecd5 --- /dev/null +++ b/8-custom-exporter/external-node-exporter/external-node-exporter-endpoints.yaml @@ -0,0 +1,14 @@ +kind: Endpoints +apiVersion: v1 +metadata: + labels: + k8s-app: node-exporter + name: external-node-exporter + namespace: monitoring +subsets: + - addresses: + - ip: 127.0.0.1 + - ip: 127.0.1.1 + ports: + - port: 9100 + name: http diff --git a/8-custom-exporter/external-node-exporter/external-node-exporter-service.yaml b/8-custom-exporter/external-node-exporter/external-node-exporter-service.yaml new file mode 100644 index 0000000..d175e03 --- /dev/null +++ b/8-custom-exporter/external-node-exporter/external-node-exporter-service.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + k8s-app: node-exporter + name: external-node-exporter + namespace: monitoring +spec: + clusterIP: None + ports: + - name: http + port: 9100 + targetPort: http diff --git a/8-custom-exporter/external-node-exporter/external-node-exporter-serviceMonitor.yaml b/8-custom-exporter/external-node-exporter/external-node-exporter-serviceMonitor.yaml new file mode 100644 index 0000000..7d7d00a --- /dev/null +++ b/8-custom-exporter/external-node-exporter/external-node-exporter-serviceMonitor.yaml @@ -0,0 +1,16 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: node-exporter + name: external-node-exporter + namespace: monitoring +spec: + endpoints: + - interval: 30s + port: http + scheme: http + jobLabel: k8s-app + selector: + matchLabels: + k8s-app: node-exporter diff --git a/8-custom-exporter/ingress/nginx-ingress-controller-serviceMonitor.yaml b/8-custom-exporter/ingress/nginx-ingress-controller-serviceMonitor.yaml new file mode 100644 index 0000000..f3067fa --- /dev/null +++ b/8-custom-exporter/ingress/nginx-ingress-controller-serviceMonitor.yaml @@ -0,0 +1,40 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: ingress + name: nginx-ingress-controller + namespace: monitoring +spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + interval: 10s + port: metrics + scheme: http + relabelings: + - sourceLabels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + regex: "true" + action: keep + - sourceLabels: [__meta_kubernetes_pod_annotation_prometheus_io_scheme] + regex: (https?) + action: replace + targetLabel: __scheme__ + - sourceLabels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + regex: (.+) + action: replace + targetLabel: __metrics_path__ + - sourceLabels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] + regex: ([^:]+)(?::\d+)?;(\d+) + action: replace + replacement: $1:$2 + targetLabel: __address__ + - sourceLabels: [__meta_kubernetes_service_name] + regex: prometheus-server + action: drop + jobLabel: k8s-app + namespaceSelector: + matchNames: + - ingress-nginx + selector: + matchLabels: + k8s-app: ingress \ No newline at end of file diff --git a/8-custom-exporter/ingress/nginx-ingress-discovery-service.yaml b/8-custom-exporter/ingress/nginx-ingress-discovery-service.yaml new file mode 100644 index 0000000..9c37fc7 --- /dev/null +++ b/8-custom-exporter/ingress/nginx-ingress-discovery-service.yaml @@ -0,0 +1,17 @@ +kind: Service +apiVersion: v1 +metadata: + namespace: ingress-nginx + name: nginx-ingress-discovery-service + labels: + k8s-app: ingress +spec: + type: ClusterIP + clusterIP: None + selector: + app.kubernetes.io/name: ingress-nginx + app.kubernetes.io/part-of: ingress-nginx + ports: + - name: metrics + port: 10254 + protocol: TCP \ No newline at end of file diff --git a/8-custom-exporter/kafka-exporter/kafka-exporter-clusterRole.yaml b/8-custom-exporter/kafka-exporter/kafka-exporter-clusterRole.yaml new file mode 100644 index 0000000..66ba016 --- /dev/null +++ b/8-custom-exporter/kafka-exporter/kafka-exporter-clusterRole.yaml @@ -0,0 +1,17 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: kafka-exporter +rules: + - apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create + - apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create diff --git a/8-custom-exporter/kafka-exporter/kafka-exporter-clusterRoleBinding.yaml b/8-custom-exporter/kafka-exporter/kafka-exporter-clusterRoleBinding.yaml new file mode 100644 index 0000000..534be41 --- /dev/null +++ b/8-custom-exporter/kafka-exporter/kafka-exporter-clusterRoleBinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: kafka-exporter +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kafka-exporter +subjects: + - kind: ServiceAccount + name: kafka-exporter + namespace: monitoring diff --git a/8-custom-exporter/kafka-exporter/kafka-exporter-deployment.yaml b/8-custom-exporter/kafka-exporter/kafka-exporter-deployment.yaml new file mode 100644 index 0000000..1662fcc --- /dev/null +++ b/8-custom-exporter/kafka-exporter/kafka-exporter-deployment.yaml @@ -0,0 +1,70 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: kafka-exporter + name: kafka-exporter + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: kafka-exporter + template: + metadata: + labels: + app: kafka-exporter + spec: + containers: + - args: + - --kafka.server=broker1:9092 + - --kafka.server=broker2:9092 + - --kafka.server=broker3:9092 + - --kafka.version=1.0.0 + - --web.listen-address=127.0.0.1:9308 + # 如果需要排除topic或group,修改内容并取消下两行注释 + # - --filter.exclude + # - --topic.filter="(add_list_member).*" + name: kafka-exporter + image: registry.cn-hangzhou.aliyuncs.com/clab-docker/kafka_exporter:v0.0.2 + imagePullPolicy: IfNotPresent + resources: + limits: + cpu: 2 + memory: 1Gi + requests: + cpu: 100m + memory: 100Mi + - args: + - --logtostderr + - --secure-listen-address=$(IP):9308 + - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 + - --upstream=http://127.0.0.1:9308/ + env: + - name: IP + valueFrom: + fieldRef: + fieldPath: status.podIP + image: registry.cn-hangzhou.aliyuncs.com/clab-docker/kube-rbac-proxy:v0.4.1 + imagePullPolicy: IfNotPresent + name: kube-rbac-proxy + ports: + - containerPort: 9308 + hostPort: 9308 + name: https + resources: + limits: + cpu: 20m + memory: 40Mi + requests: + cpu: 10m + memory: 20Mi + nodeSelector: + beta.kubernetes.io/os: linux + securityContext: + fsGroup: 0 + runAsNonRoot: false + runAsUser: 0 + serviceAccountName: kafka-exporter + tolerations: + - operator: Exists diff --git a/8-custom-exporter/kafka-exporter/kafka-exporter-prometheusRule.yaml b/8-custom-exporter/kafka-exporter/kafka-exporter-prometheusRule.yaml new file mode 100644 index 0000000..06e1a59 --- /dev/null +++ b/8-custom-exporter/kafka-exporter/kafka-exporter-prometheusRule.yaml @@ -0,0 +1,47 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + prometheus: k8s + role: alert-rules + name: kafka-exporter-rules + namespace: monitoring +spec: + groups: + - name: kafka.alerts + rules: + - alert: KafkaSlowConsuming + expr: | + sum by (consumergroup, topic, kafka_cluster_name) (kafka_consumergroup_lag_sum > 0) / sum by (consumergroup, topic, kafka_cluster_name) (rate(kafka_consumergroup_current_offset_sum[5m])) / 60 > 30 + for: 5m + labels: + severity: warning + annotations: + summary: "kafka topic消费慢" + description: consumergroup={{ $labels.consumergroup }} topic={{ $labels.topic }} ETA={{ printf "%.2f" $value }}minutes + - alert: KafkaSlowConsuming-critical2 + expr: | + sum by (consumergroup, topic, kafka_cluster_name) (kafka_consumergroup_lag_sum > 0) / sum by (consumergroup, topic, kafka_cluster_name) (rate(kafka_consumergroup_current_offset_sum[5m])) / 60 > 30 + for: 15m + labels: + severity: critical + annotations: + summary: "kafka topic消费慢" + description: consumergroup={{ $labels.consumergroup }} topic={{ $labels.topic }} ETA={{ printf "%.2f" $value }}minutes + - alert: KafkaSlowConsuming-critical + expr: | + sum by (consumergroup, topic, kafka_cluster_name) (kafka_consumergroup_lag_sum{consumergroup=~"wechat_qrcode_actions_group|etlv2-collectorsession-events-group|wechat-asyn-callback-message-group",topic=~"wechat_qrcode_actions|customer_event_from_collectorsession|wechat-asyn-callback-message"} > 0) / sum by (consumergroup, topic, kafka_cluster_name) (rate(kafka_consumergroup_current_offset_sum{consumergroup=~"wechat_qrcode_actions_group|etlv2-collectorsession-events-group|wechat-asyn-callback-message-group",topic=~"wechat_qrcode_actions|customer_event_from_collectorsession|wechat-asyn-callback-message"}[5m])) / 60 > 3 + for: 2m + labels: + severity: critical + annotations: + summary: "kafka topic消费慢" + description: consumergroup={{ $labels.consumergroup }} topic={{ $labels.topic }} ETA={{ printf "%.2f" $value }}minutes + - alert: kafka-brokers + expr: kafka-brokers < 3 + for: 30s + labels: + severity: critical + annotations: + summary: "kafka节点小于3台" + description: "当前kafka节点数={{ $value }}" diff --git a/8-custom-exporter/kafka-exporter/kafka-exporter-service.yaml b/8-custom-exporter/kafka-exporter/kafka-exporter-service.yaml new file mode 100644 index 0000000..101c5b0 --- /dev/null +++ b/8-custom-exporter/kafka-exporter/kafka-exporter-service.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + k8s-app: kafka-exporter + kafka-cluster-name: cluster-1 + name: kafka-exporter + namespace: monitoring +spec: + clusterIP: None + ports: + - name: https + port: 9308 + targetPort: https + selector: + app: kafka-exporter diff --git a/8-custom-exporter/kafka-exporter/kafka-exporter-serviceAccount.yaml b/8-custom-exporter/kafka-exporter/kafka-exporter-serviceAccount.yaml new file mode 100644 index 0000000..a8ca931 --- /dev/null +++ b/8-custom-exporter/kafka-exporter/kafka-exporter-serviceAccount.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kafka-exporter + namespace: monitoring diff --git a/8-custom-exporter/kafka-exporter/kafka-exporter-serviceMonitor.yaml b/8-custom-exporter/kafka-exporter/kafka-exporter-serviceMonitor.yaml new file mode 100644 index 0000000..679a0bf --- /dev/null +++ b/8-custom-exporter/kafka-exporter/kafka-exporter-serviceMonitor.yaml @@ -0,0 +1,22 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: kafka-exporter + name: kafka-exporter + namespace: monitoring +spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + interval: 30s + scrapeTimeout: 20s + port: https + scheme: https + tlsConfig: + insecureSkipVerify: true + jobLabel: k8s-app + targetLabels: + - kafka-cluster-name + selector: + matchLabels: + k8s-app: kafka-exporter diff --git a/8-custom-exporter/kube-controller-manager/kube-controller-manager-prometheusRule.yaml b/8-custom-exporter/kube-controller-manager/kube-controller-manager-prometheusRule.yaml new file mode 100644 index 0000000..72e8efb --- /dev/null +++ b/8-custom-exporter/kube-controller-manager/kube-controller-manager-prometheusRule.yaml @@ -0,0 +1,20 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + prometheus: k8s + role: alert-rules + name: kube-controller-rules + namespace: monitoring +spec: + groups: + - name: kube-controller.alerts + rules: + - alert: KubeControllerManagerDown + annotations: + message: KubeControllerManager has disappeared from Prometheus target discovery. + expr: | + absent(up{job="kube-controller-manager"} == 1) + for: 15m + labels: + severity: critical diff --git a/8-custom-exporter/kube-controller-manager/kube-controller-manager-service.yaml b/8-custom-exporter/kube-controller-manager/kube-controller-manager-service.yaml new file mode 100644 index 0000000..0da9542 --- /dev/null +++ b/8-custom-exporter/kube-controller-manager/kube-controller-manager-service.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: Service +metadata: + namespace: kube-system + name: kube-controller-manager + labels: + k8s-app: kube-controller-manager +spec: + selector: + component: kube-controller-manager + type: ClusterIP + clusterIP: None + ports: + - name: http-metrics + port: 10252 + targetPort: 10252 + protocol: TCP \ No newline at end of file diff --git a/8-custom-exporter/kube-controller-manager/kube-controller-manager-serviceMonitor.yaml b/8-custom-exporter/kube-controller-manager/kube-controller-manager-serviceMonitor.yaml new file mode 100644 index 0000000..fe96ca1 --- /dev/null +++ b/8-custom-exporter/kube-controller-manager/kube-controller-manager-serviceMonitor.yaml @@ -0,0 +1,23 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: kube-controller-manager + name: kube-controller-manager + namespace: monitoring +spec: + endpoints: + - interval: 30s + metricRelabelings: + - action: drop + regex: etcd_(debugging|disk|request|server).* + sourceLabels: + - __name__ + port: http-metrics + jobLabel: k8s-app + namespaceSelector: + matchNames: + - kube-system + selector: + matchLabels: + k8s-app: kube-controller-manager diff --git a/8-custom-exporter/kube-scheduler/kube-scheduler-manager-prometheusRule.yaml b/8-custom-exporter/kube-scheduler/kube-scheduler-manager-prometheusRule.yaml new file mode 100644 index 0000000..bd587a5 --- /dev/null +++ b/8-custom-exporter/kube-scheduler/kube-scheduler-manager-prometheusRule.yaml @@ -0,0 +1,67 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + prometheus: k8s + role: alert-rules + name: kube-scheduler-rules + namespace: monitoring +spec: + groups: + - name: kube-scheduler.alerts + rules: + - alert: KubeSchedulerDown + annotations: + message: KubeScheduler has disappeared from Prometheus target discovery. + expr: | + absent(up{job="kube-scheduler"} == 1) + for: 15m + labels: + severity: critical + - name: kube-scheduler.rules + rules: + - expr: | + histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: "0.99" + record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile + - expr: | + histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: "0.99" + record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile + - expr: | + histogram_quantile(0.99, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: "0.99" + record: cluster_quantile:scheduler_binding_latency:histogram_quantile + - expr: | + histogram_quantile(0.9, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: "0.9" + record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile + - expr: | + histogram_quantile(0.9, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: "0.9" + record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile + - expr: | + histogram_quantile(0.9, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: "0.9" + record: cluster_quantile:scheduler_binding_latency:histogram_quantile + - expr: | + histogram_quantile(0.5, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: "0.5" + record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile + - expr: | + histogram_quantile(0.5, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: "0.5" + record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile + - expr: | + histogram_quantile(0.5, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: "0.5" + record: cluster_quantile:scheduler_binding_latency:histogram_quantile \ No newline at end of file diff --git a/8-custom-exporter/kube-scheduler/kube-scheduler-service.yaml b/8-custom-exporter/kube-scheduler/kube-scheduler-service.yaml new file mode 100644 index 0000000..140e55b --- /dev/null +++ b/8-custom-exporter/kube-scheduler/kube-scheduler-service.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: Service +metadata: + namespace: kube-system + name: kube-scheduler + labels: + k8s-app: kube-scheduler +spec: + selector: + component: kube-scheduler + type: ClusterIP + clusterIP: None + ports: + - name: http-metrics + port: 10251 + targetPort: 10251 + protocol: TCP \ No newline at end of file diff --git a/8-custom-exporter/kube-scheduler/kube-scheduler-serviceMonitor.yaml b/8-custom-exporter/kube-scheduler/kube-scheduler-serviceMonitor.yaml new file mode 100644 index 0000000..5a2d441 --- /dev/null +++ b/8-custom-exporter/kube-scheduler/kube-scheduler-serviceMonitor.yaml @@ -0,0 +1,18 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: kube-scheduler + name: kube-scheduler + namespace: monitoring +spec: + endpoints: + - interval: 30s + port: http-metrics + jobLabel: k8s-app + namespaceSelector: + matchNames: + - kube-system + selector: + matchLabels: + k8s-app: kube-scheduler diff --git a/8-custom-exporter/kudu-exporter/kudu-exporter-clusterRole.yaml b/8-custom-exporter/kudu-exporter/kudu-exporter-clusterRole.yaml new file mode 100644 index 0000000..ecf1806 --- /dev/null +++ b/8-custom-exporter/kudu-exporter/kudu-exporter-clusterRole.yaml @@ -0,0 +1,17 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: kudu-exporter +rules: + - apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create + - apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create diff --git a/8-custom-exporter/kudu-exporter/kudu-exporter-clusterRoleBinding.yaml b/8-custom-exporter/kudu-exporter/kudu-exporter-clusterRoleBinding.yaml new file mode 100644 index 0000000..6134ffd --- /dev/null +++ b/8-custom-exporter/kudu-exporter/kudu-exporter-clusterRoleBinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: kudu-exporter +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kudu-exporter +subjects: + - kind: ServiceAccount + name: kudu-exporter + namespace: monitoring diff --git a/8-custom-exporter/kudu-exporter/kudu-exporter-deployment.yaml b/8-custom-exporter/kudu-exporter/kudu-exporter-deployment.yaml new file mode 100644 index 0000000..b9fd3b6 --- /dev/null +++ b/8-custom-exporter/kudu-exporter/kudu-exporter-deployment.yaml @@ -0,0 +1,63 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: kudu-exporter + name: kudu-exporter + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: kudu-exporter + template: + metadata: + labels: + app: kudu-exporter + spec: + containers: + - args: + - --kudu.bootstrap-address=hadoop01:8051 + - --collect.timeout=20 + - --web.listen-address=127.0.0.1:9878 + name: kudu-exporter + image: registry.cn-hangzhou.aliyuncs.com/clab-docker/kudu_exporter:v0.0.4 + resources: + limits: + cpu: 100m + memory: 200Mi + requests: + cpu: 10m + memory: 20Mi + - args: + - --logtostderr + - --secure-listen-address=$(IP):9878 + - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 + - --upstream=http://127.0.0.1:9878/ + env: + - name: IP + valueFrom: + fieldRef: + fieldPath: status.podIP + image: registry.cn-hangzhou.aliyuncs.com/clab-docker/kube-rbac-proxy:v0.4.1 + name: kube-rbac-proxy + ports: + - containerPort: 9878 + hostPort: 9878 + name: https + resources: + limits: + cpu: 20m + memory: 40Mi + requests: + cpu: 10m + memory: 20Mi + nodeSelector: + beta.kubernetes.io/os: linux + securityContext: + fsGroup: 0 + runAsNonRoot: false + runAsUser: 0 + serviceAccountName: kudu-exporter + tolerations: + - operator: Exists diff --git a/8-custom-exporter/kudu-exporter/kudu-exporter-prometheusRule.yaml b/8-custom-exporter/kudu-exporter/kudu-exporter-prometheusRule.yaml new file mode 100644 index 0000000..6b6b266 --- /dev/null +++ b/8-custom-exporter/kudu-exporter/kudu-exporter-prometheusRule.yaml @@ -0,0 +1,57 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + prometheus: k8s + role: alert-rules + name: kudu-exporter-rules + namespace: monitoring +spec: + groups: + - name: kudu.alerts + rules: + - alert: KuduMetricsDOWN + expr: kudu_up == 0 + for: 5m + labels: + severity: warning + annotations: + summary: "未能抓取kudu节点监控数据,已超过5分钟,请留意" + description: "节点={{ $labels.server }},角色={{ $labels.role }}" + - alert: KuduClusterReplicaSkew + expr: kudu_cluster_replica_skew > 100 + for: 5m + labels: + severity: warning + annotations: + summary: "Kudu节点间Replica数不均衡,偏差大于100,已超过5分钟,请留意" + description: "偏斜量={{ $value }}" + - alert: KuduTabletFailed + expr: kudu_tablets_num_failed > 0 + labels: + severity: warning + annotations: + summary: "Kudu节点上有Tablet失败,请留意" + description: "节点={{ $labels.server }},角色={{ $labels.role }},失败Tablet数={{ $value }}" + - alert: KuduDataDirsFailed + expr: kudu_data_dirs_failed > 0 + labels: + severity: warning + annotations: + summary: "Kudu节点上数据目录所在磁盘损坏,请留意" + description: "节点={{ $labels.server }},角色={{ $labels.role }},损坏目录数={{ $value }}" + - alert: KuduDataDirsFull + expr: kudu_data_dirs_full > 0 + labels: + severity: warning + annotations: + summary: "Kudu节点上数据目录所在磁盘已满,请留意" + description: "节点={{ $labels.server }},角色={{ $labels.role }},已满目录数={{ $value }}" + - alert: KuduLogError + expr: kudu_glog_error_messages > 0 and changes(kudu_glog_error_messages[5m]) > 0 + for: 10m + labels: + severity: warning + annotations: + summary: "Kudu节点日志持续报错,已超过10分钟,请留意" + description: "节点={{ $labels.server }},角色={{ $labels.role }},当前累计错误日志数={{ $value }}" diff --git a/8-custom-exporter/kudu-exporter/kudu-exporter-service.yaml b/8-custom-exporter/kudu-exporter/kudu-exporter-service.yaml new file mode 100644 index 0000000..69dff05 --- /dev/null +++ b/8-custom-exporter/kudu-exporter/kudu-exporter-service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + k8s-app: kudu-exporter + name: kudu-exporter + namespace: monitoring +spec: + clusterIP: None + ports: + - name: https + port: 9878 + targetPort: https + selector: + app: kudu-exporter diff --git a/8-custom-exporter/kudu-exporter/kudu-exporter-serviceAccount.yaml b/8-custom-exporter/kudu-exporter/kudu-exporter-serviceAccount.yaml new file mode 100644 index 0000000..6fa156d --- /dev/null +++ b/8-custom-exporter/kudu-exporter/kudu-exporter-serviceAccount.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kudu-exporter + namespace: monitoring diff --git a/8-custom-exporter/kudu-exporter/kudu-exporter-serviceMonitor.yaml b/8-custom-exporter/kudu-exporter/kudu-exporter-serviceMonitor.yaml new file mode 100644 index 0000000..30dfe3b --- /dev/null +++ b/8-custom-exporter/kudu-exporter/kudu-exporter-serviceMonitor.yaml @@ -0,0 +1,20 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: kudu-exporter + name: kudu-exporter + namespace: monitoring +spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + interval: 60s + scrapeTimeout: 30s + port: https + scheme: https + tlsConfig: + insecureSkipVerify: true + jobLabel: k8s-app + selector: + matchLabels: + k8s-app: kudu-exporter diff --git a/8-custom-exporter/mongodb-exporter/mongodb-exporter-clusterRole.yaml b/8-custom-exporter/mongodb-exporter/mongodb-exporter-clusterRole.yaml new file mode 100644 index 0000000..3e96b01 --- /dev/null +++ b/8-custom-exporter/mongodb-exporter/mongodb-exporter-clusterRole.yaml @@ -0,0 +1,17 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: mongodb-exporter +rules: + - apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create + - apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create diff --git a/8-custom-exporter/mongodb-exporter/mongodb-exporter-clusterRoleBinding.yaml b/8-custom-exporter/mongodb-exporter/mongodb-exporter-clusterRoleBinding.yaml new file mode 100644 index 0000000..a058b1b --- /dev/null +++ b/8-custom-exporter/mongodb-exporter/mongodb-exporter-clusterRoleBinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: mongodb-exporter +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: mongodb-exporter +subjects: + - kind: ServiceAccount + name: mongodb-exporter + namespace: monitoring diff --git a/8-custom-exporter/mongodb-exporter/mongodb-exporter-deployment.yaml b/8-custom-exporter/mongodb-exporter/mongodb-exporter-deployment.yaml new file mode 100644 index 0000000..1de13d7 --- /dev/null +++ b/8-custom-exporter/mongodb-exporter/mongodb-exporter-deployment.yaml @@ -0,0 +1,65 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: mongodb-exporter + name: mongodb-exporter + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: mongodb-exporter + template: + metadata: + labels: + app: mongodb-exporter + spec: + containers: + - args: + - --mongodb.uri + - mongodb://root:111111@dds-uf69fad95356dee41.mongodb.rds.aliyuncs.com:3717,dds-uf69fad95356dee42.mongodb.rds.aliyuncs.com:3717/admin + - --web.listen-address=127.0.0.1:9001 + name: mongodb-exporter + imagePullPolicy: IfNotPresent + image: registry.cn-hangzhou.aliyuncs.com/clab-docker/mongodbexporter + resources: + limits: + cpu: 200m + memory: 200Mi + requests: + cpu: 100m + memory: 100Mi + - args: + - --logtostderr + - --secure-listen-address=$(IP):9001 + - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 + - --upstream=http://127.0.0.1:9001/ + env: + - name: IP + valueFrom: + fieldRef: + fieldPath: status.podIP + image: registry.cn-hangzhou.aliyuncs.com/clab-docker/kube-rbac-proxy:v0.4.1 + imagePullPolicy: IfNotPresent + name: kube-rbac-proxy + ports: + - containerPort: 9001 + hostPort: 9001 + name: https + resources: + limits: + cpu: 20m + memory: 40Mi + requests: + cpu: 10m + memory: 20Mi + nodeSelector: + beta.kubernetes.io/os: linux + securityContext: + fsGroup: 0 + runAsNonRoot: false + runAsUser: 0 + serviceAccountName: mongodb-exporter + tolerations: + - operator: Exists diff --git a/8-custom-exporter/mongodb-exporter/mongodb-exporter-prometheusRule.yaml b/8-custom-exporter/mongodb-exporter/mongodb-exporter-prometheusRule.yaml new file mode 100644 index 0000000..ca9cd5b --- /dev/null +++ b/8-custom-exporter/mongodb-exporter/mongodb-exporter-prometheusRule.yaml @@ -0,0 +1,29 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + prometheus: k8s + role: alert-rules + name: mongodb-exporter-rules + namespace: monitoring +spec: + groups: + - name: mongodb.alerts + rules: + - alert: mongodb-members-health + expr: | + mongodb_mongod_replset_member_health !=1 + for: 30s + labels: + severity: critical + annotations: + summary: "mongodb异常" + description: "异常mongodb服务器 name={{ $labels.name }} state={{ $labels.state }}\n" + - alert: MongodbTooManyConnections + expr: mongodb_connections{state="current"} > 500 + for: 5m + labels: + severity: warning + annotations: + summary: "MongoDB连接数持续5m超出500" + description: "MongoDB当前连接数={{ $value }}\n" diff --git a/8-custom-exporter/mongodb-exporter/mongodb-exporter-service.yaml b/8-custom-exporter/mongodb-exporter/mongodb-exporter-service.yaml new file mode 100644 index 0000000..b02a488 --- /dev/null +++ b/8-custom-exporter/mongodb-exporter/mongodb-exporter-service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + k8s-app: mongodb-exporter + name: mongodb-exporter + namespace: monitoring +spec: + clusterIP: None + ports: + - name: https + port: 9001 + targetPort: https + selector: + app: mongodb-exporter diff --git a/8-custom-exporter/mongodb-exporter/mongodb-exporter-serviceAccount.yaml b/8-custom-exporter/mongodb-exporter/mongodb-exporter-serviceAccount.yaml new file mode 100644 index 0000000..50ad6af --- /dev/null +++ b/8-custom-exporter/mongodb-exporter/mongodb-exporter-serviceAccount.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: mongodb-exporter + namespace: monitoring diff --git a/8-custom-exporter/mongodb-exporter/mongodb-exporter-serviceMonitor.yaml b/8-custom-exporter/mongodb-exporter/mongodb-exporter-serviceMonitor.yaml new file mode 100644 index 0000000..b0263f5 --- /dev/null +++ b/8-custom-exporter/mongodb-exporter/mongodb-exporter-serviceMonitor.yaml @@ -0,0 +1,19 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: mongodb-exporter + name: mongodb-exporter + namespace: monitoring +spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + interval: 30s + port: https + scheme: https + tlsConfig: + insecureSkipVerify: true + jobLabel: k8s-app + selector: + matchLabels: + k8s-app: mongodb-exporter diff --git a/8-custom-exporter/mysql-exporter/mysql-exporter-clusterRole.yaml b/8-custom-exporter/mysql-exporter/mysql-exporter-clusterRole.yaml new file mode 100644 index 0000000..e8df506 --- /dev/null +++ b/8-custom-exporter/mysql-exporter/mysql-exporter-clusterRole.yaml @@ -0,0 +1,17 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: mysql-exporter +rules: + - apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create + - apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create diff --git a/8-custom-exporter/mysql-exporter/mysql-exporter-clusterRoleBinding.yaml b/8-custom-exporter/mysql-exporter/mysql-exporter-clusterRoleBinding.yaml new file mode 100644 index 0000000..fbe1acd --- /dev/null +++ b/8-custom-exporter/mysql-exporter/mysql-exporter-clusterRoleBinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: mysql-exporter +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: mysql-exporter +subjects: + - kind: ServiceAccount + name: mysql-exporter + namespace: monitoring diff --git a/8-custom-exporter/mysql-exporter/mysql-exporter-deployment.yaml b/8-custom-exporter/mysql-exporter/mysql-exporter-deployment.yaml new file mode 100644 index 0000000..ad91349 --- /dev/null +++ b/8-custom-exporter/mysql-exporter/mysql-exporter-deployment.yaml @@ -0,0 +1,73 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: mysql-exporter + name: mysql-exporter + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: mysql-exporter + template: + metadata: + labels: + app: mysql-exporter + spec: + containers: + - env: + - name: DATA_SOURCE_NAME + # It's better to use an individual account with a max connection limit for monitoring. + # CREATE USER 'exporter'@'%' IDENTIFIED BY 'password' WITH MAX_USER_CONNECTIONS 3; + # GRANT PROCESS, REPLICATION CLIENT, SELECT ON *.* TO 'exporter'@'%'; + value: "root:11111@(rm-uf6r86b2q6802883q.mysql.rds.aliyuncs.com)/" + args: + # More optional collector flags could be found on https://github.com/prometheus/mysqld_exporter#collector-flags + - --collect.info_schema.tables + - --collect.info_schema.processlist + - --collect.binlog_size + - --web.listen-address=127.0.0.1:9104 + name: mysql-exporter + image: registry.cn-hangzhou.aliyuncs.com/clab-docker/mysqld-exporter + imagePullPolicy: IfNotPresent + resources: + limits: + cpu: 500m + memory: 500Mi + requests: + cpu: 100m + memory: 100Mi + - args: + - --logtostderr + - --secure-listen-address=$(IP):9104 + - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 + - --upstream=http://127.0.0.1:9104/ + env: + - name: IP + valueFrom: + fieldRef: + fieldPath: status.podIP + imagePullPolicy: IfNotPresent + image: registry.cn-hangzhou.aliyuncs.com/clab-docker/kube-rbac-proxy:v0.4.1 + name: kube-rbac-proxy + ports: + - containerPort: 9104 + hostPort: 9104 + name: https + resources: + limits: + cpu: 20m + memory: 40Mi + requests: + cpu: 10m + memory: 20Mi + nodeSelector: + beta.kubernetes.io/os: linux + securityContext: + fsGroup: 0 + runAsNonRoot: false + runAsUser: 0 + serviceAccountName: mysql-exporter + tolerations: + - operator: Exists diff --git a/8-custom-exporter/mysql-exporter/mysql-exporter-prometheusRule.yaml b/8-custom-exporter/mysql-exporter/mysql-exporter-prometheusRule.yaml new file mode 100644 index 0000000..5d8abd5 --- /dev/null +++ b/8-custom-exporter/mysql-exporter/mysql-exporter-prometheusRule.yaml @@ -0,0 +1,44 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + prometheus: k8s + role: alert-rules + name: mysql-exporter-rules + namespace: monitoring +spec: + groups: + - name: mysql.alerts + rules: + - alert: mysql-up + expr: | + mysql_up !=1 OR absent(mysql_up)==1 + for: 30s + labels: + severity: critical + annotations: + summary: "mysql不可用或者采集器不可用" + - alert: MySQLReplicationNotRunning + expr: mysql_slave_status_slave_io_running == 0 or mysql_slave_status_slave_sql_running == 0 + for: 2m + labels: + severity: critical + annotations: + summary: "mysql从节点(IO or SQL)进程持续2m down" + description: "mysql主节点IP={{ $labels.master_host }} mysql主节点UUID={{ $labels.master_uuid }}\n" + - alert: MySQLUsedConnection + expr: ceil(mysql_global_status_threads_connected{} / mysql_global_variables_max_connections{} * 100) > 80 + for: 5m + labels: + severity: warning + annotations: + summary: "mysql连接数使用率持续5m超出80%" + description: "mysql连接数使用率={{ $value }}%\n" + - alert: MysqlRestarted + expr: mysql_global_status_uptime < 60 + for: 5m + labels: + severity: warning + annotations: + summary: "MySQL重启" + description: "MySQL被重启,当前运行时间={{ $value }}s" diff --git a/8-custom-exporter/mysql-exporter/mysql-exporter-service.yaml b/8-custom-exporter/mysql-exporter/mysql-exporter-service.yaml new file mode 100644 index 0000000..d3c1154 --- /dev/null +++ b/8-custom-exporter/mysql-exporter/mysql-exporter-service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + k8s-app: mysql-exporter + name: mysql-exporter + namespace: monitoring +spec: + clusterIP: None + ports: + - name: https + port: 9104 + targetPort: https + selector: + app: mysql-exporter diff --git a/8-custom-exporter/mysql-exporter/mysql-exporter-serviceAccount.yaml b/8-custom-exporter/mysql-exporter/mysql-exporter-serviceAccount.yaml new file mode 100644 index 0000000..36cbdc0 --- /dev/null +++ b/8-custom-exporter/mysql-exporter/mysql-exporter-serviceAccount.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: mysql-exporter + namespace: monitoring diff --git a/8-custom-exporter/mysql-exporter/mysql-exporter-serviceMonitor.yaml b/8-custom-exporter/mysql-exporter/mysql-exporter-serviceMonitor.yaml new file mode 100644 index 0000000..248bfca --- /dev/null +++ b/8-custom-exporter/mysql-exporter/mysql-exporter-serviceMonitor.yaml @@ -0,0 +1,19 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: mysql-exporter + name: mysql-exporter + namespace: monitoring +spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + interval: 30s + port: https + scheme: https + tlsConfig: + insecureSkipVerify: true + jobLabel: k8s-app + selector: + matchLabels: + k8s-app: mysql-exporter diff --git a/8-custom-exporter/rabbitmq-exporter/rabbitmq-exporter-clusterRole.yaml b/8-custom-exporter/rabbitmq-exporter/rabbitmq-exporter-clusterRole.yaml new file mode 100644 index 0000000..488edf2 --- /dev/null +++ b/8-custom-exporter/rabbitmq-exporter/rabbitmq-exporter-clusterRole.yaml @@ -0,0 +1,17 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: rabbitmq-exporter +rules: + - apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create + - apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create diff --git a/8-custom-exporter/rabbitmq-exporter/rabbitmq-exporter-clusterRoleBinding.yaml b/8-custom-exporter/rabbitmq-exporter/rabbitmq-exporter-clusterRoleBinding.yaml new file mode 100644 index 0000000..ea2f997 --- /dev/null +++ b/8-custom-exporter/rabbitmq-exporter/rabbitmq-exporter-clusterRoleBinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: rabbitmq-exporter +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: rabbitmq-exporter +subjects: + - kind: ServiceAccount + name: rabbitmq-exporter + namespace: monitoring diff --git a/8-custom-exporter/rabbitmq-exporter/rabbitmq-exporter-deployment-2.yaml b/8-custom-exporter/rabbitmq-exporter/rabbitmq-exporter-deployment-2.yaml new file mode 100644 index 0000000..58efe77 --- /dev/null +++ b/8-custom-exporter/rabbitmq-exporter/rabbitmq-exporter-deployment-2.yaml @@ -0,0 +1,73 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: rabbitmq-exporter-2 + name: rabbitmq-exporter-2 + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: rabbitmq-exporter-2 + template: + metadata: + labels: + app: rabbitmq-exporter-2 + spec: + containers: + - env: + - name: RABBIT_URL + value: http://192.168.11.178:15672 + - name: RABBIT_USER + value: user + - name: RABBIT_PASSWORD + value: pass + - name: RABBIT_CAPABILITIES + value: bert,no_sort + - name: PUBLISH_ADDR + value: 127.0.0.1 + - name: PUBLISH_PORT + value: "9419" + name: rabbitmq-exporter-2 + image: kbudde/rabbitmq-exporter:latest + resources: + limits: + cpu: 300m + memory: 300Mi + requests: + cpu: 100m + memory: 100Mi + - args: + - --logtostderr + - --secure-listen-address=$(IP):9419 + - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 + - --upstream=http://127.0.0.1:9419/ + env: + - name: IP + valueFrom: + fieldRef: + fieldPath: status.podIP + # image: quay.io/coreos/kube-rbac-proxy:v0.4.1 + image: gaven/kube-rbac-proxy:v0.4.1 + name: kube-rbac-proxy + ports: + - containerPort: 9419 + hostPort: 9419 + name: https + resources: + limits: + cpu: 20m + memory: 40Mi + requests: + cpu: 10m + memory: 20Mi + nodeSelector: + beta.kubernetes.io/os: linux + securityContext: + fsGroup: 0 + runAsNonRoot: false + runAsUser: 0 + serviceAccountName: rabbitmq-exporter + tolerations: + - operator: Exists diff --git a/8-custom-exporter/rabbitmq-exporter/rabbitmq-exporter-deployment.yaml b/8-custom-exporter/rabbitmq-exporter/rabbitmq-exporter-deployment.yaml new file mode 100644 index 0000000..d64ac35 --- /dev/null +++ b/8-custom-exporter/rabbitmq-exporter/rabbitmq-exporter-deployment.yaml @@ -0,0 +1,73 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: rabbitmq-exporter + name: rabbitmq-exporter + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: rabbitmq-exporter + template: + metadata: + labels: + app: rabbitmq-exporter + spec: + containers: + - env: + - name: RABBIT_URL + value: http://192.168.11.150:15672 + - name: RABBIT_USER + value: user + - name: RABBIT_PASSWORD + value: pass + - name: RABBIT_CAPABILITIES + value: nobert + - name: PUBLISH_ADDR + value: 127.0.0.1 + - name: PUBLISH_PORT + value: "9419" + name: rabbitmq-exporter + image: kbudde/rabbitmq-exporter:latest + resources: + limits: + cpu: 200m + memory: 200Mi + requests: + cpu: 100m + memory: 100Mi + - args: + - --logtostderr + - --secure-listen-address=$(IP):9419 + - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 + - --upstream=http://127.0.0.1:9419/ + env: + - name: IP + valueFrom: + fieldRef: + fieldPath: status.podIP + # image: quay.io/coreos/kube-rbac-proxy:v0.4.1 + image: gaven/kube-rbac-proxy:v0.4.1 + name: kube-rbac-proxy + ports: + - containerPort: 9419 + hostPort: 9419 + name: https + resources: + limits: + cpu: 20m + memory: 40Mi + requests: + cpu: 10m + memory: 20Mi + nodeSelector: + beta.kubernetes.io/os: linux + securityContext: + fsGroup: 0 + runAsNonRoot: false + runAsUser: 0 + serviceAccountName: rabbitmq-exporter + tolerations: + - operator: Exists diff --git a/8-custom-exporter/rabbitmq-exporter/rabbitmq-exporter-service-2.yaml b/8-custom-exporter/rabbitmq-exporter/rabbitmq-exporter-service-2.yaml new file mode 100644 index 0000000..e7a6f60 --- /dev/null +++ b/8-custom-exporter/rabbitmq-exporter/rabbitmq-exporter-service-2.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + k8s-app: rabbitmq-exporter + name: rabbitmq-exporter-2 + namespace: monitoring +spec: + clusterIP: None + ports: + - name: https + port: 9419 + targetPort: https + selector: + app: rabbitmq-exporter-2 diff --git a/8-custom-exporter/rabbitmq-exporter/rabbitmq-exporter-service.yaml b/8-custom-exporter/rabbitmq-exporter/rabbitmq-exporter-service.yaml new file mode 100644 index 0000000..d84c631 --- /dev/null +++ b/8-custom-exporter/rabbitmq-exporter/rabbitmq-exporter-service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + k8s-app: rabbitmq-exporter + name: rabbitmq-exporter + namespace: monitoring +spec: + clusterIP: None + ports: + - name: https + port: 9419 + targetPort: https + selector: + app: rabbitmq-exporter diff --git a/8-custom-exporter/rabbitmq-exporter/rabbitmq-exporter-serviceAccount.yaml b/8-custom-exporter/rabbitmq-exporter/rabbitmq-exporter-serviceAccount.yaml new file mode 100644 index 0000000..062d83e --- /dev/null +++ b/8-custom-exporter/rabbitmq-exporter/rabbitmq-exporter-serviceAccount.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: rabbitmq-exporter + namespace: monitoring diff --git a/8-custom-exporter/rabbitmq-exporter/rabbitmq-exporter-serviceMonitor.yaml b/8-custom-exporter/rabbitmq-exporter/rabbitmq-exporter-serviceMonitor.yaml new file mode 100644 index 0000000..03be972 --- /dev/null +++ b/8-custom-exporter/rabbitmq-exporter/rabbitmq-exporter-serviceMonitor.yaml @@ -0,0 +1,19 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: rabbitmq-exporter + name: rabbitmq-exporter + namespace: monitoring +spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + interval: 30s + port: https + scheme: https + tlsConfig: + insecureSkipVerify: true + jobLabel: k8s-app + selector: + matchLabels: + k8s-app: rabbitmq-exporter diff --git a/8-custom-exporter/redis-exporter/redis-exporter-clusterRole.yaml b/8-custom-exporter/redis-exporter/redis-exporter-clusterRole.yaml new file mode 100644 index 0000000..e76d551 --- /dev/null +++ b/8-custom-exporter/redis-exporter/redis-exporter-clusterRole.yaml @@ -0,0 +1,17 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: redis-exporter +rules: + - apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create + - apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create diff --git a/8-custom-exporter/redis-exporter/redis-exporter-clusterRoleBinding.yaml b/8-custom-exporter/redis-exporter/redis-exporter-clusterRoleBinding.yaml new file mode 100644 index 0000000..b148971 --- /dev/null +++ b/8-custom-exporter/redis-exporter/redis-exporter-clusterRoleBinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: redis-exporter +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: redis-exporter +subjects: + - kind: ServiceAccount + name: redis-exporter + namespace: monitoring diff --git a/8-custom-exporter/redis-exporter/redis-exporter-deployment.yaml b/8-custom-exporter/redis-exporter/redis-exporter-deployment.yaml new file mode 100644 index 0000000..abd91bb --- /dev/null +++ b/8-custom-exporter/redis-exporter/redis-exporter-deployment.yaml @@ -0,0 +1,65 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: redis-exporter + name: redis-exporter + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: redis-exporter + template: + metadata: + labels: + app: redis-exporter + spec: + containers: + - args: + - --redis.addr=r-uf6utdzqu0bgwdumgn.redis.rds.aliyuncs.com + - --redis.password=111111 + - --web.listen-address=127.0.0.1:9121 + name: redis-exporter + image: registry.cn-hangzhou.aliyuncs.com/clab-docker/redis_exporter + imagePullPolicy: IfNotPresent + resources: + limits: + cpu: 200m + memory: 200Mi + requests: + cpu: 100m + memory: 100Mi + - args: + - --logtostderr + - --secure-listen-address=$(IP):9121 + - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 + - --upstream=http://127.0.0.1:9121/ + env: + - name: IP + valueFrom: + fieldRef: + fieldPath: status.podIP + image: registry.cn-hangzhou.aliyuncs.com/clab-docker/kube-rbac-proxy:v0.4.1 + imagePullPolicy: IfNotPresent + name: kube-rbac-proxy + ports: + - containerPort: 9121 + hostPort: 9121 + name: https + resources: + limits: + cpu: 20m + memory: 40Mi + requests: + cpu: 10m + memory: 20Mi + nodeSelector: + beta.kubernetes.io/os: linux + securityContext: + fsGroup: 0 + runAsNonRoot: false + runAsUser: 0 + serviceAccountName: redis-exporter + tolerations: + - operator: Exists diff --git a/8-custom-exporter/redis-exporter/redis-exporter-prometheusRule.yaml b/8-custom-exporter/redis-exporter/redis-exporter-prometheusRule.yaml new file mode 100644 index 0000000..657cc0e --- /dev/null +++ b/8-custom-exporter/redis-exporter/redis-exporter-prometheusRule.yaml @@ -0,0 +1,53 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + prometheus: k8s + role: alert-rules + name: redis-exporter-rules + namespace: monitoring +spec: + groups: + - name: redis.alerts + rules: + - alert: redis-memory-usage1m + expr: | + ceil(redis_memory_used_bytes / redis_memory_max_bytes * 100)>85 + for: 1m + labels: + severity: critical + annotations: + summary: "redis内存使用率持续1m超出85%" + description: "redis内存使用率={{ $value }}%" + - alert: redis-up + expr: | + redis_up != 1 OR absent(redis_up) == 1 + for: 1m + labels: + severity: critical + annotations: + summary: "redis不可用或采集器不可用" + - alert: TooManyConnections + expr: ceil(redis_connected_clients / redis_config_maxclients * 100) > 80 + for: 5m + labels: + severity: low + annotations: + summary: "redis连接数使用率持续5m超出80%" + description: "Redis连接数使用率={{ $value }}\n" + - alert: RejectedConnections + expr: increase(redis_rejected_connections_total[1m]) > 0 + for: 5m + labels: + severity: critical + annotations: + summary: "Redis持续5m有被拒绝连接数" + description: "Redis 1分钟新增被拒绝连接数={{ $value }}\n" + - alert: RedisCpuTooHigh + expr: (irate(redis_cpu_sys_seconds_total[2m]) + irate(redis_cpu_user_seconds_total[2m])) * 100 > 80 + for: 5m + labels: + severity: low + annotations: + summary: "Redis CPU使用率持续5m超出80%" + description: "Redis CPU使用率={{ $value }}%\n" diff --git a/8-custom-exporter/redis-exporter/redis-exporter-service.yaml b/8-custom-exporter/redis-exporter/redis-exporter-service.yaml new file mode 100644 index 0000000..b336f74 --- /dev/null +++ b/8-custom-exporter/redis-exporter/redis-exporter-service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + k8s-app: redis-exporter + name: redis-exporter + namespace: monitoring +spec: + clusterIP: None + ports: + - name: https + port: 9121 + targetPort: https + selector: + app: redis-exporter diff --git a/8-custom-exporter/redis-exporter/redis-exporter-serviceAccount.yaml b/8-custom-exporter/redis-exporter/redis-exporter-serviceAccount.yaml new file mode 100644 index 0000000..982e92f --- /dev/null +++ b/8-custom-exporter/redis-exporter/redis-exporter-serviceAccount.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: redis-exporter + namespace: monitoring diff --git a/8-custom-exporter/redis-exporter/redis-exporter-serviceMonitor.yaml b/8-custom-exporter/redis-exporter/redis-exporter-serviceMonitor.yaml new file mode 100644 index 0000000..d397b2d --- /dev/null +++ b/8-custom-exporter/redis-exporter/redis-exporter-serviceMonitor.yaml @@ -0,0 +1,19 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: redis-exporter + name: redis-exporter + namespace: monitoring +spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + interval: 30s + port: https + scheme: https + tlsConfig: + insecureSkipVerify: true + jobLabel: k8s-app + selector: + matchLabels: + k8s-app: redis-exporter diff --git a/8-custom-exporter/statping/statping-configMap.yaml b/8-custom-exporter/statping/statping-configMap.yaml new file mode 100644 index 0000000..b266fdd --- /dev/null +++ b/8-custom-exporter/statping/statping-configMap.yaml @@ -0,0 +1,173 @@ +apiVersion: v1 +data: + .env: | + DESCRIPTION=测试环境 + DINGTALK_URL=https://oapi.dingtalk.com/robot/send?access_token=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + services.yml: | + x-httpservice: &httpservice + type: http + method: GET + check_interval: 30 + timeout: 5 + expected_status: 200 + allow_notifications: true + notify_after: 0 + notify_all_changes: false + public: true + redirect: true + + x-external-httpservice: &externalhttpservice + type: http + method: GET + check_interval: 60 + timeout: 5 + expected_status: 200 + allow_notifications: true + notify_after: 0 + notify_all_changes: false + public: true + redirect: true + + services: + - name: app-external + domain: https://app.dmhub.cn/ping + <<: *externalhttpservice + - name: api-external + domain: https://api.dmhub.cn/ping + <<: *externalhttpservice + - name: cbe-external + domain: https://cbe.dmhub.cn/cbe/ping + <<: *externalhttpservice + - name: host-external + domain: https://host.dmhub.cn/ping + <<: *externalhttpservice + - name: plugin-external + domain: https://a.dmhub.cn/ping + <<: *externalhttpservice + + - name: api + domain: http://api.test.svc.cluster.local:8015/ping + <<: *httpservice + - name: apiv2 + domain: http://apiv2.test.svc.cluster.local:8015/ping + <<: *httpservice + - name: app + domain: http://app.test.svc.cluster.local:18011/ping + <<: *httpservice + - name: appui + domain: http://appui.test.svc.cluster.local:18012/index.html + <<: *httpservice + - name: cms + domain: http://cms.test.svc.cluster.local:8067/ping + <<: *httpservice + - name: collectorentry + domain: http://collectorentry.test.svc.cluster.local:18014/ping + <<: *httpservice + - name: cpadmin + domain: http://cpadmin.test.svc.cluster.local:28008/ping + <<: *httpservice + - name: cdhadaptor + domain: http://cdhadaptor.test.svc.cluster.local:18020/ping + <<: *httpservice + - name: customer + domain: http://customer.test.svc.cluster.local:18009/ping + <<: *httpservice + - name: customerbackend + domain: http://customerbackend.test.svc.cluster.local:18010/ping + <<: *httpservice + - name: datapermission + domain: http://datapermission.test.svc.cluster.local:8012/ping + <<: *httpservice + - name: etlv2 + domain: http://etlv2.test.svc.cluster.local:8027/ping + <<: *httpservice + - name: eventjob + domain: http://eventjob.test.svc.cluster.local:8026/ping + <<: *httpservice + - name: extensionplatform + domain: http://extensionplatform.test.svc.cluster.local:28004/ping + <<: *httpservice + - name: file + domain: http://file.test.svc.cluster.local:8025/ping + <<: *httpservice + - name: flowmanager + domain: http://flowmanager.test.svc.cluster.local:8077/ping + <<: *httpservice + - name: goods + domain: http://goods.test.svc.cluster.local:8069/ping + <<: *httpservice + - name: hosting + domain: http://hosting.test.svc.cluster.local:18003/ping + <<: *httpservice + - name: job + domain: http://job.test.svc.cluster.local:8005/ping + <<: *httpservice + - name: kanglai + domain: http://kanglai.test.svc.cluster.local:8018/ping + <<: *httpservice + - name: loyaltyv2 + domain: http://loyaltyv2.test.svc.cluster.local:8029/ping + <<: *httpservice + - name: maxwell + domain: http://maxwell.test.svc.cluster.local:13300/ping + <<: *httpservice + - name: plugin + domain: http://plugin.test.svc.cluster.local:18004/ping + <<: *httpservice + - name: queryhub + domain: http://queryhub.test.svc.cluster.local:8057/ping + <<: *httpservice + - name: reactflow2 + domain: http://reactflow2.test.svc.cluster.local:8023/ping + <<: *httpservice + - name: scheduler + domain: http://scheduler.test.svc.cluster.local:8088/ping + <<: *httpservice + - name: sms + domain: http://sms.test.svc.cluster.local:8017/ping + <<: *httpservice + - name: urlshortener + domain: http://urlshortener.test.svc.cluster.local:8000/ping + <<: *httpservice + - name: webhook + domain: http://webhook.test.svc.cluster.local:8022/ping + <<: *httpservice + - name: wechat + domain: http://wechat.test.svc.cluster.local:18002/ping + <<: *httpservice + - name: wechatjob + domain: http://wechatjob.test.svc.cluster.local:18022/ping + <<: *httpservice + - name: referrer + domain: http://referrer.test.svc.cluster.local:28014/ping + <<: *httpservice + - name: tms + domain: http://tms.test.svc.cluster.local:8016/tms/ping + <<: *httpservice + - name: hadoopods + domain: http://hadoopods.test.svc.cluster.local:8032/ping + <<: *httpservice + - name: hadoopdts + domain: http://hadoopdts.test.svc.cluster.local:8037/ping + <<: *httpservice + - name: hadoopetl + domain: http://hadoopetl.test.svc.cluster.local:8039/ping + <<: *httpservice + - name: kafka2es + domain: http://kafka2es.test.svc.cluster.local:8041/ping + <<: *httpservice + - name: impala + domain: http://impala.test.svc.cluster.local:20060/ping + <<: *httpservice + - name: data-extractor + domain: http://data-extractor.test.svc.cluster.local:8192/ping + <<: *httpservice + - name: internalapi + domain: http://internalapi.test.svc.cluster.local:8096/ping + <<: *httpservice + +kind: ConfigMap +metadata: + name: statping-config + namespace: monitoring + diff --git a/8-custom-exporter/statping/statping-deployment.yaml b/8-custom-exporter/statping/statping-deployment.yaml new file mode 100644 index 0000000..47162ec --- /dev/null +++ b/8-custom-exporter/statping/statping-deployment.yaml @@ -0,0 +1,74 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: statping + name: statping + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: statping + template: + metadata: + labels: + app: statping + spec: + containers: + - image: registry.cn-hangzhou.aliyuncs.com/clab-docker/statping:v0.0.5 + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 3 + initialDelaySeconds: 5 + periodSeconds: 30 + successThreshold: 1 + tcpSocket: + port: http + timeoutSeconds: 3 + name: statping + ports: + - containerPort: 8080 + name: http + protocol: TCP + readinessProbe: + failureThreshold: 3 + httpGet: + path: /status/health + port: http + scheme: HTTP + initialDelaySeconds: 5 + periodSeconds: 30 + successThreshold: 1 + timeoutSeconds: 3 + resources: + limits: + cpu: "1" + memory: 2Gi + requests: + cpu: 100m + memory: 200Mi + volumeMounts: + - name: config-volume + mountPath: /app/.env + subPath: .env + - name: config-volume + mountPath: /app/services.yml + subPath: services.yml + nodeSelector: + beta.kubernetes.io/os: linux + volumes: + - name: config-volume + configMap: + name: statping-config + items: + - key: .env + path: .env + - key: services.yml + path: services.yml + securityContext: + fsGroup: 0 + runAsNonRoot: false + runAsUser: 0 + tolerations: + - operator: Exists diff --git a/8-custom-exporter/statping/statping-prometheusRule.yaml b/8-custom-exporter/statping/statping-prometheusRule.yaml new file mode 100644 index 0000000..4003290 --- /dev/null +++ b/8-custom-exporter/statping/statping-prometheusRule.yaml @@ -0,0 +1,20 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + prometheus: k8s + role: alert-rules + name: statping-rules + namespace: monitoring +spec: + groups: + - name: statping.alerts + rules: + - alert: ServiceDown + expr: statping_service_online == 0 + labels: + severity: critical + annotations: + summary: "服务健康检查不通过,请留意" + description: 服务={{ $labels.exported_service }} + diff --git a/8-custom-exporter/statping/statping-service.yaml b/8-custom-exporter/statping/statping-service.yaml new file mode 100644 index 0000000..7d3453a --- /dev/null +++ b/8-custom-exporter/statping/statping-service.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + k8s-app: statping + name: statping + namespace: monitoring +spec: + type: NodePort + ports: + - name: http + port: 8080 + targetPort: http + nodePort: 30800 + selector: + app: statping diff --git a/8-custom-exporter/statping/statping-serviceMonitor.yaml b/8-custom-exporter/statping/statping-serviceMonitor.yaml new file mode 100644 index 0000000..24bb5f5 --- /dev/null +++ b/8-custom-exporter/statping/statping-serviceMonitor.yaml @@ -0,0 +1,19 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: statping + name: statping + namespace: monitoring +spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + interval: 15s + scrapeTimeout: 10s + port: http + scheme: http + path: /status/metrics + jobLabel: k8s-app + selector: + matchLabels: + k8s-app: statping diff --git a/8-custom-exporter/yarn-exporter/yarn-exporter-clusterRole.yaml b/8-custom-exporter/yarn-exporter/yarn-exporter-clusterRole.yaml new file mode 100644 index 0000000..023e046 --- /dev/null +++ b/8-custom-exporter/yarn-exporter/yarn-exporter-clusterRole.yaml @@ -0,0 +1,17 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: yarn-exporter +rules: + - apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create + - apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create diff --git a/8-custom-exporter/yarn-exporter/yarn-exporter-clusterRoleBinding.yaml b/8-custom-exporter/yarn-exporter/yarn-exporter-clusterRoleBinding.yaml new file mode 100644 index 0000000..1114723 --- /dev/null +++ b/8-custom-exporter/yarn-exporter/yarn-exporter-clusterRoleBinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: yarn-exporter +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: yarn-exporter +subjects: + - kind: ServiceAccount + name: yarn-exporter + namespace: monitoring diff --git a/8-custom-exporter/yarn-exporter/yarn-exporter-deployment.yaml b/8-custom-exporter/yarn-exporter/yarn-exporter-deployment.yaml new file mode 100644 index 0000000..ea6ddcd --- /dev/null +++ b/8-custom-exporter/yarn-exporter/yarn-exporter-deployment.yaml @@ -0,0 +1,65 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: yarn-exporter + name: yarn-exporter + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: yarn-exporter + template: + metadata: + labels: + app: yarn-exporter + spec: + containers: + - args: + - --resource-manager.address=hadoop01:8088 + - --collect.finish-time-limit=60 + - --web.listen-address=127.0.0.1:9877 + name: yarn-exporter + image: registry.cn-hangzhou.aliyuncs.com/clab-docker/yarn_exporter:v0.0.2 + imagePullPolicy: IfNotPresent + resources: + limits: + cpu: 50m + memory: 100Mi + requests: + cpu: 20m + memory: 30Mi + - args: + - --logtostderr + - --secure-listen-address=$(IP):9877 + - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 + - --upstream=http://127.0.0.1:9877/ + env: + - name: IP + valueFrom: + fieldRef: + fieldPath: status.podIP + image: registry.cn-hangzhou.aliyuncs.com/clab-docker/kube-rbac-proxy:v0.4.1 + imagePullPolicy: IfNotPresent + name: kube-rbac-proxy + ports: + - containerPort: 9877 + hostPort: 9877 + name: https + resources: + limits: + cpu: 20m + memory: 40Mi + requests: + cpu: 10m + memory: 20Mi + nodeSelector: + beta.kubernetes.io/os: linux + securityContext: + fsGroup: 0 + runAsNonRoot: false + runAsUser: 0 + serviceAccountName: yarn-exporter + tolerations: + - operator: Exists diff --git a/8-custom-exporter/yarn-exporter/yarn-exporter-prometheusRule.yaml b/8-custom-exporter/yarn-exporter/yarn-exporter-prometheusRule.yaml new file mode 100644 index 0000000..959b121 --- /dev/null +++ b/8-custom-exporter/yarn-exporter/yarn-exporter-prometheusRule.yaml @@ -0,0 +1,47 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + prometheus: k8s + role: alert-rules + name: yarn-exporter-rules + namespace: monitoring +spec: + groups: + - name: yarn.alerts + rules: + - alert: yarnApplicationLongRunning + expr: | + yarn_app_elapsed_time_milliseconds > 3 * 60 * 60000 + labels: + severity: warning + annotations: + summary: "yarn任务运行时间已超过3小时,请注意" + description: Name={{ $labels.name }},ID={{ $labels.id }} + - alert: yarnClusterAllocatedVirtualCores + expr: | + sum(yarn_virtual_cores{status="allocated"}) > sum(yarn_virtual_cores_total) * 0.85 + for: 3h + labels: + severity: warning + annotations: + summary: "yarn总分配CPU核数超过85%,已持续超过3小时,请注意" + description: "virtual_cores={{ $value }}" + - alert: yarnClusterAllocatedMemory + expr: | + sum(yarn_memory_megabytes{status="allocated"}) > sum(yarn_memory_total_megabytes) * 0.85 + for: 3h + labels: + severity: warning + annotations: + summary: "yarn总分配内存超过85%,已持续超过3小时,请注意" + description: "memory={{ $value }}MB" + - alert: yarnApplicationPending + expr: | + yarn_apps{status="pending"} > 0 + for: 3h + labels: + severity: warning + annotations: + summary: "yarn存在pending任务,已持续超过3小时,请注意" + description: "pending_apps={{ $value }}" \ No newline at end of file diff --git a/8-custom-exporter/yarn-exporter/yarn-exporter-service.yaml b/8-custom-exporter/yarn-exporter/yarn-exporter-service.yaml new file mode 100644 index 0000000..92f2f39 --- /dev/null +++ b/8-custom-exporter/yarn-exporter/yarn-exporter-service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + k8s-app: yarn-exporter + name: yarn-exporter + namespace: monitoring +spec: + clusterIP: None + ports: + - name: https + port: 9877 + targetPort: https + selector: + app: yarn-exporter diff --git a/8-custom-exporter/yarn-exporter/yarn-exporter-serviceAccount.yaml b/8-custom-exporter/yarn-exporter/yarn-exporter-serviceAccount.yaml new file mode 100644 index 0000000..5962bbd --- /dev/null +++ b/8-custom-exporter/yarn-exporter/yarn-exporter-serviceAccount.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: yarn-exporter + namespace: monitoring diff --git a/8-custom-exporter/yarn-exporter/yarn-exporter-serviceMonitor.yaml b/8-custom-exporter/yarn-exporter/yarn-exporter-serviceMonitor.yaml new file mode 100644 index 0000000..deca13f --- /dev/null +++ b/8-custom-exporter/yarn-exporter/yarn-exporter-serviceMonitor.yaml @@ -0,0 +1,20 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: yarn-exporter + name: yarn-exporter + namespace: monitoring +spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + interval: 30s + scrapeTimeout: 20s + port: https + scheme: https + tlsConfig: + insecureSkipVerify: true + jobLabel: k8s-app + selector: + matchLabels: + k8s-app: yarn-exporter diff --git a/9-webhook/prometheus-webhook-dingtalk/prometheus-webhook-dingtalk-clusterRole.yaml b/9-webhook/prometheus-webhook-dingtalk/prometheus-webhook-dingtalk-clusterRole.yaml new file mode 100644 index 0000000..0da6ab6 --- /dev/null +++ b/9-webhook/prometheus-webhook-dingtalk/prometheus-webhook-dingtalk-clusterRole.yaml @@ -0,0 +1,17 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: prometheus-webhook-dingtalk +rules: + - apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create + - apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create diff --git a/9-webhook/prometheus-webhook-dingtalk/prometheus-webhook-dingtalk-clusterRoleBinding.yaml b/9-webhook/prometheus-webhook-dingtalk/prometheus-webhook-dingtalk-clusterRoleBinding.yaml new file mode 100644 index 0000000..3c16378 --- /dev/null +++ b/9-webhook/prometheus-webhook-dingtalk/prometheus-webhook-dingtalk-clusterRoleBinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: prometheus-webhook-dingtalk +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: prometheus-webhook-dingtalk +subjects: + - kind: ServiceAccount + name: prometheus-webhook-dingtalk + namespace: monitoring diff --git a/9-webhook/prometheus-webhook-dingtalk/prometheus-webhook-dingtalk-deployment.yaml b/9-webhook/prometheus-webhook-dingtalk/prometheus-webhook-dingtalk-deployment.yaml new file mode 100644 index 0000000..b2185a3 --- /dev/null +++ b/9-webhook/prometheus-webhook-dingtalk/prometheus-webhook-dingtalk-deployment.yaml @@ -0,0 +1,72 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: prometheus-webhook-dingtalk + name: prometheus-webhook-dingtalk + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: prometheus-webhook-dingtalk + template: + metadata: + labels: + app: prometheus-webhook-dingtalk + spec: + containers: + - args: + - --ding.profile=ops=https://oapi.dingtalk.com/robot/send?access_token=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + - --log.level=debug + - --web.listen-address=127.0.0.1:8060 + - --template.file=/config/default.tmpl + name: prometheus-webhook-dingtalk + image: registry.cn-hangzhou.aliyuncs.com/clab-docker/prometheus-webhook-dingtalk:1.4.0 + resources: + limits: + cpu: 200m + memory: 200Mi + requests: + cpu: 50m + memory: 50Mi + volumeMounts: + - mountPath: /config/ + name: template-volume + - args: + - --logtostderr + - --secure-listen-address=$(IP):8060 + - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 + - --upstream=http://127.0.0.1:8060/ + env: + - name: IP + valueFrom: + fieldRef: + fieldPath: status.podIP + image: registry.cn-hangzhou.aliyuncs.com/clab-docker/kube-rbac-proxy:v0.4.1 + name: kube-rbac-proxy + ports: + - containerPort: 8060 + hostPort: 8060 + name: https + resources: + limits: + cpu: 20m + memory: 40Mi + requests: + cpu: 10m + memory: 20Mi + nodeSelector: + beta.kubernetes.io/os: linux + securityContext: + fsGroup: 0 + runAsNonRoot: false + runAsUser: 0 + serviceAccountName: prometheus-webhook-dingtalk + tolerations: + - operator: Exists + volumes: + - configMap: + defaultMode: 420 + name: prometheus-webhook-dingtalk-template + name: template-volume diff --git a/9-webhook/prometheus-webhook-dingtalk/prometheus-webhook-dingtalk-service.yaml b/9-webhook/prometheus-webhook-dingtalk/prometheus-webhook-dingtalk-service.yaml new file mode 100644 index 0000000..77aadb9 --- /dev/null +++ b/9-webhook/prometheus-webhook-dingtalk/prometheus-webhook-dingtalk-service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + k8s-app: prometheus-webhook-dingtalk + name: prometheus-webhook-dingtalk + namespace: monitoring +spec: + clusterIP: None + ports: + - name: https + port: 8060 + targetPort: https + selector: + app: prometheus-webhook-dingtalk diff --git a/9-webhook/prometheus-webhook-dingtalk/prometheus-webhook-dingtalk-serviceAccount.yaml b/9-webhook/prometheus-webhook-dingtalk/prometheus-webhook-dingtalk-serviceAccount.yaml new file mode 100644 index 0000000..ea40b0f --- /dev/null +++ b/9-webhook/prometheus-webhook-dingtalk/prometheus-webhook-dingtalk-serviceAccount.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: prometheus-webhook-dingtalk + namespace: monitoring diff --git a/9-webhook/prometheus-webhook-dingtalk/prometheus-webhook-dingtalk-template.yaml b/9-webhook/prometheus-webhook-dingtalk/prometheus-webhook-dingtalk-template.yaml new file mode 100644 index 0000000..f73d46e --- /dev/null +++ b/9-webhook/prometheus-webhook-dingtalk/prometheus-webhook-dingtalk-template.yaml @@ -0,0 +1,26 @@ +apiVersion: v1 +data: + default.tmpl: |+ + {{ define "__subject" }}[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .GroupLabels.SortedPairs.Values | join " " }} {{ if gt (len .CommonLabels) (len .GroupLabels) }}({{ with .CommonLabels.Remove .GroupLabels.Names }}{{ .Values | join " " }}{{ end }}){{ end }}{{ end }} + {{ define "__alertmanagerURL" }}{{ .ExternalURL }}/#/alerts?receiver={{ .Receiver }}{{ end }} + + {{ define "__text_alert_list" }}{{ range . }} + --- + {{ range .Labels.SortedPairs }}{{ if (eq .Name "instance" "pod") }}> {{ .Name | title }}: {{ .Value | markdown | html }}{{ end }} + {{ end }} + {{ range .Annotations.SortedPairs }}> {{ .Name | title }}: {{ .Value | markdown | html }} + {{ end }} + > Time: {{ dateInZone "2006.01.02 15:04:05" (.StartsAt) "Asia/Shanghai" }} + + {{ end }}{{ end }} + + {{ define "ding.link.title" }}{{ template "__subject" . }}{{ end }} + {{ define "ding.link.content" }}#### \[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}⚠️{{ end }}{{ if eq .Status "resolved" }}✅{{ end }}\] **[{{ index .GroupLabels "alertname" }}]({{ template "__alertmanagerURL" . }})** + {{ template "__text_alert_list" .Alerts.Firing }} + {{ template "__text_alert_list" .Alerts.Resolved }} + {{ end }} + +kind: ConfigMap +metadata: + name: prometheus-webhook-dingtalk-template + namespace: monitoring \ No newline at end of file diff --git a/9-webhook/prometheus-webhook-qiyeweixin/prometheus-webhook-qiyeweixin-clusterRole.yaml b/9-webhook/prometheus-webhook-qiyeweixin/prometheus-webhook-qiyeweixin-clusterRole.yaml new file mode 100644 index 0000000..98b80bb --- /dev/null +++ b/9-webhook/prometheus-webhook-qiyeweixin/prometheus-webhook-qiyeweixin-clusterRole.yaml @@ -0,0 +1,17 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: prometheus-webhook-qiyeweixin +rules: + - apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create + - apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create diff --git a/9-webhook/prometheus-webhook-qiyeweixin/prometheus-webhook-qiyeweixin-clusterRoleBinding.yaml b/9-webhook/prometheus-webhook-qiyeweixin/prometheus-webhook-qiyeweixin-clusterRoleBinding.yaml new file mode 100644 index 0000000..721f951 --- /dev/null +++ b/9-webhook/prometheus-webhook-qiyeweixin/prometheus-webhook-qiyeweixin-clusterRoleBinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: prometheus-webhook-qiyeweixin +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: prometheus-webhook-qiyeweixin +subjects: + - kind: ServiceAccount + name: prometheus-webhook-qiyeweixin + namespace: monitoring diff --git a/9-webhook/prometheus-webhook-qiyeweixin/prometheus-webhook-qiyeweixin-configMap.yaml b/9-webhook/prometheus-webhook-qiyeweixin/prometheus-webhook-qiyeweixin-configMap.yaml new file mode 100644 index 0000000..4f761e3 --- /dev/null +++ b/9-webhook/prometheus-webhook-qiyeweixin/prometheus-webhook-qiyeweixin-configMap.yaml @@ -0,0 +1,32 @@ +apiVersion: v1 +data: + ding2wechat.yml: | + receivers: + - name: ops + targets: + - url: https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + + default.tmpl: |+ + {{ define "__subject" }}[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .GroupLabels.SortedPairs.Values | join " " }} {{ if gt (len .CommonLabels) (len .GroupLabels) }}({{ with .CommonLabels.Remove .GroupLabels.Names }}{{ .Values | join " " }}{{ end }}){{ end }}{{ end }} + {{ define "__alertmanagerURL" }}{{ .ExternalURL }}/#/alerts?receiver={{ .Receiver }}{{ end }} + + {{ define "__text_alert_list" }}{{ range . }} + --- + {{ range .Labels.SortedPairs }}{{ if (eq .Name "instance" "pod") }}> {{ .Name | title }}: {{ .Value | markdown | html }}{{ end }} + {{ end }} + {{ range .Annotations.SortedPairs }}> {{ .Name | title }}: {{ .Value | markdown | html }} + {{ end }} + > Time: {{ dateInZone "2006.01.02 15:04:05" (.StartsAt) "Asia/Shanghai" }} + + {{ end }}{{ end }} + + {{ define "ding.link.title" }}{{ template "__subject" . }}{{ end }} + {{ define "ding.link.content" }}#### \[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}⚠️{{ end }}{{ if eq .Status "resolved" }}✅{{ end }}\] **[{{ index .GroupLabels "alertname" }}]({{ template "__alertmanagerURL" . }})** + {{ template "__text_alert_list" .Alerts.Firing }} + {{ template "__text_alert_list" .Alerts.Resolved }} + {{ end }} + +kind: ConfigMap +metadata: + name: prometheus-webhook-qiyeweixin-config + namespace: monitoring \ No newline at end of file diff --git a/9-webhook/prometheus-webhook-qiyeweixin/prometheus-webhook-qiyeweixin-deployment.yaml b/9-webhook/prometheus-webhook-qiyeweixin/prometheus-webhook-qiyeweixin-deployment.yaml new file mode 100644 index 0000000..288747d --- /dev/null +++ b/9-webhook/prometheus-webhook-qiyeweixin/prometheus-webhook-qiyeweixin-deployment.yaml @@ -0,0 +1,88 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: prometheus-webhook-qiyeweixin + name: prometheus-webhook-qiyeweixin + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: prometheus-webhook-qiyeweixin + template: + metadata: + labels: + app: prometheus-webhook-qiyeweixin + spec: + containers: + - args: + - --log.level=debug + - --web.listen-address=127.0.0.1:8080 + - --config.file=/config/ding2wechat.yml + name: ding2wechat + image: registry.cn-hangzhou.aliyuncs.com/clab-docker/ding2wechat:v0.1.5 + resources: + limits: + cpu: 20m + memory: 40Mi + requests: + cpu: 10m + memory: 20Mi + volumeMounts: + - mountPath: /config/ + name: config-volume + - args: + - --ding.profile=ops=http://localhost:8080/receiver?name=ops + - --log.level=debug + - --web.listen-address=127.0.0.1:8060 + - --template.file=/config/default.tmpl + name: prometheus-webhook-dingtalk + image: registry.cn-hangzhou.aliyuncs.com/clab-docker/prometheus-webhook-dingtalk:1.4.0 + resources: + limits: + cpu: 200m + memory: 200Mi + requests: + cpu: 50m + memory: 50Mi + volumeMounts: + - mountPath: /config/ + name: config-volume + - args: + - --logtostderr + - --secure-listen-address=$(IP):8060 + - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 + - --upstream=http://127.0.0.1:8060/ + env: + - name: IP + valueFrom: + fieldRef: + fieldPath: status.podIP + image: registry.cn-hangzhou.aliyuncs.com/clab-docker/kube-rbac-proxy:v0.4.1 + name: kube-rbac-proxy + ports: + - containerPort: 8060 + hostPort: 8060 + name: https + resources: + limits: + cpu: 20m + memory: 40Mi + requests: + cpu: 10m + memory: 20Mi + nodeSelector: + beta.kubernetes.io/os: linux + securityContext: + fsGroup: 0 + runAsNonRoot: false + runAsUser: 0 + serviceAccountName: prometheus-webhook-qiyeweixin + tolerations: + - operator: Exists + volumes: + - configMap: + defaultMode: 420 + name: prometheus-webhook-qiyeweixin-config + name: config-volume diff --git a/9-webhook/prometheus-webhook-qiyeweixin/prometheus-webhook-qiyeweixin-service.yaml b/9-webhook/prometheus-webhook-qiyeweixin/prometheus-webhook-qiyeweixin-service.yaml new file mode 100644 index 0000000..532b093 --- /dev/null +++ b/9-webhook/prometheus-webhook-qiyeweixin/prometheus-webhook-qiyeweixin-service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + k8s-app: prometheus-webhook-qiyeweixin + name: prometheus-webhook-dingtalk + namespace: monitoring +spec: + clusterIP: None + ports: + - name: https + port: 8060 + targetPort: https + selector: + app: prometheus-webhook-qiyeweixin diff --git a/9-webhook/prometheus-webhook-qiyeweixin/prometheus-webhook-qiyeweixin-serviceAccount.yaml b/9-webhook/prometheus-webhook-qiyeweixin/prometheus-webhook-qiyeweixin-serviceAccount.yaml new file mode 100644 index 0000000..edaf85b --- /dev/null +++ b/9-webhook/prometheus-webhook-qiyeweixin/prometheus-webhook-qiyeweixin-serviceAccount.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: prometheus-webhook-qiyeweixin + namespace: monitoring diff --git a/README.md b/README.md new file mode 100644 index 0000000..1315057 --- /dev/null +++ b/README.md @@ -0,0 +1,383 @@ +# Prometheus on Kubernetes + + +本项目为Prometheus Operator部署监控相关的Kubernetes定义,快速部署请参考以下Quickstart + + +
镜像列表与离线使用 + +--- + +- registry.cn-hangzhou.aliyuncs.com/clab-docker/prometheus-operator:v0.30.0 +- registry.cn-hangzhou.aliyuncs.com/clab-docker/configmap-reload:v0.0.1 +- registry.cn-hangzhou.aliyuncs.com/clab-docker/prometheus-config-reloader:v0.30.0 +- registry.cn-hangzhou.aliyuncs.com/clab-docker/prometheus:v2.11.1 +- registry.cn-hangzhou.aliyuncs.com/clab-docker/grafana:7.2.0 +- registry.cn-hangzhou.aliyuncs.com/clab-docker/kafka_exporter:v0.0.1 +- registry.cn-hangzhou.aliyuncs.com/clab-docker/kube-rbac-proxy:v0.4.1 +- registry.cn-hangzhou.aliyuncs.com/clab-docker/kube-state-metrics:v1.5.0 +- registry.cn-hangzhou.aliyuncs.com/clab-docker/addon-resizer:1.8.4 +- registry.cn-hangzhou.aliyuncs.com/clab-docker/node-exporter:v1.3.1 +- registry.cn-hangzhou.aliyuncs.com/clab-docker/statping:v0.0.5 +- registry.cn-hangzhou.aliyuncs.com/clab-docker/alertmanager:v0.17.0 +- registry.cn-hangzhou.aliyuncs.com/clab-docker/prometheus-webhook-dingtalk:1.4.0 +- registry.cn-hangzhou.aliyuncs.com/clab-docker/ding2wechat:v0.1.5 +- registry.cn-hangzhou.aliyuncs.com/clab-docker/cloudera_exporter:v0.0.1 +- registry.cn-hangzhou.aliyuncs.com/clab-docker/yarn_exporter:v0.0.2 +- registry.cn-hangzhou.aliyuncs.com/clab-docker/kudu_exporter:v0.0.4 +- registry.cn-hangzhou.aliyuncs.com/clab-docker/prometheus-es-exporter:0.14.0 +- registry.cn-hangzhou.aliyuncs.com/clab-docker/azure-scheduledevents-exporter:21.01.1 +- registry.cn-hangzhou.aliyuncs.com/clab-docker/mysqld-exporter +- registry.cn-hangzhou.aliyuncs.com/clab-docker/elasticsearch_exporter:1.1.0rc1 +- registry.cn-hangzhou.aliyuncs.com/clab-docker/mongodbexporter +- registry.cn-hangzhou.aliyuncs.com/clab-docker/redis_exporter + +**Package Ver.20200810** + +[oss://devops-components/prometheus/prometheus_20200810.images.gz](https://oss.console.aliyun.com/bucket/oss-cn-hangzhou/devops-components/object?path=prometheus%2F) + +**Usage** + +``` +gzip -d -c prometheus_20200810.images.gz | docker load +newRepoPrefix=nexus.xsio.cn/test # 替换为本地仓库地址的前缀 +docker images --format "{{.Repository}}:{{.Tag}}" -f reference="registry.cn-hangzhou.aliyuncs.com/clab-docker/*" | while read i;do echo "-> process $i" && docker tag $i ${i/registry.cn-hangzhou.aliyuncs.com\/clab-docker/$newRepoPrefix} && docker push ${i/registry.cn-hangzhou.aliyuncs.com\/clab-docker/$newRepoPrefix};done +``` + +
+ + +## Quickstart + +0. 拉取本项目 + +手动下载 [https://gitlab.cd.xsio.cn/devops/prometheus-operator/-/archive/master/prometheus-operator-master.tar.gz](https://gitlab.cd.xsio.cn/devops/prometheus-operator/-/archive/master/prometheus-operator-master.tar.gz),然后上传到远程机器并解压 + +或者 + +在[这里](https://gitlab.cd.xsio.cn/devops/prometheus-operator/-/settings/repository)添加`Deploy Keys`,然后在远程机器上`git clone ssh://git@gitlab.cd.xsio.cn:22222/devops/prometheus-operator.git` + +> (可选)创建钉钉机器人,关键词至少包括`service`,`FIRING`,`RESOLVED`,获取WEBHOOK URL备用 + +1. 为prometheus/grafana确认机器和数据目录 + +> 需要多节点或高可用,请另行使用NFS、云盘等存储 + +> 如果只想使用临时存储,请参考`3-prometheus/prometheus-prometheus.yaml`和`7-grafana/grafana-deployment.yaml`中的注释进行 + +> 以下为本地单点存储 + +``` +kubectl get nodes # 挑选一台机器 +kubectl label nodes xxxxxxxx prometheus-data=true # 为这台机器打标签 +ssh xxxxxxxx mkdir -p /opt/prometheus/{prometheus,grafana} # !登录选择的机器创建目录 +kubectl apply -f create-local-storage.yaml # 创建对应PV和StorageClass +``` + +2. 启动promethues-operator + +``` +kubectl apply -f create-monitoring-namespace.yaml +kubectl apply -f 1-custom-resource-definition +kubectl apply -f 2-prometheus-operator +kubectl -nmonitoring get pod -l apps.kubernetes.io/name=prometheus-operator # 确认状态为Running +``` + +3. 启动promethues与exporter + +``` +kubectl apply -f 3-prometheus +kubectl -nmonitoring get pod -l app=prometheus -o wide # 确认状态为Running,节点为之前选择的机器 +kubectl apply -f 6-exporter --recursive +# ingress相关报错可忽略,如果是托管版Kubernetes,可以跳过kube-scheduler和kube-controller-manager +``` + +此时可以打开`http://任意节点IP:30900/targets`确认prometheus工作正常,不方便访问的话可以跳过 + +4. 启动grafana + +``` +kubectl apply -f 7-grafana +kubectl -nmonitoring get pod -l app=grafana # 确认状态为Running +``` + +此时可以打开`http://任意节点IP:30300`确认grafana工作正常,默认密码为`admin/admin` + +5. 启动其他监控 + + +- kafka(推荐🌟) + +``` +vi 8-custom-exporter/kafka-exporter/kafka-exporter-deployment.yaml +# 编辑kafka地址,如果只有一个节点,请删除多余行 +kubectl apply -f 8-custom-exporter/kafka-exporter +kubectl -nmonitoring get pod -l app=kafka-exporter # 确认状态为Running +``` + +- statping服务监控(推荐🌟) + +``` +vi 8-custom-exporter/statping/statping-configMap.yaml +# 按需修改services中需要监控的地址,注意批量替换域名和namespace,修改DINGTALK_URL为钉钉机器人地址或企业微信机器人地址 +kubectl apply -f 8-custom-exporter/statping +kubectl -nmonitoring get pod -l app=statping # 确认状态为Running +``` + +此时可以打开`http://任意节点IP:30800`确认statping工作正常,默认密码为`admin/Convertlab0202` + +- 外部机器监控(推荐🌟) + +对Kubernetes之外的机器进行监控,在每台机器上执行 + +``` +sudo yum install -y https://devops-components.oss-cn-hangzhou.aliyuncs.com/prometheus/node_exporter-1.3.1-clab.el7.centos.x86_64.rpm +sudo systemctl start node_exporter +sudo systemctl enable node_exporter +``` + +之后,修改配置并启用 + +``` +vi 8-custom-exporter/external-node-exporter/external-node-exporter-endpoints.yaml +# 修改addresses字段,更新为需要监控的机器IP列表 +kubectl apply -f 8-custom-exporter/external-node-exporter +``` + +> 对外部机器的监控需要访问其9100端口,请注意防火墙开通情况 + + +- CDH(推荐🌟) + +``` +vi 8-custom-exporter/cloudera-exporter/cloudera-exporter-configMap.yaml +# 修改host字段为CDH Manager的IP,按实际修改user,最好新建一个只读用户 +kubectl apply -f 8-custom-exporter/cloudera-exporter +kubectl -nmonitoring get pod -l app=cloudera-exporter # 确认状态为Running +``` + +- Spring Boot Actuator + +需要被监控的应用对应的Kubernetes Service YAML中需要打以下标签 + +``` +actuator_should_be_scraped: "true" +actuator_scrape_port: "9999" +``` + +推荐在发布脚本中配置label,如果不方便,可以手动打label,例如 + +``` +kubectl -ntest label svc app actuator_should_be_scraped=true +kubectl -ntest label svc app actuator_scrape_port=9999 +``` + +需要被监控的应用启用`spring-boot-starter-actuator`和`micrometer-registry-prometheus`依赖,且在`application.yml`进行相关配置,推荐最小配置为 + +``` +management: + server: + port: 9999 + endpoint: + metrics: + enabled: true + prometheus: + enabled: true + endpoints: + enabled-by-default: false + web: + exposure: + include: 'prometheus' +``` + +默认已包含于`6-exporter`,新增监控不需要额外操作,已有监控执行 + +``` +kubectl apply -f 6-exporter/spring-boot-actuator +``` + +- JMX (作为Spring Boot Actuator备用)) + +
点此展开 + +--- + +JMX监控通过prometheus提供的jmx-exporter进行采集,其本身是一个java agent,在应用启动时跟随启动,并暴露供采集的端口。 + +为了最小化对现有应用的影响,监控相关的链路比较复杂,说明如下: + +I. Prometheus Operator端配置ServiceMonitor + +默认已包含于`6-exporter`,不需要额外操作 + +II. 在ConfigMap中配置启用jmx-exporter java agent + +在SRE`全局配置`中搜索`JMX`,设置如下并更新配置 + +`JMX_EXPORTER_ENABLED`: `true` + +`JMX_EXPORTER_BLACKLIST_OBJECT_NAMES`: `["kafka.producer:type=producer-topic-metrics,*","kafka.consumer:type=consumer-fetch-manager-metrics,topic=*,*","kafka.consumer:type=consumer-coordinator-metrics,*"]` + +如果更多需求,具体可参考[这里](https://github.com/xsio/dockerimages/tree/master/alpine-oraclejdk8) + +III. 确认发布脚本中为Service打上标签 + +需要被监控的应用对应的Kubernetes Service YAML中需要打以下标签 + +``` +jmx_should_be_scraped: "true" +jmx_scrape_port: "7777" +``` + +添加后例如 + +``` +apiVersion: v1 +kind: Service +metadata: + namespace: "{{ namespace }}" + name: "{{ service_name }}" + labels: + jmx_should_be_scraped: "true" + jmx_scrape_port: "7777" +spec: + selector: + app: "{{ service_name }}" + ports: + - name: web + targetPort: web + port: {{ server_port }} +{% for port in extra_ports %} + - name: port{{ loop.index }} + targetPort: {{ port }} + port: {{ port }} +{% endfor %} +``` + +请确认当前环境对应的发布脚本中`service.j2`和`reactflow_service.j2`都已相应修改 + +IV. 重新发布服务 + +应用启动后,稍等两分钟,如果prometheus尚未采集到数据,排查如下 + +a. 进入应用容器,`netstat -nltp`确认`7777`端口是否监听 + +b. 如果未监听,确认`/opt/`下是否存在`jmx_prometheus_javaagent`目录,如果不存在,说明基础镜像未更新 + +c. 如果`jmx_prometheus_javaagent`目录存在,确认`start.sh`文件中是否包含`. .init_jmx_exporter.sh`一行,如果不包含,说明应用使用了自定义启动脚本 + +d. 如果`7777`已监听,`kubectl describe svc`确认Service标签是否打上,如果未打上,确认发布脚本工作是否正常 + +e. 如果标签打上,访问Prometheus网页界面,进入`Status`-`Targets`搜索`jmx`确认采集目标是否正常 + +
+ +- Azure计划事件(Azure云推荐🌟,特别地建议部署于Kudu节点) + +用于获取Azure即将发生的维护事件,Kubernetes节点已默认包括,Kubernetes之外的机器需要执行 + +``` +sudo yum install -y https://devops-components.oss-cn-hangzhou.aliyuncs.com/tools/azure-scheduledevents-exporter/21.01.1/azure-scheduledevents-exporter-21.01.1-1.el7.x86_64.rpm +sudo systemctl enable azure-scheduledevents-exporter --now +``` + +之后,修改配置并启用 + +``` +vi 8-custom-exporter/azure-scheduledevents-exporter/external-azure-scheduledevents-exporter-endpoints.yaml +# 修改addresses字段,更新为需要监控的Kubernetes外部机器IP列表 +kubectl apply -f 8-custom-exporter/azure-scheduledevents-exporter +``` + +> 对Kubernetes外部机器的监控需要访问其9879端口,请注意防火墙开通情况 + +- yarn(可选) + +``` +vi 8-custom-exporter/yarn-exporter/yarn-exporter-deployment.yaml +# 修改--resource-manager.address后的yarn resource manager地址,一般为8088端口 +kubectl apply -f 8-custom-exporter/yarn-exporter +kubectl -nmonitoring get pod -l app=yarn-exporter # 确认状态为Running +``` + +- kudu(可选) + +**要求kudu最低版本为1.10** + +``` +vi 8-custom-exporter/kudu-exporter/kudu-exporter-deployment.yaml +# 修改--kudu.bootstrap-address后的kudu master地址,一般为8051端口 +kubectl apply -f 8-custom-exporter/kudu-exporter +kubectl -nmonitoring get pod -l app=kudu-exporter # 确认状态为Running +``` + +- 日志报错监控(可选) + +*5分钟查询一次ES,非绝对准确* + +``` +vi 8-custom-exporter/elasticsearch-log-exporter/elasticsearch-log-exporter-deployment.yaml +# 修改--es-cluster后的elastic地址,如果为非9200端口,需要添加端口 +vi 8-custom-exporter/elasticsearch-log-exporter/elasticsearch-log-exporter-configMap.yaml +# 修改QueryIndices后的index名称,调整为实际使用的前缀 +kubectl apply -f 8-custom-exporter/elasticsearch-log-exporter +kubectl -nmonitoring get pod -l app=elasticsearch-log-exporter # 确认状态为Running +``` + +- redis(可选) + +``` +vi 8-custom-exporter/redis/redis-exporter-deployment.yaml +# 编辑redis地址,如果有密码,请填写,无则忽略 +kubectl apply -f 8-custom-exporter/redis-exporter +kubectl -nmonitoring get pod -l app=redis-exporter # 确认状态为Running +``` + +- mongodb(可选) + +``` +vi 8-custom-exporter/mongodb-exporter/mongodb-exporter-deployment.yaml +# 编辑mongodb地址,可在demo中进行修改 +kubectl apply -f 8-custom-exporter/mongodb-exporter +kubectl -nmonitoring get pod -l app=mongodb-exporter # 确认状态为Running +``` + +- mysql(可选) + +``` +vi 8-custom-exporter/mysql-exporter/mysql-exporter-deployment.yaml +# 编辑mysql地址,可在demo中进行修改 +kubectl apply -f 8-custom-exporter/mysql-exporter +kubectl -nmonitoring get pod -l app=mysql-exporter # 确认状态为Running +``` + +- elasticsearch(可选) + +``` +vi 8-custom-exporter/elasticsearch-exporter/elasticsearch-exporter-deployment.yaml +# 编辑elasticsearch地址,可在demo中进行修改 +vi 8-custom-exporter/elasticsearch-exporter/elasticsearch-log-exporter-deployment.yaml +# 编辑收集日志的elasticsearch地址,如果elasticsearch共用,请排除此文件执行 +kubectl apply -f 8-custom-exporter/elasticsearch-exporter +kubectl -nmonitoring get pod -l app=elasticsearch-exporter # 确认状态为Running +``` + +6. 启动alertmanager报警(选择其一) + +- 钉钉 + +``` +vi 9-webhook/prometheus-webhook-dingtalk/prometheus-webhook-dingtalk-deployment.yaml +# 修改ding.profile中钉钉机器人地址 +kubectl apply -f 5-alertmanager +kubectl apply -f 9-webhook/prometheus-webhook-dingtalk +``` + +- 企业微信 + +``` +vi 9-webhook/prometheus-webhook-qiyeweixin/prometheus-webhook-qiyeweixin-configMap.yaml +# 修改ding2wechat.yml字段中url定义的企业微信群机器人地址 +kubectl apply -f 5-alertmanager +kubectl apply -f 9-webhook/prometheus-webhook-qiyeweixin +``` diff --git a/alicloud-disk-topology-efficiency.yaml b/alicloud-disk-topology-efficiency.yaml new file mode 100644 index 0000000..5176bcc --- /dev/null +++ b/alicloud-disk-topology-efficiency.yaml @@ -0,0 +1,13 @@ +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: alicloud-disk-topology-efficiency +# CSI +provisioner: diskplugin.csi.alibabacloud.com +# Flexvolume +# provisioner: alicloud/disk +parameters: + type: cloud_efficiency +reclaimPolicy: Delete +volumeBindingMode: WaitForFirstConsumer +allowVolumeExpansion: true diff --git a/create-local-storage.yaml b/create-local-storage.yaml new file mode 100644 index 0000000..a28a2d9 --- /dev/null +++ b/create-local-storage.yaml @@ -0,0 +1,60 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: + name: prometheus-pv-001 + labels: + type: local +spec: + storageClassName: prometheus-local-storage + capacity: + storage: 50Gi + volumeMode: Filesystem + accessModes: + - ReadWriteOnce + persistentVolumeReclaimPolicy: Retain + local: + path: /opt/prometheus/prometheus + nodeAffinity: + required: + nodeSelectorTerms: + - matchExpressions: + - key: prometheus-data + operator: In + values: + - "true" + +--- + +apiVersion: v1 +kind: PersistentVolume +metadata: + name: grafana-pv-001 + labels: + type: local +spec: + storageClassName: prometheus-local-storage + capacity: + storage: 5Gi + volumeMode: Filesystem + accessModes: + - ReadWriteOnce + persistentVolumeReclaimPolicy: Retain + local: + path: /opt/prometheus/grafana + nodeAffinity: + required: + nodeSelectorTerms: + - matchExpressions: + - key: prometheus-data + operator: In + values: + - "true" + +--- + +kind: StorageClass +apiVersion: storage.k8s.io/v1 +metadata: + name: prometheus-local-storage +provisioner: kubernetes.io/no-provisioner +volumeBindingMode: WaitForFirstConsumer diff --git a/create-monitoring-namespace.yaml b/create-monitoring-namespace.yaml new file mode 100644 index 0000000..d325236 --- /dev/null +++ b/create-monitoring-namespace.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: monitoring diff --git a/grafana-pv.yaml b/grafana-pv.yaml new file mode 100644 index 0000000..5a9276a --- /dev/null +++ b/grafana-pv.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: + annotations: + name: grafana-pv +spec: + accessModes: + - ReadWriteOnce + capacity: + storage: 20Gi + persistentVolumeReclaimPolicy: Retain + nfs: + path: /data/grafana-data + server: 10.0.35.206 diff --git a/prometheus-pv.yaml b/prometheus-pv.yaml new file mode 100644 index 0000000..075995c --- /dev/null +++ b/prometheus-pv.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: + annotations: + name: prometheus-pv +spec: + accessModes: + - ReadWriteOnce + capacity: + storage: 50Gi + persistentVolumeReclaimPolicy: Retain + nfs: + path: /data/prometheus-data + server: 10.0.35.206