Skip to content

Commit 74ae94d

Browse files
authored
Adds support for deploying prometheus stack on substrate node (#154)
* Adds support for deploying prometheus stack
1 parent f0aa82f commit 74ae94d

File tree

10 files changed

+130
-20
lines changed

10 files changed

+130
-20
lines changed

operator/pkg/controllers/etcd/pod.go

+7-4
Original file line numberDiff line numberDiff line change
@@ -45,14 +45,14 @@ func podSpecFor(controlPlane *v1alpha1.ControlPlane) *v1.PodSpec {
4545
TopologyKey: "topology.kubernetes.io/zone",
4646
WhenUnsatisfiable: v1.DoNotSchedule,
4747
LabelSelector: &metav1.LabelSelector{
48-
MatchLabels: labelsFor(controlPlane.ClusterName()),
48+
MatchLabels: labels,
4949
},
5050
}, {
5151
MaxSkew: int32(1),
5252
TopologyKey: "kubernetes.io/hostname",
5353
WhenUnsatisfiable: v1.DoNotSchedule,
5454
LabelSelector: &metav1.LabelSelector{
55-
MatchLabels: labelsFor(controlPlane.ClusterName()),
55+
MatchLabels: labels,
5656
},
5757
}},
5858
Containers: []v1.Container{{
@@ -64,6 +64,9 @@ func podSpecFor(controlPlane *v1alpha1.ControlPlane) *v1.PodSpec {
6464
}, {
6565
ContainerPort: 2380,
6666
Name: "etcd-peer",
67+
}, {
68+
ContainerPort: 2381,
69+
Name: "metrics",
6770
}},
6871
VolumeMounts: []v1.VolumeMount{{
6972
Name: "etcd-data",
@@ -89,7 +92,7 @@ func podSpecFor(controlPlane *v1alpha1.ControlPlane) *v1.PodSpec {
8992
"--advertise-client-urls=" + advertizeClusterURL(controlPlane),
9093
"--initial-advertise-peer-urls=" + advertizePeerURL(controlPlane),
9194
"--listen-client-urls=https://$(NODE_IP):2379,https://127.0.0.1:2379",
92-
"--listen-metrics-urls=http://127.0.0.1:2381",
95+
"--listen-metrics-urls=http://$(NODE_IP):2381,http://127.0.0.1:2381",
9396
"--listen-peer-urls=https://$(NODE_IP):2380",
9497
"--name=$(NODE_ID)",
9598
"--peer-cert-file=/etc/kubernetes/pki/etcd/peer/peer.crt",
@@ -212,6 +215,6 @@ func caPeerName(controlPlane *v1alpha1.ControlPlane) string {
212215
}
213216

214217
func nodeSelector(clusterName string) map[string]string {
215-
return functional.UnionStringMaps(labelsFor(clusterName),
218+
return functional.UnionStringMaps(labels,
216219
map[string]string{object.ControlPlaneLabelKey: clusterName, instanceTypeLabelKey: instanceTypeLabelDefaultValue})
217220
}

operator/pkg/controllers/etcd/service.go

+3-7
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,11 @@ func (c *Controller) reconcileService(ctx context.Context, controlPlane *v1alpha
3131
ObjectMeta: metav1.ObjectMeta{
3232
Name: ServiceNameFor(controlPlane.ClusterName()),
3333
Namespace: controlPlane.Namespace,
34-
Labels: labelsFor(controlPlane.ClusterName()),
34+
Labels: labels,
3535
},
3636
Spec: v1.ServiceSpec{
3737
ClusterIP: v1.ClusterIPNone,
38-
Selector: labelsFor(controlPlane.ClusterName()),
38+
Selector: labels,
3939
Ports: []v1.ServicePort{{
4040
Port: 2380,
4141
Name: serverPortNameFor(controlPlane.ClusterName()),
@@ -63,8 +63,4 @@ func ServiceNameFor(clusterName string) string {
6363
return fmt.Sprintf("%s-etcd", clusterName)
6464
}
6565

66-
func labelsFor(clusterName string) map[string]string {
67-
return map[string]string{
68-
object.AppNameLabelKey: ServiceNameFor(clusterName),
69-
}
70-
}
66+
var labels = map[string]string{object.AppNameLabelKey: "etcd"}

operator/pkg/controllers/etcd/statefulset.go

+2-4
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,11 @@ func (c *Controller) reconcileStatefulSet(ctx context.Context, controlPlane *v1a
5050
Namespace: controlPlane.Namespace,
5151
},
5252
Spec: appsv1.StatefulSetSpec{
53-
Selector: &metav1.LabelSelector{
54-
MatchLabels: labelsFor(controlPlane.ClusterName()),
55-
},
53+
Selector: &metav1.LabelSelector{MatchLabels: labels},
5654
PodManagementPolicy: appsv1.ParallelPodManagement,
5755
ServiceName: ServiceNameFor(controlPlane.ClusterName()),
5856
Replicas: aws.Int32(int32(controlPlane.Spec.Etcd.Replicas)),
59-
Template: v1.PodTemplateSpec{ObjectMeta: metav1.ObjectMeta{Labels: labelsFor(controlPlane.ClusterName())}, Spec: podSpec},
57+
Template: v1.PodTemplateSpec{ObjectMeta: metav1.ObjectMeta{Labels: labels}, Spec: podSpec},
6058
VolumeClaimTemplates: []v1.PersistentVolumeClaim{{ObjectMeta: metav1.ObjectMeta{Name: "etcd-data"}, Spec: persistentVolumeClaimSpec}},
6159
},
6260
}))

operator/pkg/controllers/master/kubeapiserver.go

+4-2
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ func APIServerDeploymentName(clusterName string) string {
7474

7575
func APIServerLabels(clustername string) map[string]string {
7676
return map[string]string{
77-
object.AppNameLabelKey: APIServerDeploymentName(clustername),
77+
object.AppNameLabelKey: "apiserver",
7878
}
7979
}
8080

@@ -112,6 +112,7 @@ func apiServerPodSpecFor(controlPlane *v1alpha1.ControlPlane) v1.PodSpec {
112112
v1.ResourceCPU: resource.MustParse("1"),
113113
},
114114
},
115+
Ports: []v1.ContainerPort{{ContainerPort: 8080, Name: "metrics"}},
115116
Args: []string{
116117
"--advertise-address=$(NODE_IP)",
117118
"--allow-privileged=true",
@@ -123,7 +124,8 @@ func apiServerPodSpecFor(controlPlane *v1alpha1.ControlPlane) v1.PodSpec {
123124
"--etcd-certfile=/etc/kubernetes/pki/etcd/apiserver-etcd-client.crt",
124125
"--etcd-keyfile=/etc/kubernetes/pki/etcd/apiserver-etcd-client.key",
125126
"--etcd-servers=https://" + etcd.SvcFQDN(controlPlane.ClusterName(), controlPlane.Namespace) + ":2379",
126-
"--insecure-port=0",
127+
"--insecure-port=8080",
128+
"--insecure-bind-address=$(NODE_IP)",
127129
"--kubelet-client-certificate=/etc/kubernetes/pki/kubelet/apiserver-kubelet-client.crt",
128130
"--kubelet-client-key=/etc/kubernetes/pki/kubelet/apiserver-kubelet-client.key",
129131
"--kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname",

substrate/README.md

+13
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,19 @@ spec:
3939
EOF
4040
```
4141

42+
### Accessing metrics from prometheus and grafana
43+
44+
```bash
45+
kubectl port-forward svc/prometheus-operated -n monitoring 9090:9090&
46+
kubectl port-forward svc/kube-prometheus-stack-grafana -n monitoring 8080:80&
47+
```
48+
49+
### Allowing API server to trust kubelet endpoints
50+
51+
```bash
52+
kubectl certificate approve $(k get csr | grep "Pending" | awk '{print $1}')
53+
```
54+
4255
### cleanup
4356

4457
- To remove the kubernetes cluster provisioned using kit-operator

substrate/pkg/controller/substrate/cluster/addons/awsloadbalancer.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ func (l *AWSLoadBalancer) Create(ctx context.Context, substrate *v1alpha1.Substr
5252
}); err != nil {
5353
return reconcile.Result{}, fmt.Errorf("tagging resources, %w", err)
5454
}
55-
logging.FromContext(ctx).Debug("Tagged subnets with %s=%s", "kubernetes.io/role/elb", "1")
55+
logging.FromContext(ctx).Debugf("Tagged subnets with %s=%s", "kubernetes.io/role/elb", "1")
5656
return reconcile.Result{}, nil
5757
}
5858

substrate/pkg/controller/substrate/cluster/addons/karpenter.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ func (k *Karpenter) Create(ctx context.Context, substrate *v1alpha1.Substrate) (
100100
}); err != nil {
101101
return reconcile.Result{}, fmt.Errorf("tagging resources, %w", err)
102102
}
103-
logging.FromContext(ctx).Debug("Tagged subnets and security groups with %s=%s", "karpenter.sh/discovery", substrate.Name)
103+
logging.FromContext(ctx).Debugf("Tagged subnets and security groups with %s=%s", "karpenter.sh/discovery", substrate.Name)
104104
// Apply Provisioner
105105
if err := client.ApplyYAML(ctx, []byte(fmt.Sprintf(provisioner, substrate.Name))); err != nil {
106106
return reconcile.Result{}, fmt.Errorf("applying provisioner, %w", err)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
/*
2+
Licensed under the Apache License, Version 2.0 (the "License");
3+
you may not use this file except in compliance with the License.
4+
You may obtain a copy of the License at
5+
6+
http://www.apache.org/licenses/LICENSE-2.0
7+
8+
Unless required by applicable law or agreed to in writing, software
9+
distributed under the License is distributed on an "AS IS" BASIS,
10+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
See the License for the specific language governing permissions and
12+
limitations under the License.
13+
*/
14+
15+
package addons
16+
17+
import (
18+
"bytes"
19+
"context"
20+
"fmt"
21+
"html/template"
22+
23+
"github.com/awslabs/kubernetes-iteration-toolkit/substrate/pkg/apis/v1alpha1"
24+
"github.com/awslabs/kubernetes-iteration-toolkit/substrate/pkg/utils/helm"
25+
"github.com/awslabs/kubernetes-iteration-toolkit/substrate/pkg/utils/kubectl"
26+
"sigs.k8s.io/controller-runtime/pkg/reconcile"
27+
)
28+
29+
type PrometheusStack struct{}
30+
31+
func (p *PrometheusStack) Create(ctx context.Context, substrate *v1alpha1.Substrate) (reconcile.Result, error) {
32+
if !substrate.Status.IsReady() {
33+
return reconcile.Result{Requeue: true}, nil
34+
}
35+
if err := helm.NewClient(*substrate.Status.Cluster.KubeConfig).Apply(ctx, &helm.Chart{
36+
Namespace: "monitoring",
37+
Name: "kube-prometheus-stack",
38+
Repository: "https://github.com/prometheus-community/helm-charts/releases/download/kube-prometheus-stack-34.0.0/",
39+
Version: "34.0.0",
40+
CreateNamespace: true,
41+
Values: map[string]interface{}{
42+
"coreDns": map[string]interface{}{"enabled": false},
43+
"kubeProxy": map[string]interface{}{"enabled": false},
44+
"kubeEtcd": map[string]interface{}{"enabled": false},
45+
"alertmanager": map[string]interface{}{"enabled": false},
46+
"kubeScheduler": map[string]interface{}{"enabled": false},
47+
"kubeApiServer": map[string]interface{}{"enabled": false},
48+
"kubeStateMetrics": map[string]interface{}{"enabled": false},
49+
"kubeControllerManager": map[string]interface{}{"enabled": false},
50+
"prometheus": map[string]interface{}{"serviceMonitor": map[string]interface{}{"selfMonitor": false}},
51+
"prometheusOperator": map[string]interface{}{"serviceMonitor": map[string]interface{}{"selfMonitor": false}},
52+
},
53+
}); err != nil {
54+
return reconcile.Result{}, fmt.Errorf("applying chart, %w", err)
55+
}
56+
// configure podmonitors
57+
client, err := kubectl.NewClient(*substrate.Status.Cluster.KubeConfig)
58+
if err != nil {
59+
return reconcile.Result{}, fmt.Errorf("initializing client, %w", err)
60+
}
61+
for _, name := range []string{"apiserver", "etcd"} {
62+
var buf bytes.Buffer
63+
tmpl := template.Must(template.New("Text").Parse(podMonitorTemplate))
64+
err := tmpl.Execute(&buf, struct{ ComponentName string }{ComponentName: name})
65+
if err != nil {
66+
return reconcile.Result{}, fmt.Errorf("error when executing template, %w", err)
67+
}
68+
if err := client.ApplyYAML(ctx, buf.Bytes()); err != nil {
69+
return reconcile.Result{}, fmt.Errorf("applying pod monitors, %w", err)
70+
}
71+
}
72+
return reconcile.Result{}, nil
73+
}
74+
75+
func (p *PrometheusStack) Delete(_ context.Context, _ *v1alpha1.Substrate) (reconcile.Result, error) {
76+
return reconcile.Result{}, nil
77+
}
78+
79+
var podMonitorTemplate = `
80+
apiVersion: monitoring.coreos.com/v1
81+
kind: PodMonitor
82+
metadata:
83+
name: {{ .ComponentName }}-pods
84+
namespace: monitoring
85+
labels:
86+
release: kube-prometheus-stack
87+
spec:
88+
jobLabel: {{ .ComponentName }}-guest
89+
namespaceSelector:
90+
matchNames:
91+
- default
92+
selector:
93+
matchLabels:
94+
kit.k8s.sh/app: {{ .ComponentName }}
95+
podMetricsEndpoints:
96+
- port: metrics
97+
`

substrate/pkg/controller/substrate/cluster/config.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ After=docker.service iptables-restore.service
212212
Requires=docker.service
213213
214214
[Service]
215-
ExecStart=/usr/bin/kubelet --hostname-override=%s --pod-manifest-path=/etc/kubernetes/manifests --kubeconfig=/etc/kubernetes/kubelet.conf --cgroup-driver=systemd --container-runtime=docker --network-plugin=cni --pod-infra-container-image=public.ecr.aws/eks-distro/kubernetes/pause:v1.18.9-eks-1-18-1 --node-labels=kit.aws/substrate=control-plane
215+
ExecStart=/usr/bin/kubelet --cluster-dns=10.96.0.10 --cluster-domain=cluster.local --hostname-override=%s --pod-manifest-path=/etc/kubernetes/manifests --kubeconfig=/etc/kubernetes/kubelet.conf --cgroup-driver=systemd --container-runtime=docker --network-plugin=cni --pod-infra-container-image=public.ecr.aws/eks-distro/kubernetes/pause:v1.18.9-eks-1-18-1 --node-labels=kit.aws/substrate=control-plane
216216
Restart=always`, substrate.Name)), 0644); err != nil {
217217
return fmt.Errorf("writing kubelet configuration, %w", err)
218218
}

substrate/pkg/controller/substrate/controller.go

+1
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ func NewController(ctx context.Context) *Controller {
7070
&addons.KubeProxy{},
7171
&addons.RBAC{},
7272
&addons.Tekton{},
73+
&addons.PrometheusStack{},
7374
},
7475
}
7576
}

0 commit comments

Comments
 (0)