Skip to content

Commit aa5dda5

Browse files
committed
feat: thanos querier to thanos sidecar mTLS
1 parent c9424bd commit aa5dda5

File tree

9 files changed

+490
-3
lines changed

9 files changed

+490
-3
lines changed

go.mod

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ require (
66
github.com/go-logr/logr v1.4.2
77
github.com/google/go-cmp v0.6.0
88
github.com/openshift/api v0.0.0-20240301093301-ce10821dc999
9+
github.com/openshift/library-go v0.0.0-20240216151214-738f3fa4ccf8
910
github.com/pkg/errors v0.9.1
1011
github.com/prometheus/common v0.59.1
1112
github.com/rhobs/obo-prometheus-operator v0.77.1-rhobs1
@@ -106,6 +107,7 @@ require (
106107
google.golang.org/protobuf v1.34.2 // indirect
107108
gopkg.in/inf.v0 v0.9.1 // indirect
108109
gopkg.in/yaml.v2 v2.4.0 // indirect
110+
k8s.io/apiserver v0.31.1 // indirect
109111
k8s.io/klog/v2 v2.130.1 // indirect
110112
k8s.io/kube-openapi v0.0.0-20240903163716-9e1beecbcb38 // indirect
111113
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect

go.sum

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,8 @@ github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8
188188
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
189189
github.com/openshift/api v0.0.0-20240301093301-ce10821dc999 h1:+S998xHiJApsJZjRAO8wyedU9GfqFd8mtwWly6LqHDo=
190190
github.com/openshift/api v0.0.0-20240301093301-ce10821dc999/go.mod h1:CxgbWAlvu2iQB0UmKTtRu1YfepRg1/vJ64n2DlIEVz4=
191+
github.com/openshift/library-go v0.0.0-20240216151214-738f3fa4ccf8 h1:dKtHGYiOwl0DKZEWBW4MFWFS6IYW02AVD1WSuUAVwEo=
192+
github.com/openshift/library-go v0.0.0-20240216151214-738f3fa4ccf8/go.mod h1:ePlaOqUiPplRc++6aYdMe+2FmXb2xTNS9Nz5laG2YmI=
191193
github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs=
192194
github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc=
193195
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ=
@@ -357,6 +359,8 @@ k8s.io/apiextensions-apiserver v0.31.1 h1:L+hwULvXx+nvTYX/MKM3kKMZyei+UiSXQWciX/
357359
k8s.io/apiextensions-apiserver v0.31.1/go.mod h1:tWMPR3sgW+jsl2xm9v7lAyRF1rYEK71i9G5dRtkknoQ=
358360
k8s.io/apimachinery v0.31.1 h1:mhcUBbj7KUjaVhyXILglcVjuS4nYXiwC+KKFBgIVy7U=
359361
k8s.io/apimachinery v0.31.1/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo=
362+
k8s.io/apiserver v0.31.1 h1:Sars5ejQDCRBY5f7R3QFHdqN3s61nhkpaX8/k1iEw1c=
363+
k8s.io/apiserver v0.31.1/go.mod h1:lzDhpeToamVZJmmFlaLwdYZwd7zB+WYRYIboqA1kGxM=
360364
k8s.io/client-go v0.31.1 h1:f0ugtWSbWpxHR7sjVpQwuvw9a3ZKLXX0u0itkFXufb0=
361365
k8s.io/client-go v0.31.1/go.mod h1:sKI8871MJN2OyeqRlmA4W4KM9KBdBUpDLu/43eGemCg=
362366
k8s.io/component-base v0.31.1 h1:UpOepcrX3rQ3ab5NB6g5iP0tvsgJWzxTyAo20sgYSy8=

pkg/assets/certificate_generator.go

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
package assets
2+
3+
import (
4+
"crypto/rand"
5+
"crypto/x509"
6+
"fmt"
7+
"math/big"
8+
"time"
9+
10+
"github.com/go-logr/logr"
11+
"github.com/openshift/library-go/pkg/crypto"
12+
v1 "k8s.io/api/core/v1"
13+
"k8s.io/apimachinery/pkg/util/sets"
14+
"k8s.io/apiserver/pkg/authentication/user"
15+
)
16+
17+
const certificateLifetime = time.Duration(crypto.DefaultCertificateLifetimeInDays) * 24 * time.Hour
18+
const GRPCSecretName = "thanos-grpc-secret"
19+
20+
// Taken from
21+
// https://github.com/openshift/library-go/blob/08c2fd1b452520da35ad210930ea9d100545589a/pkg/operator/certrotation/signer.go#L68-L86
22+
// without refresh time handling. We just take care of rotation if we reach 1/5 of the validity timespan before expiration.
23+
func needsNewCert(notBefore, notAfter time.Time, now func() time.Time) bool {
24+
maxWait := notAfter.Sub(notBefore) / 5
25+
latestTime := notAfter.Add(-maxWait)
26+
return now().After(latestTime)
27+
}
28+
29+
// Taken from
30+
// https://github.com/openshift/cluster-monitoring-operator/blob/765d0b0369b176a5997d787b6710783437172879/pkg/manifests/tls.go#L113
31+
func RotateGRPCSecret(s *v1.Secret, logger logr.Logger) (bool, error) {
32+
var (
33+
curCA, newCA *crypto.CA
34+
curCABytes, crtPresent = s.Data["ca.crt"]
35+
curCAKeyBytes, keyPresent = s.Data["ca.key"]
36+
rotate = !crtPresent || !keyPresent
37+
)
38+
39+
if crtPresent && keyPresent {
40+
var err error
41+
curCA, err = crypto.GetCAFromBytes(curCABytes, curCAKeyBytes)
42+
if err != nil {
43+
logger.Info(fmt.Sprintf("generating a new CA due to error reading CA: %v", err))
44+
rotate = true
45+
} else if needsNewCert(curCA.Config.Certs[0].NotBefore, curCA.Config.Certs[0].NotAfter, time.Now) {
46+
logger.Info("generating new CA, because the current one is older than 1/5 of it validity timestamp")
47+
rotate = true
48+
}
49+
}
50+
51+
if !rotate {
52+
return rotate, nil
53+
}
54+
55+
if curCA == nil {
56+
newCAConfig, err := crypto.MakeSelfSignedCAConfig(
57+
fmt.Sprintf("%s@%d", "openshift-cluster-monitoring", time.Now().Unix()),
58+
crypto.DefaultCertificateLifetimeInDays,
59+
)
60+
if err != nil {
61+
return rotate, fmt.Errorf("error generating self signed CA: %w", err)
62+
}
63+
64+
newCA = &crypto.CA{
65+
SerialGenerator: &crypto.RandomSerialGenerator{},
66+
Config: newCAConfig,
67+
}
68+
} else {
69+
template := curCA.Config.Certs[0]
70+
now := time.Now()
71+
template.NotBefore = now.Add(-1 * time.Second)
72+
template.NotAfter = now.Add(certificateLifetime)
73+
template.SerialNumber = template.SerialNumber.Add(template.SerialNumber, big.NewInt(1))
74+
75+
newCACert, err := createCertificate(template, template, template.PublicKey, curCA.Config.Key)
76+
if err != nil {
77+
return rotate, fmt.Errorf("error rotating CA: %w", err)
78+
}
79+
80+
newCA = &crypto.CA{
81+
SerialGenerator: &crypto.RandomSerialGenerator{},
82+
Config: &crypto.TLSCertificateConfig{
83+
Certs: []*x509.Certificate{newCACert},
84+
Key: curCA.Config.Key,
85+
},
86+
}
87+
}
88+
89+
newCABytes, newCAKeyBytes, err := newCA.Config.GetPEMBytes()
90+
if err != nil {
91+
return rotate, fmt.Errorf("error getting PEM bytes from CA: %w", err)
92+
}
93+
94+
s.Data["ca.crt"] = newCABytes
95+
s.Data["ca.key"] = newCAKeyBytes
96+
97+
{
98+
cfg, err := newCA.MakeClientCertificateForDuration(
99+
&user.DefaultInfo{
100+
Name: "thanos-querier",
101+
},
102+
time.Duration(crypto.DefaultCertificateLifetimeInDays)*24*time.Hour,
103+
)
104+
if err != nil {
105+
return rotate, fmt.Errorf("error making client certificate: %w", err)
106+
}
107+
108+
crt, key, err := cfg.GetPEMBytes()
109+
if err != nil {
110+
return rotate, fmt.Errorf("error getting PEM bytes for thanos querier client certificate: %w", err)
111+
}
112+
s.Data["thanos-querier-client.crt"] = crt
113+
s.Data["thanos-querier-client.key"] = key
114+
}
115+
116+
{
117+
cfg, err := newCA.MakeServerCert(
118+
sets.NewString("prometheus-grpc"),
119+
crypto.DefaultCertificateLifetimeInDays,
120+
)
121+
if err != nil {
122+
return rotate, fmt.Errorf("error making server certificate: %w", err)
123+
}
124+
125+
crt, key, err := cfg.GetPEMBytes()
126+
if err != nil {
127+
return rotate, fmt.Errorf("error getting PEM bytes for prometheus-k8s server certificate: %w", err)
128+
}
129+
s.Data["prometheus-server.crt"] = crt
130+
s.Data["prometheus-server.key"] = key
131+
}
132+
133+
return rotate, nil
134+
}
135+
136+
// createCertificate creates a new certificate and returns it in x509.Certificate form.
137+
func createCertificate(template, parent *x509.Certificate, pub, priv interface{}) (*x509.Certificate, error) {
138+
rawCert, err := x509.CreateCertificate(rand.Reader, template, parent, pub, priv)
139+
if err != nil {
140+
return nil, fmt.Errorf("error creating certificate: %w", err)
141+
}
142+
parsedCerts, err := x509.ParseCertificates(rawCert)
143+
if err != nil {
144+
return nil, fmt.Errorf("error parsing certificate: %w", err)
145+
}
146+
return parsedCerts[0], nil
147+
}

pkg/controllers/monitoring/monitoring-stack/components.go

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package monitoringstack
22

33
import (
4+
"fmt"
45
"reflect"
56

67
monv1 "github.com/rhobs/obo-prometheus-operator/pkg/apis/monitoring/v1"
@@ -12,6 +13,7 @@ import (
1213
"k8s.io/utils/ptr"
1314

1415
stack "github.com/rhobs/observability-operator/pkg/apis/monitoring/v1alpha1"
16+
"github.com/rhobs/observability-operator/pkg/assets"
1517
"github.com/rhobs/observability-operator/pkg/reconciler"
1618
)
1719

@@ -28,6 +30,7 @@ func stackComponentReconcilers(
2830
thanos ThanosConfiguration,
2931
prometheus PrometheusConfiguration,
3032
alertmanager AlertmanagerConfiguration,
33+
tlsHashes map[string]string,
3134
) []reconciler.Reconciler {
3235
prometheusName := ms.Name + "-prometheus"
3336
alertmanagerName := ms.Name + "-alertmanager"
@@ -44,7 +47,7 @@ func stackComponentReconcilers(
4447
reconciler.NewUpdater(newPrometheus(ms, prometheusName,
4548
additionalScrapeConfigsSecretName,
4649
instanceSelectorKey, instanceSelectorValue,
47-
thanos, prometheus), ms),
50+
thanos, prometheus, tlsHashes), ms),
4851
reconciler.NewUpdater(newPrometheusService(ms, instanceSelectorKey, instanceSelectorValue), ms),
4952
reconciler.NewUpdater(newThanosSidecarService(ms, instanceSelectorKey, instanceSelectorValue), ms),
5053
reconciler.NewOptionalUpdater(newPrometheusPDB(ms, instanceSelectorKey, instanceSelectorValue), ms,
@@ -115,6 +118,7 @@ func newPrometheus(
115118
instanceSelectorValue string,
116119
thanosCfg ThanosConfiguration,
117120
prometheusCfg PrometheusConfiguration,
121+
tlsHashes map[string]string,
118122
) *monv1.Prometheus {
119123
prometheusSelector := ms.Spec.ResourceSelector
120124

@@ -193,12 +197,33 @@ func newPrometheus(
193197
}
194198
return []monv1.EnableFeature{}
195199
}(),
200+
Volumes: []corev1.Volume{
201+
{
202+
Name: "thanos-tls-assets",
203+
VolumeSource: corev1.VolumeSource{
204+
Secret: &corev1.SecretVolumeSource{
205+
SecretName: assets.GRPCSecretName,
206+
},
207+
},
208+
},
209+
},
196210
},
197211
Retention: ms.Spec.Retention,
198212
RuleSelector: prometheusSelector,
199213
RuleNamespaceSelector: ms.Spec.NamespaceSelector,
200214
Thanos: &monv1.ThanosSpec{
201215
Image: ptr.To(thanosCfg.Image),
216+
GRPCServerTLSConfig: &monv1.TLSConfig{
217+
CAFile: "/etc/thanos/tls-assets/ca.crt",
218+
CertFile: "/etc/thanos/tls-assets/prometheus-server.crt",
219+
KeyFile: "/etc/thanos/tls-assets/prometheus-server.key",
220+
},
221+
VolumeMounts: []corev1.VolumeMount{
222+
{
223+
Name: "thanos-tls-assets",
224+
MountPath: "/etc/thanos/tls-assets",
225+
},
226+
},
202227
},
203228
},
204229
}
@@ -230,6 +255,14 @@ func newPrometheus(
230255
prometheus.Spec.Secrets = append(prometheus.Spec.Secrets, tlsConfig.CertificateAuthority.Name)
231256
}
232257

258+
if len(tlsHashes) > 0 {
259+
tlsAnnotations := map[string]string{}
260+
for name, hash := range tlsHashes {
261+
tlsAnnotations[fmt.Sprintf("monitoring.openshift.io/%s-hash", name)] = hash
262+
}
263+
prometheus.Spec.CommonPrometheusFields.PodMetadata.Annotations = tlsAnnotations
264+
}
265+
233266
if prometheusCfg.Image != "" {
234267
prometheus.Spec.CommonPrometheusFields.Image = ptr.To(prometheusCfg.Image)
235268
}

pkg/controllers/monitoring/monitoring-stack/controller.go

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,18 @@ import (
2828
policyv1 "k8s.io/api/policy/v1"
2929
rbacv1 "k8s.io/api/rbac/v1"
3030
"k8s.io/apimachinery/pkg/api/errors"
31+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3132
"k8s.io/apimachinery/pkg/runtime"
33+
"k8s.io/apimachinery/pkg/types"
3234
ctrl "sigs.k8s.io/controller-runtime"
3335
"sigs.k8s.io/controller-runtime/pkg/builder"
3436
"sigs.k8s.io/controller-runtime/pkg/client"
3537
"sigs.k8s.io/controller-runtime/pkg/controller"
3638
"sigs.k8s.io/controller-runtime/pkg/predicate"
3739

3840
stack "github.com/rhobs/observability-operator/pkg/apis/monitoring/v1alpha1"
41+
"github.com/rhobs/observability-operator/pkg/assets"
42+
"github.com/rhobs/observability-operator/pkg/controllers/monitoring/utils"
3943
)
4044

4145
type resourceManager struct {
@@ -133,6 +137,42 @@ func RegisterWithManager(mgr ctrl.Manager, opts Options) error {
133137
func (rm resourceManager) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
134138
logger := rm.logger.WithValues("stack", req.NamespacedName)
135139
logger.Info("Reconciling monitoring stack")
140+
141+
gRPCSecret := v1.Secret{
142+
TypeMeta: metav1.TypeMeta{
143+
APIVersion: v1.SchemeGroupVersion.String(),
144+
Kind: "Secret",
145+
},
146+
ObjectMeta: metav1.ObjectMeta{
147+
Name: assets.GRPCSecretName,
148+
Namespace: req.Namespace,
149+
},
150+
Data: map[string][]byte{},
151+
}
152+
err := rm.k8sClient.Get(ctx,
153+
types.NamespacedName{
154+
Name: assets.GRPCSecretName,
155+
Namespace: req.Namespace,
156+
},
157+
&gRPCSecret)
158+
if client.IgnoreNotFound(err) != nil {
159+
return ctrl.Result{}, err
160+
}
161+
162+
rotate, err := assets.RotateGRPCSecret(&gRPCSecret, logger)
163+
if err != nil {
164+
return ctrl.Result{}, err
165+
}
166+
if rotate {
167+
err = rm.k8sClient.Update(ctx, &gRPCSecret)
168+
if errors.IsNotFound(err) {
169+
err = rm.k8sClient.Create(ctx, &gRPCSecret)
170+
}
171+
if err != nil {
172+
return ctrl.Result{}, err
173+
}
174+
}
175+
136176
ms, err := rm.getStack(ctx, req)
137177
if err != nil {
138178
// retry since some error has occured
@@ -149,12 +189,24 @@ func (rm resourceManager) Reconcile(ctx context.Context, req ctrl.Request) (ctrl
149189
return ctrl.Result{}, nil
150190
}
151191

192+
// querier <---> sidecar mTLS hashes
193+
mTLSSecretKeys := []string{"prometheus-server.key", "prometheus-server.crt", "ca.crt"}
194+
tlsHashes := map[string]string{}
195+
for _, key := range mTLSSecretKeys {
196+
hash, err := utils.HashOfTLSSecret(assets.GRPCSecretName, key, ms.Namespace, rm.k8sClient)
197+
if err != nil {
198+
return ctrl.Result{}, err
199+
}
200+
tlsHashes[fmt.Sprintf("%s-%s", assets.GRPCSecretName, key)] = hash
201+
}
202+
152203
reconcilers := stackComponentReconcilers(ms,
153204
rm.instanceSelectorKey,
154205
rm.instanceSelectorValue,
155206
rm.thanos,
156207
rm.prometheus,
157208
rm.alertmanager,
209+
tlsHashes,
158210
)
159211
for _, reconciler := range reconcilers {
160212
err := reconciler.Reconcile(ctx, rm.k8sClient, rm.scheme)

0 commit comments

Comments
 (0)