From dd4c8ea22a460c4082a45669c32ffc9978cf976a Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 2 Dec 2024 13:53:30 +0100 Subject: [PATCH] feat: Support setting TLS certificate lifetimes --- CHANGELOG.md | 6 ++++ Cargo.lock | 19 +++------- Cargo.toml | 4 +-- deploy/helm/kafka-operator/crds/crds.yaml | 8 +++++ rust/crd/src/lib.rs | 9 +++++ rust/crd/src/security.rs | 37 +++++++++++++------- rust/operator-binary/src/kafka_controller.rs | 14 +++++++- 7 files changed, 66 insertions(+), 31 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6562abe2..e4091296 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +### Added + +- The lifetime of auto generated TLS certificates is now configurable with the role and roleGroup + config property `requestedSecretLifetime`. This helps reduce frequent Pod restarts ([#796]). + ### Fixed - BREAKING: Use distinct ServiceAccounts for the Stacklets, so that multiple Stacklets can be @@ -11,6 +16,7 @@ All notable changes to this project will be documented in this file. restart ([#793]). [#793]: https://github.com/stackabletech/kafka-operator/pull/793 +[#796]: https://github.com/stackabletech/kafka-operator/pull/796 ## [24.11.0] - 2024-11-18 diff --git a/Cargo.lock b/Cargo.lock index 7504a98f..03df5b71 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -478,17 +478,6 @@ dependencies = [ "powerfmt", ] -[[package]] -name = "derivative" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "digest" version = "0.10.7" @@ -2202,14 +2191,14 @@ dependencies = [ [[package]] name = "stackable-operator" version = "0.82.0" -source = "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.82.0#415bbd031bd52e9c0c5392060235030e9930b46b" +source = "git+https://github.com/stackabletech//operator-rs.git?branch=main#7939b3516f01483c0d9f601d57ee70003420f7e5" dependencies = [ "chrono", "clap", "const_format", "delegate", - "derivative", "dockerfile-parser", + "educe", "either", "futures", "indexmap", @@ -2240,7 +2229,7 @@ dependencies = [ [[package]] name = "stackable-operator-derive" version = "0.3.1" -source = "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.82.0#415bbd031bd52e9c0c5392060235030e9930b46b" +source = "git+https://github.com/stackabletech//operator-rs.git?branch=main#7939b3516f01483c0d9f601d57ee70003420f7e5" dependencies = [ "darling", "proc-macro2", @@ -2251,7 +2240,7 @@ dependencies = [ [[package]] name = "stackable-shared" version = "0.0.1" -source = "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.82.0#415bbd031bd52e9c0c5392060235030e9930b46b" +source = "git+https://github.com/stackabletech//operator-rs.git?branch=main#7939b3516f01483c0d9f601d57ee70003420f7e5" dependencies = [ "kube", "semver", diff --git a/Cargo.toml b/Cargo.toml index 01eed416..2eb77a9b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,6 +27,6 @@ strum = { version = "0.26", features = ["derive"] } tokio = { version = "1.40", features = ["full"] } tracing = "0.1" -# [patch."https://github.com/stackabletech/operator-rs.git"] +[patch."https://github.com/stackabletech/operator-rs.git"] # stackable-operator = { path = "../operator-rs/crates/stackable-operator" } -# stackable-operator = { git = "https://github.com/stackabletech//operator-rs.git", branch = "main" } +stackable-operator = { git = "https://github.com/stackabletech//operator-rs.git", branch = "main" } diff --git a/deploy/helm/kafka-operator/crds/crds.yaml b/deploy/helm/kafka-operator/crds/crds.yaml index 98b6769d..ea903f8e 100644 --- a/deploy/helm/kafka-operator/crds/crds.yaml +++ b/deploy/helm/kafka-operator/crds/crds.yaml @@ -163,6 +163,10 @@ spec: nullable: true type: boolean type: object + requestedSecretLifetime: + description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. Please note that this can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. + nullable: true + type: string resources: default: cpu: @@ -434,6 +438,10 @@ spec: nullable: true type: boolean type: object + requestedSecretLifetime: + description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. Please note that this can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. + nullable: true + type: string resources: default: cpu: diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index 45eb0c46..c889b4fc 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -430,9 +430,17 @@ pub struct KafkaConfig { /// The ListenerClass used for connecting to brokers. Should use a direct connection ListenerClass to minimize cost and minimize performance overhead (such as `cluster-internal` or `external-unstable`). pub broker_listener_class: String, + + /// Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. + /// Please note that this can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. + #[fragment_attrs(serde(default))] + pub requested_secret_lifetime: Option, } impl KafkaConfig { + // Auto TLS certificate lifetime + const DEFAULT_BROKER_SECRET_LIFETIME: Duration = Duration::from_days_unchecked(7); + pub fn default_config(cluster_name: &str, role: &KafkaRole) -> KafkaConfigFragment { KafkaConfigFragment { logging: product_logging::spec::default_logging(), @@ -457,6 +465,7 @@ impl KafkaConfig { graceful_shutdown_timeout: Some(DEFAULT_BROKER_GRACEFUL_SHUTDOWN_TIMEOUT), bootstrap_listener_class: Some("cluster-internal".to_string()), broker_listener_class: Some("cluster-internal".to_string()), + requested_secret_lifetime: Some(Self::DEFAULT_BROKER_SECRET_LIFETIME), } } } diff --git a/rust/crd/src/security.rs b/rust/crd/src/security.rs index 5b4fe9b5..cdae5533 100644 --- a/rust/crd/src/security.rs +++ b/rust/crd/src/security.rs @@ -6,8 +6,20 @@ //! This is required due to overlaps between TLS encryption and e.g. mTLS authentication or Kerberos use std::collections::BTreeMap; +use crate::{ + authentication::{self, ResolvedAuthenticationClasses}, + listener::{self, KafkaListenerConfig}, + tls, KafkaCluster, LISTENER_BOOTSTRAP_VOLUME_NAME, SERVER_PROPERTIES_FILE, + STACKABLE_CONFIG_DIR, +}; +use crate::{ + listener::node_address_cmd, STACKABLE_KERBEROS_KRB5_PATH, STACKABLE_LISTENER_BOOTSTRAP_DIR, + STACKABLE_LISTENER_BROKER_DIR, +}; +use crate::{KafkaRole, LISTENER_BROKER_VOLUME_NAME, STACKABLE_LOG_DIR}; use indoc::formatdoc; use snafu::{ensure, ResultExt, Snafu}; +use stackable_operator::time::Duration; use stackable_operator::{ builder::{ self, @@ -26,18 +38,6 @@ use stackable_operator::{ utils::COMMON_BASH_TRAP_FUNCTIONS, }; -use crate::{ - authentication::{self, ResolvedAuthenticationClasses}, - listener::{self, KafkaListenerConfig}, - tls, KafkaCluster, LISTENER_BOOTSTRAP_VOLUME_NAME, SERVER_PROPERTIES_FILE, - STACKABLE_CONFIG_DIR, -}; -use crate::{ - listener::node_address_cmd, STACKABLE_KERBEROS_KRB5_PATH, STACKABLE_LISTENER_BOOTSTRAP_DIR, - STACKABLE_LISTENER_BROKER_DIR, -}; -use crate::{KafkaRole, LISTENER_BROKER_VOLUME_NAME, STACKABLE_LOG_DIR}; - #[derive(Snafu, Debug)] pub enum Error { #[snafu(display("failed to process authentication class"))] @@ -385,6 +385,7 @@ impl KafkaTlsSecurity { pod_builder: &mut PodBuilder, cb_kcat_prober: &mut ContainerBuilder, cb_kafka: &mut ContainerBuilder, + requested_secret_lifetime: &Duration, ) -> Result<(), Error> { // add tls (server or client authentication volumes) if required if let Some(tls_server_secret_class) = self.get_tls_secret_class() { @@ -393,6 +394,7 @@ impl KafkaTlsSecurity { .add_volume(Self::create_kcat_tls_volume( Self::STACKABLE_TLS_KCAT_VOLUME_NAME, tls_server_secret_class, + requested_secret_lifetime, )?) .context(AddVolumeSnafu)?; cb_kcat_prober @@ -406,6 +408,7 @@ impl KafkaTlsSecurity { .add_volume(Self::create_tls_keystore_volume( Self::STACKABLE_TLS_KAFKA_SERVER_VOLUME_NAME, tls_server_secret_class, + requested_secret_lifetime, )?) .context(AddVolumeSnafu)?; cb_kafka @@ -421,6 +424,7 @@ impl KafkaTlsSecurity { .add_volume(Self::create_tls_keystore_volume( Self::STACKABLE_TLS_KAFKA_INTERNAL_VOLUME_NAME, tls_internal_secret_class, + requested_secret_lifetime, )?) .context(AddVolumeSnafu)?; cb_kafka @@ -594,12 +598,17 @@ impl KafkaTlsSecurity { } /// Creates ephemeral volumes to mount the `SecretClass` into the Pods for kcat client - fn create_kcat_tls_volume(volume_name: &str, secret_class_name: &str) -> Result { + fn create_kcat_tls_volume( + volume_name: &str, + secret_class_name: &str, + requested_secret_lifetime: &Duration, + ) -> Result { Ok(VolumeBuilder::new(volume_name) .ephemeral( SecretOperatorVolumeSourceBuilder::new(secret_class_name) .with_pod_scope() .with_format(SecretFormat::TlsPem) + .with_auto_tls_cert_lifetime(*requested_secret_lifetime) .build() .context(SecretVolumeBuildSnafu)?, ) @@ -610,6 +619,7 @@ impl KafkaTlsSecurity { fn create_tls_keystore_volume( volume_name: &str, secret_class_name: &str, + requested_secret_lifetime: &Duration, ) -> Result { Ok(VolumeBuilder::new(volume_name) .ephemeral( @@ -618,6 +628,7 @@ impl KafkaTlsSecurity { .with_listener_volume_scope(LISTENER_BROKER_VOLUME_NAME) .with_listener_volume_scope(LISTENER_BOOTSTRAP_VOLUME_NAME) .with_format(SecretFormat::TlsPkcs12) + .with_auto_tls_cert_lifetime(*requested_secret_lifetime) .build() .context(SecretVolumeBuildSnafu)?, ) diff --git a/rust/operator-binary/src/kafka_controller.rs b/rust/operator-binary/src/kafka_controller.rs index 3020d6af..4a50673a 100644 --- a/rust/operator-binary/src/kafka_controller.rs +++ b/rust/operator-binary/src/kafka_controller.rs @@ -110,6 +110,9 @@ pub struct Ctx { #[strum_discriminants(derive(IntoStaticStr))] #[allow(clippy::enum_variant_names)] pub enum Error { + #[snafu(display("missing secret lifetime"))] + MissingSecretLifetime, + #[snafu(display("object has no name"))] ObjectHasNoName, @@ -363,6 +366,7 @@ impl ReconcilerError for Error { fn secondary_object(&self) -> Option> { match self { + Error::MissingSecretLifetime => None, Error::ObjectHasNoName => None, Error::ObjectHasNoNamespace => None, Error::NoBrokerRole => None, @@ -866,8 +870,16 @@ fn build_broker_rolegroup_statefulset( let mut pod_builder = PodBuilder::new(); // Add TLS related volumes and volume mounts + let requested_secret_lifetime = merged_config + .requested_secret_lifetime + .context(MissingSecretLifetimeSnafu)?; kafka_security - .add_volume_and_volume_mounts(&mut pod_builder, &mut cb_kcat_prober, &mut cb_kafka) + .add_volume_and_volume_mounts( + &mut pod_builder, + &mut cb_kcat_prober, + &mut cb_kafka, + &requested_secret_lifetime, + ) .context(AddVolumesAndVolumeMountsSnafu)?; let mut pvcs = merged_config.resources.storage.build_pvcs();