Skip to content

Commit 1e4f5a2

Browse files
fix: Include hdfs principal names in discovery ConfigMap (#451)
* fix: Include hdfs principal names in discovery ConfigMap * changelog * Apply suggestions from code review Co-authored-by: Siegfried Weber <[email protected]> --------- Co-authored-by: Siegfried Weber <[email protected]>
1 parent f619a87 commit 1e4f5a2

File tree

6 files changed

+92
-43
lines changed

6 files changed

+92
-43
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,13 @@ All notable changes to this project will be documented in this file.
1212

1313
- `operator-rs` `0.56.1` -> `0.57.0` ([#433]).
1414

15+
### Fixed
16+
17+
- Include hdfs principals `dfs.journalnode.kerberos.principal`, `dfs.namenode.kerberos.principal`
18+
and `dfs.datanode.kerberos.principal` in the discovery ConfigMap in case Kerberos is enabled ([#451]).
19+
1520
[#433]: https://github.com/stackabletech/hdfs-operator/pull/433
21+
[#451]: https://github.com/stackabletech/hdfs-operator/pull/451
1622

1723
## [23.11.0] - 2023-11-24
1824

docs/modules/hdfs/pages/reference/discovery.adoc

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,13 @@ The ConfigMap data values are formatted as Hadoop XML files which allows simple
3737
Contains the `fs.DefaultFS` which defaults to `hdfs://{clusterName}/`.
3838

3939
`hdfs-site.xml`::
40-
Contains the `dfs.namenode.*` properties for `rpc` and `http` addresses for the `namenodes` as well as the `dfs.nameservices` property which defaults to `hdfs://{clusterName}/`.
40+
Contains the `dfs.namenode.*` properties for `rpc` and `http` addresses for the `namenodes` as well as the `dfs.nameservices` property which defaults to `hdfs://{clusterName}/`.
41+
42+
=== Kerberos
43+
In case Kerberos is enabled according to the xref:usage-guide/security.adoc[security documentation], the discovery ConfigMap also includes the information that clients must authenticate themselves using Kerberos.
44+
45+
Some Kerberos-related configuration settings require the environment variable `KERBEROS_REALM` to be set (e.g. using `export KERBEROS_REALM=$(grep -oP 'default_realm = \K.*' /stackable/kerberos/krb5.conf)`).
46+
If you want to use the discovery ConfigMap outside Stackable services, you need to provide this environment variable.
47+
As an alternative you can substitute `${env.KERBEROS_REALM}` with your actual realm (e.g. by using `sed -i -e 's/${{env.KERBEROS_REALM}}/'"$KERBEROS_REALM/g" core-site.xml`).
48+
49+
One example would be the property `dfs.namenode.kerberos.principal` being set to `nn/hdfs.default.svc.cluster.local@${env.KERBEROS_REALM}`.

rust/crd/src/lib.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,11 @@ impl HdfsRole {
441441
}
442442

443443
impl HdfsCluster {
444+
/// Return the namespace of the cluster or an error in case it is not set.
445+
pub fn namespace_or_error(&self) -> Result<String, Error> {
446+
self.namespace().context(NoNamespaceSnafu)
447+
}
448+
444449
/// Kubernetes labels to attach to Pods within a role group.
445450
///
446451
/// The same labels are also used as selectors for Services and StatefulSets.

rust/operator-binary/src/discovery.rs

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use crate::{
22
build_recommended_labels,
33
config::{CoreSiteConfigBuilder, HdfsSiteConfigBuilder},
4+
hdfs_controller::Error,
45
};
56
use stackable_hdfs_crd::{
67
constants::{CORE_SITE_XML, HDFS_SITE_XML},
@@ -9,9 +10,8 @@ use stackable_hdfs_crd::{
910
use stackable_operator::{
1011
builder::{ConfigMapBuilder, ObjectMetaBuilder},
1112
commons::product_image_selection::ResolvedProductImage,
12-
error::OperatorResult,
1313
k8s_openapi::api::core::v1::ConfigMap,
14-
kube::ResourceExt,
14+
kube::{runtime::reflector::ObjectRef, ResourceExt},
1515
};
1616

1717
/// Creates a discovery config map containing the `hdfs-site.xml` and `core-site.xml`
@@ -21,12 +21,16 @@ pub fn build_discovery_configmap(
2121
controller: &str,
2222
namenode_podrefs: &[HdfsPodRef],
2323
resolved_product_image: &ResolvedProductImage,
24-
) -> OperatorResult<ConfigMap> {
24+
) -> Result<ConfigMap, crate::hdfs_controller::Error> {
2525
ConfigMapBuilder::new()
2626
.metadata(
2727
ObjectMetaBuilder::new()
2828
.name_and_namespace(hdfs)
29-
.ownerreference_from_resource(hdfs, None, Some(true))?
29+
.ownerreference_from_resource(hdfs, None, Some(true))
30+
.map_err(|err| Error::ObjectMissingMetadataForOwnerRef {
31+
source: err,
32+
obj_ref: ObjectRef::from_obj(hdfs),
33+
})?
3034
.with_recommended_labels(build_recommended_labels(
3135
hdfs,
3236
controller,
@@ -42,9 +46,10 @@ pub fn build_discovery_configmap(
4246
)
4347
.add_data(
4448
CORE_SITE_XML,
45-
build_discovery_core_site_xml(hdfs, hdfs.name_any()),
49+
build_discovery_core_site_xml(hdfs, hdfs.name_any())?,
4650
)
4751
.build()
52+
.map_err(|err| Error::BuildDiscoveryConfigMap { source: err })
4853
}
4954

5055
fn build_discovery_hdfs_site_xml(
@@ -62,9 +67,12 @@ fn build_discovery_hdfs_site_xml(
6267
.build_as_xml()
6368
}
6469

65-
fn build_discovery_core_site_xml(hdfs: &HdfsCluster, logical_name: String) -> String {
66-
CoreSiteConfigBuilder::new(logical_name)
70+
fn build_discovery_core_site_xml(
71+
hdfs: &HdfsCluster,
72+
logical_name: String,
73+
) -> Result<String, Error> {
74+
Ok(CoreSiteConfigBuilder::new(logical_name)
6775
.fs_default_fs()
68-
.security_discovery_config(hdfs)
69-
.build_as_xml()
76+
.security_discovery_config(hdfs)?
77+
.build_as_xml())
7078
}

rust/operator-binary/src/hdfs_controller.rs

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -277,8 +277,7 @@ pub async fn reconcile_hdfs(hdfs: Arc<HdfsCluster>, ctx: Arc<Ctx>) -> HdfsOperat
277277
HDFS_CONTROLLER,
278278
&namenode_podrefs,
279279
&resolved_product_image,
280-
)
281-
.context(BuildDiscoveryConfigMapSnafu)?;
280+
)?;
282281

283282
// The discovery CM is linked to the cluster lifecycle via ownerreference.
284283
// Therefore, must not be added to the "orphaned" cluster resources
@@ -482,11 +481,6 @@ fn rolegroup_config_map(
482481
.with_context(|| ObjectHasNoNameSnafu {
483482
obj_ref: ObjectRef::from_obj(hdfs),
484483
})?;
485-
let hdfs_namespace = hdfs
486-
.namespace()
487-
.with_context(|| ObjectHasNoNamespaceSnafu {
488-
obj_ref: ObjectRef::from_obj(hdfs),
489-
})?;
490484

491485
let mut hdfs_site_xml = String::new();
492486
let mut core_site_xml = String::new();
@@ -525,7 +519,7 @@ fn rolegroup_config_map(
525519
core_site_xml = CoreSiteConfigBuilder::new(hdfs_name.to_string())
526520
.fs_default_fs()
527521
.ha_zookeeper_quorum()
528-
.security_config(hdfs, hdfs_name, &hdfs_namespace)
522+
.security_config(hdfs)?
529523
// the extend with config must come last in order to have overrides working!!!
530524
.extend(config)
531525
.build_as_xml();

rust/operator-binary/src/kerberos.rs

Lines changed: 52 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@ use stackable_hdfs_crd::{
22
constants::{SSL_CLIENT_XML, SSL_SERVER_XML},
33
HdfsCluster,
44
};
5-
use stackable_operator::commons::product_image_selection::ResolvedProductImage;
5+
use stackable_operator::{
6+
commons::product_image_selection::ResolvedProductImage,
7+
kube::{runtime::reflector::ObjectRef, ResourceExt},
8+
};
69

710
use crate::{
811
config::{CoreSiteConfigBuilder, HdfsSiteConfigBuilder},
@@ -52,29 +55,14 @@ impl HdfsSiteConfigBuilder {
5255
}
5356

5457
impl CoreSiteConfigBuilder {
55-
pub fn security_config(
56-
&mut self,
57-
hdfs: &HdfsCluster,
58-
hdfs_name: &str,
59-
hdfs_namespace: &str,
60-
) -> &mut Self {
58+
pub fn security_config(&mut self, hdfs: &HdfsCluster) -> Result<&mut Self, Error> {
6159
if hdfs.authentication_config().is_some() {
62-
// For a long time we tried using `_HOST` in principals, e.g. `jn/[email protected]`.
63-
// Turns out there are a lot of code paths that check the principal of the requester using a reverse lookup of the incoming IP address
64-
// and getting a different hostname than the principal has.
65-
// What ultimately killed this approach was
66-
//
67-
// 2023-05-30 09:23:01,745 ERROR namenode.EditLogInputStream (EditLogFileInputStream.java:nextOpImpl(220)) - caught exception initializing https://hdfs-journalnode-default-1.hdfs-journalnode-default.kuttl-test-fine-rat.svc.cluster.local:8481/getJournal?jid=hdfs&segmentTxId=1&storageInfo=-65%3A595659877%3A1685437352616%3ACID-90c52400-5b07-49bf-bdbe-3469bbdc5ebb&inProgressOk=true
68-
// org.apache.hadoop.hdfs.server.common.HttpGetFailedException: Fetch of https://hdfs-journalnode-default-1.hdfs-journalnode-default.kuttl-test-fine-rat.svc.cluster.local:8481/getJournal?jid=hdfs&segmentTxId=1&storageInfo=-65%3A595659877%3A1685437352616%3ACID-90c52400-5b07-49bf-bdbe-3469bbdc5ebb&inProgressOk=true failed with status code 403
69-
// Response message:
70-
// Only Namenode and another JournalNode may access this servlet
71-
//
72-
// After we have switched to using the following principals everything worked without problems
60+
let principal_host_part = principal_host_part(hdfs)?;
7361

74-
let principal_host_part =
75-
format!("{hdfs_name}.{hdfs_namespace}.svc.cluster.local@${{env.KERBEROS_REALM}}");
7662
self.add("hadoop.security.authentication", "kerberos")
77-
.add("hadoop.registry.kerberos.realm", "${env.KERBEROS_REALM}")
63+
// Not adding hadoop.registry.kerberos.realm, as it seems to not be used by our customers
64+
// and would need text-replacement of the env var anyway.
65+
// .add("hadoop.registry.kerberos.realm", "${env.KERBEROS_REALM}")
7866
.add(
7967
"dfs.journalnode.kerberos.principal",
8068
format!("jn/{principal_host_part}"),
@@ -115,19 +103,58 @@ impl CoreSiteConfigBuilder {
115103

116104
self.add_wire_encryption_settings();
117105
}
118-
self
106+
Ok(self)
119107
}
120108

121-
pub fn security_discovery_config(&mut self, hdfs: &HdfsCluster) -> &mut Self {
109+
pub fn security_discovery_config(&mut self, hdfs: &HdfsCluster) -> Result<&mut Self, Error> {
122110
if hdfs.has_kerberos_enabled() {
123-
self.add("hadoop.security.authentication", "kerberos");
111+
let principal_host_part = principal_host_part(hdfs)?;
112+
113+
self.add("hadoop.security.authentication", "kerberos")
114+
.add(
115+
"dfs.journalnode.kerberos.principal",
116+
format!("jn/{principal_host_part}"),
117+
)
118+
.add(
119+
"dfs.namenode.kerberos.principal",
120+
format!("nn/{principal_host_part}"),
121+
)
122+
.add(
123+
"dfs.datanode.kerberos.principal",
124+
format!("dn/{principal_host_part}"),
125+
);
124126
self.add_wire_encryption_settings();
125127
}
126-
self
128+
Ok(self)
127129
}
128130

129131
fn add_wire_encryption_settings(&mut self) -> &mut Self {
130132
self.add("hadoop.rpc.protection", "privacy");
131133
self
132134
}
133135
}
136+
137+
/// For a long time we tried using `_HOST` in principals, e.g. `jn/[email protected]`.
138+
/// Turns out there are a lot of code paths that check the principal of the requester using a reverse lookup of the incoming IP address
139+
/// and getting a different hostname than the principal has.
140+
/// What ultimately killed this approach was
141+
///
142+
/// ```text
143+
/// 2023-05-30 09:23:01,745 ERROR namenode.EditLogInputStream (EditLogFileInputStream.java:nextOpImpl(220)) - caught exception initializing https://hdfs-journalnode-default-1.hdfs-journalnode-default.kuttl-test-fine-rat.svc.cluster.local:8481/getJournal?jid=hdfs&segmentTxId=1&storageInfo=-65%3A595659877%3A1685437352616%3ACID-90c52400-5b07-49bf-bdbe-3469bbdc5ebb&inProgressOk=true
144+
/// org.apache.hadoop.hdfs.server.common.HttpGetFailedException: Fetch of https://hdfs-journalnode-default-1.hdfs-journalnode-default.kuttl-test-fine-rat.svc.cluster.local:8481/getJournal?jid=hdfs&segmentTxId=1&storageInfo=-65%3A595659877%3A1685437352616%3ACID-90c52400-5b07-49bf-bdbe-3469bbdc5ebb&inProgressOk=true failed with status code 403
145+
/// Response message:
146+
/// Only Namenode and another JournalNode may access this servlet
147+
/// ```
148+
///
149+
/// After we have switched to using the following principals everything worked without problems
150+
fn principal_host_part(hdfs: &HdfsCluster) -> Result<String, Error> {
151+
let hdfs_name = hdfs.name_any();
152+
let hdfs_namespace = hdfs
153+
.namespace_or_error()
154+
.map_err(|_| Error::ObjectHasNoNamespace {
155+
obj_ref: ObjectRef::from_obj(hdfs),
156+
})?;
157+
Ok(format!(
158+
"{hdfs_name}.{hdfs_namespace}.svc.cluster.local@${{env.KERBEROS_REALM}}"
159+
))
160+
}

0 commit comments

Comments
 (0)