From 042641643f7a0e24fe4effb1052edabfbf34ad0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Natalie=20Klestrup=20R=C3=B6ijezon?= Date: Fri, 10 Nov 2023 23:23:02 +0100 Subject: [PATCH 1/5] Apply list-op patch to HDFS --- conf.py | 2 +- hadoop/Dockerfile | 75 +++--- .../patches/0001-disable-pipes.patch | 30 +++ .../0002-datanode-registration-override.patch | 213 ++++++++++++++++++ 4 files changed, 285 insertions(+), 35 deletions(-) create mode 100644 hadoop/stackable/patches/0001-disable-pipes.patch create mode 100644 hadoop/stackable/patches/0002-datanode-registration-override.patch diff --git a/conf.py b/conf.py index 6f8739177..f95d6bfda 100644 --- a/conf.py +++ b/conf.py @@ -61,7 +61,7 @@ {"product": "3.2.2", "java-base": "11", "jmx_exporter": "0.20.0"}, {"product": "3.2.4", "java-base": "11", "jmx_exporter": "0.20.0"}, {"product": "3.3.4", "java-base": "11", "jmx_exporter": "0.20.0"}, - {"product": "3.3.6", "java-base": "11", "jmx_exporter": "0.20.0"}, + {"product": "3.3.6", "java-base": "11", "jmx_exporter": "0.20.0", "node": "18.16.0"}, ], }, { diff --git a/hadoop/Dockerfile b/hadoop/Dockerfile index 6c2f29181..46443349b 100644 --- a/hadoop/Dockerfile +++ b/hadoop/Dockerfile @@ -2,6 +2,7 @@ FROM stackable/image/java-base AS builder ARG PRODUCT +ARG NODE ARG JMX_EXPORTER # https://github.com/hadolint/hadolint/wiki/DL4006 @@ -19,9 +20,9 @@ RUN microdnf update && \ java-11-openjdk-devel \ maven \ openssl-devel \ - tar \ + tar xz \ unzip \ - zip && \ + zip git && \ microdnf clean all WORKDIR /stackable @@ -31,11 +32,11 @@ COPY hadoop/stackable /stackable # The source is needed to build FUSE. The rest of the src package will not make it into the final image. # Both the src and binary variants extract into different root folders -RUN curl --fail -L "https://repo.stackable.tech/repository/packages/hadoop/hadoop-${PRODUCT}-src.tar.gz" | tar -xzC . && \ - curl --fail -L "https://repo.stackable.tech/repository/packages/hadoop/hadoop-${PRODUCT}.tar.gz" | tar -xzC . && \ - ln -s "/stackable/hadoop-${PRODUCT}" /stackable/hadoop && \ - rm -rf /stackable/hadoop/lib/native/examples && \ - rm -rf /stackable/hadoop/share/doc +RUN curl --fail -L "https://repo.stackable.tech/repository/packages/hadoop/hadoop-${PRODUCT}-src.tar.gz" | tar -xzC . + # curl --fail -L "https://repo.stackable.tech/repository/packages/hadoop/hadoop-${PRODUCT}.tar.gz" | tar -xzC . && \ + # ln -s "/stackable/hadoop-${PRODUCT}" /stackable/hadoop && \ + # rm -rf /stackable/hadoop/lib/native/examples && \ + # rm -rf /stackable/hadoop/share/doc # The symlink from JMX Exporter 0.16.1 to the versionless link exists because old HDFS Operators (up until and including 23.7) used to hardcode # the version of JMX Exporter like this: "-javaagent:/stackable/jmx/jmx_prometheus_javaagent-0.16.1.jar" @@ -48,6 +49,39 @@ RUN curl --fail "https://repo.stackable.tech/repository/packages/jmx-exporter/jm ln -s "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" /stackable/jmx/jmx_prometheus_javaagent.jar && \ ln -s /stackable/jmx/jmx_prometheus_javaagent.jar /stackable/jmx/jmx_prometheus_javaagent-0.16.1.jar +# This Protobuf version is the exact version as used in the Hadoop Dockerfile +# See https://github.com/apache/hadoop/blob/trunk/dev-support/docker/pkg-resolver/install-protobuf.sh +# (this was hardcoded in the Dockerfile in earlier versions of Hadoop, make sure to look at the exact version in Github) +# For now all versions of Hadoop we support use Protobuf 3.7.1 so we can hardcode it here. +# Should it ever differ between versions we'll need to make this a variable as well. +RUN mkdir -p /opt/protobuf-src && \ + curl --fail -L -s -S https://repo.stackable.tech/repository/packages/protobuf/protobuf-java-3.7.1.tar.gz -o /opt/protobuf.tar.gz && \ + tar xzf /opt/protobuf.tar.gz --strip-components 1 -C /opt/protobuf-src --no-same-owner && \ + cd /opt/protobuf-src && \ + ./configure --prefix=/opt/protobuf && \ + make "-j$(nproc)" && \ + make install && \ + cd /root && \ + rm -rf /opt/protobuf-src + +ENV PROTOBUF_HOME /opt/protobuf +ENV PATH "${PATH}:/opt/protobuf/bin" + +RUN curl --fail -L https://repo.stackable.tech/repository/packages/node/node-v${NODE}-linux-x64.tar.xz | tar -xJC . && \ + ln -s /stackable/node-v${NODE}-linux-x64 /stackable/node +ENV PATH "${PATH}:/stackable/node/bin" +RUN corepack enable yarn && \ + yarn config set ignore-engines true + +RUN cd /stackable/hadoop-${PRODUCT}-src && \ + # Hadoop Pipes requires libtirpc to build, whose headers are not packaged in RedHat UBI + git apply < /stackable/patches/0001-disable-pipes.patch && \ + # Datanode registration override is required for Listener Operator integration + git apply < /stackable/patches/0002-datanode-registration-override.patch && \ + mvn clean package -Pdist,native -Drequire.fuse=true -DskipTests -Dmaven.javadoc.skip=true && \ + cp -r hadoop-dist/target/hadoop-${PRODUCT} /stackable/hadoop-${PRODUCT} && \ + # HDFS fuse-dfs is not part of the regular dist output, so we need to copy it in ourselves + cp hadoop-hdfs-project/hadoop-hdfs-native-client/target/main/native/fuse-dfs/fuse_dfs /stackable/hadoop-${PRODUCT}/bin # === # Mitigation for CVE-2021-44228 (Log4Shell) @@ -72,33 +106,6 @@ COPY shared/log4shell_scanner /bin/log4shell_scanner RUN /bin/log4shell_scanner s "/stackable/hadoop-${PRODUCT}" # === - -# This Protobuf version is the exact version as used in the Hadoop Dockerfile -# See https://github.com/apache/hadoop/blob/trunk/dev-support/docker/pkg-resolver/install-protobuf.sh -# (this was hardcoded in the Dockerfile in earlier versions of Hadoop, make sure to look at the exact version in Github) -# For now all versions of Hadoop we support use Protobuf 3.7.1 so we can hardcode it here. -# Should it ever differ between versions we'll need to make this a variable as well. -RUN mkdir -p /opt/protobuf-src && \ - curl --fail -L -s -S https://repo.stackable.tech/repository/packages/protobuf/protobuf-java-3.7.1.tar.gz -o /opt/protobuf.tar.gz && \ - tar xzf /opt/protobuf.tar.gz --strip-components 1 -C /opt/protobuf-src --no-same-owner && \ - cd /opt/protobuf-src && \ - ./configure --prefix=/opt/protobuf && \ - make "-j$(nproc)" && \ - make install && \ - cd /root && \ - rm -rf /opt/protobuf-src - -ENV PROTOBUF_HOME /opt/protobuf -ENV PATH "${PATH}:/opt/protobuf/bin" - -WORKDIR /stackable/hadoop-${PRODUCT}-src/hadoop-hdfs-project/hadoop-hdfs-native-client - -# This command comes from hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/doc/README -RUN mvn clean package -Pnative -Drequire.fuse=true -DskipTests -Dmaven.javadoc.skip=true && \ - cp target/main/native/fuse-dfs/fuse_dfs /stackable/hadoop/bin && \ - rm -rf /stackable/hadoop-${PRODUCT}-src - - # Final Image FROM stackable/image/java-base diff --git a/hadoop/stackable/patches/0001-disable-pipes.patch b/hadoop/stackable/patches/0001-disable-pipes.patch new file mode 100644 index 000000000..53134ca90 --- /dev/null +++ b/hadoop/stackable/patches/0001-disable-pipes.patch @@ -0,0 +1,30 @@ +diff --git a/hadoop-tools/hadoop-tools-dist/pom.xml b/hadoop-tools/hadoop-tools-dist/pom.xml +index 8a3e93c1037..8604a3325d9 100644 +--- a/hadoop-tools/hadoop-tools-dist/pom.xml ++++ b/hadoop-tools/hadoop-tools-dist/pom.xml +@@ -85,13 +85,6 @@ + hadoop-gridmix + compile + +- +- org.apache.hadoop +- hadoop-pipes +- compile +- pom +- ${project.version} +- + + org.apache.hadoop + hadoop-aws +diff --git a/hadoop-tools/pom.xml b/hadoop-tools/pom.xml +index 4e934cd101f..2654dea5dd6 100644 +--- a/hadoop-tools/pom.xml ++++ b/hadoop-tools/pom.xml +@@ -41,7 +41,6 @@ + hadoop-datajoin + hadoop-tools-dist + hadoop-extras +- hadoop-pipes + hadoop-openstack + hadoop-sls + hadoop-resourceestimator diff --git a/hadoop/stackable/patches/0002-datanode-registration-override.patch b/hadoop/stackable/patches/0002-datanode-registration-override.patch new file mode 100644 index 000000000..91d813132 --- /dev/null +++ b/hadoop/stackable/patches/0002-datanode-registration-override.patch @@ -0,0 +1,213 @@ +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +index 88a18d9cf07..b07fcb0b17a 100755 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +@@ -152,6 +152,13 @@ public class DFSConfigKeys extends CommonConfigurationKeys { + public static final boolean DFS_DATANODE_DROP_CACHE_BEHIND_READS_DEFAULT = false; + public static final String DFS_DATANODE_USE_DN_HOSTNAME = "dfs.datanode.use.datanode.hostname"; + public static final boolean DFS_DATANODE_USE_DN_HOSTNAME_DEFAULT = false; ++ ++ public static final String DFS_DATANODE_ADVERTISED_HOSTNAME = "dfs.datanode.advertised.hostname"; ++ public static final String DFS_DATANODE_ADVERTISED_DATA_PORT = "dfs.datanode.advertised.port"; ++ public static final String DFS_DATANODE_ADVERTISED_HTTP_PORT = "dfs.datanode.advertised.http.port"; ++ public static final String DFS_DATANODE_ADVERTISED_HTTPS_PORT = "dfs.datanode.advertised.https.port"; ++ public static final String DFS_DATANODE_ADVERTISED_IPC_PORT = "dfs.datanode.advertised.ipc.port"; ++ + public static final String DFS_DATANODE_MAX_LOCKED_MEMORY_KEY = "dfs.datanode.max.locked.memory"; + public static final long DFS_DATANODE_MAX_LOCKED_MEMORY_DEFAULT = 0; + public static final String DFS_DATANODE_FSDATASETCACHE_MAX_THREADS_PER_VOLUME_KEY = "dfs.datanode.fsdatasetcache.max.threads.per.volume"; +@@ -484,6 +491,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys { + public static final long DFS_DATANODE_PROCESS_COMMANDS_THRESHOLD_DEFAULT = + TimeUnit.SECONDS.toMillis(2); + ++ public static final String DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_KEY = "dfs.namenode.datanode.registration.unsafe.allow-address-override"; ++ public static final boolean DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_DEFAULT = false; + public static final String DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_KEY = "dfs.namenode.datanode.registration.ip-hostname-check"; + public static final boolean DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_DEFAULT = true; + +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +index bdd20d7e276..c10db0611c9 100644 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +@@ -181,6 +181,8 @@ public class DatanodeManager { + private boolean hasClusterEverBeenMultiRack = false; + + private final boolean checkIpHostnameInRegistration; ++ private final boolean allowRegistrationAddressOverride; ++ + /** + * Whether we should tell datanodes what to cache in replies to + * heartbeat messages. +@@ -314,6 +316,11 @@ public class DatanodeManager { + // Block invalidate limit also has some dependency on heartbeat interval. + // Check setBlockInvalidateLimit(). + setBlockInvalidateLimit(configuredBlockInvalidateLimit); ++ this.allowRegistrationAddressOverride = conf.getBoolean( ++ DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_KEY, ++ DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_DEFAULT); ++ LOG.info(DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_KEY ++ + "=" + allowRegistrationAddressOverride); + this.checkIpHostnameInRegistration = conf.getBoolean( + DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_KEY, + DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_DEFAULT); +@@ -1146,27 +1153,29 @@ void startAdminOperationIfNecessary(DatanodeDescriptor nodeReg) { + */ + public void registerDatanode(DatanodeRegistration nodeReg) + throws DisallowedDatanodeException, UnresolvedTopologyException { +- InetAddress dnAddress = Server.getRemoteIp(); +- if (dnAddress != null) { +- // Mostly called inside an RPC, update ip and peer hostname +- String hostname = dnAddress.getHostName(); +- String ip = dnAddress.getHostAddress(); +- if (checkIpHostnameInRegistration && !isNameResolved(dnAddress)) { +- // Reject registration of unresolved datanode to prevent performance +- // impact of repetitive DNS lookups later. +- final String message = "hostname cannot be resolved (ip=" +- + ip + ", hostname=" + hostname + ")"; +- LOG.warn("Unresolved datanode registration: " + message); +- throw new DisallowedDatanodeException(nodeReg, message); ++ if (!allowRegistrationAddressOverride) { ++ InetAddress dnAddress = Server.getRemoteIp(); ++ if (dnAddress != null) { ++ // Mostly called inside an RPC, update ip and peer hostname ++ String hostname = dnAddress.getHostName(); ++ String ip = dnAddress.getHostAddress(); ++ if (checkIpHostnameInRegistration && !isNameResolved(dnAddress)) { ++ // Reject registration of unresolved datanode to prevent performance ++ // impact of repetitive DNS lookups later. ++ final String message = "hostname cannot be resolved (ip=" ++ + ip + ", hostname=" + hostname + ")"; ++ LOG.warn("Unresolved datanode registration: " + message); ++ throw new DisallowedDatanodeException(nodeReg, message); ++ } ++ // update node registration with the ip and hostname from rpc request ++ nodeReg.setIpAddr(ip); ++ nodeReg.setPeerHostName(hostname); + } +- // update node registration with the ip and hostname from rpc request +- nodeReg.setIpAddr(ip); +- nodeReg.setPeerHostName(hostname); + } +- ++ + try { + nodeReg.setExportedKeys(blockManager.getBlockKeys()); +- ++ + // Checks if the node is not on the hosts list. If it is not, then + // it will be disallowed from registering. + if (!hostConfigManager.isIncluded(nodeReg)) { +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java +index 9b5343321d3..8ce6a61204b 100644 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java +@@ -100,6 +100,11 @@ public class DNConf { + final boolean syncOnClose; + final boolean encryptDataTransfer; + final boolean connectToDnViaHostname; ++ private final String advertisedHostname; ++ private final int advertisedDataPort; ++ private final int advertisedHttpPort; ++ private final int advertisedHttpsPort; ++ private final int advertisedIpcPort; + final boolean overwriteDownstreamDerivedQOP; + private final boolean pmemCacheRecoveryEnabled; + +@@ -188,6 +193,11 @@ public DNConf(final Configurable dn) { + connectToDnViaHostname = getConf().getBoolean( + DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME, + DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME_DEFAULT); ++ advertisedHostname = getConf().get(DFSConfigKeys.DFS_DATANODE_ADVERTISED_HOSTNAME); ++ advertisedDataPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_ADVERTISED_DATA_PORT, -1); ++ advertisedHttpPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_ADVERTISED_HTTP_PORT, -1); ++ advertisedHttpsPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_ADVERTISED_HTTPS_PORT, -1); ++ advertisedIpcPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_ADVERTISED_IPC_PORT, -1); + this.blockReportInterval = getConf().getLong( + DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, + DFS_BLOCKREPORT_INTERVAL_MSEC_DEFAULT); +@@ -362,6 +372,32 @@ public boolean getConnectToDnViaHostname() { + return connectToDnViaHostname; + } + ++ /** ++ * Returns a hostname to advertise instead of the system hostname. ++ * This is an expert setting and can be used in multihoming scenarios to override the detected hostname. ++ * ++ * @return null if the system hostname should be used, otherwise a hostname ++ */ ++ public String getAdvertisedHostname() { ++ return advertisedHostname; ++ } ++ ++ public int getAdvertisedDataPort() { ++ return advertisedDataPort; ++ } ++ ++ public int getAdvertisedHttpPort() { ++ return advertisedHttpPort; ++ } ++ ++ public int getAdvertisedHttpsPort() { ++ return advertisedHttpsPort; ++ } ++ ++ public int getAdvertisedIpcPort() { ++ return advertisedIpcPort; ++ } ++ + /** + * Returns socket timeout + * +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +index 8fb009dab85..228bcce62b3 100644 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +@@ -133,6 +133,7 @@ + import java.util.Iterator; + import java.util.List; + import java.util.Map; ++import java.util.Optional; + import java.util.Map.Entry; + import java.util.Set; + import java.util.UUID; +@@ -2053,11 +2054,35 @@ DatanodeRegistration createBPRegistration(NamespaceInfo nsInfo) { + NodeType.DATA_NODE); + } + +- DatanodeID dnId = new DatanodeID( +- streamingAddr.getAddress().getHostAddress(), hostName, +- storage.getDatanodeUuid(), getXferPort(), getInfoPort(), +- infoSecurePort, getIpcPort()); +- return new DatanodeRegistration(dnId, storageInfo, ++ String advertisedHostname = Optional ++ .ofNullable(dnConf.getAdvertisedHostname()) ++ .orElseGet(() -> streamingAddr.getAddress().getHostAddress()); ++ int advertisedDataPort = dnConf.getAdvertisedDataPort(); ++ if (advertisedDataPort == -1) { ++ advertisedDataPort = getXferPort(); ++ } ++ int advertisedHttpPort = dnConf.getAdvertisedHttpPort(); ++ if (advertisedHttpPort == -1) { ++ advertisedHttpPort = getInfoPort(); ++ } ++ int advertisedHttpsPort = dnConf.getAdvertisedHttpPort(); ++ if (advertisedHttpsPort == -1) { ++ advertisedHttpPort = getInfoSecurePort(); ++ } ++ int advertisedIpcPort = dnConf.getAdvertisedIpcPort(); ++ if (advertisedIpcPort == -1) { ++ advertisedIpcPort = getIpcPort(); ++ } ++ ++ DatanodeID dnId = new DatanodeID(advertisedHostname, ++ hostName, ++ storage.getDatanodeUuid(), ++ advertisedDataPort, ++ advertisedHttpPort, ++ advertisedHttpsPort, ++ advertisedIpcPort); ++ ++ return new DatanodeRegistration(dnId, storageInfo, + new ExportedBlockKeys(), VersionInfo.getVersion()); + } + From d8105f759b40bdbb49d868c41987867335a5b8cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Natalie=20Klestrup=20R=C3=B6ijezon?= Date: Thu, 16 Nov 2023 16:27:39 +0100 Subject: [PATCH 2/5] Tidy up hadoop Dockerfile --- hadoop/Dockerfile | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/hadoop/Dockerfile b/hadoop/Dockerfile index 46443349b..e96e0319f 100644 --- a/hadoop/Dockerfile +++ b/hadoop/Dockerfile @@ -8,21 +8,13 @@ ARG JMX_EXPORTER # https://github.com/hadolint/hadolint/wiki/DL4006 SHELL ["/bin/bash", "-o", "pipefail", "-c"] -# unzip & zip are required for log4shell.sh # All others are required for the FUSE build RUN microdnf update && \ microdnf install \ - cmake \ - cyrus-sasl-devel \ - fuse-devel \ - gcc \ - gcc-c++ \ - java-11-openjdk-devel \ - maven \ - openssl-devel \ - tar xz \ - unzip \ - zip git && \ + # Required for Hadoop build + cmake cyrus-sasl-devel fuse-devel gcc gcc-c++ java-11-openjdk-devel maven openssl-devel tar xz git \ + # Required for log4shell.sh + unzip zip && \ microdnf clean all WORKDIR /stackable @@ -30,13 +22,8 @@ WORKDIR /stackable # This is needed here because it creates the JMX directory, we could create it any other way but this works COPY hadoop/stackable /stackable -# The source is needed to build FUSE. The rest of the src package will not make it into the final image. -# Both the src and binary variants extract into different root folders +# Build from source to enable FUSE module, and to apply custom patches. RUN curl --fail -L "https://repo.stackable.tech/repository/packages/hadoop/hadoop-${PRODUCT}-src.tar.gz" | tar -xzC . - # curl --fail -L "https://repo.stackable.tech/repository/packages/hadoop/hadoop-${PRODUCT}.tar.gz" | tar -xzC . && \ - # ln -s "/stackable/hadoop-${PRODUCT}" /stackable/hadoop && \ - # rm -rf /stackable/hadoop/lib/native/examples && \ - # rm -rf /stackable/hadoop/share/doc # The symlink from JMX Exporter 0.16.1 to the versionless link exists because old HDFS Operators (up until and including 23.7) used to hardcode # the version of JMX Exporter like this: "-javaagent:/stackable/jmx/jmx_prometheus_javaagent-0.16.1.jar" @@ -77,6 +64,7 @@ RUN cd /stackable/hadoop-${PRODUCT}-src && \ # Hadoop Pipes requires libtirpc to build, whose headers are not packaged in RedHat UBI git apply < /stackable/patches/0001-disable-pipes.patch && \ # Datanode registration override is required for Listener Operator integration + # Developed at https://github.com/stackabletech/hadoop/tree/spike/override-datanode-id git apply < /stackable/patches/0002-datanode-registration-override.patch && \ mvn clean package -Pdist,native -Drequire.fuse=true -DskipTests -Dmaven.javadoc.skip=true && \ cp -r hadoop-dist/target/hadoop-${PRODUCT} /stackable/hadoop-${PRODUCT} && \ From 47454e9e75ff46ea82c2219e89dd16254c82f72a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Natalie=20Klestrup=20R=C3=B6ijezon?= Date: Thu, 21 Dec 2023 18:42:47 +0100 Subject: [PATCH 3/5] Backport patchset to Hadoop 3.3.4 --- conf.py | 2 +- .../patches/0001-disable-pipes.patch | 10 +- .../0002-datanode-registration-override.patch | 160 +++++++++++------- 3 files changed, 103 insertions(+), 69 deletions(-) diff --git a/conf.py b/conf.py index f95d6bfda..7001abbd0 100644 --- a/conf.py +++ b/conf.py @@ -60,7 +60,7 @@ "versions": [ {"product": "3.2.2", "java-base": "11", "jmx_exporter": "0.20.0"}, {"product": "3.2.4", "java-base": "11", "jmx_exporter": "0.20.0"}, - {"product": "3.3.4", "java-base": "11", "jmx_exporter": "0.20.0"}, + {"product": "3.3.4", "java-base": "11", "jmx_exporter": "0.20.0", "node": "18.16.0"}, {"product": "3.3.6", "java-base": "11", "jmx_exporter": "0.20.0", "node": "18.16.0"}, ], }, diff --git a/hadoop/stackable/patches/0001-disable-pipes.patch b/hadoop/stackable/patches/0001-disable-pipes.patch index 53134ca90..561ba318e 100644 --- a/hadoop/stackable/patches/0001-disable-pipes.patch +++ b/hadoop/stackable/patches/0001-disable-pipes.patch @@ -1,8 +1,8 @@ diff --git a/hadoop-tools/hadoop-tools-dist/pom.xml b/hadoop-tools/hadoop-tools-dist/pom.xml -index 8a3e93c1037..8604a3325d9 100644 +index 652f55682f3..e2b8daee582 100644 --- a/hadoop-tools/hadoop-tools-dist/pom.xml +++ b/hadoop-tools/hadoop-tools-dist/pom.xml -@@ -85,13 +85,6 @@ +@@ -74,13 +74,6 @@ hadoop-gridmix compile @@ -15,12 +15,12 @@ index 8a3e93c1037..8604a3325d9 100644 - org.apache.hadoop - hadoop-aws + hadoop-openstack diff --git a/hadoop-tools/pom.xml b/hadoop-tools/pom.xml -index 4e934cd101f..2654dea5dd6 100644 +index d69e4abef43..83d63f733f2 100644 --- a/hadoop-tools/pom.xml +++ b/hadoop-tools/pom.xml -@@ -41,7 +41,6 @@ +@@ -40,7 +40,6 @@ hadoop-datajoin hadoop-tools-dist hadoop-extras diff --git a/hadoop/stackable/patches/0002-datanode-registration-override.patch b/hadoop/stackable/patches/0002-datanode-registration-override.patch index 91d813132..db97dba26 100644 --- a/hadoop/stackable/patches/0002-datanode-registration-override.patch +++ b/hadoop/stackable/patches/0002-datanode-registration-override.patch @@ -1,22 +1,22 @@ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java -index 88a18d9cf07..b07fcb0b17a 100755 +index 7196def4221..2c00fb4fb1a 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java -@@ -152,6 +152,13 @@ public class DFSConfigKeys extends CommonConfigurationKeys { +@@ -139,6 +139,13 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final boolean DFS_DATANODE_DROP_CACHE_BEHIND_READS_DEFAULT = false; public static final String DFS_DATANODE_USE_DN_HOSTNAME = "dfs.datanode.use.datanode.hostname"; public static final boolean DFS_DATANODE_USE_DN_HOSTNAME_DEFAULT = false; + -+ public static final String DFS_DATANODE_ADVERTISED_HOSTNAME = "dfs.datanode.advertised.hostname"; -+ public static final String DFS_DATANODE_ADVERTISED_DATA_PORT = "dfs.datanode.advertised.port"; -+ public static final String DFS_DATANODE_ADVERTISED_HTTP_PORT = "dfs.datanode.advertised.http.port"; -+ public static final String DFS_DATANODE_ADVERTISED_HTTPS_PORT = "dfs.datanode.advertised.https.port"; -+ public static final String DFS_DATANODE_ADVERTISED_IPC_PORT = "dfs.datanode.advertised.ipc.port"; ++ public static final String DFS_DATANODE_REGISTERED_HOSTNAME = "dfs.datanode.registered.hostname"; ++ public static final String DFS_DATANODE_REGISTERED_DATA_PORT = "dfs.datanode.registered.port"; ++ public static final String DFS_DATANODE_REGISTERED_HTTP_PORT = "dfs.datanode.registered.http.port"; ++ public static final String DFS_DATANODE_REGISTERED_HTTPS_PORT = "dfs.datanode.registered.https.port"; ++ public static final String DFS_DATANODE_REGISTERED_IPC_PORT = "dfs.datanode.registered.ipc.port"; + public static final String DFS_DATANODE_MAX_LOCKED_MEMORY_KEY = "dfs.datanode.max.locked.memory"; public static final long DFS_DATANODE_MAX_LOCKED_MEMORY_DEFAULT = 0; public static final String DFS_DATANODE_FSDATASETCACHE_MAX_THREADS_PER_VOLUME_KEY = "dfs.datanode.fsdatasetcache.max.threads.per.volume"; -@@ -484,6 +491,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys { +@@ -446,6 +453,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final long DFS_DATANODE_PROCESS_COMMANDS_THRESHOLD_DEFAULT = TimeUnit.SECONDS.toMillis(2); @@ -26,10 +26,10 @@ index 88a18d9cf07..b07fcb0b17a 100755 public static final boolean DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_DEFAULT = true; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java -index bdd20d7e276..c10db0611c9 100644 +index 44dffcbed11..54f6d63fa78 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java -@@ -181,6 +181,8 @@ public class DatanodeManager { +@@ -179,6 +179,8 @@ public class DatanodeManager { private boolean hasClusterEverBeenMultiRack = false; private final boolean checkIpHostnameInRegistration; @@ -38,10 +38,10 @@ index bdd20d7e276..c10db0611c9 100644 /** * Whether we should tell datanodes what to cache in replies to * heartbeat messages. -@@ -314,6 +316,11 @@ public class DatanodeManager { - // Block invalidate limit also has some dependency on heartbeat interval. - // Check setBlockInvalidateLimit(). - setBlockInvalidateLimit(configuredBlockInvalidateLimit); +@@ -326,6 +328,11 @@ public class DatanodeManager { + + ": configured=" + configuredBlockInvalidateLimit + + ", counted=" + countedBlockInvalidateLimit + + ", effected=" + blockInvalidateLimit); + this.allowRegistrationAddressOverride = conf.getBoolean( + DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_KEY, + DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_DEFAULT); @@ -50,7 +50,7 @@ index bdd20d7e276..c10db0611c9 100644 this.checkIpHostnameInRegistration = conf.getBoolean( DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_KEY, DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_DEFAULT); -@@ -1146,27 +1153,29 @@ void startAdminOperationIfNecessary(DatanodeDescriptor nodeReg) { +@@ -1133,27 +1140,29 @@ void startAdminOperationIfNecessary(DatanodeDescriptor nodeReg) { */ public void registerDatanode(DatanodeRegistration nodeReg) throws DisallowedDatanodeException, UnresolvedTopologyException { @@ -98,71 +98,105 @@ index bdd20d7e276..c10db0611c9 100644 // it will be disallowed from registering. if (!hostConfigManager.isIncluded(nodeReg)) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java -index 9b5343321d3..8ce6a61204b 100644 +index d61a17e83fe..eaf4a6d7c1d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java -@@ -100,6 +100,11 @@ public class DNConf { +@@ -99,6 +99,11 @@ public class DNConf { final boolean syncOnClose; final boolean encryptDataTransfer; final boolean connectToDnViaHostname; -+ private final String advertisedHostname; -+ private final int advertisedDataPort; -+ private final int advertisedHttpPort; -+ private final int advertisedHttpsPort; -+ private final int advertisedIpcPort; ++ private final String registeredHostname; ++ private final int registeredDataPort; ++ private final int registeredHttpPort; ++ private final int registeredHttpsPort; ++ private final int registeredIpcPort; final boolean overwriteDownstreamDerivedQOP; private final boolean pmemCacheRecoveryEnabled; -@@ -188,6 +193,11 @@ public DNConf(final Configurable dn) { +@@ -187,6 +192,11 @@ public DNConf(final Configurable dn) { connectToDnViaHostname = getConf().getBoolean( DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME, DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME_DEFAULT); -+ advertisedHostname = getConf().get(DFSConfigKeys.DFS_DATANODE_ADVERTISED_HOSTNAME); -+ advertisedDataPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_ADVERTISED_DATA_PORT, -1); -+ advertisedHttpPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_ADVERTISED_HTTP_PORT, -1); -+ advertisedHttpsPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_ADVERTISED_HTTPS_PORT, -1); -+ advertisedIpcPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_ADVERTISED_IPC_PORT, -1); ++ registeredHostname = getConf().get(DFSConfigKeys.DFS_DATANODE_REGISTERED_HOSTNAME); ++ registeredDataPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_DATA_PORT, -1); ++ registeredHttpPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_HTTP_PORT, -1); ++ registeredHttpsPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_HTTPS_PORT, -1); ++ registeredIpcPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_IPC_PORT, -1); this.blockReportInterval = getConf().getLong( DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, DFS_BLOCKREPORT_INTERVAL_MSEC_DEFAULT); -@@ -362,6 +372,32 @@ public boolean getConnectToDnViaHostname() { +@@ -360,6 +370,66 @@ public boolean getConnectToDnViaHostname() { return connectToDnViaHostname; } + /** -+ * Returns a hostname to advertise instead of the system hostname. -+ * This is an expert setting and can be used in multihoming scenarios to override the detected hostname. ++ * Returns a hostname to register with the cluster instead of the system ++ * hostname. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected hostname. + * + * @return null if the system hostname should be used, otherwise a hostname + */ -+ public String getAdvertisedHostname() { -+ return advertisedHostname; ++ public String getRegisteredHostname() { ++ return registeredHostname; + } + -+ public int getAdvertisedDataPort() { -+ return advertisedDataPort; ++ /** ++ * Returns a port number to register with the cluster instead of the ++ * data port that the node is listening on. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected port. ++ * ++ * @return -1 if the actual port should be used, otherwise a port number ++ */ ++ public int getRegisteredDataPort() { ++ return registeredDataPort; + } + -+ public int getAdvertisedHttpPort() { -+ return advertisedHttpPort; ++ /** ++ * Returns a port number to register with the cluster instead of the ++ * HTTP port that the node is listening on. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected port. ++ * ++ * @return -1 if the actual port should be used, otherwise a port number ++ */ ++ public int getRegisteredHttpPort() { ++ return registeredHttpPort; + } + -+ public int getAdvertisedHttpsPort() { -+ return advertisedHttpsPort; ++ /** ++ * Returns a port number to register with the cluster instead of the ++ * HTTPS port that the node is listening on. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected port. ++ * ++ * @return -1 if the actual port should be used, otherwise a port number ++ */ ++ public int getRegisteredHttpsPort() { ++ return registeredHttpsPort; + } + -+ public int getAdvertisedIpcPort() { -+ return advertisedIpcPort; ++ /** ++ * Returns a port number to register with the cluster instead of the ++ * IPC port that the node is listening on. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected port. ++ * ++ * @return -1 if the actual port should be used, otherwise a port number ++ */ ++ public int getRegisteredIpcPort() { ++ return registeredIpcPort; + } + /** * Returns socket timeout * diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java -index 8fb009dab85..228bcce62b3 100644 +index c1507a45120..d253779e70d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java -@@ -133,6 +133,7 @@ +@@ -82,6 +82,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; @@ -170,7 +204,7 @@ index 8fb009dab85..228bcce62b3 100644 import java.util.Map.Entry; import java.util.Set; import java.util.UUID; -@@ -2053,11 +2054,35 @@ DatanodeRegistration createBPRegistration(NamespaceInfo nsInfo) { +@@ -1556,11 +1557,35 @@ DatanodeRegistration createBPRegistration(NamespaceInfo nsInfo) { NodeType.DATA_NODE); } @@ -179,33 +213,33 @@ index 8fb009dab85..228bcce62b3 100644 - storage.getDatanodeUuid(), getXferPort(), getInfoPort(), - infoSecurePort, getIpcPort()); - return new DatanodeRegistration(dnId, storageInfo, -+ String advertisedHostname = Optional -+ .ofNullable(dnConf.getAdvertisedHostname()) ++ String registeredHostname = Optional ++ .ofNullable(dnConf.getRegisteredHostname()) + .orElseGet(() -> streamingAddr.getAddress().getHostAddress()); -+ int advertisedDataPort = dnConf.getAdvertisedDataPort(); -+ if (advertisedDataPort == -1) { -+ advertisedDataPort = getXferPort(); ++ int registeredDataPort = dnConf.getRegisteredDataPort(); ++ if (registeredDataPort == -1) { ++ registeredDataPort = getXferPort(); + } -+ int advertisedHttpPort = dnConf.getAdvertisedHttpPort(); -+ if (advertisedHttpPort == -1) { -+ advertisedHttpPort = getInfoPort(); ++ int registeredHttpPort = dnConf.getRegisteredHttpPort(); ++ if (registeredHttpPort == -1) { ++ registeredHttpPort = getInfoPort(); + } -+ int advertisedHttpsPort = dnConf.getAdvertisedHttpPort(); -+ if (advertisedHttpsPort == -1) { -+ advertisedHttpPort = getInfoSecurePort(); ++ int registeredHttpsPort = dnConf.getRegisteredHttpsPort(); ++ if (registeredHttpsPort == -1) { ++ registeredHttpsPort = getInfoSecurePort(); + } -+ int advertisedIpcPort = dnConf.getAdvertisedIpcPort(); -+ if (advertisedIpcPort == -1) { -+ advertisedIpcPort = getIpcPort(); ++ int registeredIpcPort = dnConf.getRegisteredIpcPort(); ++ if (registeredIpcPort == -1) { ++ registeredIpcPort = getIpcPort(); + } + -+ DatanodeID dnId = new DatanodeID(advertisedHostname, ++ DatanodeID dnId = new DatanodeID(registeredHostname, + hostName, + storage.getDatanodeUuid(), -+ advertisedDataPort, -+ advertisedHttpPort, -+ advertisedHttpsPort, -+ advertisedIpcPort); ++ registeredDataPort, ++ registeredHttpPort, ++ registeredHttpsPort, ++ registeredIpcPort); + + return new DatanodeRegistration(dnId, storageInfo, new ExportedBlockKeys(), VersionInfo.getVersion()); From 83a708f9047699dc0f16b9cd548fa678371a9c27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Natalie=20Klestrup=20R=C3=B6ijezon?= Date: Thu, 11 Jan 2024 12:10:18 +0100 Subject: [PATCH 4/5] Forwardport registration patch to 3.3.6 --- ...atanode-registration-override-3.3.4.patch} | 0 ...datanode-registration-override-3.3.6.patch | 247 ++++++++++++++++++ 2 files changed, 247 insertions(+) rename hadoop/stackable/patches/3.3.4/{002-datanode-registration-override.patch => 002-datanode-registration-override-3.3.4.patch} (100%) create mode 100644 hadoop/stackable/patches/3.3.6/002-datanode-registration-override-3.3.6.patch diff --git a/hadoop/stackable/patches/3.3.4/002-datanode-registration-override.patch b/hadoop/stackable/patches/3.3.4/002-datanode-registration-override-3.3.4.patch similarity index 100% rename from hadoop/stackable/patches/3.3.4/002-datanode-registration-override.patch rename to hadoop/stackable/patches/3.3.4/002-datanode-registration-override-3.3.4.patch diff --git a/hadoop/stackable/patches/3.3.6/002-datanode-registration-override-3.3.6.patch b/hadoop/stackable/patches/3.3.6/002-datanode-registration-override-3.3.6.patch new file mode 100644 index 000000000..475a5b756 --- /dev/null +++ b/hadoop/stackable/patches/3.3.6/002-datanode-registration-override-3.3.6.patch @@ -0,0 +1,247 @@ +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +index e3f4bfcde84..3d65bcad229 100755 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +@@ -147,6 +147,13 @@ public class DFSConfigKeys extends CommonConfigurationKeys { + public static final boolean DFS_DATANODE_DROP_CACHE_BEHIND_READS_DEFAULT = false; + public static final String DFS_DATANODE_USE_DN_HOSTNAME = "dfs.datanode.use.datanode.hostname"; + public static final boolean DFS_DATANODE_USE_DN_HOSTNAME_DEFAULT = false; ++ ++ public static final String DFS_DATANODE_REGISTERED_HOSTNAME = "dfs.datanode.registered.hostname"; ++ public static final String DFS_DATANODE_REGISTERED_DATA_PORT = "dfs.datanode.registered.port"; ++ public static final String DFS_DATANODE_REGISTERED_HTTP_PORT = "dfs.datanode.registered.http.port"; ++ public static final String DFS_DATANODE_REGISTERED_HTTPS_PORT = "dfs.datanode.registered.https.port"; ++ public static final String DFS_DATANODE_REGISTERED_IPC_PORT = "dfs.datanode.registered.ipc.port"; ++ + public static final String DFS_DATANODE_MAX_LOCKED_MEMORY_KEY = "dfs.datanode.max.locked.memory"; + public static final long DFS_DATANODE_MAX_LOCKED_MEMORY_DEFAULT = 0; + public static final String DFS_DATANODE_FSDATASETCACHE_MAX_THREADS_PER_VOLUME_KEY = "dfs.datanode.fsdatasetcache.max.threads.per.volume"; +@@ -454,6 +461,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys { + public static final long DFS_DATANODE_PROCESS_COMMANDS_THRESHOLD_DEFAULT = + TimeUnit.SECONDS.toMillis(2); + ++ public static final String DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_KEY = "dfs.namenode.datanode.registration.unsafe.allow-address-override"; ++ public static final boolean DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_DEFAULT = false; + public static final String DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_KEY = "dfs.namenode.datanode.registration.ip-hostname-check"; + public static final boolean DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_DEFAULT = true; + +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +index 07381fc696f..8aeb92cff11 100644 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +@@ -180,6 +180,8 @@ public class DatanodeManager { + private boolean hasClusterEverBeenMultiRack = false; + + private final boolean checkIpHostnameInRegistration; ++ private final boolean allowRegistrationAddressOverride; ++ + /** + * Whether we should tell datanodes what to cache in replies to + * heartbeat messages. +@@ -316,6 +318,11 @@ public class DatanodeManager { + // Block invalidate limit also has some dependency on heartbeat interval. + // Check setBlockInvalidateLimit(). + setBlockInvalidateLimit(configuredBlockInvalidateLimit); ++ this.allowRegistrationAddressOverride = conf.getBoolean( ++ DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_KEY, ++ DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_DEFAULT); ++ LOG.info(DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_KEY ++ + "=" + allowRegistrationAddressOverride); + this.checkIpHostnameInRegistration = conf.getBoolean( + DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_KEY, + DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_DEFAULT); +@@ -1138,27 +1145,29 @@ void startAdminOperationIfNecessary(DatanodeDescriptor nodeReg) { + */ + public void registerDatanode(DatanodeRegistration nodeReg) + throws DisallowedDatanodeException, UnresolvedTopologyException { +- InetAddress dnAddress = Server.getRemoteIp(); +- if (dnAddress != null) { +- // Mostly called inside an RPC, update ip and peer hostname +- String hostname = dnAddress.getHostName(); +- String ip = dnAddress.getHostAddress(); +- if (checkIpHostnameInRegistration && !isNameResolved(dnAddress)) { +- // Reject registration of unresolved datanode to prevent performance +- // impact of repetitive DNS lookups later. +- final String message = "hostname cannot be resolved (ip=" +- + ip + ", hostname=" + hostname + ")"; +- LOG.warn("Unresolved datanode registration: " + message); +- throw new DisallowedDatanodeException(nodeReg, message); ++ if (!allowRegistrationAddressOverride) { ++ InetAddress dnAddress = Server.getRemoteIp(); ++ if (dnAddress != null) { ++ // Mostly called inside an RPC, update ip and peer hostname ++ String hostname = dnAddress.getHostName(); ++ String ip = dnAddress.getHostAddress(); ++ if (checkIpHostnameInRegistration && !isNameResolved(dnAddress)) { ++ // Reject registration of unresolved datanode to prevent performance ++ // impact of repetitive DNS lookups later. ++ final String message = "hostname cannot be resolved (ip=" ++ + ip + ", hostname=" + hostname + ")"; ++ LOG.warn("Unresolved datanode registration: " + message); ++ throw new DisallowedDatanodeException(nodeReg, message); ++ } ++ // update node registration with the ip and hostname from rpc request ++ nodeReg.setIpAddr(ip); ++ nodeReg.setPeerHostName(hostname); + } +- // update node registration with the ip and hostname from rpc request +- nodeReg.setIpAddr(ip); +- nodeReg.setPeerHostName(hostname); + } +- ++ + try { + nodeReg.setExportedKeys(blockManager.getBlockKeys()); +- ++ + // Checks if the node is not on the hosts list. If it is not, then + // it will be disallowed from registering. + if (!hostConfigManager.isIncluded(nodeReg)) { +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java +index 9b5343321d3..790d508e5ea 100644 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java +@@ -100,6 +100,11 @@ public class DNConf { + final boolean syncOnClose; + final boolean encryptDataTransfer; + final boolean connectToDnViaHostname; ++ private final String registeredHostname; ++ private final int registeredDataPort; ++ private final int registeredHttpPort; ++ private final int registeredHttpsPort; ++ private final int registeredIpcPort; + final boolean overwriteDownstreamDerivedQOP; + private final boolean pmemCacheRecoveryEnabled; + +@@ -188,6 +193,11 @@ public DNConf(final Configurable dn) { + connectToDnViaHostname = getConf().getBoolean( + DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME, + DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME_DEFAULT); ++ registeredHostname = getConf().get(DFSConfigKeys.DFS_DATANODE_REGISTERED_HOSTNAME); ++ registeredDataPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_DATA_PORT, -1); ++ registeredHttpPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_HTTP_PORT, -1); ++ registeredHttpsPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_HTTPS_PORT, -1); ++ registeredIpcPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_IPC_PORT, -1); + this.blockReportInterval = getConf().getLong( + DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, + DFS_BLOCKREPORT_INTERVAL_MSEC_DEFAULT); +@@ -362,6 +372,66 @@ public boolean getConnectToDnViaHostname() { + return connectToDnViaHostname; + } + ++ /** ++ * Returns a hostname to register with the cluster instead of the system ++ * hostname. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected hostname. ++ * ++ * @return null if the system hostname should be used, otherwise a hostname ++ */ ++ public String getRegisteredHostname() { ++ return registeredHostname; ++ } ++ ++ /** ++ * Returns a port number to register with the cluster instead of the ++ * data port that the node is listening on. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected port. ++ * ++ * @return -1 if the actual port should be used, otherwise a port number ++ */ ++ public int getRegisteredDataPort() { ++ return registeredDataPort; ++ } ++ ++ /** ++ * Returns a port number to register with the cluster instead of the ++ * HTTP port that the node is listening on. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected port. ++ * ++ * @return -1 if the actual port should be used, otherwise a port number ++ */ ++ public int getRegisteredHttpPort() { ++ return registeredHttpPort; ++ } ++ ++ /** ++ * Returns a port number to register with the cluster instead of the ++ * HTTPS port that the node is listening on. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected port. ++ * ++ * @return -1 if the actual port should be used, otherwise a port number ++ */ ++ public int getRegisteredHttpsPort() { ++ return registeredHttpsPort; ++ } ++ ++ /** ++ * Returns a port number to register with the cluster instead of the ++ * IPC port that the node is listening on. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected port. ++ * ++ * @return -1 if the actual port should be used, otherwise a port number ++ */ ++ public int getRegisteredIpcPort() { ++ return registeredIpcPort; ++ } ++ + /** + * Returns socket timeout + * +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +index 96c4ad9ae28..fdb8e631dc8 100644 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +@@ -117,6 +117,7 @@ + import java.util.Iterator; + import java.util.List; + import java.util.Map; ++import java.util.Optional; + import java.util.Map.Entry; + import java.util.Set; + import java.util.UUID; +@@ -1876,11 +1877,35 @@ DatanodeRegistration createBPRegistration(NamespaceInfo nsInfo) { + NodeType.DATA_NODE); + } + +- DatanodeID dnId = new DatanodeID( +- streamingAddr.getAddress().getHostAddress(), hostName, +- storage.getDatanodeUuid(), getXferPort(), getInfoPort(), +- infoSecurePort, getIpcPort()); +- return new DatanodeRegistration(dnId, storageInfo, ++ String registeredHostname = Optional ++ .ofNullable(dnConf.getRegisteredHostname()) ++ .orElseGet(() -> streamingAddr.getAddress().getHostAddress()); ++ int registeredDataPort = dnConf.getRegisteredDataPort(); ++ if (registeredDataPort == -1) { ++ registeredDataPort = getXferPort(); ++ } ++ int registeredHttpPort = dnConf.getRegisteredHttpPort(); ++ if (registeredHttpPort == -1) { ++ registeredHttpPort = getInfoPort(); ++ } ++ int registeredHttpsPort = dnConf.getRegisteredHttpsPort(); ++ if (registeredHttpsPort == -1) { ++ registeredHttpsPort = getInfoSecurePort(); ++ } ++ int registeredIpcPort = dnConf.getRegisteredIpcPort(); ++ if (registeredIpcPort == -1) { ++ registeredIpcPort = getIpcPort(); ++ } ++ ++ DatanodeID dnId = new DatanodeID(registeredHostname, ++ hostName, ++ storage.getDatanodeUuid(), ++ registeredDataPort, ++ registeredHttpPort, ++ registeredHttpsPort, ++ registeredIpcPort); ++ ++ return new DatanodeRegistration(dnId, storageInfo, + new ExportedBlockKeys(), VersionInfo.getVersion()); + } + From 242cbd8fc95db1b2037d6af4b44f4d0f86892439 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Natalie=20Klestrup=20R=C3=B6ijezon?= Date: Thu, 11 Jan 2024 13:59:59 +0100 Subject: [PATCH 5/5] Changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f21441e3d..6637f31ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ All notable changes to this project will be documented in this file. ### Added - omid: init at 1.1.0 ([#493]). +- hadoop: Allow datanodes to override their registration addresses ([#506]). ### Changed @@ -17,6 +18,7 @@ All notable changes to this project will be documented in this file. - hadoop: Build from source ([#526]). [#493]: https://github.com/stackabletech/docker-images/pull/493 +[#506]: https://github.com/stackabletech/docker-images/pull/506 [#514]: https://github.com/stackabletech/docker-images/pull/514 [#517]: https://github.com/stackabletech/docker-images/pull/517 [#519]: https://github.com/stackabletech/docker-images/pull/519