diff --git a/.github/workflows/precompile_hadoop.yaml b/.github/workflows/precompile_hadoop.yaml new file mode 100644 index 000000000..faec23714 --- /dev/null +++ b/.github/workflows/precompile_hadoop.yaml @@ -0,0 +1,51 @@ +--- +name: Compile Hadoop +run-name: | + Compile Hadoop (attempt #${{ github.run_attempt }}) + +on: + workflow_dispatch: + push: + branches: [main] + paths: + - precompiled/hadoop/** + # I don't think there's any reason to rebuild just because the workflow changed. + # If the version is compiled (for that patch set), then it needs no rebuild. + # - .github/actions/** + # - .github/workflows/precompile_hadoop.yaml + # - .github/workflows/reusable_build_image.yaml + +permissions: {} + +jobs: + # This is a separate job so that it remains consistent if a rerun of failed jobs is needed. + # It is used in place of the "sdp-version" passed to the build action. + generate_build_timestamp: + name: Generate unix timestamp + runs-on: ubuntu-latest + steps: + - shell: bash + id: unix_timestamp + run: | + set -euo pipefail + UNIX_TIMESTAMP=$(date +%s) + echo "unix_timestamp=$UNIX_TIMESTAMP" | tee -a "$GITHUB_OUTPUT" + outputs: + unix_timestamp: ${{ steps.unix_timestamp.outputs.unix_timestamp }} + + build_image: + name: Reusable Workflow + uses: ./.github/workflows/reusable_build_image.yaml + needs: [generate_build_timestamp] + secrets: + harbor-robot-secret: ${{ secrets.HARBOR_ROBOT_PRECOMPILED_GITHUB_ACTION_BUILD_SECRET }} + slack-token: ${{ secrets.SLACK_CONTAINER_IMAGE_TOKEN }} + permissions: + # Needed for cosign (sign and attest) + id-token: write + # Needed for checkout + contents: read + with: + product-name: hadoop + sdp-version: ${{ needs.generate_build_timestamp.outputs.unix_timestamp }} + registry-namespace: precompiled diff --git a/.github/workflows/reusable_build_image.yaml b/.github/workflows/reusable_build_image.yaml index ebc4b2165..432578537 100644 --- a/.github/workflows/reusable_build_image.yaml +++ b/.github/workflows/reusable_build_image.yaml @@ -146,6 +146,8 @@ jobs: name: Failure Notification needs: [generate_version_dimension, build, publish_manifests] runs-on: ubuntu-latest + # TODO (@NickLarsenNZ): Allow a condition from input so that we can always + # be notified of new builds for precompiled product images. if: failure() || (github.run_attempt > 1 && !cancelled()) steps: - name: Send Notification diff --git a/CHANGELOG.md b/CHANGELOG.md index 0fdf38ff1..2a863895e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +### Added + +- hadoop: Add precompiled hadoop for later reuse in dependent images ([#1466]). + ### Changed - hbase: Update `hbase-opa-authorizer` from `0.1.0` to `0.2.0` and then `0.3.0` ([#1446], [#1454]). @@ -17,6 +21,7 @@ All notable changes to this project will be documented in this file. [#1452]: https://github.com/stackabletech/docker-images/pull/1452 [#1453]: https://github.com/stackabletech/docker-images/pull/1453 [#1454]: https://github.com/stackabletech/docker-images/pull/1454 +[#1466]: https://github.com/stackabletech/docker-images/pull/1466 ## [26.3.0] - 2026-03-16 diff --git a/precompiled/hadoop/Dockerfile b/precompiled/hadoop/Dockerfile new file mode 100644 index 000000000..396d20297 --- /dev/null +++ b/precompiled/hadoop/Dockerfile @@ -0,0 +1,128 @@ +# syntax=docker/dockerfile:1.16.0@sha256:e2dd261f92e4b763d789984f6eab84be66ab4f5f08052316d8eb8f173593acf7 +# check=error=true + +FROM local-image/java-devel AS hadoop-builder + +ARG PRODUCT_VERSION +ARG RELEASE_VERSION +ARG PROTOBUF_VERSION +ARG STACKABLE_USER_UID + +WORKDIR /stackable + +COPY --chown=${STACKABLE_USER_UID}:0 shared/protobuf/stackable/patches/patchable.toml /stackable/src/shared/protobuf/stackable/patches/patchable.toml +COPY --chown=${STACKABLE_USER_UID}:0 shared/protobuf/stackable/patches/${PROTOBUF_VERSION} /stackable/src/shared/protobuf/stackable/patches/${PROTOBUF_VERSION} + +RUN <hadoop-pipes<\/artifactId>/,/<\/dependency>/ { s/.*<\/version>/'"$ORIGINAL_VERSION"'<\/version>/ }' -i hadoop-tools/hadoop-tools-dist/pom.xml + +# Create snapshot of the source code including custom patches +tar -czf /stackable/hadoop-${NEW_VERSION}-src.tar.gz . + +# We do not pass require.snappy because that is only built in to the MapReduce client and we don't need that +# +# Passing require.openssl SHOULD make the build fail if OpenSSL is not present. +# This does not work properly however because this builder image contains the openssl-devel package which creates a symlink from /usr/lib64/libcrypto.so to the real version. +# Therefore, this build does work but the final image does NOT contain the openssl-devel package which is why it fails there which is why we have to create the symlink over there manually. +# We still leave this flag in to automatically fail should anything with the packages or symlinks ever fail. +mvn \ + clean package install \ + -Pdist,native \ + -pl '!hadoop-tools/hadoop-pipes' \ + -Dhadoop.version=${NEW_VERSION} \ + -Drequire.fuse=true \ + -Drequire.openssl=true \ + -DskipTests \ + -Dmaven.javadoc.skip=true + +mkdir -p /stackable/patched-libs/maven/org/apache +cp -r /stackable/.m2/repository/org/apache/hadoop /stackable/patched-libs/maven/org/apache + +rm -rf hadoop-dist/target/hadoop-${NEW_VERSION}/share/hadoop/yarn +rm -rf hadoop-dist/target/hadoop-${NEW_VERSION}/share/hadoop/mapreduce +rm hadoop-dist/target/hadoop-${NEW_VERSION}/share/hadoop/client/hadoop-client-minicluster-*.jar +rm hadoop-dist/target/hadoop-${NEW_VERSION}/share/hadoop/tools/lib/hadoop-minicluster-*.jar + +cp -r hadoop-dist/target/hadoop-${NEW_VERSION} /stackable/hadoop-${NEW_VERSION} +sed -i "s/${NEW_VERSION}/${ORIGINAL_VERSION}/g" hadoop-dist/target/bom.json +mv hadoop-dist/target/bom.json /stackable/hadoop-${NEW_VERSION}/hadoop-${NEW_VERSION}.cdx.json + +# HDFS fuse-dfs is not part of the regular dist output, so we need to copy it in ourselves +cp hadoop-hdfs-project/hadoop-hdfs-native-client/target/main/native/fuse-dfs/fuse_dfs /stackable/hadoop-${NEW_VERSION}/bin + +# Remove source code +(cd .. && rm -r ${PRODUCT_VERSION}) + +ln -s /stackable/hadoop-${NEW_VERSION} /stackable/hadoop + +mv /build/fuse_dfs_wrapper /stackable/hadoop/bin + +# Remove unneeded binaries: +# - code sources +# - mapreduce/yarn binaries that were built as cross-project dependencies +# - minicluster (only used for testing) and test .jars +# - json-io: this is a transitive dependency pulled in by cedarsoft/java-utils/json-io and is excluded in 3.4.0. See CVE-2023-34610. +rm -rf /stackable/hadoop/share/hadoop/common/sources/ +rm -rf /stackable/hadoop/share/hadoop/hdfs/sources/ +rm -rf /stackable/hadoop/share/hadoop/tools/sources/ +rm -rf /stackable/hadoop/share/hadoop/tools/lib/json-io-*.jar +rm -rf /stackable/hadoop/share/hadoop/tools/lib/hadoop-mapreduce-client-*.jar +rm -rf /stackable/hadoop/share/hadoop/tools/lib/hadoop-yarn-server*.jar +find /stackable/hadoop -name 'hadoop-minicluster-*.jar' -type f -delete +find /stackable/hadoop -name 'hadoop-client-minicluster-*.jar' -type f -delete +find /stackable/hadoop -name 'hadoop-*tests.jar' -type f -delete +rm -rf /stackable/.m2 + +# Set correct groups; make sure only required artifacts for the final image are located in /stackable +chmod -R g=u /stackable +EOF diff --git a/precompiled/hadoop/boil-config.toml b/precompiled/hadoop/boil-config.toml new file mode 100644 index 000000000..7aa9fbeb6 --- /dev/null +++ b/precompiled/hadoop/boil-config.toml @@ -0,0 +1,11 @@ +[versions."3.3.6".local-images] +java-devel = "11" + +[versions."3.3.6".build-arguments] +protobuf-version = "3.7.1" + +[versions."3.4.2".local-images] +java-devel = "11" + +[versions."3.4.2".build-arguments] +protobuf-version = "3.7.1" diff --git a/precompiled/hadoop/stackable/fuse_dfs_wrapper b/precompiled/hadoop/stackable/fuse_dfs_wrapper new file mode 100755 index 000000000..ae6f87c2e --- /dev/null +++ b/precompiled/hadoop/stackable/fuse_dfs_wrapper @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +set -e + +# JNI does _NOT_ support wildcards in the Classpath so we can't use the usual /stackable/hadoop/share/hadoop/client/* here +# Instead we need to use find and then concatenate them all with colons. +# There is a trailing colon at the end but that does seem to work just fine +# Not all three directories might be needed, common is definitely needed +CLASSPATH=$(find -L /stackable/hadoop/share/hadoop/client/ /stackable/hadoop/share/hadoop/hdfs/ /stackable/hadoop/share/hadoop/common -type f -name "*.jar" -print0 | xargs -0 printf "%s:") + +export CLASSPATH=$HADOOP_CONF_DIR:$CLASSPATH +export LD_LIBRARY_PATH=/stackable/hadoop/lib/native:/usr/lib/jvm/jre/lib/server +export PATH="${PATH}":/stackable/hadoop/bin +export HADOOP_HOME=/stackable/hadoop + +fuse_dfs "$@" diff --git a/precompiled/hadoop/stackable/patches/3.3.6/0001-YARN-11527-Update-node.js.patch b/precompiled/hadoop/stackable/patches/3.3.6/0001-YARN-11527-Update-node.js.patch new file mode 100644 index 000000000..6b511dcfe --- /dev/null +++ b/precompiled/hadoop/stackable/patches/3.3.6/0001-YARN-11527-Update-node.js.patch @@ -0,0 +1,22 @@ +From bd2fa3a3a5ef57c5f6ca4f0e5535a1cd875e50d1 Mon Sep 17 00:00:00 2001 +From: Siegfried Weber +Date: Thu, 21 Dec 2023 13:51:13 +0100 +Subject: YARN-11527: Update node.js + +--- + hadoop-project/pom.xml | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml +index f1ac43ed5b..9b01858e0e 100644 +--- a/hadoop-project/pom.xml ++++ b/hadoop-project/pom.xml +@@ -213,7 +213,7 @@ + 1.1.3.Final + 5.4.0 + 9.8.1 +- v12.22.1 ++ v14.17.0 + v1.22.5 + 1.10.13 + 1.20 diff --git a/precompiled/hadoop/stackable/patches/3.3.6/0002-Allow-overriding-datanode-registration-addresses.patch b/precompiled/hadoop/stackable/patches/3.3.6/0002-Allow-overriding-datanode-registration-addresses.patch new file mode 100644 index 000000000..56def7424 --- /dev/null +++ b/precompiled/hadoop/stackable/patches/3.3.6/0002-Allow-overriding-datanode-registration-addresses.patch @@ -0,0 +1,259 @@ +From 77312867feaf8931ce8650208ebcbdea5fcfdb0e Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Natalie=20Klestrup=20R=C3=B6ijezon?= +Date: Thu, 11 Jan 2024 14:01:02 +0100 +Subject: Allow overriding datanode registration addresses + +--- + .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 9 +++ + .../blockmanagement/DatanodeManager.java | 43 +++++++----- + .../hadoop/hdfs/server/datanode/DNConf.java | 70 +++++++++++++++++++ + .../hadoop/hdfs/server/datanode/DataNode.java | 35 ++++++++-- + 4 files changed, 135 insertions(+), 22 deletions(-) + +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +index e3f4bfcde8..3d65bcad22 100755 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +@@ -147,6 +147,13 @@ public class DFSConfigKeys extends CommonConfigurationKeys { + public static final boolean DFS_DATANODE_DROP_CACHE_BEHIND_READS_DEFAULT = false; + public static final String DFS_DATANODE_USE_DN_HOSTNAME = "dfs.datanode.use.datanode.hostname"; + public static final boolean DFS_DATANODE_USE_DN_HOSTNAME_DEFAULT = false; ++ ++ public static final String DFS_DATANODE_REGISTERED_HOSTNAME = "dfs.datanode.registered.hostname"; ++ public static final String DFS_DATANODE_REGISTERED_DATA_PORT = "dfs.datanode.registered.port"; ++ public static final String DFS_DATANODE_REGISTERED_HTTP_PORT = "dfs.datanode.registered.http.port"; ++ public static final String DFS_DATANODE_REGISTERED_HTTPS_PORT = "dfs.datanode.registered.https.port"; ++ public static final String DFS_DATANODE_REGISTERED_IPC_PORT = "dfs.datanode.registered.ipc.port"; ++ + public static final String DFS_DATANODE_MAX_LOCKED_MEMORY_KEY = "dfs.datanode.max.locked.memory"; + public static final long DFS_DATANODE_MAX_LOCKED_MEMORY_DEFAULT = 0; + public static final String DFS_DATANODE_FSDATASETCACHE_MAX_THREADS_PER_VOLUME_KEY = "dfs.datanode.fsdatasetcache.max.threads.per.volume"; +@@ -454,6 +461,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys { + public static final long DFS_DATANODE_PROCESS_COMMANDS_THRESHOLD_DEFAULT = + TimeUnit.SECONDS.toMillis(2); + ++ public static final String DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_KEY = "dfs.namenode.datanode.registration.unsafe.allow-address-override"; ++ public static final boolean DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_DEFAULT = false; + public static final String DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_KEY = "dfs.namenode.datanode.registration.ip-hostname-check"; + public static final boolean DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_DEFAULT = true; + +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +index 07381fc696..8aeb92cff1 100644 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +@@ -180,6 +180,8 @@ public class DatanodeManager { + private boolean hasClusterEverBeenMultiRack = false; + + private final boolean checkIpHostnameInRegistration; ++ private final boolean allowRegistrationAddressOverride; ++ + /** + * Whether we should tell datanodes what to cache in replies to + * heartbeat messages. +@@ -316,6 +318,11 @@ public class DatanodeManager { + // Block invalidate limit also has some dependency on heartbeat interval. + // Check setBlockInvalidateLimit(). + setBlockInvalidateLimit(configuredBlockInvalidateLimit); ++ this.allowRegistrationAddressOverride = conf.getBoolean( ++ DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_KEY, ++ DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_DEFAULT); ++ LOG.info(DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_KEY ++ + "=" + allowRegistrationAddressOverride); + this.checkIpHostnameInRegistration = conf.getBoolean( + DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_KEY, + DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_DEFAULT); +@@ -1138,27 +1145,29 @@ public class DatanodeManager { + */ + public void registerDatanode(DatanodeRegistration nodeReg) + throws DisallowedDatanodeException, UnresolvedTopologyException { +- InetAddress dnAddress = Server.getRemoteIp(); +- if (dnAddress != null) { +- // Mostly called inside an RPC, update ip and peer hostname +- String hostname = dnAddress.getHostName(); +- String ip = dnAddress.getHostAddress(); +- if (checkIpHostnameInRegistration && !isNameResolved(dnAddress)) { +- // Reject registration of unresolved datanode to prevent performance +- // impact of repetitive DNS lookups later. +- final String message = "hostname cannot be resolved (ip=" +- + ip + ", hostname=" + hostname + ")"; +- LOG.warn("Unresolved datanode registration: " + message); +- throw new DisallowedDatanodeException(nodeReg, message); ++ if (!allowRegistrationAddressOverride) { ++ InetAddress dnAddress = Server.getRemoteIp(); ++ if (dnAddress != null) { ++ // Mostly called inside an RPC, update ip and peer hostname ++ String hostname = dnAddress.getHostName(); ++ String ip = dnAddress.getHostAddress(); ++ if (checkIpHostnameInRegistration && !isNameResolved(dnAddress)) { ++ // Reject registration of unresolved datanode to prevent performance ++ // impact of repetitive DNS lookups later. ++ final String message = "hostname cannot be resolved (ip=" ++ + ip + ", hostname=" + hostname + ")"; ++ LOG.warn("Unresolved datanode registration: " + message); ++ throw new DisallowedDatanodeException(nodeReg, message); ++ } ++ // update node registration with the ip and hostname from rpc request ++ nodeReg.setIpAddr(ip); ++ nodeReg.setPeerHostName(hostname); + } +- // update node registration with the ip and hostname from rpc request +- nodeReg.setIpAddr(ip); +- nodeReg.setPeerHostName(hostname); + } +- ++ + try { + nodeReg.setExportedKeys(blockManager.getBlockKeys()); +- ++ + // Checks if the node is not on the hosts list. If it is not, then + // it will be disallowed from registering. + if (!hostConfigManager.isIncluded(nodeReg)) { +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java +index 9b5343321d..790d508e5e 100644 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java +@@ -100,6 +100,11 @@ public class DNConf { + final boolean syncOnClose; + final boolean encryptDataTransfer; + final boolean connectToDnViaHostname; ++ private final String registeredHostname; ++ private final int registeredDataPort; ++ private final int registeredHttpPort; ++ private final int registeredHttpsPort; ++ private final int registeredIpcPort; + final boolean overwriteDownstreamDerivedQOP; + private final boolean pmemCacheRecoveryEnabled; + +@@ -188,6 +193,11 @@ public class DNConf { + connectToDnViaHostname = getConf().getBoolean( + DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME, + DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME_DEFAULT); ++ registeredHostname = getConf().get(DFSConfigKeys.DFS_DATANODE_REGISTERED_HOSTNAME); ++ registeredDataPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_DATA_PORT, -1); ++ registeredHttpPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_HTTP_PORT, -1); ++ registeredHttpsPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_HTTPS_PORT, -1); ++ registeredIpcPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_IPC_PORT, -1); + this.blockReportInterval = getConf().getLong( + DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, + DFS_BLOCKREPORT_INTERVAL_MSEC_DEFAULT); +@@ -362,6 +372,66 @@ public class DNConf { + return connectToDnViaHostname; + } + ++ /** ++ * Returns a hostname to register with the cluster instead of the system ++ * hostname. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected hostname. ++ * ++ * @return null if the system hostname should be used, otherwise a hostname ++ */ ++ public String getRegisteredHostname() { ++ return registeredHostname; ++ } ++ ++ /** ++ * Returns a port number to register with the cluster instead of the ++ * data port that the node is listening on. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected port. ++ * ++ * @return -1 if the actual port should be used, otherwise a port number ++ */ ++ public int getRegisteredDataPort() { ++ return registeredDataPort; ++ } ++ ++ /** ++ * Returns a port number to register with the cluster instead of the ++ * HTTP port that the node is listening on. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected port. ++ * ++ * @return -1 if the actual port should be used, otherwise a port number ++ */ ++ public int getRegisteredHttpPort() { ++ return registeredHttpPort; ++ } ++ ++ /** ++ * Returns a port number to register with the cluster instead of the ++ * HTTPS port that the node is listening on. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected port. ++ * ++ * @return -1 if the actual port should be used, otherwise a port number ++ */ ++ public int getRegisteredHttpsPort() { ++ return registeredHttpsPort; ++ } ++ ++ /** ++ * Returns a port number to register with the cluster instead of the ++ * IPC port that the node is listening on. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected port. ++ * ++ * @return -1 if the actual port should be used, otherwise a port number ++ */ ++ public int getRegisteredIpcPort() { ++ return registeredIpcPort; ++ } ++ + /** + * Returns socket timeout + * +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +index 96c4ad9ae2..66b75cff3f 100644 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +@@ -117,6 +117,7 @@ import java.util.HashSet; + import java.util.Iterator; + import java.util.List; + import java.util.Map; ++import java.util.Optional; + import java.util.Map.Entry; + import java.util.Set; + import java.util.UUID; +@@ -1876,11 +1877,35 @@ public class DataNode extends ReconfigurableBase + NodeType.DATA_NODE); + } + +- DatanodeID dnId = new DatanodeID( +- streamingAddr.getAddress().getHostAddress(), hostName, +- storage.getDatanodeUuid(), getXferPort(), getInfoPort(), +- infoSecurePort, getIpcPort()); +- return new DatanodeRegistration(dnId, storageInfo, ++ String registeredHostname = Optional ++ .ofNullable(dnConf.getRegisteredHostname()) ++ .orElseGet(() -> streamingAddr.getAddress().getHostAddress()); ++ int registeredDataPort = dnConf.getRegisteredDataPort(); ++ if (registeredDataPort == -1) { ++ registeredDataPort = getXferPort(); ++ } ++ int registeredHttpPort = dnConf.getRegisteredHttpPort(); ++ if (registeredHttpPort == -1) { ++ registeredHttpPort = getInfoPort(); ++ } ++ int registeredHttpsPort = dnConf.getRegisteredHttpsPort(); ++ if (registeredHttpsPort == -1) { ++ registeredHttpsPort = getInfoSecurePort(); ++ } ++ int registeredIpcPort = dnConf.getRegisteredIpcPort(); ++ if (registeredIpcPort == -1) { ++ registeredIpcPort = getIpcPort(); ++ } ++ ++ DatanodeID dnId = new DatanodeID(registeredHostname, ++ registeredHostname, ++ storage.getDatanodeUuid(), ++ registeredDataPort, ++ registeredHttpPort, ++ registeredHttpsPort, ++ registeredIpcPort); ++ ++ return new DatanodeRegistration(dnId, storageInfo, + new ExportedBlockKeys(), VersionInfo.getVersion()); + } + diff --git a/precompiled/hadoop/stackable/patches/3.3.6/0003-HADOOP-18055-Add-async-profiler.patch b/precompiled/hadoop/stackable/patches/3.3.6/0003-HADOOP-18055-Add-async-profiler.patch new file mode 100644 index 000000000..9516f5c81 --- /dev/null +++ b/precompiled/hadoop/stackable/patches/3.3.6/0003-HADOOP-18055-Add-async-profiler.patch @@ -0,0 +1,1014 @@ +From 773ce66db817720e4f44ff307195014240b141a7 Mon Sep 17 00:00:00 2001 +From: Siegfried Weber +Date: Tue, 6 Feb 2024 16:10:54 +0100 +Subject: HADOOP-18055: Add async-profiler + +--- + .../org/apache/hadoop/http/HttpServer2.java | 21 + + .../hadoop/http/ProfileOutputServlet.java | 87 ++++ + .../apache/hadoop/http/ProfileServlet.java | 394 ++++++++++++++++++ + .../hadoop/http/ProfilerDisabledServlet.java | 44 ++ + .../org/apache/hadoop/util/ProcessUtils.java | 74 ++++ + .../src/main/resources/core-default.xml | 2 +- + .../src/site/markdown/AsyncProfilerServlet.md | 145 +++++++ + .../http/TestDisabledProfileServlet.java | 95 +++++ + .../hadoop-kms/src/site/markdown/index.md.vm | 5 +- + .../src/site/markdown/ServerSetup.md.vm | 5 +- + hadoop-project/src/site/site.xml | 1 + + 11 files changed, 868 insertions(+), 5 deletions(-) + create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileOutputServlet.java + create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java + create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfilerDisabledServlet.java + create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProcessUtils.java + create mode 100644 hadoop-common-project/hadoop-common/src/site/markdown/AsyncProfilerServlet.md + create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestDisabledProfileServlet.java + +diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java +index 8dadbe390a..1f66a7e809 100644 +--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java ++++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java +@@ -27,6 +27,7 @@ import java.net.InetSocketAddress; + import java.net.MalformedURLException; + import java.net.URI; + import java.net.URL; ++import java.nio.file.Files; + import java.nio.file.Path; + import java.nio.file.Paths; + import java.util.List; +@@ -744,6 +745,26 @@ public final class HttpServer2 implements FilterContainer { + + addDefaultServlets(); + addPrometheusServlet(conf); ++ addAsyncProfilerServlet(contexts); ++ } ++ ++ private void addAsyncProfilerServlet(ContextHandlerCollection contexts) throws IOException { ++ final String asyncProfilerHome = ProfileServlet.getAsyncProfilerHome(); ++ if (asyncProfilerHome != null && !asyncProfilerHome.trim().isEmpty()) { ++ addServlet("prof", "/prof", ProfileServlet.class); ++ Path tmpDir = Paths.get(ProfileServlet.OUTPUT_DIR); ++ if (Files.notExists(tmpDir)) { ++ Files.createDirectories(tmpDir); ++ } ++ ServletContextHandler genCtx = new ServletContextHandler(contexts, "/prof-output-hadoop"); ++ genCtx.addServlet(ProfileOutputServlet.class, "/*"); ++ genCtx.setResourceBase(tmpDir.toAbsolutePath().toString()); ++ genCtx.setDisplayName("prof-output-hadoop"); ++ } else { ++ addServlet("prof", "/prof", ProfilerDisabledServlet.class); ++ LOG.info("ASYNC_PROFILER_HOME environment variable and async.profiler.home system property " ++ + "not specified. Disabling /prof endpoint."); ++ } + } + + private void addPrometheusServlet(Configuration conf) { +diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileOutputServlet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileOutputServlet.java +new file mode 100644 +index 0000000000..1ecc21f375 +--- /dev/null ++++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileOutputServlet.java +@@ -0,0 +1,87 @@ ++/* ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++package org.apache.hadoop.http; ++ ++import java.io.File; ++import java.io.IOException; ++import java.util.regex.Pattern; ++import javax.servlet.ServletException; ++import javax.servlet.http.HttpServletRequest; ++import javax.servlet.http.HttpServletResponse; ++ ++import org.eclipse.jetty.servlet.DefaultServlet; ++import org.slf4j.Logger; ++import org.slf4j.LoggerFactory; ++ ++import org.apache.hadoop.classification.InterfaceAudience; ++ ++/** ++ * Servlet to serve files generated by {@link ProfileServlet}. ++ */ ++@InterfaceAudience.Private ++public class ProfileOutputServlet extends DefaultServlet { ++ ++ private static final long serialVersionUID = 1L; ++ ++ private static final Logger LOG = LoggerFactory.getLogger(ProfileOutputServlet.class); ++ // default refresh period 2 sec ++ private static final int REFRESH_PERIOD = 2; ++ // Alphanumeric characters, plus percent (url-encoding), equals, ampersand, dot and hyphen ++ private static final Pattern ALPHA_NUMERIC = Pattern.compile("[a-zA-Z0-9%=&.\\-]*"); ++ ++ @Override ++ protected void doGet(final HttpServletRequest req, final HttpServletResponse resp) ++ throws ServletException, IOException { ++ if (!HttpServer2.isInstrumentationAccessAllowed(getServletContext(), req, resp)) { ++ resp.setStatus(HttpServletResponse.SC_UNAUTHORIZED); ++ ProfileServlet.setResponseHeader(resp); ++ resp.getWriter().write("Unauthorized: Instrumentation access is not allowed!"); ++ return; ++ } ++ ++ String absoluteDiskPath = getServletContext().getRealPath(req.getPathInfo()); ++ File requestedFile = new File(absoluteDiskPath); ++ // async-profiler version 1.4 writes 'Started [cpu] profiling' to output file when profiler is ++ // running which gets replaced by final output. If final output is not ready yet, the file size ++ // will be <100 bytes (in all modes). ++ if (requestedFile.length() < 100) { ++ LOG.info("{} is incomplete. Sending auto-refresh header.", requestedFile); ++ String refreshUrl = req.getRequestURI(); ++ // Rebuild the query string (if we have one) ++ if (req.getQueryString() != null) { ++ refreshUrl += "?" + sanitize(req.getQueryString()); ++ } ++ ProfileServlet.setResponseHeader(resp); ++ resp.setHeader("Refresh", REFRESH_PERIOD + ";" + refreshUrl); ++ resp.getWriter().write("This page will be auto-refreshed every " + REFRESH_PERIOD ++ + " seconds until the output file is ready. Redirecting to " + refreshUrl); ++ } else { ++ super.doGet(req, resp); ++ } ++ } ++ ++ static String sanitize(String input) { ++ // Basic test to try to avoid any XSS attacks or HTML content showing up. ++ // Duplicates HtmlQuoting a little, but avoid destroying ampersand. ++ if (ALPHA_NUMERIC.matcher(input).matches()) { ++ return input; ++ } ++ throw new RuntimeException("Non-alphanumeric data found in input, aborting."); ++ } ++} +diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java +new file mode 100644 +index 0000000000..fc0ec7736e +--- /dev/null ++++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java +@@ -0,0 +1,394 @@ ++/* ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++package org.apache.hadoop.http; ++ ++import java.io.File; ++import java.io.IOException; ++import java.util.ArrayList; ++import java.util.List; ++import java.util.concurrent.TimeUnit; ++import java.util.concurrent.atomic.AtomicInteger; ++import java.util.concurrent.locks.Lock; ++import java.util.concurrent.locks.ReentrantLock; ++import javax.servlet.http.HttpServlet; ++import javax.servlet.http.HttpServletRequest; ++import javax.servlet.http.HttpServletResponse; ++ ++import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; ++import org.slf4j.Logger; ++import org.slf4j.LoggerFactory; ++ ++import org.apache.hadoop.classification.InterfaceAudience; ++import org.apache.hadoop.util.ProcessUtils; ++ ++/** ++ * Servlet that runs async-profiler as web-endpoint. ++ *

++ * Following options from async-profiler can be specified as query paramater. ++ * // -e event profiling event: cpu|alloc|lock|cache-misses etc. ++ * // -d duration run profiling for 'duration' seconds (integer) ++ * // -i interval sampling interval in nanoseconds (long) ++ * // -j jstackdepth maximum Java stack depth (integer) ++ * // -b bufsize frame buffer size (long) ++ * // -t profile different threads separately ++ * // -s simple class names instead of FQN ++ * // -o fmt[,fmt...] output format: summary|traces|flat|collapsed|svg|tree|jfr|html ++ * // --width px SVG width pixels (integer) ++ * // --height px SVG frame height pixels (integer) ++ * // --minwidth px skip frames smaller than px (double) ++ * // --reverse generate stack-reversed FlameGraph / Call tree ++ *

++ * Example: ++ * If Namenode http address is localhost:9870, and ResourceManager http address is localhost:8088, ++ * ProfileServlet running with async-profiler setup can be accessed with ++ * http://localhost:9870/prof and http://localhost:8088/prof for Namenode and ResourceManager ++ * processes respectively. ++ * Deep dive into some params: ++ * - To collect 10 second CPU profile of current process i.e. Namenode (returns FlameGraph svg) ++ * curl "http://localhost:9870/prof" ++ * - To collect 10 second CPU profile of pid 12345 (returns FlameGraph svg) ++ * curl "http://localhost:9870/prof?pid=12345" (For instance, provide pid of Datanode) ++ * - To collect 30 second CPU profile of pid 12345 (returns FlameGraph svg) ++ * curl "http://localhost:9870/prof?pid=12345&duration=30" ++ * - To collect 1 minute CPU profile of current process and output in tree format (html) ++ * curl "http://localhost:9870/prof?output=tree&duration=60" ++ * - To collect 10 second heap allocation profile of current process (returns FlameGraph svg) ++ * curl "http://localhost:9870/prof?event=alloc" ++ * - To collect lock contention profile of current process (returns FlameGraph svg) ++ * curl "http://localhost:9870/prof?event=lock" ++ *

++ * Following event types are supported (default is 'cpu') (NOTE: not all OS'es support all events) ++ * // Perf events: ++ * // cpu ++ * // page-faults ++ * // context-switches ++ * // cycles ++ * // instructions ++ * // cache-references ++ * // cache-misses ++ * // branches ++ * // branch-misses ++ * // bus-cycles ++ * // L1-dcache-load-misses ++ * // LLC-load-misses ++ * // dTLB-load-misses ++ * // mem:breakpoint ++ * // trace:tracepoint ++ * // Java events: ++ * // alloc ++ * // lock ++ */ ++@InterfaceAudience.Private ++public class ProfileServlet extends HttpServlet { ++ ++ private static final long serialVersionUID = 1L; ++ private static final Logger LOG = LoggerFactory.getLogger(ProfileServlet.class); ++ ++ static final String ACCESS_CONTROL_ALLOW_METHODS = "Access-Control-Allow-Methods"; ++ static final String ACCESS_CONTROL_ALLOW_ORIGIN = "Access-Control-Allow-Origin"; ++ private static final String ALLOWED_METHODS = "GET"; ++ private static final String CONTENT_TYPE_TEXT = "text/plain; charset=utf-8"; ++ private static final String ASYNC_PROFILER_HOME_ENV = "ASYNC_PROFILER_HOME"; ++ private static final String ASYNC_PROFILER_HOME_SYSTEM_PROPERTY = "async.profiler.home"; ++ private static final String PROFILER_SCRIPT = "/profiler.sh"; ++ private static final int DEFAULT_DURATION_SECONDS = 10; ++ private static final AtomicInteger ID_GEN = new AtomicInteger(0); ++ ++ static final String OUTPUT_DIR = System.getProperty("java.io.tmpdir") + "/prof-output-hadoop"; ++ ++ private enum Event { ++ ++ CPU("cpu"), ++ ALLOC("alloc"), ++ LOCK("lock"), ++ PAGE_FAULTS("page-faults"), ++ CONTEXT_SWITCHES("context-switches"), ++ CYCLES("cycles"), ++ INSTRUCTIONS("instructions"), ++ CACHE_REFERENCES("cache-references"), ++ CACHE_MISSES("cache-misses"), ++ BRANCHES("branches"), ++ BRANCH_MISSES("branch-misses"), ++ BUS_CYCLES("bus-cycles"), ++ L1_DCACHE_LOAD_MISSES("L1-dcache-load-misses"), ++ LLC_LOAD_MISSES("LLC-load-misses"), ++ DTLB_LOAD_MISSES("dTLB-load-misses"), ++ MEM_BREAKPOINT("mem:breakpoint"), ++ TRACE_TRACEPOINT("trace:tracepoint"); ++ ++ private final String internalName; ++ ++ Event(final String internalName) { ++ this.internalName = internalName; ++ } ++ ++ public String getInternalName() { ++ return internalName; ++ } ++ ++ public static Event fromInternalName(final String name) { ++ for (Event event : values()) { ++ if (event.getInternalName().equalsIgnoreCase(name)) { ++ return event; ++ } ++ } ++ ++ return null; ++ } ++ } ++ ++ private enum Output { ++ SUMMARY, ++ TRACES, ++ FLAT, ++ COLLAPSED, ++ // No SVG in 2.x asyncprofiler. ++ SVG, ++ TREE, ++ JFR, ++ // In 2.x asyncprofiler, this is how you get flamegraphs. ++ HTML ++ } ++ ++ private final Lock profilerLock = new ReentrantLock(); ++ private transient volatile Process process; ++ private final String asyncProfilerHome; ++ private Integer pid; ++ ++ public ProfileServlet() { ++ this.asyncProfilerHome = getAsyncProfilerHome(); ++ this.pid = ProcessUtils.getPid(); ++ LOG.info("Servlet process PID: {} asyncProfilerHome: {}", pid, asyncProfilerHome); ++ } ++ ++ @Override ++ protected void doGet(final HttpServletRequest req, final HttpServletResponse resp) ++ throws IOException { ++ if (!HttpServer2.isInstrumentationAccessAllowed(getServletContext(), req, resp)) { ++ resp.setStatus(HttpServletResponse.SC_UNAUTHORIZED); ++ setResponseHeader(resp); ++ resp.getWriter().write("Unauthorized: Instrumentation access is not allowed!"); ++ return; ++ } ++ ++ // make sure async profiler home is set ++ if (asyncProfilerHome == null || asyncProfilerHome.trim().isEmpty()) { ++ resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); ++ setResponseHeader(resp); ++ resp.getWriter().write("ASYNC_PROFILER_HOME env is not set.\n\n" ++ + "Please ensure the prerequisites for the Profiler Servlet have been installed and the\n" ++ + "environment is properly configured."); ++ return; ++ } ++ ++ // if pid is explicitly specified, use it else default to current process ++ pid = getInteger(req, "pid", pid); ++ ++ // if pid is not specified in query param and if current process pid cannot be determined ++ if (pid == null) { ++ resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); ++ setResponseHeader(resp); ++ resp.getWriter().write( ++ "'pid' query parameter unspecified or unable to determine PID of current process."); ++ return; ++ } ++ ++ final int duration = getInteger(req, "duration", DEFAULT_DURATION_SECONDS); ++ final Output output = getOutput(req); ++ final Event event = getEvent(req); ++ final Long interval = getLong(req, "interval"); ++ final Integer jstackDepth = getInteger(req, "jstackdepth", null); ++ final Long bufsize = getLong(req, "bufsize"); ++ final boolean thread = req.getParameterMap().containsKey("thread"); ++ final boolean simple = req.getParameterMap().containsKey("simple"); ++ final Integer width = getInteger(req, "width", null); ++ final Integer height = getInteger(req, "height", null); ++ final Double minwidth = getMinWidth(req); ++ final boolean reverse = req.getParameterMap().containsKey("reverse"); ++ ++ if (process == null || !process.isAlive()) { ++ try { ++ int lockTimeoutSecs = 3; ++ if (profilerLock.tryLock(lockTimeoutSecs, TimeUnit.SECONDS)) { ++ try { ++ File outputFile = new File(OUTPUT_DIR, ++ "async-prof-pid-" + pid + "-" + event.name().toLowerCase() + "-" + ID_GEN ++ .incrementAndGet() + "." + output.name().toLowerCase()); ++ List cmd = new ArrayList<>(); ++ cmd.add(asyncProfilerHome + PROFILER_SCRIPT); ++ cmd.add("-e"); ++ cmd.add(event.getInternalName()); ++ cmd.add("-d"); ++ cmd.add("" + duration); ++ cmd.add("-o"); ++ cmd.add(output.name().toLowerCase()); ++ cmd.add("-f"); ++ cmd.add(outputFile.getAbsolutePath()); ++ if (interval != null) { ++ cmd.add("-i"); ++ cmd.add(interval.toString()); ++ } ++ if (jstackDepth != null) { ++ cmd.add("-j"); ++ cmd.add(jstackDepth.toString()); ++ } ++ if (bufsize != null) { ++ cmd.add("-b"); ++ cmd.add(bufsize.toString()); ++ } ++ if (thread) { ++ cmd.add("-t"); ++ } ++ if (simple) { ++ cmd.add("-s"); ++ } ++ if (width != null) { ++ cmd.add("--width"); ++ cmd.add(width.toString()); ++ } ++ if (height != null) { ++ cmd.add("--height"); ++ cmd.add(height.toString()); ++ } ++ if (minwidth != null) { ++ cmd.add("--minwidth"); ++ cmd.add(minwidth.toString()); ++ } ++ if (reverse) { ++ cmd.add("--reverse"); ++ } ++ cmd.add(pid.toString()); ++ process = ProcessUtils.runCmdAsync(cmd); ++ ++ // set response and set refresh header to output location ++ setResponseHeader(resp); ++ resp.setStatus(HttpServletResponse.SC_ACCEPTED); ++ String relativeUrl = "/prof-output-hadoop/" + outputFile.getName(); ++ resp.getWriter().write("Started [" + event.getInternalName() ++ + "] profiling. This page will automatically redirect to " + relativeUrl + " after " ++ + duration + " seconds. " ++ + "If empty diagram and Linux 4.6+, see 'Basic Usage' section on the Async " ++ + "Profiler Home Page, https://github.com/jvm-profiling-tools/async-profiler." ++ + "\n\nCommand:\n" + Joiner.on(" ").join(cmd)); ++ ++ // to avoid auto-refresh by ProfileOutputServlet, refreshDelay can be specified ++ // via url param ++ int refreshDelay = getInteger(req, "refreshDelay", 0); ++ ++ // instead of sending redirect, set auto-refresh so that browsers will refresh ++ // with redirected url ++ resp.setHeader("Refresh", (duration + refreshDelay) + ";" + relativeUrl); ++ resp.getWriter().flush(); ++ } finally { ++ profilerLock.unlock(); ++ } ++ } else { ++ setResponseHeader(resp); ++ resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); ++ resp.getWriter() ++ .write("Unable to acquire lock. Another instance of profiler might be running."); ++ LOG.warn("Unable to acquire lock in {} seconds. Another instance of profiler might be" ++ + " running.", lockTimeoutSecs); ++ } ++ } catch (InterruptedException e) { ++ LOG.warn("Interrupted while acquiring profile lock.", e); ++ resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); ++ } ++ } else { ++ setResponseHeader(resp); ++ resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); ++ resp.getWriter().write("Another instance of profiler is already running."); ++ } ++ } ++ ++ private Integer getInteger(final HttpServletRequest req, final String param, ++ final Integer defaultValue) { ++ final String value = req.getParameter(param); ++ if (value != null) { ++ try { ++ return Integer.valueOf(value); ++ } catch (NumberFormatException e) { ++ return defaultValue; ++ } ++ } ++ return defaultValue; ++ } ++ ++ private Long getLong(final HttpServletRequest req, final String param) { ++ final String value = req.getParameter(param); ++ if (value != null) { ++ try { ++ return Long.valueOf(value); ++ } catch (NumberFormatException e) { ++ return null; ++ } ++ } ++ return null; ++ } ++ ++ private Double getMinWidth(final HttpServletRequest req) { ++ final String value = req.getParameter("minwidth"); ++ if (value != null) { ++ try { ++ return Double.valueOf(value); ++ } catch (NumberFormatException e) { ++ return null; ++ } ++ } ++ return null; ++ } ++ ++ private Event getEvent(final HttpServletRequest req) { ++ final String eventArg = req.getParameter("event"); ++ if (eventArg != null) { ++ Event event = Event.fromInternalName(eventArg); ++ return event == null ? Event.CPU : event; ++ } ++ return Event.CPU; ++ } ++ ++ private Output getOutput(final HttpServletRequest req) { ++ final String outputArg = req.getParameter("output"); ++ if (req.getParameter("output") != null) { ++ try { ++ return Output.valueOf(outputArg.trim().toUpperCase()); ++ } catch (IllegalArgumentException e) { ++ return Output.HTML; ++ } ++ } ++ return Output.HTML; ++ } ++ ++ static void setResponseHeader(final HttpServletResponse response) { ++ response.setHeader(ACCESS_CONTROL_ALLOW_METHODS, ALLOWED_METHODS); ++ response.setHeader(ACCESS_CONTROL_ALLOW_ORIGIN, "*"); ++ response.setContentType(CONTENT_TYPE_TEXT); ++ } ++ ++ static String getAsyncProfilerHome() { ++ String asyncProfilerHome = System.getenv(ASYNC_PROFILER_HOME_ENV); ++ // if ENV is not set, see if -Dasync.profiler.home=/path/to/async/profiler/home is set ++ if (asyncProfilerHome == null || asyncProfilerHome.trim().isEmpty()) { ++ asyncProfilerHome = System.getProperty(ASYNC_PROFILER_HOME_SYSTEM_PROPERTY); ++ } ++ ++ return asyncProfilerHome; ++ } ++ ++} +diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfilerDisabledServlet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfilerDisabledServlet.java +new file mode 100644 +index 0000000000..459485ffa5 +--- /dev/null ++++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfilerDisabledServlet.java +@@ -0,0 +1,44 @@ ++/* ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++package org.apache.hadoop.http; ++ ++import java.io.IOException; ++import javax.servlet.http.HttpServlet; ++import javax.servlet.http.HttpServletRequest; ++import javax.servlet.http.HttpServletResponse; ++ ++import org.apache.hadoop.classification.InterfaceAudience; ++ ++/** ++ * Servlet for disabled async-profiler. ++ */ ++@InterfaceAudience.Private ++public class ProfilerDisabledServlet extends HttpServlet { ++ ++ @Override ++ protected void doGet(final HttpServletRequest req, final HttpServletResponse resp) ++ throws IOException { ++ resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); ++ ProfileServlet.setResponseHeader(resp); ++ resp.getWriter().write("The profiler servlet was disabled at startup.\n\n" ++ + "Please ensure the prerequisites for the Profiler Servlet have been installed and the\n" ++ + "environment is properly configured."); ++ } ++ ++} +diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProcessUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProcessUtils.java +new file mode 100644 +index 0000000000..cf653b9c91 +--- /dev/null ++++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProcessUtils.java +@@ -0,0 +1,74 @@ ++/* ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++package org.apache.hadoop.util; ++ ++import java.io.IOException; ++import java.lang.management.ManagementFactory; ++import java.util.List; ++ ++import org.slf4j.Logger; ++import org.slf4j.LoggerFactory; ++ ++import org.apache.hadoop.classification.InterfaceAudience; ++ ++/** ++ * Process related utilities. ++ */ ++@InterfaceAudience.Private ++public final class ProcessUtils { ++ ++ private static final Logger LOG = LoggerFactory.getLogger(ProcessUtils.class); ++ ++ private ProcessUtils() { ++ // no-op ++ } ++ ++ public static Integer getPid() { ++ // JVM_PID can be exported in service start script ++ String pidStr = System.getenv("JVM_PID"); ++ ++ // In case if it is not set correctly, fallback to mxbean which is implementation specific. ++ if (pidStr == null || pidStr.trim().isEmpty()) { ++ String name = ManagementFactory.getRuntimeMXBean().getName(); ++ if (name != null) { ++ int idx = name.indexOf("@"); ++ if (idx != -1) { ++ pidStr = name.substring(0, name.indexOf("@")); ++ } ++ } ++ } ++ try { ++ if (pidStr != null) { ++ return Integer.valueOf(pidStr); ++ } ++ } catch (NumberFormatException ignored) { ++ // ignore ++ } ++ return null; ++ } ++ ++ public static Process runCmdAsync(List cmd) { ++ try { ++ LOG.info("Running command async: {}", cmd); ++ return new ProcessBuilder(cmd).inheritIO().start(); ++ } catch (IOException e) { ++ throw new IllegalStateException(e); ++ } ++ } ++} +diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +index b1a25ce1f0..8068bae969 100644 +--- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml ++++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +@@ -69,7 +69,7 @@ + false + + Indicates if administrator ACLs are required to access +- instrumentation servlets (JMX, METRICS, CONF, STACKS). ++ instrumentation servlets (JMX, METRICS, CONF, STACKS, PROF). + + + +diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/AsyncProfilerServlet.md b/hadoop-common-project/hadoop-common/src/site/markdown/AsyncProfilerServlet.md +new file mode 100644 +index 0000000000..4b93cc219a +--- /dev/null ++++ b/hadoop-common-project/hadoop-common/src/site/markdown/AsyncProfilerServlet.md +@@ -0,0 +1,145 @@ ++ ++ ++Async Profiler Servlet for Hadoop ++======================================== ++ ++ ++ ++Purpose ++------- ++ ++This document describes how to configure and use async profiler ++with Hadoop applications. ++Async profiler is a low overhead sampling profiler for Java that ++does not suffer from Safepoint bias problem. It features ++HotSpot-specific APIs to collect stack traces and to track memory ++allocations. The profiler works with OpenJDK, Oracle JDK and other ++Java runtimes based on the HotSpot JVM. ++ ++Hadoop profiler servlet supports Async Profiler major versions ++1.x and 2.x. ++ ++Prerequisites ++------------- ++ ++Make sure Hadoop is installed, configured and setup correctly. ++For more information see: ++ ++* [Single Node Setup](./SingleCluster.html) for first-time users. ++* [Cluster Setup](./ClusterSetup.html) for large, distributed clusters. ++ ++Go to https://github.com/jvm-profiling-tools/async-profiler, ++download a release appropriate for your platform, and install ++on every cluster host. ++ ++Set `ASYNC_PROFILER_HOME` in the environment (put it in hadoop-env.sh) ++to the root directory of the async-profiler install location, or pass ++it on the Hadoop daemon's command line as a system property as ++`-Dasync.profiler.home=/path/to/async-profiler`. ++ ++ ++Usage ++-------- ++ ++Once the prerequisites have been satisfied, access to the async-profiler ++is available by using Namenode or ResourceManager UI. ++ ++Following options from async-profiler can be specified as query paramater. ++* `-e event` profiling event: cpu|alloc|lock|cache-misses etc. ++* `-d duration` run profiling for 'duration' seconds (integer) ++* `-i interval` sampling interval in nanoseconds (long) ++* `-j jstackdepth` maximum Java stack depth (integer) ++* `-b bufsize` frame buffer size (long) ++* `-t` profile different threads separately ++* `-s` simple class names instead of FQN ++* `-o fmt[,fmt...]` output format: summary|traces|flat|collapsed|svg|tree|jfr|html ++* `--width px` SVG width pixels (integer) ++* `--height px` SVG frame height pixels (integer) ++* `--minwidth px` skip frames smaller than px (double) ++* `--reverse` generate stack-reversed FlameGraph / Call tree ++ ++ ++Example: ++If Namenode http address is localhost:9870, and ResourceManager http ++address is localhost:8088, ProfileServlet running with async-profiler ++setup can be accessed with http://localhost:9870/prof and ++http://localhost:8088/prof for Namenode and ResourceManager processes ++respectively. ++ ++Diving deep into some params: ++ ++* To collect 10 second CPU profile of current process ++ (returns FlameGraph svg) ++ * `curl http://localhost:9870/prof` (FlameGraph svg for Namenode) ++ * `curl http://localhost:8088/prof` (FlameGraph svg for ResourceManager) ++* To collect 10 second CPU profile of pid 12345 (returns FlameGraph svg) ++ * `curl http://localhost:9870/prof?pid=12345` (For instance, provide ++ pid of Datanode here) ++* To collect 30 second CPU profile of pid 12345 (returns FlameGraph svg) ++ * `curl http://localhost:9870/prof?pid=12345&duration=30` ++* To collect 1 minute CPU profile of current process and output in tree ++ format (html) ++ * `curl http://localhost:9870/prof?output=tree&duration=60` ++* To collect 10 second heap allocation profile of current process ++ (returns FlameGraph svg) ++ * `curl http://localhost:9870/prof?event=alloc` ++* To collect lock contention profile of current process ++ (returns FlameGraph svg) ++ * `curl http://localhost:9870/prof?event=lock` ++ ++ ++The following event types are supported by async-profiler. ++Use the 'event' parameter to specify. Default is 'cpu'. ++Not all operating systems will support all types. ++ ++Perf events: ++ ++* cpu ++* page-faults ++* context-switches ++* cycles ++* instructions ++* cache-references ++* cache-misses ++* branches ++* branch-misses ++* bus-cycles ++* L1-dcache-load-misses ++* LLC-load-misses ++* dTLB-load-misses ++ ++Java events: ++ ++* alloc ++* lock ++ ++The following output formats are supported. ++Use the 'output' parameter to specify. Default is 'flamegraph'. ++ ++Output formats: ++ ++* summary: A dump of basic profiling statistics. ++* traces: Call traces. ++* flat: Flat profile (top N hot methods). ++* collapsed: Collapsed call traces in the format used by FlameGraph ++ script. This is a collection of call stacks, where each line is a ++ semicolon separated list of frames followed by a counter. ++* svg: FlameGraph in SVG format. ++* tree: Call tree in HTML format. ++* jfr: Call traces in Java Flight Recorder format. ++ ++The 'duration' parameter specifies how long to collect trace data ++before generating output, specified in seconds. The default is 10 seconds. ++ +diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestDisabledProfileServlet.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestDisabledProfileServlet.java +new file mode 100644 +index 0000000000..ce068bb6f1 +--- /dev/null ++++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestDisabledProfileServlet.java +@@ -0,0 +1,95 @@ ++/* ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++package org.apache.hadoop.http; ++ ++import java.io.IOException; ++import java.net.HttpURLConnection; ++import java.net.URL; ++import javax.servlet.http.HttpServletResponse; ++ ++import org.junit.AfterClass; ++import org.junit.BeforeClass; ++import org.junit.Test; ++ ++/** ++ * Small test to cover default disabled prof endpoint. ++ */ ++public class TestDisabledProfileServlet extends HttpServerFunctionalTest { ++ ++ private static HttpServer2 server; ++ private static URL baseUrl; ++ ++ @BeforeClass ++ public static void setup() throws Exception { ++ server = createTestServer(); ++ server.start(); ++ baseUrl = getServerURL(server); ++ } ++ ++ @AfterClass ++ public static void cleanup() throws Exception { ++ server.stop(); ++ } ++ ++ @Test ++ public void testQuery() throws Exception { ++ try { ++ readOutput(new URL(baseUrl, "/prof")); ++ throw new IllegalStateException("Should not reach here"); ++ } catch (IOException e) { ++ assertTrue(e.getMessage() ++ .contains(HttpServletResponse.SC_INTERNAL_SERVER_ERROR + " for URL: " + baseUrl)); ++ } ++ ++ // CORS headers ++ HttpURLConnection conn = ++ (HttpURLConnection) new URL(baseUrl, "/prof").openConnection(); ++ assertEquals("GET", conn.getHeaderField(ProfileServlet.ACCESS_CONTROL_ALLOW_METHODS)); ++ assertNotNull(conn.getHeaderField(ProfileServlet.ACCESS_CONTROL_ALLOW_ORIGIN)); ++ conn.disconnect(); ++ } ++ ++ @Test ++ public void testRequestMethods() throws IOException { ++ HttpURLConnection connection = getConnection("PUT"); ++ assertEquals("Unexpected response code", HttpServletResponse.SC_METHOD_NOT_ALLOWED, ++ connection.getResponseCode()); ++ connection.disconnect(); ++ connection = getConnection("POST"); ++ assertEquals("Unexpected response code", HttpServletResponse.SC_METHOD_NOT_ALLOWED, ++ connection.getResponseCode()); ++ connection.disconnect(); ++ connection = getConnection("DELETE"); ++ assertEquals("Unexpected response code", HttpServletResponse.SC_METHOD_NOT_ALLOWED, ++ connection.getResponseCode()); ++ connection.disconnect(); ++ connection = getConnection("GET"); ++ assertEquals("Unexpected response code", HttpServletResponse.SC_INTERNAL_SERVER_ERROR, ++ connection.getResponseCode()); ++ connection.disconnect(); ++ } ++ ++ private HttpURLConnection getConnection(final String method) throws IOException { ++ URL url = new URL(baseUrl, "/prof"); ++ HttpURLConnection conn = (HttpURLConnection) url.openConnection(); ++ conn.setRequestMethod(method); ++ return conn; ++ } ++ ++} +diff --git a/hadoop-common-project/hadoop-kms/src/site/markdown/index.md.vm b/hadoop-common-project/hadoop-kms/src/site/markdown/index.md.vm +index 6ea21d5cf4..09375d5aab 100644 +--- a/hadoop-common-project/hadoop-kms/src/site/markdown/index.md.vm ++++ b/hadoop-common-project/hadoop-kms/src/site/markdown/index.md.vm +@@ -1208,9 +1208,10 @@ Name | Description + /logs | Display log files + /stacks | Display JVM stacks + /static/index.html | The static home page ++/prof | Async Profiler endpoint + + To control the access to servlet `/conf`, `/jmx`, `/logLevel`, `/logs`, +-and `/stacks`, configure the following properties in `kms-site.xml`: ++`/stacks` and `/prof`, configure the following properties in `kms-site.xml`: + + ```xml + +@@ -1224,7 +1225,7 @@ and `/stacks`, configure the following properties in `kms-site.xml`: + true + + Indicates if administrator ACLs are required to access +- instrumentation servlets (JMX, METRICS, CONF, STACKS). ++ instrumentation servlets (JMX, METRICS, CONF, STACKS, PROF). + + + +diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/markdown/ServerSetup.md.vm b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/markdown/ServerSetup.md.vm +index 2d0a5b8cd2..e97de0275c 100644 +--- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/markdown/ServerSetup.md.vm ++++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/markdown/ServerSetup.md.vm +@@ -162,9 +162,10 @@ Name | Description + /logs | Display log files + /stacks | Display JVM stacks + /static/index.html | The static home page ++/prof | Async Profiler endpoint + + To control the access to servlet `/conf`, `/jmx`, `/logLevel`, `/logs`, +-and `/stacks`, configure the following properties in `httpfs-site.xml`: ++`/stacks` and `/prof`, configure the following properties in `httpfs-site.xml`: + + ```xml + +@@ -178,7 +179,7 @@ and `/stacks`, configure the following properties in `httpfs-site.xml`: + true + + Indicates if administrator ACLs are required to access +- instrumentation servlets (JMX, METRICS, CONF, STACKS). ++ instrumentation servlets (JMX, METRICS, CONF, STACKS, PROF). + + + +diff --git a/hadoop-project/src/site/site.xml b/hadoop-project/src/site/site.xml +index b53cbd2a05..0793e97771 100644 +--- a/hadoop-project/src/site/site.xml ++++ b/hadoop-project/src/site/site.xml +@@ -74,6 +74,7 @@ + + + ++ + + +

diff --git a/precompiled/hadoop/stackable/patches/3.3.6/0004-Backport-HADOOP-18077.patch b/precompiled/hadoop/stackable/patches/3.3.6/0004-Backport-HADOOP-18077.patch new file mode 100644 index 000000000..f94d482cd --- /dev/null +++ b/precompiled/hadoop/stackable/patches/3.3.6/0004-Backport-HADOOP-18077.patch @@ -0,0 +1,57 @@ +From 14ea43b731dcbc77f3bedc26529198787b399dea Mon Sep 17 00:00:00 2001 +From: Siegfried Weber +Date: Tue, 6 Feb 2024 16:10:54 +0100 +Subject: Backport HADOOP-18077 + +--- + .../src/main/java/org/apache/hadoop/http/HttpServer2.java | 6 ++++-- + .../org/apache/hadoop/http/ProfilerDisabledServlet.java | 8 +++++++- + 2 files changed, 11 insertions(+), 3 deletions(-) + +diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java +index 1f66a7e809..96794086cb 100644 +--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java ++++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java +@@ -745,10 +745,11 @@ public final class HttpServer2 implements FilterContainer { + + addDefaultServlets(); + addPrometheusServlet(conf); +- addAsyncProfilerServlet(contexts); ++ addAsyncProfilerServlet(contexts, conf); + } + +- private void addAsyncProfilerServlet(ContextHandlerCollection contexts) throws IOException { ++ private void addAsyncProfilerServlet(ContextHandlerCollection contexts, Configuration conf) ++ throws IOException { + final String asyncProfilerHome = ProfileServlet.getAsyncProfilerHome(); + if (asyncProfilerHome != null && !asyncProfilerHome.trim().isEmpty()) { + addServlet("prof", "/prof", ProfileServlet.class); +@@ -760,6 +761,7 @@ public final class HttpServer2 implements FilterContainer { + genCtx.addServlet(ProfileOutputServlet.class, "/*"); + genCtx.setResourceBase(tmpDir.toAbsolutePath().toString()); + genCtx.setDisplayName("prof-output-hadoop"); ++ setContextAttributes(genCtx, conf); + } else { + addServlet("prof", "/prof", ProfilerDisabledServlet.class); + LOG.info("ASYNC_PROFILER_HOME environment variable and async.profiler.home system property " +diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfilerDisabledServlet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfilerDisabledServlet.java +index 459485ffa5..c488b57499 100644 +--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfilerDisabledServlet.java ++++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfilerDisabledServlet.java +@@ -36,9 +36,15 @@ public class ProfilerDisabledServlet extends HttpServlet { + throws IOException { + resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); + ProfileServlet.setResponseHeader(resp); ++ // TODO : Replace github.com link with ++ // https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/ ++ // AsyncProfilerServlet.html once Async profiler changes are released ++ // in 3.x (3.4.0 as of today). + resp.getWriter().write("The profiler servlet was disabled at startup.\n\n" + + "Please ensure the prerequisites for the Profiler Servlet have been installed and the\n" +- + "environment is properly configured."); ++ + "environment is properly configured. \n\n" ++ + "For more details, please refer to: https://github.com/apache/hadoop/blob/trunk/" ++ + "hadoop-common-project/hadoop-common/src/site/markdown/AsyncProfilerServlet.md"); + } + + } diff --git a/precompiled/hadoop/stackable/patches/3.3.6/0005-Async-profiler-also-grab-itimer-events.patch b/precompiled/hadoop/stackable/patches/3.3.6/0005-Async-profiler-also-grab-itimer-events.patch new file mode 100644 index 000000000..a4945dd9f --- /dev/null +++ b/precompiled/hadoop/stackable/patches/3.3.6/0005-Async-profiler-also-grab-itimer-events.patch @@ -0,0 +1,29 @@ +From 6b4eb2c72a71effdc2112567b750e24d5745c186 Mon Sep 17 00:00:00 2001 +From: Siegfried Weber +Date: Tue, 6 Feb 2024 16:10:54 +0100 +Subject: Async-profiler: also grab itimer events + +--- + .../src/main/java/org/apache/hadoop/http/ProfileServlet.java | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java +index fc0ec7736e..e324ad6d49 100644 +--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java ++++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java +@@ -76,6 +76,7 @@ import org.apache.hadoop.util.ProcessUtils; + * Following event types are supported (default is 'cpu') (NOTE: not all OS'es support all events) + * // Perf events: + * // cpu ++ * // itimer + * // page-faults + * // context-switches + * // cycles +@@ -115,6 +116,7 @@ public class ProfileServlet extends HttpServlet { + private enum Event { + + CPU("cpu"), ++ ITIMER("itimer"), + ALLOC("alloc"), + LOCK("lock"), + PAGE_FAULTS("page-faults"), diff --git a/precompiled/hadoop/stackable/patches/3.3.6/0006-HDFS-17378-Fix-missing-operationType-for-some-operat.patch b/precompiled/hadoop/stackable/patches/3.3.6/0006-HDFS-17378-Fix-missing-operationType-for-some-operat.patch new file mode 100644 index 000000000..2e3e33690 --- /dev/null +++ b/precompiled/hadoop/stackable/patches/3.3.6/0006-HDFS-17378-Fix-missing-operationType-for-some-operat.patch @@ -0,0 +1,201 @@ +From 36ed6731ce3afa4ccacb40c1c82dfc81e0e80483 Mon Sep 17 00:00:00 2001 +From: Sebastian Bernauer +Date: Thu, 15 Feb 2024 15:33:43 +0100 +Subject: HDFS-17378: Fix missing operationType for some operations in + authorizer + +--- + .../hdfs/server/namenode/FSNamesystem.java | 41 +++++++++++-------- + 1 file changed, 24 insertions(+), 17 deletions(-) + +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +index 9855b434e9..b3781ee1dd 100644 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +@@ -2530,15 +2530,16 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + * @throws IOException + */ + BlockStoragePolicy getStoragePolicy(String src) throws IOException { ++ final String operationName = "getStoragePolicy"; + checkOperation(OperationCategory.READ); + final FSPermissionChecker pc = getPermissionChecker(); +- FSPermissionChecker.setOperationType(null); ++ FSPermissionChecker.setOperationType(operationName); + readLock(); + try { + checkOperation(OperationCategory.READ); + return FSDirAttrOp.getStoragePolicy(dir, pc, blockManager, src); + } finally { +- readUnlock("getStoragePolicy"); ++ readUnlock(operationName); + } + } + +@@ -2558,15 +2559,16 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + } + + long getPreferredBlockSize(String src) throws IOException { ++ final String operationName = "getPreferredBlockSize"; + checkOperation(OperationCategory.READ); + final FSPermissionChecker pc = getPermissionChecker(); +- FSPermissionChecker.setOperationType(null); ++ FSPermissionChecker.setOperationType(operationName); + readLock(); + try { + checkOperation(OperationCategory.READ); + return FSDirAttrOp.getPreferredBlockSize(dir, pc, src); + } finally { +- readUnlock("getPreferredBlockSize"); ++ readUnlock(operationName); + } + } + +@@ -2619,7 +2621,6 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + boolean createParent, short replication, long blockSize, + CryptoProtocolVersion[] supportedVersions, String ecPolicyName, + String storagePolicy, boolean logRetryCache) throws IOException { +- + HdfsFileStatus status; + try { + status = startFileInt(src, permissions, holder, clientMachine, flag, +@@ -2639,6 +2640,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + long blockSize, CryptoProtocolVersion[] supportedVersions, + String ecPolicyName, String storagePolicy, boolean logRetryCache) + throws IOException { ++ final String operationName = "create"; + if (NameNode.stateChangeLog.isDebugEnabled()) { + StringBuilder builder = new StringBuilder(); + builder.append("DIR* NameSystem.startFile: src=").append(src) +@@ -2676,7 +2678,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + + checkOperation(OperationCategory.WRITE); + final FSPermissionChecker pc = getPermissionChecker(); +- FSPermissionChecker.setOperationType(null); ++ FSPermissionChecker.setOperationType(operationName); + writeLock(); + try { + checkOperation(OperationCategory.WRITE); +@@ -2740,7 +2742,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + dir.writeUnlock(); + } + } finally { +- writeUnlock("create"); ++ writeUnlock(operationName); + // There might be transactions logged while trying to recover the lease. + // They need to be sync'ed even when an exception was thrown. + if (!skipSync) { +@@ -2769,10 +2771,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + */ + boolean recoverLease(String src, String holder, String clientMachine) + throws IOException { ++ final String operationName = "recoverLease"; + boolean skipSync = false; + checkOperation(OperationCategory.WRITE); + final FSPermissionChecker pc = getPermissionChecker(); +- FSPermissionChecker.setOperationType(null); ++ FSPermissionChecker.setOperationType(operationName); + writeLock(); + try { + checkOperation(OperationCategory.WRITE); +@@ -2793,7 +2796,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + skipSync = true; + throw se; + } finally { +- writeUnlock("recoverLease"); ++ writeUnlock(operationName); + // There might be transactions logged while trying to recover the lease. + // They need to be sync'ed even when an exception was thrown. + if (!skipSync) { +@@ -3010,6 +3013,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + final Set excludes, + final int numAdditionalNodes, final String clientName + ) throws IOException { ++ final String operationName = "getAdditionalDatanode"; + //check if the feature is enabled + dtpReplaceDatanodeOnFailure.checkEnabled(); + +@@ -3021,7 +3025,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + final BlockType blockType; + checkOperation(OperationCategory.WRITE); + final FSPermissionChecker pc = getPermissionChecker(); +- FSPermissionChecker.setOperationType(null); ++ FSPermissionChecker.setOperationType(operationName); + readLock(); + try { + // Changing this operation category to WRITE instead of making getAdditionalDatanode as a +@@ -3047,7 +3051,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + "src=%s, fileId=%d, blk=%s, clientName=%s, clientMachine=%s", + src, fileId, blk, clientName, clientMachine)); + } finally { +- readUnlock("getAdditionalDatanode"); ++ readUnlock(operationName); + } + + if (clientnode == null) { +@@ -3069,11 +3073,12 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + */ + void abandonBlock(ExtendedBlock b, long fileId, String src, String holder) + throws IOException { ++ final String operationName = "abandonBlock"; + NameNode.stateChangeLog.debug( + "BLOCK* NameSystem.abandonBlock: {} of file {}", b, src); + checkOperation(OperationCategory.WRITE); + final FSPermissionChecker pc = getPermissionChecker(); +- FSPermissionChecker.setOperationType(null); ++ FSPermissionChecker.setOperationType(operationName); + writeLock(); + try { + checkOperation(OperationCategory.WRITE); +@@ -3082,7 +3087,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: {} is " + + "removed from pendingCreates", b); + } finally { +- writeUnlock("abandonBlock"); ++ writeUnlock(operationName); + } + getEditLog().logSync(); + } +@@ -3136,10 +3141,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + boolean completeFile(final String src, String holder, + ExtendedBlock last, long fileId) + throws IOException { ++ final String operationName = "completeFile"; + boolean success = false; + checkOperation(OperationCategory.WRITE); + final FSPermissionChecker pc = getPermissionChecker(); +- FSPermissionChecker.setOperationType(null); ++ FSPermissionChecker.setOperationType(operationName); + writeLock(); + try { + checkOperation(OperationCategory.WRITE); +@@ -3147,7 +3153,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + success = FSDirWriteFileOp.completeFile(this, pc, src, holder, last, + fileId); + } finally { +- writeUnlock("completeFile"); ++ writeUnlock(operationName); + } + getEditLog().logSync(); + if (success) { +@@ -3572,10 +3578,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + */ + void fsync(String src, long fileId, String clientName, long lastBlockLength) + throws IOException { ++ final String operationName = "fsync"; + NameNode.stateChangeLog.info("BLOCK* fsync: " + src + " for " + clientName); + checkOperation(OperationCategory.WRITE); + final FSPermissionChecker pc = getPermissionChecker(); +- FSPermissionChecker.setOperationType(null); ++ FSPermissionChecker.setOperationType(operationName); + writeLock(); + try { + checkOperation(OperationCategory.WRITE); +@@ -3589,7 +3596,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + } + FSDirWriteFileOp.persistBlocks(dir, src, pendingFile, false); + } finally { +- writeUnlock("fsync"); ++ writeUnlock(operationName); + } + getEditLog().logSync(); + } diff --git a/precompiled/hadoop/stackable/patches/3.3.6/0007-Bump-Snappy-version-to-fix-CVEs.patch b/precompiled/hadoop/stackable/patches/3.3.6/0007-Bump-Snappy-version-to-fix-CVEs.patch new file mode 100644 index 000000000..a6711920a --- /dev/null +++ b/precompiled/hadoop/stackable/patches/3.3.6/0007-Bump-Snappy-version-to-fix-CVEs.patch @@ -0,0 +1,22 @@ +From 8cd8cdc424ff7cf410fb84941fd6d7777ec91913 Mon Sep 17 00:00:00 2001 +From: Andrew Kenworthy +Date: Thu, 16 May 2024 16:44:14 +0200 +Subject: Bump Snappy version to fix CVEs + +--- + hadoop-project/pom.xml | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml +index 9b01858e0e..da39c1e0ad 100644 +--- a/hadoop-project/pom.xml ++++ b/hadoop-project/pom.xml +@@ -144,7 +144,7 @@ + 3.2.4 + 3.10.6.Final + 4.1.89.Final +- 1.1.8.2 ++ 1.1.10.4 + 1.7.1 + + diff --git a/precompiled/hadoop/stackable/patches/3.3.6/0008-Update-CycloneDX-plugin.patch b/precompiled/hadoop/stackable/patches/3.3.6/0008-Update-CycloneDX-plugin.patch new file mode 100644 index 000000000..ef27fb2a4 --- /dev/null +++ b/precompiled/hadoop/stackable/patches/3.3.6/0008-Update-CycloneDX-plugin.patch @@ -0,0 +1,44 @@ +From bb767718387bcc1c49e5780e5d1a7a79fde99f15 Mon Sep 17 00:00:00 2001 +From: Lukas Voetmand +Date: Fri, 6 Sep 2024 17:53:52 +0200 +Subject: Update CycloneDX plugin + +--- + pom.xml | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/pom.xml b/pom.xml +index aaa4203012..de001775ab 100644 +--- a/pom.xml ++++ b/pom.xml +@@ -116,7 +116,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x + 1.4.3 + 4.2.2 + 4.2.0 +- 2.7.6 ++ 2.8.0 + + bash + +@@ -649,6 +649,11 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x + org.cyclonedx + cyclonedx-maven-plugin + ${cyclonedx.version} ++ ++ application ++ 1.5 ++ false ++ + + + package +@@ -657,9 +662,6 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x + + + +- +- xml +- + + + diff --git a/precompiled/hadoop/stackable/patches/3.3.6/0009-HADOOP-18516-ABFS-Authentication-Support-Fixed-SAS-T.patch b/precompiled/hadoop/stackable/patches/3.3.6/0009-HADOOP-18516-ABFS-Authentication-Support-Fixed-SAS-T.patch new file mode 100644 index 000000000..41d6c9447 --- /dev/null +++ b/precompiled/hadoop/stackable/patches/3.3.6/0009-HADOOP-18516-ABFS-Authentication-Support-Fixed-SAS-T.patch @@ -0,0 +1,979 @@ +From 3864664a22a8c75d79774c77a7c88f5d54085f5d Mon Sep 17 00:00:00 2001 +From: Anuj Modi <128447756+anujmodi2021@users.noreply.github.com> +Date: Fri, 7 Jun 2024 19:03:23 +0530 +Subject: HADOOP-18516: [ABFS][Authentication] Support Fixed SAS Token for ABFS + Authentication (#6552) + +Contributed by Anuj Modi +--- + .../hadoop/fs/azurebfs/AbfsConfiguration.java | 69 +++++-- + .../fs/azurebfs/AzureBlobFileSystem.java | 3 +- + .../fs/azurebfs/AzureBlobFileSystemStore.java | 2 +- + .../azurebfs/constants/ConfigurationKeys.java | 5 +- + .../fs/azurebfs/services/AbfsClient.java | 9 +- + .../services/FixedSASTokenProvider.java | 65 +++++++ + .../hadoop-azure/src/site/markdown/abfs.md | 149 +++++++++++--- + .../azurebfs/AbstractAbfsIntegrationTest.java | 23 ++- + .../ITestAzureBlobFileSystemChooseSAS.java | 182 ++++++++++++++++++ + .../MockDelegationSASTokenProvider.java | 2 +- + .../extensions/MockSASTokenProvider.java | 16 +- + .../azurebfs/utils/AccountSASGenerator.java | 103 ++++++++++ + .../fs/azurebfs/utils/SASGenerator.java | 34 +++- + .../azurebfs/utils/ServiceSASGenerator.java | 15 +- + 14 files changed, 608 insertions(+), 69 deletions(-) + create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/FixedSASTokenProvider.java + create mode 100644 hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemChooseSAS.java + create mode 100644 hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/AccountSASGenerator.java + +diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java +index 1bf7c569da..98534f75e0 100644 +--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java ++++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java +@@ -59,6 +59,7 @@ import org.apache.hadoop.fs.azurebfs.oauth2.UserPasswordTokenProvider; + import org.apache.hadoop.fs.azurebfs.security.AbfsDelegationTokenManager; + import org.apache.hadoop.fs.azurebfs.services.AuthType; + import org.apache.hadoop.fs.azurebfs.services.ExponentialRetryPolicy; ++import org.apache.hadoop.fs.azurebfs.services.FixedSASTokenProvider; + import org.apache.hadoop.fs.azurebfs.services.KeyProvider; + import org.apache.hadoop.fs.azurebfs.services.SimpleKeyProvider; + import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderFormat; +@@ -927,33 +928,63 @@ public class AbfsConfiguration{ + } + } + ++ /** ++ * Returns the SASTokenProvider implementation to be used to generate SAS token.
++ * Users can choose between a custom implementation of {@link SASTokenProvider} ++ * or an in house implementation {@link FixedSASTokenProvider}.
++ * For Custom implementation "fs.azure.sas.token.provider.type" needs to be provided.
++ * For Fixed SAS Token use "fs.azure.sas.fixed.token" needs to be provided.
++ * In case both are provided, Preference will be given to Custom implementation.
++ * Avoid using a custom tokenProvider implementation just to read the configured ++ * fixed token, as this could create confusion. Also,implementing the SASTokenProvider ++ * requires relying on the raw configurations. It is more stable to depend on ++ * the AbfsConfiguration with which a filesystem is initialized, and eliminate ++ * chances of dynamic modifications and spurious situations.
++ * @return sasTokenProvider object based on configurations provided ++ * @throws AzureBlobFileSystemException ++ */ + public SASTokenProvider getSASTokenProvider() throws AzureBlobFileSystemException { + AuthType authType = getEnum(FS_AZURE_ACCOUNT_AUTH_TYPE_PROPERTY_NAME, AuthType.SharedKey); + if (authType != AuthType.SAS) { + throw new SASTokenProviderException(String.format( +- "Invalid auth type: %s is being used, expecting SAS", authType)); ++ "Invalid auth type: %s is being used, expecting SAS.", authType)); + } + + try { +- String configKey = FS_AZURE_SAS_TOKEN_PROVIDER_TYPE; +- Class sasTokenProviderClass = +- getTokenProviderClass(authType, configKey, null, +- SASTokenProvider.class); +- +- Preconditions.checkArgument(sasTokenProviderClass != null, +- String.format("The configuration value for \"%s\" is invalid.", configKey)); +- +- SASTokenProvider sasTokenProvider = ReflectionUtils +- .newInstance(sasTokenProviderClass, rawConfig); +- Preconditions.checkArgument(sasTokenProvider != null, +- String.format("Failed to initialize %s", sasTokenProviderClass)); +- +- LOG.trace("Initializing {}", sasTokenProviderClass.getName()); +- sasTokenProvider.initialize(rawConfig, accountName); +- LOG.trace("{} init complete", sasTokenProviderClass.getName()); +- return sasTokenProvider; ++ Class customSasTokenProviderImplementation = ++ getTokenProviderClass(authType, FS_AZURE_SAS_TOKEN_PROVIDER_TYPE, ++ null, SASTokenProvider.class); ++ String configuredFixedToken = this.getTrimmedPasswordString(FS_AZURE_SAS_FIXED_TOKEN, EMPTY_STRING); ++ ++ if (customSasTokenProviderImplementation == null && configuredFixedToken.isEmpty()) { ++ throw new SASTokenProviderException(String.format( ++ "At least one of the \"%s\" and \"%s\" must be set.", ++ FS_AZURE_SAS_TOKEN_PROVIDER_TYPE, FS_AZURE_SAS_FIXED_TOKEN)); ++ } ++ ++ // Prefer Custom SASTokenProvider Implementation if configured. ++ if (customSasTokenProviderImplementation != null) { ++ LOG.trace("Using Custom SASTokenProvider implementation because it is given precedence when it is set."); ++ SASTokenProvider sasTokenProvider = ReflectionUtils.newInstance( ++ customSasTokenProviderImplementation, rawConfig); ++ if (sasTokenProvider == null) { ++ throw new SASTokenProviderException(String.format( ++ "Failed to initialize %s", customSasTokenProviderImplementation)); ++ } ++ LOG.trace("Initializing {}", customSasTokenProviderImplementation.getName()); ++ sasTokenProvider.initialize(rawConfig, accountName); ++ LOG.trace("{} init complete", customSasTokenProviderImplementation.getName()); ++ return sasTokenProvider; ++ } else { ++ LOG.trace("Using FixedSASTokenProvider implementation"); ++ FixedSASTokenProvider fixedSASTokenProvider = new FixedSASTokenProvider(configuredFixedToken); ++ return fixedSASTokenProvider; ++ } ++ } catch (SASTokenProviderException e) { ++ throw e; + } catch (Exception e) { +- throw new TokenAccessProviderException("Unable to load SAS token provider class: " + e, e); ++ throw new SASTokenProviderException( ++ "Unable to load SAS token provider class: " + e, e); + } + } + +diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java +index 5fb2c6e170..8bfaf2fa5e 100644 +--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java ++++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java +@@ -1273,10 +1273,9 @@ public class AzureBlobFileSystem extends FileSystem + + /** + * Incrementing exists() calls from superclass for statistic collection. +- * + * @param f source path. + * @return true if the path exists. +- * @throws IOException ++ * @throws IOException if some issue in checking path. + */ + @Override + public boolean exists(Path f) throws IOException { +diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +index cd33da401c..dc6d100173 100644 +--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java ++++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +@@ -1611,7 +1611,7 @@ public class AzureBlobFileSystemStore implements Closeable, ListingSupport { + creds = new SharedKeyCredentials(accountName.substring(0, dotIndex), + abfsConfiguration.getStorageAccountKey()); + } else if (authType == AuthType.SAS) { +- LOG.trace("Fetching SAS token provider"); ++ LOG.trace("Fetching SAS Token Provider"); + sasTokenProvider = abfsConfiguration.getSASTokenProvider(); + } else { + LOG.trace("Fetching token provider"); +diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java +index 872364a8e6..5458bf4d8b 100644 +--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java ++++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java +@@ -248,7 +248,10 @@ public final class ConfigurationKeys { + public static final String FS_AZURE_ENABLE_DELEGATION_TOKEN = "fs.azure.enable.delegation.token"; + public static final String FS_AZURE_DELEGATION_TOKEN_PROVIDER_TYPE = "fs.azure.delegation.token.provider.type"; + +- /** Key for SAS token provider **/ ++ /** Key for fixed SAS token: {@value}. **/ ++ public static final String FS_AZURE_SAS_FIXED_TOKEN = "fs.azure.sas.fixed.token"; ++ ++ /** Key for SAS token provider: {@value}. **/ + public static final String FS_AZURE_SAS_TOKEN_PROVIDER_TYPE = "fs.azure.sas.token.provider.type"; + + /** For performance, AbfsInputStream/AbfsOutputStream re-use SAS tokens until the expiry is within this number of seconds. **/ +diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +index 1767274f36..8255bbb76c 100644 +--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java ++++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +@@ -891,6 +891,7 @@ public class AbfsClient implements Closeable { + abfsUriQueryBuilder.addQuery(QUERY_PARAM_POSITION, Long.toString(position)); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_RETAIN_UNCOMMITTED_DATA, String.valueOf(retainUncommittedData)); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_CLOSE, String.valueOf(isClose)); ++ + // AbfsInputStream/AbfsOutputStream reuse SAS tokens for better performance + String sasTokenForReuse = appendSASTokenToQuery(path, SASTokenProvider.WRITE_OPERATION, + abfsUriQueryBuilder, cachedSasToken); +@@ -972,6 +973,7 @@ public class AbfsClient implements Closeable { + requestHeaders.add(new AbfsHttpHeader(IF_MATCH, eTag)); + + final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); ++ + // AbfsInputStream/AbfsOutputStream reuse SAS tokens for better performance + String sasTokenForReuse = appendSASTokenToQuery(path, SASTokenProvider.READ_OPERATION, + abfsUriQueryBuilder, cachedSasToken); +@@ -1266,16 +1268,17 @@ public class AbfsClient implements Closeable { + sasToken = cachedSasToken; + LOG.trace("Using cached SAS token."); + } ++ + // if SAS Token contains a prefix of ?, it should be removed + if (sasToken.charAt(0) == '?') { + sasToken = sasToken.substring(1); + } ++ + queryBuilder.setSASToken(sasToken); + LOG.trace("SAS token fetch complete for {} on {}", operation, path); + } catch (Exception ex) { +- throw new SASTokenProviderException(String.format("Failed to acquire a SAS token for %s on %s due to %s", +- operation, +- path, ++ throw new SASTokenProviderException(String.format( ++ "Failed to acquire a SAS token for %s on %s due to %s", operation, path, + ex.toString())); + } + } +diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/FixedSASTokenProvider.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/FixedSASTokenProvider.java +new file mode 100644 +index 0000000000..1a2614dcc1 +--- /dev/null ++++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/FixedSASTokenProvider.java +@@ -0,0 +1,65 @@ ++/** ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++package org.apache.hadoop.fs.azurebfs.services; ++ ++import java.io.IOException; ++ ++import org.apache.hadoop.conf.Configuration; ++import org.apache.hadoop.fs.azurebfs.contracts.exceptions.SASTokenProviderException; ++import org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider; ++ ++/** ++ * In house implementation of {@link SASTokenProvider} to use a fixed SAS token with ABFS. ++ * Use this to avoid implementing a Custom Token Provider just to return fixed SAS. ++ * Fixed SAS Token to be provided using the config "fs.azure.sas.fixed.token". ++ */ ++public class FixedSASTokenProvider implements SASTokenProvider { ++ private String fixedSASToken; ++ ++ public FixedSASTokenProvider(final String fixedSASToken) throws SASTokenProviderException { ++ this.fixedSASToken = fixedSASToken; ++ if (fixedSASToken == null || fixedSASToken.isEmpty()) { ++ throw new SASTokenProviderException( ++ String.format("Configured Fixed SAS Token is Invalid: %s", fixedSASToken)); ++ } ++ } ++ ++ @Override ++ public void initialize(final Configuration configuration, ++ final String accountName) ++ throws IOException { ++ } ++ ++ /** ++ * Returns the fixed SAS Token configured. ++ * @param account the name of the storage account. ++ * @param fileSystem the name of the fileSystem. ++ * @param path the file or directory path. ++ * @param operation the operation to be performed on the path. ++ * @return Fixed SAS Token ++ * @throws IOException never ++ */ ++ @Override ++ public String getSASToken(final String account, ++ final String fileSystem, ++ final String path, ++ final String operation) throws IOException { ++ return fixedSASToken; ++ } ++} +diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md +index aff1e32b83..a994b3892c 100644 +--- a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md ++++ b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md +@@ -12,7 +12,7 @@ + limitations under the License. See accompanying LICENSE file. + --> + +-# Hadoop Azure Support: ABFS — Azure Data Lake Storage Gen2 ++# Hadoop Azure Support: ABFS - Azure Data Lake Storage Gen2 + + + +@@ -309,12 +309,13 @@ in different deployment situations. + The ABFS client can be deployed in different ways, with its authentication needs + driven by them. + +-1. With the storage account's authentication secret in the configuration: +-"Shared Key". +-1. Using OAuth 2.0 tokens of one form or another. +-1. Deployed in-Azure with the Azure VMs providing OAuth 2.0 tokens to the application, +- "Managed Instance". +-1. Using Shared Access Signature (SAS) tokens provided by a custom implementation of the SASTokenProvider interface. ++1. With the storage account's authentication secret in the configuration: "Shared Key". ++2. Using OAuth 2.0 tokens of one form or another. ++3. Deployed in-Azure with the Azure VMs providing OAuth 2.0 tokens to the application, "Managed Instance". ++4. Using Shared Access Signature (SAS) tokens provided by a custom implementation of the SASTokenProvider interface. ++5. By directly configuring a fixed Shared Access Signature (SAS) token in the account configuration settings files. ++ ++Note: SAS Based Authentication should be used only with HNS Enabled accounts. + + What can be changed is what secrets/credentials are used to authenticate the caller. + +@@ -355,14 +356,14 @@ the password, "key", retrieved from the XML/JCECKs configuration files. + + ```xml + +- fs.azure.account.auth.type.abfswales1.dfs.core.windows.net ++ fs.azure.account.auth.type.ACCOUNT_NAME.dfs.core.windows.net + SharedKey + + + + +- fs.azure.account.key.abfswales1.dfs.core.windows.net +- ZGlkIHlvdSByZWFsbHkgdGhpbmsgSSB3YXMgZ29pbmcgdG8gcHV0IGEga2V5IGluIGhlcmU/IA== ++ fs.azure.account.key.ACCOUNT_NAME.dfs.core.windows.net ++ ACCOUNT_KEY + + The secret password. Never share these. + +@@ -609,21 +610,119 @@ In case delegation token is enabled, and the config `fs.azure.delegation.token + + ### Shared Access Signature (SAS) Token Provider + +-A Shared Access Signature (SAS) token provider supplies the ABFS connector with SAS +-tokens by implementing the SASTokenProvider interface. +- +-```xml +- +- fs.azure.account.auth.type +- SAS +- +- +- fs.azure.sas.token.provider.type +- {fully-qualified-class-name-for-implementation-of-SASTokenProvider-interface} +- +-``` +- +-The declared class must implement `org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider`. ++A shared access signature (SAS) provides secure delegated access to resources in ++your storage account. With a SAS, you have granular control over how a client can access your data. ++To know more about how SAS Authentication works refer to ++[Grant limited access to Azure Storage resources using shared access signatures (SAS)](https://learn.microsoft.com/en-us/azure/storage/common/storage-sas-overview) ++ ++There are three types of SAS supported by Azure Storage: ++- [User Delegation SAS](https://learn.microsoft.com/en-us/rest/api/storageservices/create-user-delegation-sas): Recommended for use with ABFS Driver with HNS Enabled ADLS Gen2 accounts. It is Identity based SAS that works at blob/directory level) ++- [Service SAS](https://learn.microsoft.com/en-us/rest/api/storageservices/create-service-sas): Global and works at container level. ++- [Account SAS](https://learn.microsoft.com/en-us/rest/api/storageservices/create-account-sas): Global and works at account level. ++ ++#### Known Issues With SAS ++- SAS Based Authentication works only with HNS Enabled ADLS Gen2 Accounts which ++is a recommended account type to be used with ABFS. ++- Certain root level operations are known to fail with SAS Based Authentication. ++ ++#### Using User Delegation SAS with ABFS ++ ++- **Description**: ABFS allows you to implement your custom SAS Token Provider ++that uses your identity to create a user delegation key which then can be used to ++create SAS instead of storage account key. The declared class must implement ++`org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider`. ++ ++- **Configuration**: To use this method with ABFS Driver, specify the following properties in your `core-site.xml` file: ++ 1. Authentication Type: ++ ```xml ++ ++ fs.azure.account.auth.type ++ SAS ++ ++ ``` ++ ++ 1. Custom SAS Token Provider Class: ++ ```xml ++ ++ fs.azure.sas.token.provider.type ++ CUSTOM_SAS_TOKEN_PROVIDER_CLASS ++ ++ ``` ++ ++ Replace `CUSTOM_SAS_TOKEN_PROVIDER_CLASS` with fully qualified class name of ++your custom token provider implementation. Depending upon the implementation you ++might need to specify additional configurations that are required by your custom ++implementation. ++ ++- **Example**: ABFS Hadoop Driver provides a [MockDelegationSASTokenProvider](https://github.com/apache/hadoop/blob/trunk/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockDelegationSASTokenProvider.java) ++implementation that can be used as an example on how to implement your own custom ++SASTokenProvider. This requires the Application credentials to be specifed using ++the following configurations apart from above two: ++ ++ 1. App Service Principle Tenant Id: ++ ```xml ++ ++ fs.azure.test.app.service.principal.tenant.id ++ TENANT_ID ++ ++ ``` ++ 1. App Service Principle Object Id: ++ ```xml ++ ++ fs.azure.test.app.service.principal.object.id ++ OBJECT_ID ++ ++ ``` ++ 1. App Id: ++ ```xml ++ ++ fs.azure.test.app.id ++ APPLICATION_ID ++ ++ ``` ++ 1. App Secret: ++ ```xml ++ ++ fs.azure.test.app.secret ++ APPLICATION_SECRET ++ ++ ``` ++ ++- **Security**: More secure than Shared Key and allows granting limited access ++to data without exposing the access key. Recommended to be used only with HNS Enabled, ++ADLS Gen 2 storage accounts. ++ ++#### Using Account/Service SAS with ABFS ++ ++- **Description**: ABFS allows user to use Account/Service SAS for authenticating ++requests. User can specify them as fixed SAS Token to be used across all the requests. ++ ++- **Configuration**: To use this method with ABFS Driver, specify the following properties in your `core-site.xml` file: ++ ++ 1. Authentication Type: ++ ```xml ++ ++ fs.azure.account.auth.type ++ SAS ++ ++ ``` ++ ++ 1. Fixed SAS Token: ++ ```xml ++ ++ fs.azure.sas.fixed.token ++ FIXED_SAS_TOKEN ++ ++ ``` ++ ++ Replace `FIXED_SAS_TOKEN` with fixed Account/Service SAS. You can also ++generate SAS from Azure portal. Account -> Security + Networking -> Shared Access Signature ++ ++- **Security**: Account/Service SAS requires account keys to be used which makes ++them less secure. There is no scope of having delegated access to different users. ++ ++*Note:* When `fs.azure.sas.token.provider.type` and `fs.azure.fixed.sas.token` ++are both configured, precedence will be given to the custom token provider implementation. + + ## Technical notes + +diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java +index 74655fd573..be4c4541eb 100644 +--- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java ++++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java +@@ -282,13 +282,30 @@ public abstract class AbstractAbfsIntegrationTest extends + useConfiguredFileSystem = true; + } + ++ /** ++ * Create a filesystem for SAS tests using the SharedKey authentication. ++ * We do not allow filesystem creation with SAS because certain type of SAS do not have ++ * required permissions, and it is not known what type of SAS is configured by user. ++ * @throws Exception ++ */ + protected void createFilesystemForSASTests() throws Exception { +- // The SAS tests do not have permission to create a filesystem +- // so first create temporary instance of the filesystem using SharedKey +- // then re-use the filesystem it creates with SAS auth instead of SharedKey. ++ createFilesystemWithTestFileForSASTests(null); ++ } ++ ++ /** ++ * Create a filesystem for SAS tests along with a test file using SharedKey authentication. ++ * We do not allow filesystem creation with SAS because certain type of SAS do not have ++ * required permissions, and it is not known what type of SAS is configured by user. ++ * @param testPath path of the test file. ++ * @throws Exception ++ */ ++ protected void createFilesystemWithTestFileForSASTests(Path testPath) throws Exception { + try (AzureBlobFileSystem tempFs = (AzureBlobFileSystem) FileSystem.newInstance(rawConfig)){ + ContractTestUtils.assertPathExists(tempFs, "This path should exist", + new Path("/")); ++ if (testPath != null) { ++ tempFs.create(testPath).close(); ++ } + abfsConfig.set(FS_AZURE_ACCOUNT_AUTH_TYPE_PROPERTY_NAME, AuthType.SAS.name()); + usingFilesystemForSASTests = true; + } +diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemChooseSAS.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemChooseSAS.java +new file mode 100644 +index 0000000000..d8db901151 +--- /dev/null ++++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemChooseSAS.java +@@ -0,0 +1,182 @@ ++/** ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++package org.apache.hadoop.fs.azurebfs; ++ ++import java.io.IOException; ++import java.nio.file.AccessDeniedException; ++ ++import org.assertj.core.api.Assertions; ++import org.junit.Assume; ++import org.junit.Test; ++ ++import org.apache.hadoop.fs.FileSystem; ++import org.apache.hadoop.fs.Path; ++import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; ++import org.apache.hadoop.fs.azurebfs.contracts.exceptions.SASTokenProviderException; ++import org.apache.hadoop.fs.azurebfs.extensions.MockDelegationSASTokenProvider; ++import org.apache.hadoop.fs.azurebfs.services.AuthType; ++import org.apache.hadoop.fs.azurebfs.services.FixedSASTokenProvider; ++import org.apache.hadoop.fs.azurebfs.utils.AccountSASGenerator; ++import org.apache.hadoop.fs.azurebfs.utils.Base64; ++ ++import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_SAS_FIXED_TOKEN; ++import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_SAS_TOKEN_PROVIDER_TYPE; ++import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.accountProperty; ++import static org.apache.hadoop.test.LambdaTestUtils.intercept; ++ ++/** ++ * Tests to validate the choice between using a custom SASTokenProvider ++ * implementation and FixedSASTokenProvider. ++ */ ++public class ITestAzureBlobFileSystemChooseSAS extends AbstractAbfsIntegrationTest{ ++ ++ private String accountSAS = null; ++ private static final String TEST_PATH = "testPath"; ++ ++ /** ++ * To differentiate which SASTokenProvider was used we will use different type of SAS Tokens. ++ * FixedSASTokenProvider will return an Account SAS with only read permissions. ++ * SASTokenProvider will return a User Delegation SAS Token with both read and write permissions. ++= */ ++ public ITestAzureBlobFileSystemChooseSAS() throws Exception { ++ // SAS Token configured might not have permissions for creating file system. ++ // Shared Key must be configured to create one. Once created, a new instance ++ // of same file system will be used with SAS Authentication. ++ Assume.assumeTrue(this.getAuthType() == AuthType.SharedKey); ++ } ++ ++ @Override ++ public void setup() throws Exception { ++ createFilesystemWithTestFileForSASTests(new Path(TEST_PATH)); ++ super.setup(); ++ generateAccountSAS(); ++ } ++ ++ /** ++ * Generates an Account SAS Token using the Account Shared Key to be used as a fixed SAS Token. ++ * Account SAS used here will have only read permissions to resources. ++ * This will be used by individual tests to set in the configurations. ++ * @throws AzureBlobFileSystemException ++ */ ++ private void generateAccountSAS() throws AzureBlobFileSystemException { ++ final String accountKey = getConfiguration().getStorageAccountKey(); ++ AccountSASGenerator configAccountSASGenerator = new AccountSASGenerator(Base64.decode(accountKey)); ++ // Setting only read permissions. ++ configAccountSASGenerator.setPermissions("r"); ++ accountSAS = configAccountSASGenerator.getAccountSAS(getAccountName()); ++ } ++ ++ /** ++ * Tests the scenario where both the custom SASTokenProvider and a fixed SAS token are configured. ++ * Custom implementation of SASTokenProvider class should be chosen and User Delegation SAS should be used. ++ * @throws Exception ++ */ ++ @Test ++ public void testBothProviderFixedTokenConfigured() throws Exception { ++ AbfsConfiguration testAbfsConfig = new AbfsConfiguration( ++ getRawConfiguration(), this.getAccountName()); ++ removeAnyPresetConfiguration(testAbfsConfig); ++ ++ // Configuring a SASTokenProvider class which provides a user delegation SAS. ++ testAbfsConfig.set(FS_AZURE_SAS_TOKEN_PROVIDER_TYPE, ++ MockDelegationSASTokenProvider.class.getName()); ++ ++ // configuring the Fixed SAS token which is an Account SAS. ++ testAbfsConfig.set(FS_AZURE_SAS_FIXED_TOKEN, accountSAS); ++ ++ // Creating a new file system with updated configs. ++ try (AzureBlobFileSystem newTestFs = (AzureBlobFileSystem) ++ FileSystem.newInstance(testAbfsConfig.getRawConfiguration())) { ++ ++ // Asserting that MockDelegationSASTokenProvider is used. ++ Assertions.assertThat(testAbfsConfig.getSASTokenProvider()) ++ .describedAs("Custom SASTokenProvider Class must be used") ++ .isInstanceOf(MockDelegationSASTokenProvider.class); ++ ++ // Assert that User Delegation SAS is used and both read and write operations are permitted. ++ Path testPath = path(getMethodName()); ++ newTestFs.create(testPath).close(); ++ newTestFs.open(testPath).close(); ++ } ++ } ++ ++ /** ++ * Tests the scenario where only the fixed token is configured, and no token provider class is set. ++ * Account SAS Token configured as fixed SAS should be used. ++ * Also verifies that Account Specific as well as Account Agnostic Fixed SAS Token Works. ++ * @throws IOException ++ */ ++ @Test ++ public void testOnlyFixedTokenConfigured() throws Exception { ++ AbfsConfiguration testAbfsConfig = new AbfsConfiguration( ++ getRawConfiguration(), this.getAccountName()); ++ ++ // setting an Account Specific Fixed SAS token. ++ removeAnyPresetConfiguration(testAbfsConfig); ++ testAbfsConfig.set(accountProperty(FS_AZURE_SAS_FIXED_TOKEN, this.getAccountName()), accountSAS); ++ testOnlyFixedTokenConfiguredInternal(testAbfsConfig); ++ ++ // setting an Account Agnostic Fixed SAS token. ++ removeAnyPresetConfiguration(testAbfsConfig); ++ testAbfsConfig.set(FS_AZURE_SAS_FIXED_TOKEN, accountSAS); ++ testOnlyFixedTokenConfiguredInternal(testAbfsConfig); ++ } ++ ++ private void testOnlyFixedTokenConfiguredInternal(AbfsConfiguration testAbfsConfig) throws Exception { ++ // Creating a new filesystem with updated configs. ++ try (AzureBlobFileSystem newTestFs = (AzureBlobFileSystem) ++ FileSystem.newInstance(testAbfsConfig.getRawConfiguration())) { ++ ++ // Asserting that FixedSASTokenProvider is used. ++ Assertions.assertThat(testAbfsConfig.getSASTokenProvider()) ++ .describedAs("FixedSASTokenProvider Class must be used") ++ .isInstanceOf(FixedSASTokenProvider.class); ++ ++ // Assert that Account SAS is used and only read operations are permitted. ++ Path testPath = path(getMethodName()); ++ intercept(AccessDeniedException.class, () -> { ++ newTestFs.create(testPath); ++ }); ++ // Read Operation is permitted ++ newTestFs.getFileStatus(new Path(TEST_PATH)); ++ } ++ } ++ ++ /** ++ * Tests the scenario where both the token provider class and the fixed token are not configured. ++ * The code errors out at the initialization stage itself. ++ * @throws IOException ++ */ ++ @Test ++ public void testBothProviderFixedTokenUnset() throws Exception { ++ AbfsConfiguration testAbfsConfig = new AbfsConfiguration( ++ getRawConfiguration(), this.getAccountName()); ++ removeAnyPresetConfiguration(testAbfsConfig); ++ ++ intercept(SASTokenProviderException.class, () -> { ++ FileSystem.newInstance(testAbfsConfig.getRawConfiguration()); ++ }); ++ } ++ ++ private void removeAnyPresetConfiguration(AbfsConfiguration testAbfsConfig) { ++ testAbfsConfig.unset(FS_AZURE_SAS_TOKEN_PROVIDER_TYPE); ++ testAbfsConfig.unset(FS_AZURE_SAS_FIXED_TOKEN); ++ testAbfsConfig.unset(accountProperty(FS_AZURE_SAS_TOKEN_PROVIDER_TYPE, this.getAccountName())); ++ testAbfsConfig.unset(accountProperty(FS_AZURE_SAS_FIXED_TOKEN, this.getAccountName())); ++ } ++} +diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockDelegationSASTokenProvider.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockDelegationSASTokenProvider.java +index cf7d51da4c..d1e5dd4519 100644 +--- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockDelegationSASTokenProvider.java ++++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockDelegationSASTokenProvider.java +@@ -40,7 +40,7 @@ import org.apache.hadoop.fs.azurebfs.utils.SASGenerator; + import org.apache.hadoop.security.AccessControlException; + + /** +- * A mock SAS token provider implementation ++ * A mock SAS token provider implementation. + */ + public class MockDelegationSASTokenProvider implements SASTokenProvider { + +diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockSASTokenProvider.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockSASTokenProvider.java +index 50ac20970f..3fda128a9c 100644 +--- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockSASTokenProvider.java ++++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockSASTokenProvider.java +@@ -20,7 +20,11 @@ package org.apache.hadoop.fs.azurebfs.extensions; + + import java.io.IOException; + ++import org.slf4j.Logger; ++import org.slf4j.LoggerFactory; ++ + import org.apache.hadoop.conf.Configuration; ++import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException; + import org.apache.hadoop.security.AccessControlException; + + import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; +@@ -28,17 +32,25 @@ import org.apache.hadoop.fs.azurebfs.utils.Base64; + import org.apache.hadoop.fs.azurebfs.utils.ServiceSASGenerator; + + /** +- * A mock SAS token provider implementation ++ * A mock SAS token provider implementation. + */ + public class MockSASTokenProvider implements SASTokenProvider { + + private byte[] accountKey; + private ServiceSASGenerator generator; + private boolean skipAuthorizationForTestSetup = false; ++ private static final Logger LOG = LoggerFactory.getLogger(MockSASTokenProvider.class); + + // For testing we use a container SAS for all operations. + private String generateSAS(byte[] accountKey, String accountName, String fileSystemName) { +- return generator.getContainerSASWithFullControl(accountName, fileSystemName); ++ String containerSAS = ""; ++ try { ++ containerSAS = generator.getContainerSASWithFullControl(accountName, fileSystemName); ++ } catch (InvalidConfigurationValueException e) { ++ LOG.debug(e.getMessage()); ++ containerSAS = ""; ++ } ++ return containerSAS; + } + + @Override +diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/AccountSASGenerator.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/AccountSASGenerator.java +new file mode 100644 +index 0000000000..2af741b7a4 +--- /dev/null ++++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/AccountSASGenerator.java +@@ -0,0 +1,103 @@ ++/** ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++package org.apache.hadoop.fs.azurebfs.utils; ++ ++import java.time.Instant; ++ ++import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; ++import org.apache.hadoop.fs.azurebfs.services.AbfsUriQueryBuilder; ++ ++/** ++ * Test Account SAS Generator. ++ * SAS generated by this will have only read access to storage account blob and file services. ++ */ ++public class AccountSASGenerator extends SASGenerator { ++ /** ++ * Creates Account SAS from Storage Account Key. ++ * https://learn.microsoft.com/en-us/rest/api/storageservices/create-account-sas. ++ * @param accountKey: the storage account key. ++ */ ++ public AccountSASGenerator(byte[] accountKey) { ++ super(accountKey); ++ } ++ ++ private String permissions = "racwdl"; ++ ++ public String getAccountSAS(String accountName) throws ++ AzureBlobFileSystemException { ++ // retaining only the account name ++ accountName = getCanonicalAccountName(accountName); ++ String sp = permissions; ++ String sv = "2021-06-08"; ++ String srt = "sco"; ++ ++ String st = ISO_8601_FORMATTER.format(Instant.now().minus(FIVE_MINUTES)); ++ String se = ISO_8601_FORMATTER.format(Instant.now().plus(ONE_DAY)); ++ ++ String ss = "bf"; ++ String spr = "https"; ++ String signature = computeSignatureForSAS(sp, ss, srt, st, se, sv, accountName); ++ ++ AbfsUriQueryBuilder qb = new AbfsUriQueryBuilder(); ++ qb.addQuery("sp", sp); ++ qb.addQuery("ss", ss); ++ qb.addQuery("srt", srt); ++ qb.addQuery("st", st); ++ qb.addQuery("se", se); ++ qb.addQuery("sv", sv); ++ qb.addQuery("sig", signature); ++ return qb.toString().substring(1); ++ } ++ ++ private String computeSignatureForSAS(String signedPerm, String signedService, String signedResType, ++ String signedStart, String signedExp, String signedVersion, String accountName) { ++ ++ StringBuilder sb = new StringBuilder(); ++ sb.append(accountName); ++ sb.append("\n"); ++ sb.append(signedPerm); ++ sb.append("\n"); ++ sb.append(signedService); ++ sb.append("\n"); ++ sb.append(signedResType); ++ sb.append("\n"); ++ sb.append(signedStart); ++ sb.append("\n"); ++ sb.append(signedExp); ++ sb.append("\n"); ++ sb.append("\n"); // signedIP ++ sb.append("\n"); // signedProtocol ++ sb.append(signedVersion); ++ sb.append("\n"); ++ sb.append("\n"); //signed encryption scope ++ ++ String stringToSign = sb.toString(); ++ LOG.debug("Account SAS stringToSign: " + stringToSign.replace("\n", ".")); ++ return computeHmac256(stringToSign); ++ } ++ ++ /** ++ * By default Account SAS has all the available permissions. Use this to ++ * override the default permissions and set as per the requirements. ++ * @param permissions ++ */ ++ public void setPermissions(final String permissions) { ++ this.permissions = permissions; ++ } ++} +diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/SASGenerator.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/SASGenerator.java +index 2e9289d8d4..a80ddac5ed 100644 +--- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/SASGenerator.java ++++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/SASGenerator.java +@@ -29,6 +29,10 @@ import javax.crypto.spec.SecretKeySpec; + + import org.slf4j.Logger; + import org.slf4j.LoggerFactory; ++ ++import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; ++import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException; ++ + /** + * Test SAS generator. + */ +@@ -54,10 +58,8 @@ public abstract class SASGenerator { + protected static final Logger LOG = LoggerFactory.getLogger(SASGenerator.class); + public static final Duration FIVE_MINUTES = Duration.ofMinutes(5); + public static final Duration ONE_DAY = Duration.ofDays(1); +- public static final DateTimeFormatter ISO_8601_FORMATTER = +- DateTimeFormatter +- .ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.ROOT) +- .withZone(ZoneId.of("UTC")); ++ public static final DateTimeFormatter ISO_8601_FORMATTER = DateTimeFormatter ++ .ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.ROOT).withZone(ZoneId.of("UTC")); + + private Mac hmacSha256; + private byte[] key; +@@ -68,7 +70,7 @@ public abstract class SASGenerator { + + /** + * Called by subclasses to initialize the cryptographic SHA-256 HMAC provider. +- * @param key - a 256-bit secret key ++ * @param key - a 256-bit secret key. + */ + protected SASGenerator(byte[] key) { + this.key = key; +@@ -85,6 +87,26 @@ public abstract class SASGenerator { + } + } + ++ protected String getCanonicalAccountName(String accountName) throws ++ InvalidConfigurationValueException { ++ // returns the account name without the endpoint ++ // given account names with endpoint have the format accountname.endpoint ++ // For example, input of xyz.dfs.core.windows.net should return "xyz" only ++ int dotIndex = accountName.indexOf(AbfsHttpConstants.DOT); ++ if (dotIndex == 0) { ++ // case when accountname starts with a ".": endpoint is present, accountName is null ++ // for example .dfs.azure.com, which is invalid ++ throw new InvalidConfigurationValueException("Account Name is not fully qualified"); ++ } ++ if (dotIndex > 0) { ++ // case when endpoint is present with accountName ++ return accountName.substring(0, dotIndex); ++ } else { ++ // case when accountName is already canonicalized ++ return accountName; ++ } ++ } ++ + protected String computeHmac256(final String stringToSign) { + byte[] utf8Bytes; + try { +@@ -98,4 +120,4 @@ public abstract class SASGenerator { + } + return Base64.encode(hmac); + } +-} +\ No newline at end of file ++} +diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/ServiceSASGenerator.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/ServiceSASGenerator.java +index 24a1cea255..0ae5239e8f 100644 +--- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/ServiceSASGenerator.java ++++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/ServiceSASGenerator.java +@@ -20,23 +20,26 @@ package org.apache.hadoop.fs.azurebfs.utils; + + import java.time.Instant; + ++import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException; + import org.apache.hadoop.fs.azurebfs.services.AbfsUriQueryBuilder; + + /** +- * Test Service SAS generator. ++ * Test Service SAS Generator. + */ + public class ServiceSASGenerator extends SASGenerator { + + /** +- * Creates a SAS Generator for Service SAS ++ * Creates a SAS Generator for Service SAS. + * (https://docs.microsoft.com/en-us/rest/api/storageservices/create-service-sas). +- * @param accountKey - the storage account key ++ * @param accountKey - the storage account key. + */ + public ServiceSASGenerator(byte[] accountKey) { + super(accountKey); + } + +- public String getContainerSASWithFullControl(String accountName, String containerName) { ++ public String getContainerSASWithFullControl(String accountName, String containerName) throws ++ InvalidConfigurationValueException { ++ accountName = getCanonicalAccountName(accountName); + String sp = "rcwdl"; + String sv = AuthenticationVersion.Feb20.toString(); + String sr = "c"; +@@ -66,7 +69,7 @@ public class ServiceSASGenerator extends SASGenerator { + sb.append("\n"); + sb.append(se); + sb.append("\n"); +- // canonicalized resource ++ // canonicalize resource + sb.append("/blob/"); + sb.append(accountName); + sb.append("/"); +@@ -93,4 +96,4 @@ public class ServiceSASGenerator extends SASGenerator { + LOG.debug("Service SAS stringToSign: " + stringToSign.replace("\n", ".")); + return computeHmac256(stringToSign); + } +-} +\ No newline at end of file ++} diff --git a/precompiled/hadoop/stackable/patches/3.3.6/0010-Build-hadoop-client-modules-before-hadoop-dist.patch b/precompiled/hadoop/stackable/patches/3.3.6/0010-Build-hadoop-client-modules-before-hadoop-dist.patch new file mode 100644 index 000000000..45e516906 --- /dev/null +++ b/precompiled/hadoop/stackable/patches/3.3.6/0010-Build-hadoop-client-modules-before-hadoop-dist.patch @@ -0,0 +1,25 @@ +From 786f8efde24951c73f3c022d0f96733b78d695ae Mon Sep 17 00:00:00 2001 +From: dervoeti +Date: Fri, 13 Jun 2025 15:38:45 +0200 +Subject: Build hadoop-client-modules before hadoop-dist + +This is needed, because dist depends on parts of client-modules. At least when specifying a custom version when building Hadoop, Maven for some reason does not build the client-modules before dist and the build fails. +--- + pom.xml | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/pom.xml b/pom.xml +index de001775ab..ccb15235c8 100644 +--- a/pom.xml ++++ b/pom.xml +@@ -133,9 +133,9 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x + hadoop-yarn-project + hadoop-mapreduce-project + hadoop-tools ++ hadoop-client-modules + hadoop-dist + hadoop-minicluster +- hadoop-client-modules + hadoop-build-tools + hadoop-cloud-storage-project + diff --git a/precompiled/hadoop/stackable/patches/3.3.6/0011-Remove-Hadoop-benchmark.patch b/precompiled/hadoop/stackable/patches/3.3.6/0011-Remove-Hadoop-benchmark.patch new file mode 100644 index 000000000..014521c5f --- /dev/null +++ b/precompiled/hadoop/stackable/patches/3.3.6/0011-Remove-Hadoop-benchmark.patch @@ -0,0 +1,21 @@ +From eb7a5e6b23118d016c2d3450c7ea3a3d82e39545 Mon Sep 17 00:00:00 2001 +From: dervoeti +Date: Fri, 13 Jun 2025 15:39:07 +0200 +Subject: Remove Hadoop benchmark + +--- + hadoop-tools/pom.xml | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/hadoop-tools/pom.xml b/hadoop-tools/pom.xml +index 88b3da867b..08811db902 100644 +--- a/hadoop-tools/pom.xml ++++ b/hadoop-tools/pom.xml +@@ -50,7 +50,6 @@ + hadoop-azure-datalake + hadoop-aliyun + hadoop-fs2img +- hadoop-benchmark + + + diff --git a/precompiled/hadoop/stackable/patches/3.3.6/0012-HADOOP-18583.-Fix-loading-of-OpenSSL-3.x-symbols-525.patch b/precompiled/hadoop/stackable/patches/3.3.6/0012-HADOOP-18583.-Fix-loading-of-OpenSSL-3.x-symbols-525.patch new file mode 100644 index 000000000..97c1eac8d --- /dev/null +++ b/precompiled/hadoop/stackable/patches/3.3.6/0012-HADOOP-18583.-Fix-loading-of-OpenSSL-3.x-symbols-525.patch @@ -0,0 +1,115 @@ +From baa7ec826f3f6d044f5307efe4b5d3bdd111bf4e Mon Sep 17 00:00:00 2001 +From: Sebastian Klemke <3669903+packet23@users.noreply.github.com> +Date: Thu, 7 Nov 2024 19:14:13 +0100 +Subject: HADOOP-18583. Fix loading of OpenSSL 3.x symbols (#5256) (#7149) + +Contributed by Sebastian Klemke +--- + .../org/apache/hadoop/crypto/OpensslCipher.c | 68 +++++++++++++++++-- + 1 file changed, 64 insertions(+), 4 deletions(-) + +diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/crypto/OpensslCipher.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/crypto/OpensslCipher.c +index abff7ea5f1..f17169dec2 100644 +--- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/crypto/OpensslCipher.c ++++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/crypto/OpensslCipher.c +@@ -24,6 +24,57 @@ + + #include "org_apache_hadoop_crypto_OpensslCipher.h" + ++/* ++ # OpenSSL ABI Symbols ++ ++ Available on all OpenSSL versions: ++ ++ | Function | 1.0 | 1.1 | 3.0 | ++ |--------------------------------|-----|-----|-----| ++ | EVP_CIPHER_CTX_new | YES | YES | YES | ++ | EVP_CIPHER_CTX_free | YES | YES | YES | ++ | EVP_CIPHER_CTX_set_padding | YES | YES | YES | ++ | EVP_CIPHER_CTX_test_flags | YES | YES | YES | ++ | EVP_CipherInit_ex | YES | YES | YES | ++ | EVP_CipherUpdate | YES | YES | YES | ++ | EVP_CipherFinal_ex | YES | YES | YES | ++ | ENGINE_by_id | YES | YES | YES | ++ | ENGINE_free | YES | YES | YES | ++ | EVP_aes_256_ctr | YES | YES | YES | ++ | EVP_aes_128_ctr | YES | YES | YES | ++ ++ Available on old versions: ++ ++ | Function | 1.0 | 1.1 | 3.0 | ++ |--------------------------------|-----|-----|-----| ++ | EVP_CIPHER_CTX_cleanup | YES | --- | --- | ++ | EVP_CIPHER_CTX_init | YES | --- | --- | ++ | EVP_CIPHER_CTX_block_size | YES | YES | --- | ++ | EVP_CIPHER_CTX_encrypting | --- | YES | --- | ++ ++ Available on new versions: ++ ++ | Function | 1.0 | 1.1 | 3.0 | ++ |--------------------------------|-----|-----|-----| ++ | OPENSSL_init_crypto | --- | YES | YES | ++ | EVP_CIPHER_CTX_reset | --- | YES | YES | ++ | EVP_CIPHER_CTX_get_block_size | --- | --- | YES | ++ | EVP_CIPHER_CTX_is_encrypting | --- | --- | YES | ++ ++ Optionally available on new versions: ++ ++ | Function | 1.0 | 1.1 | 3.0 | ++ |--------------------------------|-----|-----|-----| ++ | EVP_sm4_ctr | --- | opt | opt | ++ ++ Name changes: ++ ++ | < 3.0 name | >= 3.0 name | ++ |----------------------------|--------------------------------| ++ | EVP_CIPHER_CTX_block_size | EVP_CIPHER_CTX_get_block_size | ++ | EVP_CIPHER_CTX_encrypting | EVP_CIPHER_CTX_is_encrypting | ++ */ ++ + #ifdef UNIX + static EVP_CIPHER_CTX * (*dlsym_EVP_CIPHER_CTX_new)(void); + static void (*dlsym_EVP_CIPHER_CTX_free)(EVP_CIPHER_CTX *); +@@ -87,6 +138,15 @@ static __dlsym_EVP_aes_128_ctr dlsym_EVP_aes_128_ctr; + static HMODULE openssl; + #endif + ++// names changed in OpenSSL 3 ABI - see History section in EVP_EncryptInit(3) ++#if OPENSSL_VERSION_NUMBER >= 0x30000000L ++#define CIPHER_CTX_BLOCK_SIZE "EVP_CIPHER_CTX_get_block_size" ++#define CIPHER_CTX_ENCRYPTING "EVP_CIPHER_CTX_is_encrypting" ++#else ++#define CIPHER_CTX_BLOCK_SIZE "EVP_CIPHER_CTX_block_size" ++#define CIPHER_CTX_ENCRYPTING "EVP_CIPHER_CTX_encrypting" ++#endif /* OPENSSL_VERSION_NUMBER >= 0x30000000L */ ++ + static void loadAesCtr(JNIEnv *env) + { + #ifdef UNIX +@@ -142,10 +202,10 @@ JNIEXPORT void JNICALL Java_org_apache_hadoop_crypto_OpensslCipher_initIDs + LOAD_DYNAMIC_SYMBOL(dlsym_EVP_CIPHER_CTX_test_flags, env, openssl, \ + "EVP_CIPHER_CTX_test_flags"); + LOAD_DYNAMIC_SYMBOL(dlsym_EVP_CIPHER_CTX_block_size, env, openssl, \ +- "EVP_CIPHER_CTX_block_size"); ++ CIPHER_CTX_BLOCK_SIZE); + #if OPENSSL_VERSION_NUMBER >= 0x10100000L + LOAD_DYNAMIC_SYMBOL(dlsym_EVP_CIPHER_CTX_encrypting, env, openssl, \ +- "EVP_CIPHER_CTX_encrypting"); ++ CIPHER_CTX_ENCRYPTING); + #endif + LOAD_DYNAMIC_SYMBOL(dlsym_EVP_CipherInit_ex, env, openssl, \ + "EVP_CipherInit_ex"); +@@ -173,11 +233,11 @@ JNIEXPORT void JNICALL Java_org_apache_hadoop_crypto_OpensslCipher_initIDs + openssl, "EVP_CIPHER_CTX_test_flags"); + LOAD_DYNAMIC_SYMBOL(__dlsym_EVP_CIPHER_CTX_block_size, \ + dlsym_EVP_CIPHER_CTX_block_size, env, \ +- openssl, "EVP_CIPHER_CTX_block_size"); ++ openssl, CIPHER_CTX_BLOCK_SIZE); + #if OPENSSL_VERSION_NUMBER >= 0x10100000L + LOAD_DYNAMIC_SYMBOL(__dlsym_EVP_CIPHER_CTX_encrypting, \ + dlsym_EVP_CIPHER_CTX_encrypting, env, \ +- openssl, "EVP_CIPHER_CTX_encrypting"); ++ openssl, CIPHER_CTX_ENCRYPTING); + #endif + LOAD_DYNAMIC_SYMBOL(__dlsym_EVP_CipherInit_ex, dlsym_EVP_CipherInit_ex, \ + env, openssl, "EVP_CipherInit_ex"); diff --git a/precompiled/hadoop/stackable/patches/3.3.6/0013-YARN-11873-Update-nodejs-to-LTS-version.patch b/precompiled/hadoop/stackable/patches/3.3.6/0013-YARN-11873-Update-nodejs-to-LTS-version.patch new file mode 100644 index 000000000..5da6ff5a7 --- /dev/null +++ b/precompiled/hadoop/stackable/patches/3.3.6/0013-YARN-11873-Update-nodejs-to-LTS-version.patch @@ -0,0 +1,22 @@ +From 140bab81c680be4f8a57c5c98ccf3daebd2f5528 Mon Sep 17 00:00:00 2001 +From: Andrew Kenworthy +Date: Wed, 1 Oct 2025 18:12:09 +0200 +Subject: YARN-11873-Update-nodejs-to-LTS-version + +--- + hadoop-project/pom.xml | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml +index da39c1e0ad..615c2d395b 100644 +--- a/hadoop-project/pom.xml ++++ b/hadoop-project/pom.xml +@@ -213,7 +213,7 @@ + 1.1.3.Final + 5.4.0 + 9.8.1 +- v14.17.0 ++ v22.20.0 + v1.22.5 + 1.10.13 + 1.20 diff --git a/precompiled/hadoop/stackable/patches/3.3.6/patchable.toml b/precompiled/hadoop/stackable/patches/3.3.6/patchable.toml new file mode 100644 index 000000000..54002b2ca --- /dev/null +++ b/precompiled/hadoop/stackable/patches/3.3.6/patchable.toml @@ -0,0 +1,2 @@ +base = "1be78238728da9266a4f88195058f08fd012bf9c" +mirror = "https://github.com/stackabletech/hadoop.git" diff --git a/precompiled/hadoop/stackable/patches/3.4.2/0001-YARN-11527-Update-node.js.patch b/precompiled/hadoop/stackable/patches/3.4.2/0001-YARN-11527-Update-node.js.patch new file mode 100644 index 000000000..497600fa1 --- /dev/null +++ b/precompiled/hadoop/stackable/patches/3.4.2/0001-YARN-11527-Update-node.js.patch @@ -0,0 +1,22 @@ +From c4dbb05b4f92f93c7e8f11d6a622b73f40f4664c Mon Sep 17 00:00:00 2001 +From: xeniape +Date: Wed, 10 Sep 2025 14:18:38 +0200 +Subject: YARN-11527-Update-node.js + +--- + hadoop-project/pom.xml | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml +index b9eacd5ba3..70f64bf55c 100644 +--- a/hadoop-project/pom.xml ++++ b/hadoop-project/pom.xml +@@ -234,7 +234,7 @@ + 1.0.2 + 5.4.0 + 9.37.2 +- v12.22.1 ++ v14.17.0 + v1.22.5 + 1.10.13 + 1.20 diff --git a/precompiled/hadoop/stackable/patches/3.4.2/0002-Allow-overriding-datanode-registration-addresses.patch b/precompiled/hadoop/stackable/patches/3.4.2/0002-Allow-overriding-datanode-registration-addresses.patch new file mode 100644 index 000000000..0f2b9d916 --- /dev/null +++ b/precompiled/hadoop/stackable/patches/3.4.2/0002-Allow-overriding-datanode-registration-addresses.patch @@ -0,0 +1,259 @@ +From adc337817824ba29e7eb669c13730acdbb0b9630 Mon Sep 17 00:00:00 2001 +From: xeniape +Date: Wed, 10 Sep 2025 14:36:20 +0200 +Subject: Allow-overriding-datanode-registration-addresses + +--- + .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 9 +++ + .../blockmanagement/DatanodeManager.java | 43 +++++++----- + .../hadoop/hdfs/server/datanode/DNConf.java | 70 +++++++++++++++++++ + .../hadoop/hdfs/server/datanode/DataNode.java | 35 ++++++++-- + 4 files changed, 135 insertions(+), 22 deletions(-) + +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +index f92a2ad565..25bcd438c7 100755 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +@@ -152,6 +152,13 @@ public class DFSConfigKeys extends CommonConfigurationKeys { + public static final boolean DFS_DATANODE_DROP_CACHE_BEHIND_READS_DEFAULT = false; + public static final String DFS_DATANODE_USE_DN_HOSTNAME = "dfs.datanode.use.datanode.hostname"; + public static final boolean DFS_DATANODE_USE_DN_HOSTNAME_DEFAULT = false; ++ ++ public static final String DFS_DATANODE_REGISTERED_HOSTNAME = "dfs.datanode.registered.hostname"; ++ public static final String DFS_DATANODE_REGISTERED_DATA_PORT = "dfs.datanode.registered.port"; ++ public static final String DFS_DATANODE_REGISTERED_HTTP_PORT = "dfs.datanode.registered.http.port"; ++ public static final String DFS_DATANODE_REGISTERED_HTTPS_PORT = "dfs.datanode.registered.https.port"; ++ public static final String DFS_DATANODE_REGISTERED_IPC_PORT = "dfs.datanode.registered.ipc.port"; ++ + public static final String DFS_DATANODE_MAX_LOCKED_MEMORY_KEY = "dfs.datanode.max.locked.memory"; + public static final long DFS_DATANODE_MAX_LOCKED_MEMORY_DEFAULT = 0; + public static final String DFS_DATANODE_FSDATASETCACHE_MAX_THREADS_PER_VOLUME_KEY = "dfs.datanode.fsdatasetcache.max.threads.per.volume"; +@@ -491,6 +498,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys { + public static final long DFS_DATANODE_PROCESS_COMMANDS_THRESHOLD_DEFAULT = + TimeUnit.SECONDS.toMillis(2); + ++ public static final String DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_KEY = "dfs.namenode.datanode.registration.unsafe.allow-address-override"; ++ public static final boolean DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_DEFAULT = false; + public static final String DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_KEY = "dfs.namenode.datanode.registration.ip-hostname-check"; + public static final boolean DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_DEFAULT = true; + +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +index ebd2fa992e..c56f254478 100644 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +@@ -181,6 +181,8 @@ public class DatanodeManager { + private boolean hasClusterEverBeenMultiRack = false; + + private final boolean checkIpHostnameInRegistration; ++ private final boolean allowRegistrationAddressOverride; ++ + /** + * Whether we should tell datanodes what to cache in replies to + * heartbeat messages. +@@ -314,6 +316,11 @@ public class DatanodeManager { + // Block invalidate limit also has some dependency on heartbeat interval. + // Check setBlockInvalidateLimit(). + setBlockInvalidateLimit(configuredBlockInvalidateLimit); ++ this.allowRegistrationAddressOverride = conf.getBoolean( ++ DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_KEY, ++ DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_DEFAULT); ++ LOG.info(DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_KEY ++ + "=" + allowRegistrationAddressOverride); + this.checkIpHostnameInRegistration = conf.getBoolean( + DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_KEY, + DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_DEFAULT); +@@ -1158,27 +1165,29 @@ public class DatanodeManager { + */ + public void registerDatanode(DatanodeRegistration nodeReg) + throws DisallowedDatanodeException, UnresolvedTopologyException { +- InetAddress dnAddress = Server.getRemoteIp(); +- if (dnAddress != null) { +- // Mostly called inside an RPC, update ip and peer hostname +- String hostname = dnAddress.getHostName(); +- String ip = dnAddress.getHostAddress(); +- if (checkIpHostnameInRegistration && !isNameResolved(dnAddress)) { +- // Reject registration of unresolved datanode to prevent performance +- // impact of repetitive DNS lookups later. +- final String message = "hostname cannot be resolved (ip=" +- + ip + ", hostname=" + hostname + ")"; +- LOG.warn("Unresolved datanode registration: " + message); +- throw new DisallowedDatanodeException(nodeReg, message); ++ if (!allowRegistrationAddressOverride) { ++ InetAddress dnAddress = Server.getRemoteIp(); ++ if (dnAddress != null) { ++ // Mostly called inside an RPC, update ip and peer hostname ++ String hostname = dnAddress.getHostName(); ++ String ip = dnAddress.getHostAddress(); ++ if (checkIpHostnameInRegistration && !isNameResolved(dnAddress)) { ++ // Reject registration of unresolved datanode to prevent performance ++ // impact of repetitive DNS lookups later. ++ final String message = "hostname cannot be resolved (ip=" ++ + ip + ", hostname=" + hostname + ")"; ++ LOG.warn("Unresolved datanode registration: " + message); ++ throw new DisallowedDatanodeException(nodeReg, message); ++ } ++ // update node registration with the ip and hostname from rpc request ++ nodeReg.setIpAddr(ip); ++ nodeReg.setPeerHostName(hostname); + } +- // update node registration with the ip and hostname from rpc request +- nodeReg.setIpAddr(ip); +- nodeReg.setPeerHostName(hostname); + } +- ++ + try { + nodeReg.setExportedKeys(blockManager.getBlockKeys()); +- ++ + // Checks if the node is not on the hosts list. If it is not, then + // it will be disallowed from registering. + if (!hostConfigManager.isIncluded(nodeReg)) { +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java +index 21b92db307..5d3437239c 100644 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java +@@ -101,6 +101,11 @@ public class DNConf { + final boolean syncOnClose; + final boolean encryptDataTransfer; + final boolean connectToDnViaHostname; ++ private final String registeredHostname; ++ private final int registeredDataPort; ++ private final int registeredHttpPort; ++ private final int registeredHttpsPort; ++ private final int registeredIpcPort; + final boolean overwriteDownstreamDerivedQOP; + private final boolean pmemCacheRecoveryEnabled; + +@@ -189,6 +194,11 @@ public class DNConf { + connectToDnViaHostname = getConf().getBoolean( + DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME, + DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME_DEFAULT); ++ registeredHostname = getConf().get(DFSConfigKeys.DFS_DATANODE_REGISTERED_HOSTNAME); ++ registeredDataPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_DATA_PORT, -1); ++ registeredHttpPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_HTTP_PORT, -1); ++ registeredHttpsPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_HTTPS_PORT, -1); ++ registeredIpcPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_IPC_PORT, -1); + this.blockReportInterval = getConf().getLong( + DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, + DFS_BLOCKREPORT_INTERVAL_MSEC_DEFAULT); +@@ -363,6 +373,66 @@ public class DNConf { + return connectToDnViaHostname; + } + ++ /** ++ * Returns a hostname to register with the cluster instead of the system ++ * hostname. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected hostname. ++ * ++ * @return null if the system hostname should be used, otherwise a hostname ++ */ ++ public String getRegisteredHostname() { ++ return registeredHostname; ++ } ++ ++ /** ++ * Returns a port number to register with the cluster instead of the ++ * data port that the node is listening on. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected port. ++ * ++ * @return -1 if the actual port should be used, otherwise a port number ++ */ ++ public int getRegisteredDataPort() { ++ return registeredDataPort; ++ } ++ ++ /** ++ * Returns a port number to register with the cluster instead of the ++ * HTTP port that the node is listening on. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected port. ++ * ++ * @return -1 if the actual port should be used, otherwise a port number ++ */ ++ public int getRegisteredHttpPort() { ++ return registeredHttpPort; ++ } ++ ++ /** ++ * Returns a port number to register with the cluster instead of the ++ * HTTPS port that the node is listening on. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected port. ++ * ++ * @return -1 if the actual port should be used, otherwise a port number ++ */ ++ public int getRegisteredHttpsPort() { ++ return registeredHttpsPort; ++ } ++ ++ /** ++ * Returns a port number to register with the cluster instead of the ++ * IPC port that the node is listening on. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected port. ++ * ++ * @return -1 if the actual port should be used, otherwise a port number ++ */ ++ public int getRegisteredIpcPort() { ++ return registeredIpcPort; ++ } ++ + /** + * Returns socket timeout + * +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +index 956f5bbe51..22ae127d98 100644 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +@@ -135,6 +135,7 @@ import java.util.HashSet; + import java.util.Iterator; + import java.util.List; + import java.util.Map; ++import java.util.Optional; + import java.util.Map.Entry; + import java.util.Set; + import java.util.UUID; +@@ -2076,11 +2077,35 @@ public class DataNode extends ReconfigurableBase + NodeType.DATA_NODE); + } + +- DatanodeID dnId = new DatanodeID( +- streamingAddr.getAddress().getHostAddress(), hostName, +- storage.getDatanodeUuid(), getXferPort(), getInfoPort(), +- infoSecurePort, getIpcPort()); +- return new DatanodeRegistration(dnId, storageInfo, ++ String registeredHostname = Optional ++ .ofNullable(dnConf.getRegisteredHostname()) ++ .orElseGet(() -> streamingAddr.getAddress().getHostAddress()); ++ int registeredDataPort = dnConf.getRegisteredDataPort(); ++ if (registeredDataPort == -1) { ++ registeredDataPort = getXferPort(); ++ } ++ int registeredHttpPort = dnConf.getRegisteredHttpPort(); ++ if (registeredHttpPort == -1) { ++ registeredHttpPort = getInfoPort(); ++ } ++ int registeredHttpsPort = dnConf.getRegisteredHttpsPort(); ++ if (registeredHttpsPort == -1) { ++ registeredHttpsPort = getInfoSecurePort(); ++ } ++ int registeredIpcPort = dnConf.getRegisteredIpcPort(); ++ if (registeredIpcPort == -1) { ++ registeredIpcPort = getIpcPort(); ++ } ++ ++ DatanodeID dnId = new DatanodeID(registeredHostname, ++ registeredHostname, ++ storage.getDatanodeUuid(), ++ registeredDataPort, ++ registeredHttpPort, ++ registeredHttpsPort, ++ registeredIpcPort); ++ ++ return new DatanodeRegistration(dnId, storageInfo, + new ExportedBlockKeys(), VersionInfo.getVersion()); + } + diff --git a/precompiled/hadoop/stackable/patches/3.4.2/0003-Async-profiler-also-grab-itimer-events.patch b/precompiled/hadoop/stackable/patches/3.4.2/0003-Async-profiler-also-grab-itimer-events.patch new file mode 100644 index 000000000..5191b1465 --- /dev/null +++ b/precompiled/hadoop/stackable/patches/3.4.2/0003-Async-profiler-also-grab-itimer-events.patch @@ -0,0 +1,29 @@ +From ab9550bd7b71c16c381a105a22732f6e71f2dba6 Mon Sep 17 00:00:00 2001 +From: xeniape +Date: Wed, 10 Sep 2025 14:39:20 +0200 +Subject: Async-profiler-also-grab-itimer-events + +--- + .../src/main/java/org/apache/hadoop/http/ProfileServlet.java | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java +index ce53274151..909892ff90 100644 +--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java ++++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java +@@ -76,6 +76,7 @@ import org.apache.hadoop.util.ProcessUtils; + * Following event types are supported (default is 'cpu') (NOTE: not all OS'es support all events) + * // Perf events: + * // cpu ++ * // itimer + * // page-faults + * // context-switches + * // cycles +@@ -118,6 +119,7 @@ public class ProfileServlet extends HttpServlet { + private enum Event { + + CPU("cpu"), ++ ITIMER("itimer"), + ALLOC("alloc"), + LOCK("lock"), + PAGE_FAULTS("page-faults"), diff --git a/precompiled/hadoop/stackable/patches/3.4.2/0004-HDFS-17378-Fix-missing-operationType-for-some-operat.patch b/precompiled/hadoop/stackable/patches/3.4.2/0004-HDFS-17378-Fix-missing-operationType-for-some-operat.patch new file mode 100644 index 000000000..f210f7c62 --- /dev/null +++ b/precompiled/hadoop/stackable/patches/3.4.2/0004-HDFS-17378-Fix-missing-operationType-for-some-operat.patch @@ -0,0 +1,199 @@ +From 2b131b13ad062695d5de8840be744900fa4a71c1 Mon Sep 17 00:00:00 2001 +From: xeniape +Date: Wed, 10 Sep 2025 14:40:41 +0200 +Subject: HDFS-17378-Fix-missing-operationType-for-some-operat + +--- + .../hdfs/server/namenode/FSNamesystem.java | 41 +++++++++++-------- + 1 file changed, 24 insertions(+), 17 deletions(-) + +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +index cfc7f24381..e9c5fc0da2 100644 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +@@ -2618,15 +2618,16 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + * @throws IOException + */ + BlockStoragePolicy getStoragePolicy(String src) throws IOException { ++ final String operationName = "getStoragePolicy"; + checkOperation(OperationCategory.READ); + final FSPermissionChecker pc = getPermissionChecker(); +- FSPermissionChecker.setOperationType(null); ++ FSPermissionChecker.setOperationType(operationName); + readLock(); + try { + checkOperation(OperationCategory.READ); + return FSDirAttrOp.getStoragePolicy(dir, pc, blockManager, src); + } finally { +- readUnlock("getStoragePolicy"); ++ readUnlock(operationName); + } + } + +@@ -2646,15 +2647,16 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + } + + long getPreferredBlockSize(String src) throws IOException { ++ final String operationName = "getPreferredBlockSize"; + checkOperation(OperationCategory.READ); + final FSPermissionChecker pc = getPermissionChecker(); +- FSPermissionChecker.setOperationType(null); ++ FSPermissionChecker.setOperationType(operationName); + readLock(); + try { + checkOperation(OperationCategory.READ); + return FSDirAttrOp.getPreferredBlockSize(dir, pc, src); + } finally { +- readUnlock("getPreferredBlockSize"); ++ readUnlock(operationName); + } + } + +@@ -2707,7 +2709,6 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + boolean createParent, short replication, long blockSize, + CryptoProtocolVersion[] supportedVersions, String ecPolicyName, + String storagePolicy, boolean logRetryCache) throws IOException { +- + HdfsFileStatus status; + try { + status = startFileInt(src, permissions, holder, clientMachine, flag, +@@ -2727,6 +2728,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + long blockSize, CryptoProtocolVersion[] supportedVersions, + String ecPolicyName, String storagePolicy, boolean logRetryCache) + throws IOException { ++ final String operationName = "create"; + if (NameNode.stateChangeLog.isDebugEnabled()) { + StringBuilder builder = new StringBuilder(); + builder.append("DIR* NameSystem.startFile: src=").append(src) +@@ -2764,7 +2766,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + + checkOperation(OperationCategory.WRITE); + final FSPermissionChecker pc = getPermissionChecker(); +- FSPermissionChecker.setOperationType(null); ++ FSPermissionChecker.setOperationType(operationName); + writeLock(); + try { + checkOperation(OperationCategory.WRITE); +@@ -2827,7 +2829,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + dir.writeUnlock(); + } + } finally { +- writeUnlock("create", getLockReportInfoSupplier(src, null, stat)); ++ writeUnlock(operationName, getLockReportInfoSupplier(src, null, stat)); + // There might be transactions logged while trying to recover the lease. + // They need to be sync'ed even when an exception was thrown. + if (!skipSync) { +@@ -2856,10 +2858,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + */ + boolean recoverLease(String src, String holder, String clientMachine) + throws IOException { ++ final String operationName = "recoverLease"; + boolean skipSync = false; + checkOperation(OperationCategory.WRITE); + final FSPermissionChecker pc = getPermissionChecker(); +- FSPermissionChecker.setOperationType(null); ++ FSPermissionChecker.setOperationType(operationName); + writeLock(); + try { + checkOperation(OperationCategory.WRITE); +@@ -2880,7 +2883,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + skipSync = true; + throw se; + } finally { +- writeUnlock("recoverLease"); ++ writeUnlock(operationName); + // There might be transactions logged while trying to recover the lease. + // They need to be sync'ed even when an exception was thrown. + if (!skipSync) { +@@ -3096,6 +3099,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + final Set excludes, + final int numAdditionalNodes, final String clientName + ) throws IOException { ++ final String operationName = "getAdditionalDatanode"; + //check if the feature is enabled + dtpReplaceDatanodeOnFailure.checkEnabled(); + +@@ -3107,7 +3111,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + final BlockType blockType; + checkOperation(OperationCategory.WRITE); + final FSPermissionChecker pc = getPermissionChecker(); +- FSPermissionChecker.setOperationType(null); ++ FSPermissionChecker.setOperationType(operationName); + readLock(); + try { + // Changing this operation category to WRITE instead of making getAdditionalDatanode as a +@@ -3133,7 +3137,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + "src=%s, fileId=%d, blk=%s, clientName=%s, clientMachine=%s", + src, fileId, blk, clientName, clientMachine)); + } finally { +- readUnlock("getAdditionalDatanode"); ++ readUnlock(operationName); + } + + if (clientnode == null) { +@@ -3155,10 +3159,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + */ + void abandonBlock(ExtendedBlock b, long fileId, String src, String holder) + throws IOException { ++ final String operationName = "abandonBlock"; + NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: {} of file {}", b, src); + checkOperation(OperationCategory.WRITE); + final FSPermissionChecker pc = getPermissionChecker(); +- FSPermissionChecker.setOperationType(null); ++ FSPermissionChecker.setOperationType(operationName); + writeLock(); + try { + checkOperation(OperationCategory.WRITE); +@@ -3167,7 +3172,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + NameNode.stateChangeLog.debug( + "BLOCK* NameSystem.abandonBlock: {} is removed from pendingCreates", b); + } finally { +- writeUnlock("abandonBlock"); ++ writeUnlock(operationName); + } + getEditLog().logSync(); + } +@@ -3221,10 +3226,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + boolean completeFile(final String src, String holder, + ExtendedBlock last, long fileId) + throws IOException { ++ final String operationName = "completeFile"; + boolean success = false; + checkOperation(OperationCategory.WRITE); + final FSPermissionChecker pc = getPermissionChecker(); +- FSPermissionChecker.setOperationType(null); ++ FSPermissionChecker.setOperationType(operationName); + writeLock(); + try { + checkOperation(OperationCategory.WRITE); +@@ -3232,7 +3238,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + success = FSDirWriteFileOp.completeFile(this, pc, src, holder, last, + fileId); + } finally { +- writeUnlock("completeFile"); ++ writeUnlock(operationName); + } + getEditLog().logSync(); + if (success) { +@@ -3666,10 +3672,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + */ + void fsync(String src, long fileId, String clientName, long lastBlockLength) + throws IOException { ++ final String operationName = "fsync"; + NameNode.stateChangeLog.info("BLOCK* fsync: " + src + " for " + clientName); + checkOperation(OperationCategory.WRITE); + final FSPermissionChecker pc = getPermissionChecker(); +- FSPermissionChecker.setOperationType(null); ++ FSPermissionChecker.setOperationType(operationName); + writeLock(); + try { + checkOperation(OperationCategory.WRITE); +@@ -3683,7 +3690,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + } + FSDirWriteFileOp.persistBlocks(dir, src, pendingFile, false); + } finally { +- writeUnlock("fsync"); ++ writeUnlock(operationName); + } + getEditLog().logSync(); + } diff --git a/precompiled/hadoop/stackable/patches/3.4.2/0005-Update-CycloneDX-plugin.patch b/precompiled/hadoop/stackable/patches/3.4.2/0005-Update-CycloneDX-plugin.patch new file mode 100644 index 000000000..39dd0f143 --- /dev/null +++ b/precompiled/hadoop/stackable/patches/3.4.2/0005-Update-CycloneDX-plugin.patch @@ -0,0 +1,44 @@ +From 18a413bfaec1acd447b070538be994ea9691899a Mon Sep 17 00:00:00 2001 +From: xeniape +Date: Wed, 10 Sep 2025 14:41:18 +0200 +Subject: Update-CycloneDX-plugin + +--- + pom.xml | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/pom.xml b/pom.xml +index 9a9a0de744..c7838e3674 100644 +--- a/pom.xml ++++ b/pom.xml +@@ -121,7 +121,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x + 4.2.0 + 1.1.1 + 3.10.1 +- 2.7.10 ++ 2.8.0 + + bash + +@@ -773,6 +773,11 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x + org.cyclonedx + cyclonedx-maven-plugin + ${cyclonedx.version} ++ ++ application ++ 1.5 ++ false ++ + + + package +@@ -781,9 +786,6 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x + + + +- +- xml +- + + + diff --git a/precompiled/hadoop/stackable/patches/3.4.2/0006-HADOOP-19352.-Hadoop-OSS-Connector-adds-support-for.patch b/precompiled/hadoop/stackable/patches/3.4.2/0006-HADOOP-19352.-Hadoop-OSS-Connector-adds-support-for.patch new file mode 100644 index 000000000..f8e486155 --- /dev/null +++ b/precompiled/hadoop/stackable/patches/3.4.2/0006-HADOOP-19352.-Hadoop-OSS-Connector-adds-support-for.patch @@ -0,0 +1,245 @@ +From e67476ea801e929feb3d89d023d915efbf82336f Mon Sep 17 00:00:00 2001 +From: xeniape +Date: Wed, 10 Sep 2025 14:41:46 +0200 +Subject: HADOOP-19352.-Hadoop-OSS-Connector-adds-support-for- + +--- + hadoop-project/pom.xml | 2 +- + hadoop-tools/hadoop-aliyun/pom.xml | 26 +++++ + .../aliyun/oss/AliyunOSSFileSystemStore.java | 16 +++ + .../hadoop/fs/aliyun/oss/Constants.java | 15 +++ + .../fs/aliyun/oss/ITAliyunOSSSignatureV4.java | 98 +++++++++++++++++++ + .../src/test/resources/log4j.properties | 3 + + 6 files changed, 159 insertions(+), 1 deletion(-) + create mode 100644 hadoop-tools/hadoop-aliyun/src/test/java/org/apache/hadoop/fs/aliyun/oss/ITAliyunOSSSignatureV4.java + +diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml +index 70f64bf55c..6d05a218d0 100644 +--- a/hadoop-project/pom.xml ++++ b/hadoop-project/pom.xml +@@ -1663,7 +1663,7 @@ + + com.aliyun.oss + aliyun-sdk-oss +- 3.13.2 ++ 3.18.1 + + + org.apache.httpcomponents +diff --git a/hadoop-tools/hadoop-aliyun/pom.xml b/hadoop-tools/hadoop-aliyun/pom.xml +index c2e04623ad..5b12e4a4ef 100644 +--- a/hadoop-tools/hadoop-aliyun/pom.xml ++++ b/hadoop-tools/hadoop-aliyun/pom.xml +@@ -165,5 +165,31 @@ + test + jar + ++ ++ ++ org.junit.jupiter ++ junit-jupiter-api ++ test ++ ++ ++ org.junit.jupiter ++ junit-jupiter-engine ++ test ++ ++ ++ org.junit.jupiter ++ junit-jupiter-params ++ test ++ ++ ++ org.junit.platform ++ junit-platform-launcher ++ test ++ ++ ++ org.junit.vintage ++ junit-vintage-engine ++ test ++ + + +diff --git a/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/AliyunOSSFileSystemStore.java b/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/AliyunOSSFileSystemStore.java +index ccd5d1ea25..30da259fd5 100644 +--- a/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/AliyunOSSFileSystemStore.java ++++ b/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/AliyunOSSFileSystemStore.java +@@ -73,6 +73,7 @@ import java.util.List; + import java.util.ListIterator; + import java.util.NoSuchElementException; + import java.util.stream.Collectors; ++import com.aliyun.oss.common.comm.SignVersion; + + import static org.apache.hadoop.fs.aliyun.oss.Constants.*; + +@@ -113,6 +114,16 @@ public class AliyunOSSFileSystemStore { + conf.get(USER_AGENT_PREFIX, USER_AGENT_PREFIX_DEFAULT) + ", Hadoop/" + + VersionInfo.getVersion()); + ++ String region = conf.get(REGION_KEY, ""); ++ String signatureVersion = conf.get(SIGNATURE_VERSION_KEY, SIGNATURE_VERSION_DEFAULT); ++ if ("V4".equalsIgnoreCase(signatureVersion)) { ++ clientConf.setSignatureVersion(SignVersion.V4); ++ if (StringUtils.isEmpty(region)) { ++ LOG.error("Signature version is V4 ,but region is empty."); ++ throw new IOException("SignVersion is V4 but region is empty"); ++ } ++ } ++ + String proxyHost = conf.getTrimmed(PROXY_HOST_KEY, ""); + int proxyPort = conf.getInt(PROXY_PORT_KEY, -1); + if (StringUtils.isNotEmpty(proxyHost)) { +@@ -171,6 +182,11 @@ public class AliyunOSSFileSystemStore { + statistics.incrementWriteOps(1); + } + ++ if (StringUtils.isNotEmpty(region)) { ++ ossClient.setRegion(region); ++ LOG.debug("ossClient setRegion {}", region); ++ } ++ + maxKeys = conf.getInt(MAX_PAGING_KEYS_KEY, MAX_PAGING_KEYS_DEFAULT); + int listVersion = conf.getInt(LIST_VERSION, DEFAULT_LIST_VERSION); + if (listVersion < 1 || listVersion > 2) { +diff --git a/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/Constants.java b/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/Constants.java +index baeb919937..176669ed15 100644 +--- a/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/Constants.java ++++ b/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/Constants.java +@@ -211,4 +211,19 @@ public final class Constants { + public static final String LIST_VERSION = "fs.oss.list.version"; + + public static final int DEFAULT_LIST_VERSION = 2; ++ ++ /** ++ * OSS signature version. ++ */ ++ public static final String SIGNATURE_VERSION_KEY = "fs.oss.signatureversion"; ++ ++ /** ++ * OSS signature version DEFAULT {@value}. ++ */ ++ public static final String SIGNATURE_VERSION_DEFAULT = "V1"; ++ ++ /** ++ * OSS region {@value}. ++ */ ++ public static final String REGION_KEY = "fs.oss.region"; + } +diff --git a/hadoop-tools/hadoop-aliyun/src/test/java/org/apache/hadoop/fs/aliyun/oss/ITAliyunOSSSignatureV4.java b/hadoop-tools/hadoop-aliyun/src/test/java/org/apache/hadoop/fs/aliyun/oss/ITAliyunOSSSignatureV4.java +new file mode 100644 +index 0000000000..5070f2a581 +--- /dev/null ++++ b/hadoop-tools/hadoop-aliyun/src/test/java/org/apache/hadoop/fs/aliyun/oss/ITAliyunOSSSignatureV4.java +@@ -0,0 +1,98 @@ ++/** ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++package org.apache.hadoop.fs.aliyun.oss; ++ ++import org.apache.hadoop.conf.Configuration; ++import org.apache.hadoop.fs.FileStatus; ++import org.apache.hadoop.fs.Path; ++import org.junit.Before; ++import org.junit.Test; ++import org.slf4j.Logger; ++import org.slf4j.LoggerFactory; ++ ++import java.io.IOException; ++import java.net.URI; ++ ++import static org.apache.hadoop.fs.aliyun.oss.Constants.REGION_KEY; ++import static org.apache.hadoop.fs.aliyun.oss.Constants.SIGNATURE_VERSION_KEY; ++import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile; ++import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; ++import static org.junit.Assert.*; ++import static org.junit.Assume.assumeNotNull; ++ ++/** ++ * Tests Aliyun OSS system. ++ */ ++public class ITAliyunOSSSignatureV4 { ++ private static final Logger LOG = LoggerFactory.getLogger(ITAliyunOSSSignatureV4.class); ++ private Configuration conf; ++ private URI testURI; ++ private Path testFile = new Path("ITAliyunOSSSignatureV4/atestr"); ++ ++ @Before ++ public void setUp() throws Exception { ++ conf = new Configuration(); ++ String bucketUri = conf.get("test.fs.oss.name"); ++ LOG.debug("bucketUri={}", bucketUri); ++ testURI = URI.create(bucketUri); ++ } ++ ++ @Test ++ public void testV4() throws IOException { ++ conf.set(SIGNATURE_VERSION_KEY, "V4"); ++ conf.set(REGION_KEY, "cn-hongkong"); ++ AliyunOSSFileSystem fs = new AliyunOSSFileSystem(); ++ fs.initialize(testURI, conf); ++ assumeNotNull(fs); ++ ++ createFile(fs, testFile, true, dataset(256, 0, 255)); ++ FileStatus status = fs.getFileStatus(testFile); ++ fs.delete(testFile); ++ fs.close(); ++ } ++ ++ @Test ++ public void testDefaultSignatureVersion() throws IOException { ++ AliyunOSSFileSystem fs = new AliyunOSSFileSystem(); ++ fs.initialize(testURI, conf); ++ assumeNotNull(fs); ++ ++ Path testFile2 = new Path("/test/atestr"); ++ createFile(fs, testFile2, true, dataset(256, 0, 255)); ++ FileStatus status = fs.getFileStatus(testFile2); ++ fs.delete(testFile2); ++ fs.close(); ++ } ++ ++ @Test ++ public void testV4WithoutRegion() throws IOException { ++ conf.set(SIGNATURE_VERSION_KEY, "V4"); ++ AliyunOSSFileSystem fs = new AliyunOSSFileSystem(); ++ IOException expectedException = null; ++ try { ++ fs.initialize(testURI, conf); ++ } catch (IOException e) { ++ LOG.warn("use V4 , but do not set region, get exception={}", e); ++ expectedException = e; ++ assertEquals("use V4 , but do not set region", e.getMessage(), ++ "SignVersion is V4 but region is empty"); ++ } ++ assertNotNull(expectedException); ++ } ++} +diff --git a/hadoop-tools/hadoop-aliyun/src/test/resources/log4j.properties b/hadoop-tools/hadoop-aliyun/src/test/resources/log4j.properties +index bb5cbe5ec3..2167f68811 100644 +--- a/hadoop-tools/hadoop-aliyun/src/test/resources/log4j.properties ++++ b/hadoop-tools/hadoop-aliyun/src/test/resources/log4j.properties +@@ -21,3 +21,6 @@ log4j.threshold=ALL + log4j.appender.stdout=org.apache.log4j.ConsoleAppender + log4j.appender.stdout.layout=org.apache.log4j.PatternLayout + log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n ++ ++# Log all oss classes ++log4j.logger.org.apache.hadoop.fs.aliyun.oss=DEBUG +\ No newline at end of file diff --git a/precompiled/hadoop/stackable/patches/3.4.2/0007-Upgrade-nimbus-jose-jwt-to-9.37.4-to-fix-CVE-2025-53.patch b/precompiled/hadoop/stackable/patches/3.4.2/0007-Upgrade-nimbus-jose-jwt-to-9.37.4-to-fix-CVE-2025-53.patch new file mode 100644 index 000000000..c6bc3e1ed --- /dev/null +++ b/precompiled/hadoop/stackable/patches/3.4.2/0007-Upgrade-nimbus-jose-jwt-to-9.37.4-to-fix-CVE-2025-53.patch @@ -0,0 +1,37 @@ +From 95beed1d7436d51e4c8655be107e6c527a1c0eaa Mon Sep 17 00:00:00 2001 +From: xeniape +Date: Thu, 11 Sep 2025 12:12:12 +0200 +Subject: Upgrade-nimbus-jose-jwt-to-9.37.4-to-fix-CVE-2025-53864, Upstream + reference: https://github.com/apache/hadoop/pull/7870 + +--- + LICENSE-binary | 2 +- + hadoop-project/pom.xml | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/LICENSE-binary b/LICENSE-binary +index 3ad4b30045..07f4e5a45f 100644 +--- a/LICENSE-binary ++++ b/LICENSE-binary +@@ -240,7 +240,7 @@ com.google.guava:guava:20.0 + com.google.guava:guava:32.0.1-jre + com.google.guava:listenablefuture:9999.0-empty-to-avoid-conflict-with-guava + com.microsoft.azure:azure-storage:7.0.0 +-com.nimbusds:nimbus-jose-jwt:9.37.2 ++com.nimbusds:nimbus-jose-jwt:9.37.4 + com.zaxxer:HikariCP:4.0.3 + commons-beanutils:commons-beanutils:1.9.4 + commons-cli:commons-cli:1.9.0 +diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml +index 6d05a218d0..2e21ff7394 100644 +--- a/hadoop-project/pom.xml ++++ b/hadoop-project/pom.xml +@@ -233,7 +233,7 @@ + 2.1.4.Final + 1.0.2 + 5.4.0 +- 9.37.2 ++ 9.37.4 + v14.17.0 + v1.22.5 + 1.10.13 diff --git a/precompiled/hadoop/stackable/patches/3.4.2/0008-YARN-11873-Update-nodejs-to-LTS-version.patch b/precompiled/hadoop/stackable/patches/3.4.2/0008-YARN-11873-Update-nodejs-to-LTS-version.patch new file mode 100644 index 000000000..f70f2de50 --- /dev/null +++ b/precompiled/hadoop/stackable/patches/3.4.2/0008-YARN-11873-Update-nodejs-to-LTS-version.patch @@ -0,0 +1,22 @@ +From 08a7206a29f212e1f0e3bd81e0cb0be7907907a4 Mon Sep 17 00:00:00 2001 +From: Andrew Kenworthy +Date: Wed, 1 Oct 2025 17:28:41 +0200 +Subject: Update nodejs to LTS version + +--- + hadoop-project/pom.xml | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml +index 2e21ff7394..a7d5faf760 100644 +--- a/hadoop-project/pom.xml ++++ b/hadoop-project/pom.xml +@@ -234,7 +234,7 @@ + 1.0.2 + 5.4.0 + 9.37.4 +- v14.17.0 ++ v22.20.0 + v1.22.5 + 1.10.13 + 1.20 diff --git a/precompiled/hadoop/stackable/patches/3.4.2/patchable.toml b/precompiled/hadoop/stackable/patches/3.4.2/patchable.toml new file mode 100644 index 000000000..07e7ae331 --- /dev/null +++ b/precompiled/hadoop/stackable/patches/3.4.2/patchable.toml @@ -0,0 +1,2 @@ +mirror = "https://github.com/stackabletech/hadoop.git" +base = "84e8b89ee2ebe6923691205b9e171badde7a495c" diff --git a/precompiled/hadoop/stackable/patches/patchable.toml b/precompiled/hadoop/stackable/patches/patchable.toml new file mode 100644 index 000000000..79e086ca7 --- /dev/null +++ b/precompiled/hadoop/stackable/patches/patchable.toml @@ -0,0 +1,2 @@ +upstream = "https://github.com/apache/hadoop.git" +default-mirror = "https://github.com/stackabletech/hadoop.git" diff --git a/shell.nix b/shell.nix index 00a4f4818..f467c334e 100644 --- a/shell.nix +++ b/shell.nix @@ -5,19 +5,20 @@ }: pkgs.mkShell { - packages = [ - pkgs.cargo - pkgs.rustc - pkgs.nodejs + packages = with pkgs; [ + cargo + rustc + nodejs + zizmor ]; - buildInputs = [ + buildInputs = with pkgs; [ # Required by patchable - pkgs.openssl + openssl ]; - nativeBuildInputs = [ + nativeBuildInputs = with pkgs; [ # Required for libraries to be discoverable - pkgs.pkg-config + pkg-config ]; }