You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by su...@apache.org on 2021/09/14 18:17:38 UTC
[hadoop] branch trunk updated: HADOOP-17891. Exclude snappy-java
and lz4-java from relocation in shaded hadoop client libraries (#3385)
This is an automated email from the ASF dual-hosted git repository.
sunchao pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push:
new b8f7c75 HADOOP-17891. Exclude snappy-java and lz4-java from relocation in shaded hadoop client libraries (#3385)
b8f7c75 is described below
commit b8f7c7527a7c33c204315a6ea615b4d3fd237744
Author: Liang-Chi Hsieh <vi...@gmail.com>
AuthorDate: Tue Sep 14 11:17:18 2021 -0700
HADOOP-17891. Exclude snappy-java and lz4-java from relocation in shaded hadoop client libraries (#3385)
---
dev-support/bin/hadoop.sh | 11 +-
hadoop-client-modules/hadoop-client-api/pom.xml | 17 +++
.../hadoop-client-check-invariants/pom.xml | 2 +
.../resources/ensure-jars-have-correct-contents.sh | 2 +
.../hadoop-client-check-test-invariants/pom.xml | 2 +
.../hadoop-client-integration-tests/pom.xml | 5 +
.../apache/hadoop/example/ITUseHadoopCodecs.java | 144 +++++++++++++++++++++
.../hadoop-client-minicluster/pom.xml | 14 ++
.../hadoop-client-runtime/pom.xml | 14 ++
9 files changed, 209 insertions(+), 2 deletions(-)
diff --git a/dev-support/bin/hadoop.sh b/dev-support/bin/hadoop.sh
index 28d3ad2..e055519 100755
--- a/dev-support/bin/hadoop.sh
+++ b/dev-support/bin/hadoop.sh
@@ -513,7 +513,7 @@ function shadedclient_initialize
maven_add_install shadedclient
}
-## @description build client facing shaded artifacts and test them
+## @description build client facing shaded and non-shaded artifacts and test them
## @audience private
## @stability evolving
## @param repostatus
@@ -546,13 +546,20 @@ function shadedclient_rebuild
return 0
fi
- big_console_header "Checking client artifacts on ${repostatus}"
+ big_console_header "Checking client artifacts on ${repostatus} with shaded clients"
echo_and_redirect "${logfile}" \
"${MAVEN}" "${MAVEN_ARGS[@]}" verify -fae --batch-mode -am \
"${modules[@]}" \
-Dtest=NoUnitTests -Dmaven.javadoc.skip=true -Dcheckstyle.skip=true -Dspotbugs.skip=true
+ big_console_header "Checking client artifacts on ${repostatus} with non-shaded clients"
+
+ echo_and_redirect "${logfile}" \
+ "${MAVEN}" "${MAVEN_ARGS[@]}" verify -fae --batch-mode -am \
+ "${modules[@]}" \
+ -Pnoshade -Dtest=NoUnitTests -Dmaven.javadoc.skip=true -Dcheckstyle.skip=true -Dspotbugs.skip=true
+
count=$("${GREP}" -c '\[ERROR\]' "${logfile}")
if [[ ${count} -gt 0 ]]; then
add_vote_table -1 shadedclient "${repostatus} has errors when building and testing our client artifacts."
diff --git a/hadoop-client-modules/hadoop-client-api/pom.xml b/hadoop-client-modules/hadoop-client-api/pom.xml
index 1a83743..d0d62f5 100644
--- a/hadoop-client-modules/hadoop-client-api/pom.xml
+++ b/hadoop-client-modules/hadoop-client-api/pom.xml
@@ -67,6 +67,13 @@
</exclusion>
</exclusions>
</dependency>
+ <!-- snappy-java is native library and cannot be relocated. So we explicitly exclude it
+ from shaded jar to prevent possible conflict. Make it as transitive dependency to
+ make the downstream pull it. -->
+ <dependency>
+ <groupId>org.xerial.snappy</groupId>
+ <artifactId>snappy-java</artifactId>
+ </dependency>
</dependencies>
<profiles>
<profile>
@@ -109,6 +116,10 @@
<includes>
<include>org.apache.hadoop:*</include>
</includes>
+ <excludes>
+ <!-- Leave snappy that includes native methods which cannot be relocated. -->
+ <exclude>org.xerial.snappy:*</exclude>
+ </excludes>
</artifactSet>
<filters>
<!-- We get these package level classes from various yarn api jars -->
@@ -147,6 +158,9 @@
<exclude>org/xml/sax/**/*</exclude>
<exclude>org/bouncycastle/*</exclude>
<exclude>org/bouncycastle/**/*</exclude>
+ <!-- Exclude snappy-java -->
+ <exclude>org/xerial/snappy/*</exclude>
+ <exclude>org/xerial/snappy/**/*</exclude>
</excludes>
</relocation>
<relocation>
@@ -223,6 +237,9 @@
<!-- Exclude config keys for Hadoop that look like package names -->
<exclude>net/topology/*</exclude>
<exclude>net/topology/**/*</exclude>
+ <!-- Exclude lz4-java -->
+ <exclude>net/jpountz/*</exclude>
+ <exclude>net/jpountz/**/*</exclude>
</excludes>
</relocation>
<!-- okio declares a top level package instead of nested -->
diff --git a/hadoop-client-modules/hadoop-client-check-invariants/pom.xml b/hadoop-client-modules/hadoop-client-check-invariants/pom.xml
index 6ae9900..9d4bce1 100644
--- a/hadoop-client-modules/hadoop-client-check-invariants/pom.xml
+++ b/hadoop-client-modules/hadoop-client-check-invariants/pom.xml
@@ -90,6 +90,8 @@
<exclude>com.google.code.findbugs:jsr305</exclude>
<!-- Leave bouncycastle unshaded because it's signed with a special Oracle certificate so it can be a custom JCE security provider -->
<exclude>org.bouncycastle:*</exclude>
+ <!-- Leave snappy that includes native methods which cannot be relocated. -->
+ <exclude>org.xerial.snappy:*</exclude>
</excludes>
</banTransitiveDependencies>
<banDuplicateClasses>
diff --git a/hadoop-client-modules/hadoop-client-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh b/hadoop-client-modules/hadoop-client-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh
index 7242ade..2e92740 100644
--- a/hadoop-client-modules/hadoop-client-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh
+++ b/hadoop-client-modules/hadoop-client-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh
@@ -67,6 +67,8 @@ allowed_expr+="|^krb5_udp-template.conf$"
# Jetty uses this style sheet for directory listings. TODO ensure our
# internal use of jetty disallows directory listings and remove this.
allowed_expr+="|^jetty-dir.css$"
+# Snappy java is native library. We cannot relocate it to under org/apache/hadoop.
+allowed_expr+="|^org/xerial/"
allowed_expr+=")"
declare -i bad_artifacts=0
diff --git a/hadoop-client-modules/hadoop-client-check-test-invariants/pom.xml b/hadoop-client-modules/hadoop-client-check-test-invariants/pom.xml
index bec5e6f..635250e 100644
--- a/hadoop-client-modules/hadoop-client-check-test-invariants/pom.xml
+++ b/hadoop-client-modules/hadoop-client-check-test-invariants/pom.xml
@@ -98,6 +98,8 @@
<exclude>com.google.code.findbugs:jsr305</exclude>
<!-- Leave bouncycastle unshaded because it's signed with a special Oracle certificate so it can be a custom JCE security provider -->
<exclude>org.bouncycastle:*</exclude>
+ <!-- Leave snappy that includes native methods which cannot be relocated. -->
+ <exclude>org.xerial.snappy:*</exclude>
</excludes>
</banTransitiveDependencies>
<banDuplicateClasses>
diff --git a/hadoop-client-modules/hadoop-client-integration-tests/pom.xml b/hadoop-client-modules/hadoop-client-integration-tests/pom.xml
index 978918e..960421b 100644
--- a/hadoop-client-modules/hadoop-client-integration-tests/pom.xml
+++ b/hadoop-client-modules/hadoop-client-integration-tests/pom.xml
@@ -52,6 +52,11 @@
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>org.lz4</groupId>
+ <artifactId>lz4-java</artifactId>
+ <scope>test</scope>
+ </dependency>
</dependencies>
<profiles>
<profile>
diff --git a/hadoop-client-modules/hadoop-client-integration-tests/src/test/java/org/apache/hadoop/example/ITUseHadoopCodecs.java b/hadoop-client-modules/hadoop-client-integration-tests/src/test/java/org/apache/hadoop/example/ITUseHadoopCodecs.java
new file mode 100644
index 0000000..fd0effa
--- /dev/null
+++ b/hadoop-client-modules/hadoop-client-integration-tests/src/test/java/org/apache/hadoop/example/ITUseHadoopCodecs.java
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+package org.apache.hadoop.example;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+
+import java.io.*;
+import java.util.Arrays;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeys;
+import org.apache.hadoop.io.DataInputBuffer;
+import org.apache.hadoop.io.DataOutputBuffer;
+import org.apache.hadoop.io.RandomDatum;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.CompressionInputStream;
+import org.apache.hadoop.io.compress.CompressionOutputStream;
+import org.apache.hadoop.io.compress.zlib.ZlibFactory;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Ensure that we can perform codec operations given the API and runtime jars
+ * by performing some simple smoke tests.
+ */
+public class ITUseHadoopCodecs {
+
+ private static final Logger LOG = LoggerFactory.getLogger(ITUseHadoopCodecs.class);
+
+ private Configuration haddopConf = new Configuration();
+ private int dataCount = 100;
+ private int dataSeed = new Random().nextInt();
+
+ @Test
+ public void testGzipCodec() throws IOException {
+ ZlibFactory.setNativeZlibLoaded(false);
+ assertFalse(ZlibFactory.isNativeZlibLoaded(haddopConf));
+ codecTest(haddopConf, dataSeed, 0, "org.apache.hadoop.io.compress.GzipCodec");
+ codecTest(haddopConf, dataSeed, dataCount, "org.apache.hadoop.io.compress.GzipCodec");
+ }
+
+ @Test
+ public void testSnappyCodec() throws IOException {
+ codecTest(haddopConf, dataSeed, 0, "org.apache.hadoop.io.compress.SnappyCodec");
+ codecTest(haddopConf, dataSeed, dataCount, "org.apache.hadoop.io.compress.SnappyCodec");
+ }
+
+ @Test
+ public void testLz4Codec() {
+ Arrays.asList(false, true).forEach(config -> {
+ haddopConf.setBoolean(
+ CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_USELZ4HC_KEY,
+ config);
+ try {
+ codecTest(haddopConf, dataSeed, 0, "org.apache.hadoop.io.compress.Lz4Codec");
+ codecTest(haddopConf, dataSeed, dataCount, "org.apache.hadoop.io.compress.Lz4Codec");
+ } catch (IOException e) {
+ throw new RuntimeException("failed when running codecTest", e);
+ }
+ });
+ }
+
+ private void codecTest(Configuration conf, int seed, int count, String codecClass)
+ throws IOException {
+
+ // Create the codec
+ CompressionCodec codec = null;
+ try {
+ codec = (CompressionCodec)
+ ReflectionUtils.newInstance(conf.getClassByName(codecClass), conf);
+ } catch (ClassNotFoundException cnfe) {
+ throw new IOException("Illegal codec!");
+ }
+ LOG.info("Created a Codec object of type: " + codecClass);
+
+ // Generate data
+ DataOutputBuffer data = new DataOutputBuffer();
+ RandomDatum.Generator generator = new RandomDatum.Generator(seed);
+ for(int i = 0; i < count; ++i) {
+ generator.next();
+ RandomDatum key = generator.getKey();
+ RandomDatum value = generator.getValue();
+
+ key.write(data);
+ value.write(data);
+ }
+ LOG.info("Generated " + count + " records");
+
+ // Compress data
+ DataOutputBuffer compressedDataBuffer = new DataOutputBuffer();
+ try (CompressionOutputStream deflateFilter =
+ codec.createOutputStream(compressedDataBuffer);
+ DataOutputStream deflateOut =
+ new DataOutputStream(new BufferedOutputStream(deflateFilter))) {
+ deflateOut.write(data.getData(), 0, data.getLength());
+ deflateOut.flush();
+ deflateFilter.finish();
+ }
+
+ // De-compress data
+ DataInputBuffer deCompressedDataBuffer = new DataInputBuffer();
+ deCompressedDataBuffer.reset(compressedDataBuffer.getData(), 0,
+ compressedDataBuffer.getLength());
+ DataInputBuffer originalData = new DataInputBuffer();
+ originalData.reset(data.getData(), 0, data.getLength());
+ try (CompressionInputStream inflateFilter =
+ codec.createInputStream(deCompressedDataBuffer);
+ DataInputStream originalIn =
+ new DataInputStream(new BufferedInputStream(originalData))) {
+
+ // Check
+ int expected;
+ do {
+ expected = originalIn.read();
+ assertEquals("Inflated stream read by byte does not match",
+ expected, inflateFilter.read());
+ } while (expected != -1);
+ }
+
+ LOG.info("SUCCESS! Completed checking " + count + " records");
+ }
+}
diff --git a/hadoop-client-modules/hadoop-client-minicluster/pom.xml b/hadoop-client-modules/hadoop-client-minicluster/pom.xml
index a35d832..c9ce6f2 100644
--- a/hadoop-client-modules/hadoop-client-minicluster/pom.xml
+++ b/hadoop-client-modules/hadoop-client-minicluster/pom.xml
@@ -40,6 +40,12 @@
<artifactId>hadoop-client-api</artifactId>
<scope>runtime</scope>
</dependency>
+ <!-- This is the api's compile dependency, but we don't want it to be compile dependency here too. -->
+ <dependency>
+ <groupId>org.xerial.snappy</groupId>
+ <artifactId>snappy-java</artifactId>
+ <scope>runtime</scope>
+ </dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client-runtime</artifactId>
@@ -682,6 +688,8 @@
<!-- We need a filter that matches just those things that are included in the above artiacts -->
<!-- Leave bouncycastle unshaded because it's signed with a special Oracle certificate so it can be a custom JCE security provider -->
<exclude>org.bouncycastle:*</exclude>
+ <!-- Leave snappy that includes native methods which cannot be relocated. -->
+ <exclude>org.xerial.snappy:*</exclude>
</excludes>
</artifactSet>
<filters>
@@ -883,6 +891,9 @@
<exclude>org/xml/sax/**/*</exclude>
<exclude>org/bouncycastle/*</exclude>
<exclude>org/bouncycastle/**/*</exclude>
+ <!-- Exclude snappy-java -->
+ <exclude>org/xerial/snappy/*</exclude>
+ <exclude>org/xerial/snappy/**/*</exclude>
</excludes>
</relocation>
<relocation>
@@ -1001,6 +1012,9 @@
<!-- Exclude config keys for Hadoop that look like package names -->
<exclude>net/topology/*</exclude>
<exclude>net/topology/**/*</exclude>
+ <!-- Exclude lz4-java -->
+ <exclude>net/jpountz/*</exclude>
+ <exclude>net/jpountz/**/*</exclude>
</excludes>
</relocation>
<!-- okio declares a top level package instead of nested -->
diff --git a/hadoop-client-modules/hadoop-client-runtime/pom.xml b/hadoop-client-modules/hadoop-client-runtime/pom.xml
index f1eb8a9..b6a71e5 100644
--- a/hadoop-client-modules/hadoop-client-runtime/pom.xml
+++ b/hadoop-client-modules/hadoop-client-runtime/pom.xml
@@ -60,6 +60,12 @@
<artifactId>hadoop-client-api</artifactId>
<scope>runtime</scope>
</dependency>
+ <!-- This is the api's compile dependency, but we don't want it to be compile dependency here too. -->
+ <dependency>
+ <groupId>org.xerial.snappy</groupId>
+ <artifactId>snappy-java</artifactId>
+ <scope>runtime</scope>
+ </dependency>
<!-- This comes from our parent pom. If we don't expressly change it here to get included,
downstream will get warnings at compile time. -->
<dependency>
@@ -155,6 +161,8 @@
<exclude>org.ow2.asm:*</exclude>
<!-- Leave bouncycastle unshaded because it's signed with a special Oracle certificate so it can be a custom JCE security provider -->
<exclude>org.bouncycastle:*</exclude>
+ <!-- Leave snappy that includes native methods which cannot be relocated. -->
+ <exclude>org.xerial.snappy:*</exclude>
</excludes>
</artifactSet>
<filters>
@@ -259,6 +267,9 @@
<exclude>org/xml/sax/**/*</exclude>
<exclude>org/bouncycastle/*</exclude>
<exclude>org/bouncycastle/**/*</exclude>
+ <!-- Exclude snappy-java -->
+ <exclude>org/xerial/snappy/*</exclude>
+ <exclude>org/xerial/snappy/**/*</exclude>
</excludes>
</relocation>
<relocation>
@@ -349,6 +360,9 @@
<!-- Exclude config keys for Hadoop that look like package names -->
<exclude>net/topology/*</exclude>
<exclude>net/topology/**/*</exclude>
+ <!-- Exclude lz4-java -->
+ <exclude>net/jpountz/*</exclude>
+ <exclude>net/jpountz/**/*</exclude>
</excludes>
</relocation>
<!-- okio declares a top level package instead of nested -->
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org