You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@phoenix.apache.org by st...@apache.org on 2023/11/30 11:26:58 UTC
(phoenix-connectors) 05/07: PHOENIX-7118 Fix Shading Regressions in Spark Connector
This is an automated email from the ASF dual-hosted git repository.
stoty pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/phoenix-connectors.git
commit bfbfb8d14e63f39adb2bce3e9ed281dccf4dd875
Author: Istvan Toth <st...@apache.org>
AuthorDate: Wed Nov 22 15:54:46 2023 +0100
PHOENIX-7118 Fix Shading Regressions in Spark Connector
---
phoenix5-spark-shaded/pom.xml | 271 ++++++++++++++++++++++++++++++++++++++---
phoenix5-spark3-shaded/pom.xml | 38 +++---
pom.xml | 7 +-
3 files changed, 282 insertions(+), 34 deletions(-)
diff --git a/phoenix5-spark-shaded/pom.xml b/phoenix5-spark-shaded/pom.xml
index 7cab58c..1b9ff72 100644
--- a/phoenix5-spark-shaded/pom.xml
+++ b/phoenix5-spark-shaded/pom.xml
@@ -30,13 +30,15 @@
<modelVersion>4.0.0</modelVersion>
<artifactId>phoenix5-spark-shaded</artifactId>
- <name>Shaded Phoenix Spark Connector for Phoenix 5</name>
+ <name>Shaded Phoenix Spark 2 Connector for Phoenix 5</name>
<properties>
<top.dir>${project.basedir}/..</top.dir>
</properties>
<dependencies>
+
+ <!-- Phoenix comes first, as we shade most dependencies anyway -->
<dependency>
<groupId>org.apache.phoenix</groupId>
<artifactId>phoenix5-spark</artifactId>
@@ -44,18 +46,199 @@
<dependency>
<groupId>org.apache.phoenix</groupId>
<artifactId>phoenix-hbase-compat-${hbase.compat.version}</artifactId>
- <scope>runtime</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ <scope>provided</scope>
+ </dependency>
+
+ <!-- Phoenix excludes commons-beanutils from the Omid dependency, but that's basically a bug
+ We need to add it back, so that we don't depend on hadoop's common-beanutils, which may or
+ may not be shaded.
+ This can be removed once we use a Phoenix version that doesn't have this problem -->
+ <dependency>
+ <groupId>commons-beanutils</groupId>
+ <artifactId>commons-beanutils</artifactId>
+ <scope>compile</scope>
+ </dependency>
+
+ <!-- Mark every Hadoop jar as provided -->
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-core</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-annotations</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-auth</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-api</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs-client</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-distcp</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-client</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-common</artifactId>
+ <scope>provided</scope>
</dependency>
<!-- We want to take the implementation from Spark -->
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ <scope>provided</scope>
+ </dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<scope>provided</scope>
</dependency>
+
+ <!-- Mark HBase as provided, too -->
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-client</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-common</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-mapreduce</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-replication</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-endpoint</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-endpoint</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-metrics-api</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-metrics</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-protocol</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-protocol-shaded</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-server</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop-compat</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop2-compat</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-zookeeper</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase.thirdparty</groupId>
+ <artifactId>hbase-shaded-netty</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase.thirdparty</groupId>
+ <artifactId>hbase-shaded-miscellaneous</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase.thirdparty</groupId>
+ <artifactId>hbase-shaded-protobuf</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <!-- Other dependencies we don't want to shade in, but are not transitively excluded by the
+ above for some reason -->
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ <!-- random version, for exclusion only -->
+ <version>11.0.2</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.github.stephenc.findbugs</groupId>
+ <artifactId>findbugs-annotations</artifactId>
+ <!-- random version, for exclusion only -->
+ <version>1.3.9-1</version>
+ <scope>provided</scope>
+ </dependency>
</dependencies>
<build>
<plugins>
+ <!-- Taken from phoenix-client-parent this should be kept in sync with
+ Phoenix as much as possible -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
@@ -75,6 +258,7 @@
<exclude>NOTICE.*</exclude>
<exclude>NOTICE</exclude>
<exclude>README*</exclude>
+ <exclude>META-INF/versions/11/org/glassfish/jersey/internal/jsr166/*.class</exclude>
<!-- Coming from Omid, should be fixed there -->
<exclude>log4j.properties</exclude>
</excludes>
@@ -107,31 +291,32 @@
</filters>
<transformers>
<transformer
- implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
+ implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
<transformer
- implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+ implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
<resource>csv-bulk-load-config.properties</resource>
<file>
${project.basedir}/../config/csv-bulk-load-config.properties
</file>
</transformer>
<transformer
- implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+ implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
<resource>README.md</resource>
<file>${project.basedir}/../README.md</file>
</transformer>
<transformer
- implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+ implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
<resource>LICENSE.txt</resource>
<file>${project.basedir}/../LICENSE</file>
</transformer>
<transformer
- implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+ implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
<resource>NOTICE</resource>
<file>${project.basedir}/../NOTICE</file>
</transformer>
</transformers>
<relocations>
+ <!-- Keep phoenix-client-byo-shaded-hadoop in sync with this -->
<relocation>
<pattern>org/</pattern>
<shadedPattern>${shaded.package}.org.</shadedPattern>
@@ -156,20 +341,68 @@
<exclude>org/apache/phoenix/**</exclude>
<exclude>org/apache/omid/**</exclude>
<!-- Do want/need to expose Tephra as well ? -->
+ <!-- See PHOENIX-7118
+ Depending on the Spark classpath we may need to leave this unshaded, relocate
+ it under org/apache/hadoop/shaded/ or under org/apache/hadoop/hbase/shaded/.
+ The only thing that is guaranteed not to work is relocating it under
+ ${shaded.package} -->
+ <exclude>org/apache/commons/configuration2/**</exclude>
</excludes>
</relocation>
+ <!-- We cannot use the more elegant shading rules in -client
+ and -server for com packages, but it SHOULD be equivalent, except for the
+ protobuf change for hbase-shaded-client compatibility -->
<relocation>
- <pattern>com/</pattern>
- <shadedPattern>${shaded.package}.com.</shadedPattern>
- <excludes>
- <!-- Not the com/ packages that are a part of particular jdk implementations -->
- <exclude>com/sun/tools/**</exclude>
- <exclude>com/sun/javadoc/**</exclude>
- <exclude>com/sun/security/**</exclude>
- <exclude>com/sun/jndi/**</exclude>
- <exclude>com/sun/management/**</exclude>
- </excludes>
+ <pattern>com/beust/</pattern>
+ <shadedPattern>${shaded.package}.com.beust.</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com/clearspring/</pattern>
+ <shadedPattern>${shaded.package}.com.clearspring.</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com/codahale/</pattern>
+ <shadedPattern>${shaded.package}.come.codahale.</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com/fasterxml/</pattern>
+ <shadedPattern>${shaded.package}.com.fasterxml.</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com/force/</pattern>
+ <shadedPattern>${shaded.package}.com.force.</shadedPattern>
</relocation>
+ <relocation>
+ <pattern>com/google/gson/</pattern>
+ <shadedPattern>${shaded.package}.com.google.gson.</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com/google/inject/</pattern>
+ <shadedPattern>${shaded.package}.com.google.inject.</shadedPattern>
+ </relocation>
+ <!-- This is protobuf 2.5.0 which is shaded to this package in hbase-shaded-client,
+ not the modified protobuf 3.x from hbase-thirdparty -->
+ <relocation>
+ <pattern>com/google/protobuf/</pattern>
+ <shadedPattern>${hbase.shaded.package}.com.google.protobuf.</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com/ibm/</pattern>
+ <shadedPattern>${shaded.package}.com.ibm.</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com/lmax/</pattern>
+ <shadedPattern>${shaded.package}.com.lmax.</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com/sun/jna/</pattern>
+ <shadedPattern>${shaded.package}.com.sun.jna.</shadedPattern>
+ </relocation>
+ <relocation>
+ <pattern>com/squareup</pattern>
+ <shadedPattern>${shaded.package}.com.squareup.</shadedPattern>
+ </relocation>
+
<relocation>
<pattern>io/</pattern>
<shadedPattern>${shaded.package}.io.</shadedPattern>
@@ -275,8 +508,8 @@
<artifactId>maven-compiler-plugin</artifactId>
<executions>
<execution>
- <id>default-compile</id>
- <phase>none</phase>
+ <id>default-compile</id>
+ <phase>none</phase>
</execution>
</executions>
</plugin>
diff --git a/phoenix5-spark3-shaded/pom.xml b/phoenix5-spark3-shaded/pom.xml
index d10e9f2..cfc90a0 100644
--- a/phoenix5-spark3-shaded/pom.xml
+++ b/phoenix5-spark3-shaded/pom.xml
@@ -23,8 +23,8 @@
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
- <artifactId>phoenix-connectors</artifactId>
<groupId>org.apache.phoenix</groupId>
+ <artifactId>phoenix-connectors</artifactId>
<version>6.0.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
@@ -53,6 +53,16 @@
<scope>provided</scope>
</dependency>
+ <!-- Phoenix excludes commons-beanutils from the Omid dependency, but that's basically a bug
+ We need to add it back, so that we don't depend on hadoop's common-beanutils, which may or
+ may not be shaded.
+ This can be removed once we use a Phoenix version that doesn't have this problem -->
+ <dependency>
+ <groupId>commons-beanutils</groupId>
+ <artifactId>commons-beanutils</artifactId>
+ <scope>compile</scope>
+ </dependency>
+
<!-- Mark every Hadoop jar as provided -->
<dependency>
<groupId>org.apache.hadoop</groupId>
@@ -227,7 +237,7 @@
<build>
<plugins>
- <!-- Taken from phoenix-client-parent this should be kept in sync with
+ <!-- Taken from phoenix-client-parent this should be kept in sync with
Phoenix as much as possible -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
@@ -323,24 +333,25 @@
<exclude>org/w3c/dom/**</exclude>
<exclude>org/xml/sax/**</exclude>
<!-- Extras compared to Hadoop -->
- <!-- Hbase classes - Maybe these could be shaded as well
- ? -->
+ <!-- Hbase classes - Maybe these could be shaded as well ? -->
<exclude>org/apache/hbase/**</exclude>
<!-- We use the spark classpath directly -->
<exclude>org/apache/spark/**</exclude>
<!-- Phoenix classes -->
<exclude>org/apache/phoenix/**</exclude>
<exclude>org/apache/omid/**</exclude>
- <!-- We must not shade this, as this is provided by HBase.
- This is specific to -->
- <!-- the cases where we rely on external HBase / Hadoop -->
- <exclude>org.apache.commons.beanutils/**</exclude>
<!-- Do want/need to expose Tephra as well ? -->
+ <!-- See PHOENIX-7118
+ Depending on the Spark classpath we may need to leave this unshaded, relocate
+ it under org/apache/hadoop/shaded/ or under org/apache/hadoop/hbase/shaded/.
+ The only thing that is guaranteed not to work is relocating it under
+ ${shaded.package} -->
+ <exclude>org/apache/commons/configuration2/**</exclude>
</excludes>
</relocation>
<!-- We cannot use the more elegant shading rules in -client
and -server for com packages, but it SHOULD be equivalent, except for the
- changes for hbase-shaded-client compatibility -->
+ protobuf change for hbase-shaded-client compatibility -->
<relocation>
<pattern>com/beust/</pattern>
<shadedPattern>${shaded.package}.com.beust.</shadedPattern>
@@ -369,7 +380,8 @@
<pattern>com/google/inject/</pattern>
<shadedPattern>${shaded.package}.com.google.inject.</shadedPattern>
</relocation>
- <!-- HBase shaded ! -->
+ <!-- This is protobuf 2.5.0 which is shaded to this package in hbase-shaded-client,
+ not the modified protobuf 3.x from hbase-thirdparty -->
<relocation>
<pattern>com/google/protobuf/</pattern>
<shadedPattern>${hbase.shaded.package}.com.google.protobuf.</shadedPattern>
@@ -395,8 +407,7 @@
<pattern>io/</pattern>
<shadedPattern>${shaded.package}.io.</shadedPattern>
<excludes>
- <!-- Exclude config keys for Hadoop that look like package
- names -->
+ <!-- Exclude config keys for Hadoop that look like package names -->
<exclude>io/compression/**</exclude>
<exclude>io/mapfile/**</exclude>
<exclude>io/map/index/*</exclude>
@@ -440,8 +451,7 @@
<pattern>net/</pattern>
<shadedPattern>${shaded.package}.net.</shadedPattern>
<excludes>
- <!-- Exclude config keys for Hadoop that look like package
- names -->
+ <!-- Exclude config keys for Hadoop that look like package names -->
<exclude>net/topology/**</exclude>
</excludes>
</relocation>
diff --git a/pom.xml b/pom.xml
index 5dbfdec..1f01f94 100644
--- a/pom.xml
+++ b/pom.xml
@@ -57,6 +57,7 @@
<!-- Phoenix Version -->
<phoenix.version>5.1.3</phoenix.version>
<omid.version>1.0.2</omid.version>
+ <commons-beanutils.version>1.9.4</commons-beanutils.version>
<phoenix.thirdparty.version>2.0.0</phoenix.thirdparty.version>
<!-- The should match the versions used to build HBase and Hadoop -->
<hbase.version>2.4.16</hbase.version>
@@ -549,7 +550,11 @@
<artifactId>phoenix5-hive-shaded</artifactId>
<version>${project.version}</version>
</dependency>
-
+ <dependency>
+ <groupId>commons-beanutils</groupId>
+ <artifactId>commons-beanutils</artifactId>
+ <version>${commons-beanutils.version}</version>
+ </dependency>
<!-- HBase dependencies -->
<!-- These are only needed so that we can set them provided and exclude from the shaded jars -->