You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@phoenix.apache.org by st...@apache.org on 2023/11/30 11:26:58 UTC

(phoenix-connectors) 05/07: PHOENIX-7118 Fix Shading Regressions in Spark Connector

This is an automated email from the ASF dual-hosted git repository.

stoty pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/phoenix-connectors.git

commit bfbfb8d14e63f39adb2bce3e9ed281dccf4dd875
Author: Istvan Toth <st...@apache.org>
AuthorDate: Wed Nov 22 15:54:46 2023 +0100

    PHOENIX-7118 Fix Shading Regressions in Spark Connector
---
 phoenix5-spark-shaded/pom.xml  | 271 ++++++++++++++++++++++++++++++++++++++---
 phoenix5-spark3-shaded/pom.xml |  38 +++---
 pom.xml                        |   7 +-
 3 files changed, 282 insertions(+), 34 deletions(-)

diff --git a/phoenix5-spark-shaded/pom.xml b/phoenix5-spark-shaded/pom.xml
index 7cab58c..1b9ff72 100644
--- a/phoenix5-spark-shaded/pom.xml
+++ b/phoenix5-spark-shaded/pom.xml
@@ -30,13 +30,15 @@
   <modelVersion>4.0.0</modelVersion>
 
   <artifactId>phoenix5-spark-shaded</artifactId>
-  <name>Shaded Phoenix Spark Connector for Phoenix 5</name>
+  <name>Shaded Phoenix Spark 2 Connector for Phoenix 5</name>
 
   <properties>
     <top.dir>${project.basedir}/..</top.dir>
   </properties>
 
   <dependencies>
+
+    <!-- Phoenix comes first, as we shade most dependencies anyway -->
     <dependency>
       <groupId>org.apache.phoenix</groupId>
       <artifactId>phoenix5-spark</artifactId>
@@ -44,18 +46,199 @@
     <dependency>
       <groupId>org.apache.phoenix</groupId>
       <artifactId>phoenix-hbase-compat-${hbase.compat.version}</artifactId>
-      <scope>runtime</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+      <scope>provided</scope>
+    </dependency>
+
+    <!-- Phoenix excludes commons-beanutils from the Omid dependency, but that's basically a bug
+     We need to add it back, so that we don't depend on hadoop's common-beanutils, which may or
+     may not be shaded.
+     This can be removed once we use a Phoenix version that doesn't have this problem -->
+    <dependency>
+      <groupId>commons-beanutils</groupId>
+      <artifactId>commons-beanutils</artifactId>
+      <scope>compile</scope>
+    </dependency>
+
+    <!-- Mark every Hadoop jar as provided -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-mapreduce-client-core</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-annotations</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-auth</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-yarn-api</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-hdfs</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-hdfs-client</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-distcp</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-client</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-mapreduce-client-common</artifactId>
+      <scope>provided</scope>
     </dependency>
     <!-- We want to take the implementation from Spark -->
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-log4j12</artifactId>
+      <scope>provided</scope>
+    </dependency>
     <dependency>
       <groupId>log4j</groupId>
       <artifactId>log4j</artifactId>
       <scope>provided</scope>
     </dependency>
+
+    <!-- Mark HBase as provided, too -->
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-client</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-common</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-mapreduce</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-replication</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-endpoint</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-endpoint</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-metrics-api</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-metrics</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-protocol</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-protocol-shaded</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-server</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-hadoop-compat</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-hadoop2-compat</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-zookeeper</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase.thirdparty</groupId>
+      <artifactId>hbase-shaded-netty</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase.thirdparty</groupId>
+      <artifactId>hbase-shaded-miscellaneous</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hbase.thirdparty</groupId>
+      <artifactId>hbase-shaded-protobuf</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <!-- Other dependencies we don't want to shade in, but are not transitively excluded by the
+    above for some reason -->
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <!-- random version, for exclusion only -->
+      <version>11.0.2</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.github.stephenc.findbugs</groupId>
+      <artifactId>findbugs-annotations</artifactId>
+      <!-- random version, for exclusion only -->
+      <version>1.3.9-1</version>
+      <scope>provided</scope>
+    </dependency>
   </dependencies>
 
   <build>
     <plugins>
+      <!-- Taken from phoenix-client-parent this should be kept in sync with
+        Phoenix as much as possible -->
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-shade-plugin</artifactId>
@@ -75,6 +258,7 @@
                 <exclude>NOTICE.*</exclude>
                 <exclude>NOTICE</exclude>
                 <exclude>README*</exclude>
+                <exclude>META-INF/versions/11/org/glassfish/jersey/internal/jsr166/*.class</exclude>
                 <!-- Coming from Omid, should be fixed there -->
                 <exclude>log4j.properties</exclude>
               </excludes>
@@ -107,31 +291,32 @@
           </filters>
           <transformers>
             <transformer
-                    implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
+              implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
             <transformer
-                    implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+              implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
               <resource>csv-bulk-load-config.properties</resource>
               <file>
                 ${project.basedir}/../config/csv-bulk-load-config.properties
               </file>
             </transformer>
             <transformer
-                    implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+              implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
               <resource>README.md</resource>
               <file>${project.basedir}/../README.md</file>
             </transformer>
             <transformer
-                    implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+              implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
               <resource>LICENSE.txt</resource>
               <file>${project.basedir}/../LICENSE</file>
             </transformer>
             <transformer
-                    implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+              implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
               <resource>NOTICE</resource>
               <file>${project.basedir}/../NOTICE</file>
             </transformer>
           </transformers>
           <relocations>
+            <!-- Keep phoenix-client-byo-shaded-hadoop in sync with this -->
             <relocation>
               <pattern>org/</pattern>
               <shadedPattern>${shaded.package}.org.</shadedPattern>
@@ -156,20 +341,68 @@
                 <exclude>org/apache/phoenix/**</exclude>
                 <exclude>org/apache/omid/**</exclude>
                 <!-- Do want/need to expose Tephra as well ? -->
+                <!-- See PHOENIX-7118
+                 Depending on the Spark classpath we may need to leave this unshaded, relocate
+                 it under org/apache/hadoop/shaded/ or under org/apache/hadoop/hbase/shaded/.
+                 The only thing that is guaranteed not to work is relocating it under
+                 ${shaded.package} -->
+                <exclude>org/apache/commons/configuration2/**</exclude>
               </excludes>
             </relocation>
+            <!-- We cannot use the more elegant shading rules in -client
+              and -server for com packages, but it SHOULD be equivalent, except for the
+              protobuf change for hbase-shaded-client compatibility -->
             <relocation>
-              <pattern>com/</pattern>
-              <shadedPattern>${shaded.package}.com.</shadedPattern>
-              <excludes>
-                <!-- Not the com/ packages that are a part of particular jdk implementations -->
-                <exclude>com/sun/tools/**</exclude>
-                <exclude>com/sun/javadoc/**</exclude>
-                <exclude>com/sun/security/**</exclude>
-                <exclude>com/sun/jndi/**</exclude>
-                <exclude>com/sun/management/**</exclude>
-              </excludes>
+              <pattern>com/beust/</pattern>
+              <shadedPattern>${shaded.package}.com.beust.</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>com/clearspring/</pattern>
+              <shadedPattern>${shaded.package}.com.clearspring.</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>com/codahale/</pattern>
+              <shadedPattern>${shaded.package}.come.codahale.</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>com/fasterxml/</pattern>
+              <shadedPattern>${shaded.package}.com.fasterxml.</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>com/force/</pattern>
+              <shadedPattern>${shaded.package}.com.force.</shadedPattern>
             </relocation>
+            <relocation>
+              <pattern>com/google/gson/</pattern>
+              <shadedPattern>${shaded.package}.com.google.gson.</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>com/google/inject/</pattern>
+              <shadedPattern>${shaded.package}.com.google.inject.</shadedPattern>
+            </relocation>
+            <!-- This is protobuf 2.5.0 which is shaded to this package in hbase-shaded-client,
+             not the modified protobuf 3.x from hbase-thirdparty -->
+            <relocation>
+              <pattern>com/google/protobuf/</pattern>
+              <shadedPattern>${hbase.shaded.package}.com.google.protobuf.</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>com/ibm/</pattern>
+              <shadedPattern>${shaded.package}.com.ibm.</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>com/lmax/</pattern>
+              <shadedPattern>${shaded.package}.com.lmax.</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>com/sun/jna/</pattern>
+              <shadedPattern>${shaded.package}.com.sun.jna.</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>com/squareup</pattern>
+              <shadedPattern>${shaded.package}.com.squareup.</shadedPattern>
+            </relocation>
+
             <relocation>
               <pattern>io/</pattern>
               <shadedPattern>${shaded.package}.io.</shadedPattern>
@@ -275,8 +508,8 @@
         <artifactId>maven-compiler-plugin</artifactId>
         <executions>
           <execution>
-             <id>default-compile</id>
-             <phase>none</phase>
+            <id>default-compile</id>
+            <phase>none</phase>
           </execution>
         </executions>
       </plugin>
diff --git a/phoenix5-spark3-shaded/pom.xml b/phoenix5-spark3-shaded/pom.xml
index d10e9f2..cfc90a0 100644
--- a/phoenix5-spark3-shaded/pom.xml
+++ b/phoenix5-spark3-shaded/pom.xml
@@ -23,8 +23,8 @@
   xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
   xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
   <parent>
-    <artifactId>phoenix-connectors</artifactId>
     <groupId>org.apache.phoenix</groupId>
+    <artifactId>phoenix-connectors</artifactId>
     <version>6.0.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
@@ -53,6 +53,16 @@
       <scope>provided</scope>
     </dependency>
 
+    <!-- Phoenix excludes commons-beanutils from the Omid dependency, but that's basically a bug
+     We need to add it back, so that we don't depend on hadoop's common-beanutils, which may or
+     may not be shaded.
+     This can be removed once we use a Phoenix version that doesn't have this problem -->
+    <dependency>
+      <groupId>commons-beanutils</groupId>
+      <artifactId>commons-beanutils</artifactId>
+      <scope>compile</scope>
+    </dependency>
+
     <!-- Mark every Hadoop jar as provided -->
     <dependency>
       <groupId>org.apache.hadoop</groupId>
@@ -227,7 +237,7 @@
 
   <build>
     <plugins>
-      <!-- Taken from phoenix-client-parent this should be kept in sync with 
+      <!-- Taken from phoenix-client-parent this should be kept in sync with
         Phoenix as much as possible -->
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
@@ -323,24 +333,25 @@
                 <exclude>org/w3c/dom/**</exclude>
                 <exclude>org/xml/sax/**</exclude>
                 <!-- Extras compared to Hadoop -->
-                <!-- Hbase classes - Maybe these could be shaded as well
-                  ? -->
+                <!-- Hbase classes - Maybe these could be shaded as well ? -->
                 <exclude>org/apache/hbase/**</exclude>
                 <!-- We use the spark classpath directly -->
                 <exclude>org/apache/spark/**</exclude>
                 <!-- Phoenix classes -->
                 <exclude>org/apache/phoenix/**</exclude>
                 <exclude>org/apache/omid/**</exclude>
-                <!-- We must not shade this, as this is provided by HBase.
-                  This is specific to -->
-                <!-- the cases where we rely on external HBase / Hadoop -->
-                <exclude>org.apache.commons.beanutils/**</exclude>
                 <!-- Do want/need to expose Tephra as well ? -->
+                <!-- See PHOENIX-7118
+                 Depending on the Spark classpath we may need to leave this unshaded, relocate
+                 it under org/apache/hadoop/shaded/ or under org/apache/hadoop/hbase/shaded/.
+                 The only thing that is guaranteed not to work is relocating it under
+                 ${shaded.package} -->
+                <exclude>org/apache/commons/configuration2/**</exclude>
               </excludes>
             </relocation>
             <!-- We cannot use the more elegant shading rules in -client
               and -server for com packages, but it SHOULD be equivalent, except for the
-              changes for hbase-shaded-client compatibility -->
+              protobuf change for hbase-shaded-client compatibility -->
             <relocation>
               <pattern>com/beust/</pattern>
               <shadedPattern>${shaded.package}.com.beust.</shadedPattern>
@@ -369,7 +380,8 @@
               <pattern>com/google/inject/</pattern>
               <shadedPattern>${shaded.package}.com.google.inject.</shadedPattern>
             </relocation>
-            <!-- HBase shaded ! -->
+            <!-- This is protobuf 2.5.0 which is shaded to this package in hbase-shaded-client,
+             not the modified protobuf 3.x from hbase-thirdparty -->
             <relocation>
               <pattern>com/google/protobuf/</pattern>
               <shadedPattern>${hbase.shaded.package}.com.google.protobuf.</shadedPattern>
@@ -395,8 +407,7 @@
               <pattern>io/</pattern>
               <shadedPattern>${shaded.package}.io.</shadedPattern>
               <excludes>
-                <!-- Exclude config keys for Hadoop that look like package
-                  names -->
+                <!-- Exclude config keys for Hadoop that look like package names -->
                 <exclude>io/compression/**</exclude>
                 <exclude>io/mapfile/**</exclude>
                 <exclude>io/map/index/*</exclude>
@@ -440,8 +451,7 @@
               <pattern>net/</pattern>
               <shadedPattern>${shaded.package}.net.</shadedPattern>
               <excludes>
-                <!-- Exclude config keys for Hadoop that look like package 
-                  names -->
+                <!-- Exclude config keys for Hadoop that look like package names -->
                 <exclude>net/topology/**</exclude>
               </excludes>
             </relocation>
diff --git a/pom.xml b/pom.xml
index 5dbfdec..1f01f94 100644
--- a/pom.xml
+++ b/pom.xml
@@ -57,6 +57,7 @@
     <!-- Phoenix Version -->
     <phoenix.version>5.1.3</phoenix.version>
     <omid.version>1.0.2</omid.version>
+    <commons-beanutils.version>1.9.4</commons-beanutils.version>
     <phoenix.thirdparty.version>2.0.0</phoenix.thirdparty.version>
     <!-- The should match the versions used to build HBase and Hadoop -->
     <hbase.version>2.4.16</hbase.version>
@@ -549,7 +550,11 @@
         <artifactId>phoenix5-hive-shaded</artifactId>
         <version>${project.version}</version>
       </dependency>
-
+      <dependency>
+        <groupId>commons-beanutils</groupId>
+        <artifactId>commons-beanutils</artifactId>
+        <version>${commons-beanutils.version}</version>
+      </dependency>
       <!-- HBase dependencies -->
 
       <!-- These are only needed so that we can set them provided and exclude from the shaded jars -->