You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2022/08/16 02:21:28 UTC

[impala] 02/04: IMPALA-11207: Use hadoop-cloud-storage for Cloud dependencies

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 4845f36b4eec47dcf8b1083bd8a042925a02622f
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Thu Aug 4 13:27:32 2022 -0700

    IMPALA-11207: Use hadoop-cloud-storage for Cloud dependencies
    
    Hadoop provides hadoop-cloud-storage, which includes most of
    the dependencies that Impala currently uses like hadoop-aws,
    hadoop-azure, Knox's gateway-cloud-bindings, etc. Hadoop has
    put in a lot of work to make sure that this package includes
    the right version of dependencies (including shading some
    dependencies for GCS). It seems like this is a more reliable
    way to consume these dependencies.
    
    This switches the Java build to use hadoop-cloud-storage
    and removes the dependencies that it replaces. This eliminates
    the need to control the version of oauth and GCS, as those
    are determined by hadoop-cloud-storage.
    
    Change-Id: I3a1631289f990513823c2b17eb9241cc1b5a7ffd
    Reviewed-on: http://gerrit.cloudera.org:8080/18817
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 bin/impala-config.sh       |  3 ---
 java/executor-deps/pom.xml | 62 ++++++++--------------------------------------
 java/pom.xml               |  2 --
 3 files changed, 11 insertions(+), 56 deletions(-)

diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index c3f36dfda..42fb2924a 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -211,7 +211,6 @@ export CDP_OZONE_VERSION=1.1.0.7.2.16.0-127
 export CDP_PARQUET_VERSION=1.10.99.7.2.16.0-127
 export CDP_RANGER_VERSION=2.3.0.7.2.16.0-127
 export CDP_TEZ_VERSION=0.9.1.7.2.16.0-127
-export CDP_GCS_VERSION=2.1.2.7.2.16.0-127
 
 # Ref: https://infra.apache.org/release-download-pages.html#closer
 : ${APACHE_MIRROR:="https://www.apache.org/dyn/closer.cgi"}
@@ -284,7 +283,6 @@ export IMPALA_RANGER_VERSION=${CDP_RANGER_VERSION}
 export IMPALA_RANGER_URL=${CDP_RANGER_URL-}
 export IMPALA_TEZ_VERSION=${CDP_TEZ_VERSION}
 export IMPALA_TEZ_URL=${CDP_TEZ_URL-}
-export IMPALA_GCS_VERSION=${CDP_GCS_VERSION}
 
 export APACHE_COMPONENTS_HOME="$IMPALA_TOOLCHAIN/apache_components"
 export USE_APACHE_HIVE=${USE_APACHE_HIVE-false}
@@ -892,7 +890,6 @@ echo "IMPALA_HUDI_VERSION     = $IMPALA_HUDI_VERSION"
 echo "IMPALA_KUDU_VERSION     = $IMPALA_KUDU_VERSION"
 echo "IMPALA_RANGER_VERSION   = $IMPALA_RANGER_VERSION"
 echo "IMPALA_ICEBERG_VERSION  = $IMPALA_ICEBERG_VERSION"
-echo "IMPALA_GCS_VERSION      = $IMPALA_GCS_VERSION"
 echo "IMPALA_COS_VERSION      = $IMPALA_COS_VERSION"
 
 # Kerberos things.  If the cluster exists and is kerberized, source
diff --git a/java/executor-deps/pom.xml b/java/executor-deps/pom.xml
index 06f4e6971..f219c2f25 100644
--- a/java/executor-deps/pom.xml
+++ b/java/executor-deps/pom.xml
@@ -88,9 +88,15 @@ under the License.
       </exclusions>
     </dependency>
 
+    <!--
+       hadoop-cloud-storage includes several dependencies that Impala needs:
+       Hadoop's AWS support, Azure support, GCS connector, Knox support, etc.
+       hadoop-cloud-storage has been carefully tuned to include the right shaded
+       dependencies.
+    -->
     <dependency>
       <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-aws</artifactId>
+      <artifactId>hadoop-cloud-storage</artifactId>
       <version>${hadoop.version}</version>
       <!-- Exclude the aws-java-sdk-bundle dependency because the Impala minimal
            version of this dependency is used instead. -->
@@ -99,6 +105,10 @@ under the License.
           <groupId>com.amazonaws</groupId>
           <artifactId>aws-java-sdk-bundle</artifactId>
         </exclusion>
+        <exclusion>
+          <groupId>org.eclipse.jetty</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
       </exclusions>
     </dependency>
 
@@ -114,42 +124,6 @@ under the License.
       </exclusions>
     </dependency>
 
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-azure</artifactId>
-      <version>${hadoop.version}</version>
-      <exclusions>
-        <exclusion>
-          <groupId>org.eclipse.jetty</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-azure-datalake</artifactId>
-      <version>${hadoop.version}</version>
-    </dependency>
-
-    <dependency>
-        <groupId>com.google.cloud.bigdataoss</groupId>
-        <artifactId>gcs-connector</artifactId>
-        <version>${gcs.version}</version>
-    </dependency>
-
-    <!-- Dependency of gcs-connector, newer version addresses CVE -->
-    <dependency>
-      <groupId>com.google.oauth-client</groupId>
-      <artifactId>google-oauth-client</artifactId>
-      <version>${google.oauth-client.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>com.google.oauth-client</groupId>
-      <artifactId>google-oauth-client-java6</artifactId>
-      <version>${google.oauth-client.version}</version>
-    </dependency>
-
     <dependency>
       <groupId>com.qcloud.cos</groupId>
       <artifactId>hadoop-cos</artifactId>
@@ -196,20 +170,6 @@ under the License.
       </exclusions>
     </dependency>
 
-    <!-- IMPALA-8766: Include Knox jars on the classpath -->
-    <dependency>
-      <groupId>org.apache.knox</groupId>
-      <artifactId>gateway-cloud-bindings</artifactId>
-      <version>${knox.version}</version>
-      <exclusions>
-        <!-- Impala currently doesn't support GCS, so exclude those jars -->
-        <exclusion>
-          <groupId>com.google.cloud.bigdataoss</groupId>
-          <artifactId>*</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
     <dependency>
       <groupId>${ozone.groupId}</groupId>
       <artifactId>${ozone.artifactId}</artifactId>
diff --git a/java/pom.xml b/java/pom.xml
index 8bc7c4531..409a15c05 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -47,8 +47,6 @@ under the License.
     <parquet.version>${env.IMPALA_PARQUET_VERSION}</parquet.version>
     <kite.version>${env.IMPALA_KITE_VERSION}</kite.version>
     <knox.version>${env.IMPALA_KNOX_VERSION}</knox.version>
-    <gcs.version>${env.IMPALA_GCS_VERSION}</gcs.version>
-    <google.oauth-client.version>1.33.3</google.oauth-client.version>
     <cos.version>${env.IMPALA_COS_VERSION}</cos.version>
     <thrift.version>${env.IMPALA_THRIFT_POM_VERSION}</thrift.version>
     <impala.extdatasrc.api.version>${project.version}</impala.extdatasrc.api.version>