You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2019/07/24 17:01:18 UTC

[impala] branch master updated: IMPALA-8766: Change cloud dependencies to use hadoop-cloud-storage

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 7c11cd4  IMPALA-8766: Change cloud dependencies to use hadoop-cloud-storage
7c11cd4 is described below

commit 7c11cd46cccafdfd8a3148eed7fb577d1ceec603
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Fri Jul 12 12:54:25 2019 -0700

    IMPALA-8766: Change cloud dependencies to use hadoop-cloud-storage
    
    Currently, fe/pom.xml includes direct dependencies on hadoop-aws,
    hadoop-azure, and hadoop-azure-datalake. Hadoop has a meta-package
    hadoop-cloud-storage that provides these dependencies with appropriate
    exclusions. This would not change Impala's build significantly, but
    it allows different versions or distributions of Hadoop to add
    necessary runtime jars to the classpath. For example, distributions
    of Hadoop that use Apache Knox for identity management can add
    Knox jars to this meta-package.
    
    This switches Impala to use the hadoop-cloud-storage package. This
    does not change the default configuration, but the USE_CDP_HIVE=true
    configuration gains Apache Knox jars.
    
    Due to the structure of the CDP maven repository, it has dependencies
    on versions of jars that are not in the CDP maven repository. So,
    USE_CDP_HIVE=true requires the Hortonworks public maven repository.
    This is added, but it is only enabled with USE_CDP_HIVE=true and only
    for releases (no SNAPSHOTs).
    
    Testing:
     - Ran core tests
     - Manually verified classpath changes
    
    Change-Id: I08f1c36ecf54ac277d99e2d2843163eada732e50
    Reviewed-on: http://gerrit.cloudera.org:8080/13872
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Vihang Karajgaonkar <vi...@cloudera.com>
---
 fe/pom.xml            | 19 ++++++-------------
 impala-parent/pom.xml | 24 +++++++++++++++++++++++-
 2 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/fe/pom.xml b/fe/pom.xml
index e647b2b..3baa98c 100644
--- a/fe/pom.xml
+++ b/fe/pom.xml
@@ -92,19 +92,7 @@ under the License.
 
     <dependency>
       <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-aws</artifactId>
-      <version>${hadoop.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-azure</artifactId>
-      <version>${hadoop.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-azure-datalake</artifactId>
+      <artifactId>hadoop-cloud-storage</artifactId>
       <version>${hadoop.version}</version>
       <exclusions>
         <!-- https://issues.apache.org/jira/browse/HADOOP-14903 -->
@@ -112,6 +100,11 @@ under the License.
           <groupId>net.minidev</groupId>
           <artifactId>json-smart</artifactId>
         </exclusion>
+        <!-- Impala currently doesn't support GCS, so exclude those jars -->
+        <exclusion>
+          <groupId>com.google.cloud.bigdataoss</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
       </exclusions>
     </dependency>
 
diff --git a/impala-parent/pom.xml b/impala-parent/pom.xml
index f2e1226..4dae057 100644
--- a/impala-parent/pom.xml
+++ b/impala-parent/pom.xml
@@ -183,8 +183,30 @@ under the License.
         <enabled>false</enabled>
       </snapshots>
     </repository>
+    <repository>
+      <!--
+      The impala.cdp.repo above can reference versions that are not in that
+      repository. For example, artifact A at version 280 may have a dependency
+      on artifact B at version 279, but the maven repository may only have
+      artifact B at version 280. This repository contains all the versions, so
+      it satisfies the dangling dependencies. This was necessary for IMPALA-8766.
+      -->
+      <id>hwx.public.repo</id>
+      <url>https://nexus-private.hortonworks.com/nexus/content/groups/public</url>
+      <name>Hortonworks public repository</name>
+      <!--
+      Snapshots are specifically disabled, because the snapshots in this repository
+      would conflict with the versions in impala.cdh.repo and should be unnecessary.
+      -->
+      <snapshots>
+        <enabled>false</enabled>
+      </snapshots>
+      <releases>
+        <!-- The Hortonworks public repository is only needed for USE_CDP_HIVE=true -->
+        <enabled>${env.USE_CDP_HIVE}</enabled>
+      </releases>
+    </repository>
   </repositories>
-
   <pluginRepositories>
     <pluginRepository>
       <id>cloudera.thirdparty.repo</id>