You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/08/13 03:12:02 UTC

[impala] 02/03: IMPALA-8766: Undo hadoop-cloud-storage + HWX Nexus

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 8094811b5d975e18e20071552f86c2e3f8c0fc8f
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Wed Jul 17 11:51:32 2019 -0700

    IMPALA-8766: Undo hadoop-cloud-storage + HWX Nexus
    
    Previous commits for IMPALA-8766 attempted to use hadoop-cloud-storage
    to satisfy Impala's cloud dependencies (e.g. hadoop-aws, hadoop-azure,
    etc). On builds with USE_CDP_HIVE=true, this adds Knox
    gateway-cloud-bindings. However, the entry for hadoop-cloud-storage
    artifact in the impala.cdp.repo maven repository introduces
    dependencies that are external to that repository. This requires the
    HWX Nexus repository to resolve those dangling dependencies.
    Unfortunately, HWX Nexus ages out old jars, including the ones we
    need.
    
    This stops using hadoop-cloud-storage, and instead adds a direct
    dependency to Knox for USE_CDP_HIVE=true. It disables the HWX Nexus
    repository and leaves a tombstone explaining why.
    
    Testing:
     - Deleted my .m2 directory and rebuilt Impala with USE_CDP_HIVE=true
     - Verified the CLASSPATH still contains the right jars on USE_CDP_HIVE=true
    
    Change-Id: I79a0c2575fc50bbc3b393c150c0bce22258ea1bd
    Reviewed-on: http://gerrit.cloudera.org:8080/14024
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Vihang Karajgaonkar <vi...@cloudera.com>
---
 bin/impala-config.sh  |  1 +
 fe/pom.xml            | 67 ++++++++++++++++++++++-----------------------------
 impala-parent/pom.xml | 29 +++++++++++++---------
 3 files changed, 48 insertions(+), 49 deletions(-)

diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index effddc4..d795748 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -208,6 +208,7 @@ if $USE_CDP_HIVE; then
   # the minicluster
   export IMPALA_HIVE_VERSION=${CDP_HIVE_VERSION}
   export IMPALA_TEZ_VERSION=0.9.1.7.0.0.0-365
+  export IMPALA_KNOX_VERSION=1.0.0.7.0.0.0-365
   export IMPALA_HADOOP_VERSION=${CDP_HADOOP_VERSION}
   export HADOOP_HOME="$CDP_COMPONENTS_HOME/hadoop-${CDP_HADOOP_VERSION}/"
 else
diff --git a/fe/pom.xml b/fe/pom.xml
index f7e0d35..5162b7a 100644
--- a/fe/pom.xml
+++ b/fe/pom.xml
@@ -91,6 +91,31 @@ under the License.
     </dependency>
 
     <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-aws</artifactId>
+      <version>${hadoop.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-azure</artifactId>
+      <version>${hadoop.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-azure-datalake</artifactId>
+      <version>${hadoop.version}</version>
+      <exclusions>
+        <!-- https://issues.apache.org/jira/browse/HADOOP-14903 -->
+        <exclusion>
+          <groupId>net.minidev</groupId>
+          <artifactId>json-smart</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
       <groupId>org.apache.ranger</groupId>
       <artifactId>ranger-plugins-common</artifactId>
       <version>${ranger.version}</version>
@@ -925,34 +950,6 @@ under the License.
             </exclusion>
           </exclusions>
         </dependency>
-
-        <!-- CDH profile manually specifies cloud storage dependencies rather
-             than using hadoop-cloud-storage. This is to minimize any disruption
-             to older configurations. -->
-        <dependency>
-          <groupId>org.apache.hadoop</groupId>
-          <artifactId>hadoop-aws</artifactId>
-          <version>${hadoop.version}</version>
-        </dependency>
-
-        <dependency>
-          <groupId>org.apache.hadoop</groupId>
-          <artifactId>hadoop-azure</artifactId>
-          <version>${hadoop.version}</version>
-        </dependency>
-
-        <dependency>
-          <groupId>org.apache.hadoop</groupId>
-          <artifactId>hadoop-azure-datalake</artifactId>
-          <version>${hadoop.version}</version>
-          <exclusions>
-            <!-- https://issues.apache.org/jira/browse/HADOOP-14903 -->
-            <exclusion>
-              <groupId>net.minidev</groupId>
-              <artifactId>json-smart</artifactId>
-            </exclusion>
-          </exclusions>
-        </dependency>
       </dependencies>
     </profile>
 
@@ -1057,18 +1054,12 @@ under the License.
           <version>3.2.0-m3</version>
           <scope>test</scope>
         </dependency>
-        <!-- The hadoop-cloud-storage artifact gets AWS, Azure, and other cloud
-             storage dependencies. It also incorporates Knox runtime dependencies. -->
+        <!-- IMPALA-8766: Include Knox jars on the classpath -->
         <dependency>
-          <groupId>org.apache.hadoop</groupId>
-          <artifactId>hadoop-cloud-storage</artifactId>
-          <version>${hadoop.version}</version>
+          <groupId>org.apache.knox</groupId>
+          <artifactId>gateway-cloud-bindings</artifactId>
+          <version>${knox.version}</version>
           <exclusions>
-            <!-- https://issues.apache.org/jira/browse/HADOOP-14903 -->
-            <exclusion>
-              <groupId>net.minidev</groupId>
-              <artifactId>json-smart</artifactId>
-            </exclusion>
             <!-- Impala currently doesn't support GCS, so exclude those jars -->
             <exclusion>
               <groupId>com.google.cloud.bigdataoss</groupId>
diff --git a/impala-parent/pom.xml b/impala-parent/pom.xml
index 4dae057..ab1dbb3 100644
--- a/impala-parent/pom.xml
+++ b/impala-parent/pom.xml
@@ -40,6 +40,7 @@ under the License.
     <hbase.version>${env.IMPALA_HBASE_VERSION}</hbase.version>
     <parquet.version>${env.IMPALA_PARQUET_VERSION}</parquet.version>
     <kite.version>${env.IMPALA_KITE_VERSION}</kite.version>
+    <knox.version>${env.IMPALA_KNOX_VERSION}</knox.version>
     <thrift.version>0.9.3</thrift.version>
     <impala.extdatasrc.api.version>1.0-SNAPSHOT</impala.extdatasrc.api.version>
     <impala.query.event.hook.api.version>1.0-SNAPSHOT</impala.query.event.hook.api.version>
@@ -185,25 +186,31 @@ under the License.
     </repository>
     <repository>
       <!--
-      The impala.cdp.repo above can reference versions that are not in that
-      repository. For example, artifact A at version 280 may have a dependency
-      on artifact B at version 279, but the maven repository may only have
-      artifact B at version 280. This repository contains all the versions, so
-      it satisfies the dangling dependencies. This was necessary for IMPALA-8766.
+      HWX Nexus is disabled. This is a tombstone to list out why:
+      1. Snapshots are disabled because HWX Nexus contains snapshots of CDH artifacts
+      that can conflict with the artifacts in the maven repository associated with
+      the CDH_BUILD_NUMBER. Maven can end up downloading a mix of artifacts that are
+      mutually incompatible. Snapshots are not necessary at this time.
+      2.  In a previous change, we depended on the hadoop-cloud-storage artifact from
+      the impala.cdp.repo. This had the odd property that it referenced versions of
+      artifacts that were not in the impala.cdp.repo. For example, artifact A at
+      version 280 may have a dependency on artifact B at version 279, but the maven
+      repository may only have artifact B at version 280. Nexus was meant to handle
+      these dangling dependencies (see IMPALA-8766). However, HWX Nexus ages out jars
+      from old build numbers. When the artifact B at version 279 ages out, it breaks
+      the build. Just as seriously, if Impala uses the impala.cdp.repo in a way that
+      requires an external maven repository, old commits may become unbuildable if that
+      external maven repository removes any of the jars we need.
+      So, HWX Nexus is disabled and should face strong scrutiny before being reenabled.
       -->
       <id>hwx.public.repo</id>
       <url>https://nexus-private.hortonworks.com/nexus/content/groups/public</url>
       <name>Hortonworks public repository</name>
-      <!--
-      Snapshots are specifically disabled, because the snapshots in this repository
-      would conflict with the versions in impala.cdh.repo and should be unnecessary.
-      -->
       <snapshots>
         <enabled>false</enabled>
       </snapshots>
       <releases>
-        <!-- The Hortonworks public repository is only needed for USE_CDP_HIVE=true -->
-        <enabled>${env.USE_CDP_HIVE}</enabled>
+        <enabled>false</enabled>
       </releases>
     </repository>
   </repositories>