You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2020/10/06 16:06:02 UTC

[impala] 02/02: IMPALA-10218: Remove impala.cdh.repo Maven repository

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 04ce57dcaf338cb92ae8c811736fb99b58acc047
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Sat Oct 3 14:08:03 2020 -0700

    IMPALA-10218: Remove impala.cdh.repo Maven repository
    
    This removes the impala.cdh.repo Maven repository (i.e.
    the repository for the CDH_BUILD_NUMBER). It removes
    the associated code for CDH_BUILD_NUMBER.
    
    The only remaining dependency for the CDH_BUILD_NUMBER
    repository was Apache Kite in some of our test code.
    This transitions that code to use the public version
    of Apache Kite.
    
    The testdata/TableFlattener Java project is intended
    to be used manually and is not used for any tests.
    It has bitrotted and currently does not build. I verified
    that it now builds (which it currently did not), but I did
    not verify functionality.
    
    Testing:
     - Ran a core job
     - Built testdata/TableFlattener Java project
    
    Change-Id: I44b587f936ae20c207c74a9800cf98baa464164a
    Reviewed-on: http://gerrit.cloudera.org:8080/16543
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Csaba Ringhofer <cs...@cloudera.com>
---
 bin/impala-config.sh                               |  6 +-
 buildall.sh                                        | 14 ++--
 impala-parent/pom.xml                              | 29 ++-----
 testdata/TableFlattener/pom.xml                    | 20 +++++
 .../apache/impala/infra/tableflattener/Main.java   |  6 +-
 testdata/pom.xml                                   | 92 +++++-----------------
 6 files changed, 53 insertions(+), 114 deletions(-)

diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 5d9b8a6..5dd8a31 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -171,9 +171,6 @@ fi
 
 : ${IMPALA_TOOLCHAIN_HOST:=native-toolchain.s3.amazonaws.com}
 export IMPALA_TOOLCHAIN_HOST
-export CDH_BUILD_NUMBER=1814051
-export CDH_MAVEN_REPOSITORY=\
-"https://${IMPALA_TOOLCHAIN_HOST}/build/cdh_components/${CDH_BUILD_NUMBER}/maven"
 
 export CDP_BUILD_NUMBER=4493826
 export CDP_MAVEN_REPOSITORY=\
@@ -191,7 +188,7 @@ export CDP_TEZ_VERSION=0.9.1.7.2.1.0-287
 export ARCH_NAME=$(uname -p)
 
 export IMPALA_HUDI_VERSION=0.5.0-incubating
-export IMPALA_KITE_VERSION=1.0.0-cdh6.x-SNAPSHOT
+export IMPALA_KITE_VERSION=1.1.0
 export IMPALA_ORC_JAVA_VERSION=1.6.2
 export IMPALA_ICEBERG_VERSION=0.8.0-incubating
 
@@ -721,7 +718,6 @@ echo "METASTORE_DB            = $METASTORE_DB"
 echo "DOWNLOAD_CDH_COMPONENTS = $DOWNLOAD_CDH_COMPONENTS"
 echo "IMPALA_MAVEN_OPTIONS    = $IMPALA_MAVEN_OPTIONS"
 echo "IMPALA_TOOLCHAIN_HOST   = $IMPALA_TOOLCHAIN_HOST"
-echo "CDH_BUILD_NUMBER        = $CDH_BUILD_NUMBER"
 echo "CDP_BUILD_NUMBER        = $CDP_BUILD_NUMBER"
 echo "CDP_COMPONENTS_HOME     = $CDP_COMPONENTS_HOME"
 echo "IMPALA_HADOOP_VERSION   = $IMPALA_HADOOP_VERSION"
diff --git a/buildall.sh b/buildall.sh
index c97d564..1440e32 100755
--- a/buildall.sh
+++ b/buildall.sh
@@ -582,18 +582,14 @@ create_log_dirs
 
 bootstrap_dependencies
 
-# Create .cdh and .cdp files that contains the CDH_BUILD_NUMBER and CDP_BUILD_NUMBER
-# respectively. If the content of the files are different than the ones in the
-# environment variable, append -U into IMPALA_MAVEN_OPTION to force Maven to update its
-# local cache.
-CDH_FILE="${IMPALA_HOME}/.cdh"
+# Create .cdp file that contains the CDP_BUILD_NUMBER. If the content of the files
+# are different than the ones in the environment variable, append -U into
+# IMPALA_MAVEN_OPTION to force Maven to update its local cache.
+# TODO: Look into removing this. The CDP components do not use SNAPSHOT versions.
 CDP_FILE="${IMPALA_HOME}/.cdp"
-if [[ ! -f ${CDH_FILE} || ! -f ${CDP_FILE} || \
-      $(cat ${CDH_FILE}) != ${CDH_BUILD_NUMBER} || \
-      $(cat ${CDP_FILE}) != ${CDP_BUILD_NUMBER} ]]; then
+if [[ ! -f ${CDP_FILE} || $(cat ${CDP_FILE}) != ${CDP_BUILD_NUMBER} ]]; then
   export IMPALA_MAVEN_OPTIONS="${IMPALA_MAVEN_OPTIONS} -U"
 fi
-echo "${CDH_BUILD_NUMBER}" > ${CDH_FILE}
 echo "${CDP_BUILD_NUMBER}" > ${CDP_FILE}
 
 if [[ "$BUILD_FE_ONLY" -eq 1 ]]; then
diff --git a/impala-parent/pom.xml b/impala-parent/pom.xml
index a6ed550..f066071 100644
--- a/impala-parent/pom.xml
+++ b/impala-parent/pom.xml
@@ -151,14 +151,6 @@ under the License.
       <url>https://repository.cloudera.com/content/groups/cdh-releases-rcs</url>
       <name>CDH Releases Repository</name>
       <snapshots>
-        <enabled>true</enabled>
-      </snapshots>
-    </repository>
-    <repository>
-      <id>cdh.releases.repo</id>
-      <url>https://repository.cloudera.com/content/repositories/releases</url>
-      <name>CDH Releases Repository</name>
-      <snapshots>
         <enabled>false</enabled>
       </snapshots>
     </repository>
@@ -175,18 +167,6 @@ under the License.
       </snapshots>
     </repository>
     <repository>
-      <!--
-      The Maven repository for the CDH build identified by CDH_BUILD_NUMBER.
-      CDH uses maven SNAPSHOT versions for non-released versions of components.
-      -->
-      <id>impala.cdh.repo</id>
-      <url>${env.CDH_MAVEN_REPOSITORY}</url>
-      <name>Impala CDH Repository</name>
-      <snapshots>
-        <enabled>true</enabled>
-      </snapshots>
-    </repository>
-    <repository>
       <id>impala.toolchain.kudu.repo</id>
       <url>${env.IMPALA_TOOLCHAIN_KUDU_MAVEN_REPOSITORY}</url>
       <name>Impala Toolchain Kudu Repository</name>
@@ -208,10 +188,11 @@ under the License.
     <repository>
       <!--
       HWX Nexus is disabled. This is a tombstone to list out why:
-      1. Snapshots are disabled because HWX Nexus contains snapshots of CDH artifacts
-      that can conflict with the artifacts in the maven repository associated with
-      the CDH_BUILD_NUMBER. Maven can end up downloading a mix of artifacts that are
-      mutually incompatible. Snapshots are not necessary at this time.
+      1. Snapshots are disabled because HWX Nexus contains snapshots of artifacts
+      that can conflict with the artifacts in any of the other repositories with
+      SNAPSHOT versions. We don't want any conflicting sources for SNAPSHOT versions,
+      so it is better to keep this disabled. In the past, this was a particular
+      problem for CDH Hadoop components that used SNAPSHOT versions.
       2.  In a previous change, we depended on the hadoop-cloud-storage artifact from
       the impala.cdp.repo. This had the odd property that it referenced versions of
       artifacts that were not in the impala.cdp.repo. For example, artifact A at
diff --git a/testdata/TableFlattener/pom.xml b/testdata/TableFlattener/pom.xml
index cff5465..cb232d4 100644
--- a/testdata/TableFlattener/pom.xml
+++ b/testdata/TableFlattener/pom.xml
@@ -53,9 +53,29 @@
       <version>${hadoop.version}</version>
     </dependency>
     <dependency>
+      <groupId>org.apache.avro</groupId>
+      <artifactId>avro</artifactId>
+      <version>${avro.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.parquet</groupId>
+      <artifactId>parquet-avro</artifactId>
+      <version>${parquet.version}</version>
+    </dependency>
+    <dependency>
       <groupId>org.kitesdk</groupId>
       <artifactId>kite-data-core</artifactId>
       <version>${kite.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>org.apache.avro</groupId>
+          <artifactId>avro</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.twitter</groupId>
+          <artifactId>parquet-avro</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
   </dependencies>
 </project>
diff --git a/testdata/TableFlattener/src/main/java/org/apache/impala/infra/tableflattener/Main.java b/testdata/TableFlattener/src/main/java/org/apache/impala/infra/tableflattener/Main.java
index 68643c5..18982c4 100644
--- a/testdata/TableFlattener/src/main/java/org/apache/impala/infra/tableflattener/Main.java
+++ b/testdata/TableFlattener/src/main/java/org/apache/impala/infra/tableflattener/Main.java
@@ -36,9 +36,9 @@ import org.kitesdk.data.DatasetDescriptor;
 import org.kitesdk.data.Datasets;
 import org.kitesdk.data.Format;
 import org.kitesdk.data.Formats;
-import parquet.avro.AvroSchemaConverter;
-import parquet.hadoop.ParquetFileReader;
-import parquet.hadoop.metadata.ParquetMetadata;
+import org.apache.parquet.avro.AvroSchemaConverter;
+import org.apache.parquet.hadoop.ParquetFileReader;
+import org.apache.parquet.hadoop.metadata.ParquetMetadata;
 
 import java.io.IOException;
 import java.io.PrintWriter;
diff --git a/testdata/pom.xml b/testdata/pom.xml
index 3d8c12f..80e0e38 100644
--- a/testdata/pom.xml
+++ b/testdata/pom.xml
@@ -44,13 +44,6 @@ under the License.
       <version>2.3</version>
     </dependency>
 
-    <!-- Force javax-el dependency -->
-    <dependency>
-      <groupId>org.glassfish</groupId>
-      <artifactId>javax.el</artifactId>
-      <version>3.0.1-b06</version>
-    </dependency>
-
     <dependency>
       <groupId>com.google.guava</groupId>
       <artifactId>guava</artifactId>
@@ -58,13 +51,6 @@ under the License.
     </dependency>
 
     <dependency>
-      <groupId>junit</groupId>
-      <artifactId>junit</artifactId>
-      <version>${junit.version}</version>
-      <scope>test</scope>
-    </dependency>
-
-    <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-common</artifactId>
       <version>${hadoop.version}</version>
@@ -74,76 +60,26 @@ under the License.
           <groupId>net.minidev</groupId>
           <artifactId>json-smart</artifactId>
         </exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.hbase</groupId>
-      <artifactId>hbase-client</artifactId>
-      <version>${hbase.version}</version>
-      <exclusions>
-        <!-- https://issues.apache.org/jira/browse/HADOOP-14903 -->
         <exclusion>
-          <groupId>net.minidev</groupId>
-          <artifactId>json-smart</artifactId>
+          <groupId>org.eclipse.jetty</groupId>
+          <artifactId>*</artifactId>
         </exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.hbase</groupId>
-      <artifactId>hbase-server</artifactId>
-      <version>${hbase.version}</version>
-      <exclusions>
-        <!-- https://issues.apache.org/jira/browse/HADOOP-14903 -->
         <exclusion>
-          <groupId>net.minidev</groupId>
-          <artifactId>json-smart</artifactId>
+          <!-- IMPALA-9468: Avoid pulling in netty for security reasons -->
+          <groupId>io.netty</groupId>
+          <artifactId>*</artifactId>
         </exclusion>
         <exclusion>
-          <groupId>org.glassfish</groupId>
-          <artifactId>javax.el</artifactId>
+          <groupId>com.sun.jersey</groupId>
+          <artifactId>jersey-server</artifactId>
         </exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.hbase</groupId>
-      <artifactId>hbase-common</artifactId>
-      <version>${hbase.version}</version>
-      <exclusions>
-        <!-- https://issues.apache.org/jira/browse/HADOOP-14903 -->
         <exclusion>
-          <groupId>net.minidev</groupId>
-          <artifactId>json-smart</artifactId>
+          <groupId>com.sun.jersey</groupId>
+          <artifactId>jersey-servlet</artifactId>
         </exclusion>
       </exclusions>
     </dependency>
 
-   <dependency>
-      <groupId>org.apache.hbase</groupId>
-      <artifactId>hbase-protocol</artifactId>
-      <version>${hbase.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.cloudera.htrace</groupId>
-      <artifactId>htrace-core</artifactId>
-      <version>2.00</version>
-    </dependency>
-
-    <dependency>
-      <groupId>commons-io</groupId>
-      <artifactId>commons-io</artifactId>
-      <version>${commons-io.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>commons-logging</groupId>
-      <artifactId>commons-logging</artifactId>
-      <version>1.1.1</version>
-    </dependency>
-
     <dependency>
       <groupId>org.slf4j</groupId>
       <artifactId>slf4j-api</artifactId>
@@ -166,6 +102,16 @@ under the License.
       <groupId>org.kitesdk</groupId>
       <artifactId>kite-data-core</artifactId>
       <version>${kite.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>org.apache.avro</groupId>
+          <artifactId>avro</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.twitter</groupId>
+          <artifactId>parquet-avro</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
   </dependencies>