You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by aj...@apache.org on 2020/02/12 14:04:21 UTC

[impala] branch master updated: IMPALA-9279: Update the Kudu version to include VARCHAR support

This is an automated email from the ASF dual-hosted git repository.

ajeges pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 14ae6ea  IMPALA-9279: Update the Kudu version to include VARCHAR support
14ae6ea is described below

commit 14ae6eae1e3e0ff78040ae2bad92d36d745cf365
Author: Attila Jeges <at...@cloudera.com>
AuthorDate: Fri Jan 24 18:23:22 2020 +0100

    IMPALA-9279: Update the Kudu version to include VARCHAR support
    
    Before this change the preferred way of getting Kudu was to pull
    it in from the specified CDH build (even if USE_CDP_HIVE was set
    to true). Optionally by setting USE_CDH_KUDU to false, one could
    force Impala to use the native toolchain Kudu. But even then, the
    Kudu Java artifacts would be downloaded from CDH.
    
    Since Kudu VARCHAR support won't be backported to CDH, this
    behavior blocks the Impala side of the Kudu/Impala VARCHAR
    integration.
    
    With this change:
    1. Using the native toolchain Kudu (including the Java artifacts)
       is the default behavior. From now on USE_CDH_KUDU will be set
       to false by default. Impala can be forced to fall back on
       using the CDH Kudu by explicitly setting USE_CDH_KUDU to true.
    2. Kudu version is updated to include the VARCHAR support.
    
    Testing:
    Ran exhaustive tests with USE_CDH_KUDU=true and
    USE_CDH_KUDU=false.
    
    Change-Id: Iafe56342d43cb63e35c0bbb1b4a99327dda0a44a
    Reviewed-on: http://gerrit.cloudera.org:8080/15134
    Reviewed-by: Attila Jeges <at...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 bin/bootstrap_toolchain.py |  8 +++-----
 bin/impala-config.sh       | 48 +++++++++++++++++++++++++++-------------------
 impala-parent/pom.xml      | 11 +++++++++++
 3 files changed, 42 insertions(+), 25 deletions(-)

diff --git a/bin/bootstrap_toolchain.py b/bin/bootstrap_toolchain.py
index 546f388..1b31872 100755
--- a/bin/bootstrap_toolchain.py
+++ b/bin/bootstrap_toolchain.py
@@ -647,14 +647,12 @@ def get_kudu_downloads(use_kudu_stub):
                       "USE_CDH_KUDU=false to use the toolchain Kudu.")
         sys.exit(1)
       kudu_downloads += [CdhKudu(get_platform_release_label().cdh)]
+      # There is also a Kudu Java package.
+      kudu_downloads += [CdhKuduJava()]
     else:
+      # Toolchain Kudu includes Java artifacts.
       kudu_downloads += [ToolchainKudu()]
 
-  # Independent of the regular Kudu package, there is also a Kudu Java package. This
-  # always needs to be downloaded from the CDH components, because the toolchain
-  # does not produce the Java artifacts.
-  # TODO: Does this make any sense with the Kudu stub?
-  kudu_downloads += [CdhKuduJava()]
   return kudu_downloads
 
 
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index fa9b01c..b592ebe 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -68,7 +68,7 @@ fi
 # moving to a different build of the toolchain, e.g. when a version is bumped or a
 # compile option is changed. The build id can be found in the output of the toolchain
 # build jobs, it is constructed from the build number and toolchain git hash prefix.
-export IMPALA_TOOLCHAIN_BUILD_ID=122-7f8aa0363e
+export IMPALA_TOOLCHAIN_BUILD_ID=137-30bf4d6e3c
 # Versions of toolchain dependencies.
 # -----------------------------------
 export IMPALA_AVRO_VERSION=1.7.4-p5
@@ -188,7 +188,6 @@ export IMPALA_PARQUET_VERSION=1.10.99-cdh6.x-SNAPSHOT
 export IMPALA_AVRO_JAVA_VERSION=1.8.2-cdh6.x-SNAPSHOT
 export IMPALA_HUDI_VERSION=0.5.0-incubating
 export IMPALA_KITE_VERSION=1.0.0-cdh6.x-SNAPSHOT
-export IMPALA_KUDU_JAVA_VERSION=1.11.0-cdh6.x-SNAPSHOT
 export IMPALA_ORC_JAVA_VERSION=1.6.2
 
 # When IMPALA_(CDH_COMPONENT)_URL are overridden, they may contain '$(platform_label)'
@@ -198,10 +197,14 @@ unset IMPALA_HBASE_URL
 unset IMPALA_HIVE_URL
 unset IMPALA_KUDU_URL
 unset IMPALA_KUDU_VERSION
+unset IMPALA_KUDU_JAVA_VERSION
 unset IMPALA_SENTRY_URL
 
 export IMPALA_KERBERIZE=false
 
+unset IMPALA_TOOLCHAIN_KUDU_MAVEN_REPOSITORY
+unset IMPALA_TOOLCHAIN_KUDU_MAVEN_REPOSITORY_ENABLED
+
 # Source the branch and local config override files here to override any
 # variables above or any variables below that allow overriding via environment
 # variable.
@@ -674,18 +677,18 @@ fi
 export USE_KUDU_DEBUG_BUILD=${USE_KUDU_DEBUG_BUILD-false}
 
 # Kudu doesn't compile on some old Linux distros. KUDU_IS_SUPPORTED enables building Kudu
-# into the backend. We prefer to pull Kudu in from CDH, but will fall back to using the
-# toolchain Kudu for distros where the CDH tarballs are not provided by setting
-# USE_CDH_KUDU to false.
-# The frontend build is OS independent since it is Java.
-export USE_CDH_KUDU=${USE_CDH_KUDU-true}
+# into the backend. We prefer to pull Kudu in from the toolchain, but will fall back to
+# using the CDH Kudu by setting USE_CDH_KUDU to true.
+export USE_CDH_KUDU=${USE_CDH_KUDU-false}
 if [[ -z "${KUDU_IS_SUPPORTED-}" ]]; then
   if [[ -n "$KUDU_BUILD_DIR" ]]; then
     KUDU_IS_SUPPORTED=true
-  else
-    KUDU_IS_SUPPORTED=false
+  elif $IS_OSX; then
     USE_CDH_KUDU=false
-    if ! $IS_OSX; then
+    KUDU_IS_SUPPORTED=false
+  else
+    KUDU_IS_SUPPORTED=true
+    if $USE_CDH_KUDU; then
       if ! which lsb_release &>/dev/null; then
         echo Unable to find the 'lsb_release' command. \
             Please ensure it is available in your PATH. 1>&2
@@ -698,14 +701,9 @@ if [[ -z "${KUDU_IS_SUPPORTED-}" ]]; then
       fi
       # Remove spaces, trim minor versions, and convert to lowercase.
       DISTRO_VERSION="$(tr -d ' \n' <<< "$DISTRO_VERSION" | cut -d. -f1 | tr "A-Z" "a-z")"
-      case "$DISTRO_VERSION" in
-        centos6 | centos7 | debian8 | suselinux12 | suse12 | ubuntu16 | ubuntu18)
-          USE_CDH_KUDU=true
-          KUDU_IS_SUPPORTED=true;;
-        ubuntu14 )
-          USE_CDH_KUDU=false
-          KUDU_IS_SUPPORTED=true;;
-      esac
+      if [[ "$DISTRO_VERSION" == "ubuntu14" ]]; then
+        USE_CDH_KUDU=false
+      fi
     fi
   fi
 fi
@@ -713,12 +711,22 @@ export KUDU_IS_SUPPORTED
 
 if $USE_CDH_KUDU; then
   export IMPALA_KUDU_VERSION=${IMPALA_KUDU_VERSION-"1.11.0-cdh6.x-SNAPSHOT"}
+  export IMPALA_KUDU_JAVA_VERSION=${IMPALA_KUDU_JAVA_VERSION-"1.11.0-cdh6.x-SNAPSHOT"}
   export IMPALA_KUDU_HOME=${CDH_COMPONENTS_HOME}/kudu-$IMPALA_KUDU_VERSION
+  export IMPALA_KUDU_JAVA_HOME=${CDH_COMPONENTS_HOME}/kudu-$IMPALA_KUDU_VERSION
+  # If USE_CDH_KUDU is true, Toolchain Kudu maven repository should be disabled.
+  # We get Kudu Java artifacts from CDH.
+  export IMPALA_TOOLCHAIN_KUDU_MAVEN_REPOSITORY="file:///non/existing/repo"
+  export IMPALA_TOOLCHAIN_KUDU_MAVEN_REPOSITORY_ENABLED=false
 else
-  export IMPALA_KUDU_VERSION=${IMPALA_KUDU_VERSION-"988296d"}
+  export IMPALA_KUDU_VERSION=${IMPALA_KUDU_VERSION-"5c610bf40"}
+  export IMPALA_KUDU_JAVA_VERSION=${IMPALA_KUDU_JAVA_VERSION-"1.12.0-SNAPSHOT"}
   export IMPALA_KUDU_HOME=${IMPALA_TOOLCHAIN}/kudu-$IMPALA_KUDU_VERSION
+  export IMPALA_KUDU_JAVA_HOME=${IMPALA_TOOLCHAIN}/kudu-${IMPALA_KUDU_VERSION}/java
+  export IMPALA_TOOLCHAIN_KUDU_MAVEN_REPOSITORY=\
+"file://${IMPALA_KUDU_JAVA_HOME}/repository"
+  export IMPALA_TOOLCHAIN_KUDU_MAVEN_REPOSITORY_ENABLED=true
 fi
-export IMPALA_KUDU_JAVA_HOME=${CDH_COMPONENTS_HOME}/kudu-$IMPALA_KUDU_VERSION
 
 # Set $THRIFT_HOME to the Thrift directory in toolchain.
 export THRIFT_HOME="${IMPALA_TOOLCHAIN}/thrift-${IMPALA_THRIFT_VERSION}"
diff --git a/impala-parent/pom.xml b/impala-parent/pom.xml
index 89498d2..5519760 100644
--- a/impala-parent/pom.xml
+++ b/impala-parent/pom.xml
@@ -181,6 +181,17 @@ under the License.
       </snapshots>
     </repository>
     <repository>
+      <id>impala.toolchain.kudu.repo</id>
+      <url>${env.IMPALA_TOOLCHAIN_KUDU_MAVEN_REPOSITORY}</url>
+      <name>Impala Toolchain Kudu Repository</name>
+      <releases>
+        <enabled>${env.IMPALA_TOOLCHAIN_KUDU_MAVEN_REPOSITORY_ENABLED}</enabled>
+      </releases>
+      <snapshots>
+        <enabled>${env.IMPALA_TOOLCHAIN_KUDU_MAVEN_REPOSITORY_ENABLED}</enabled>
+      </snapshots>
+    </repository>
+    <repository>
       <id>cloudera.thirdparty.repo</id>
       <url>https://repository.cloudera.com/content/repositories/third-party</url>
       <name>Cloudera Third Party Repository</name>