You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2023/09/14 19:19:17 UTC

[impala] 03/03: IMPALA-12441: Simplify local toolchain development

This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 4be517e150148bd852d1fd106a4e4b1adf1229f1
Author: Michael Smith <mi...@cloudera.com>
AuthorDate: Mon Jul 24 16:24:47 2023 -0700

    IMPALA-12441: Simplify local toolchain development
    
    If NATIVE_TOOLCHAIN_HOME is set, that will be used to provide the native
    toolchain instead of the default in IMPALA_TOOLCHAIN. Overrides
    IMPALA_TOOLCHAIN_PACKAGES_HOME and sets SKIP_TOOLCHAIN_BOOTSTRAP=true.
    
    Adds IMPALA_TOOLCHAIN_REPO, IMPALA_TOOLCHAIN_BRANCH, and
    IMPALA_TOOLCHAIN_COMMIT_HASH so everything is clear about what toolchain
    is used for this Impala commit.
    
    If NATIVE_TOOLCHAIN_HOME does not yet exist, buildall.sh will clone the
    repo and checkout the commit hash mentioned above before building.
    
    Also skips downloading Kudu if SKIP_TOOLCHAIN_BOOTSTRAP is true as Kudu
    is built from native-toolchain. Normalizes aarch64 logic, which skipped
    Kudu because it would always build native-toolchain locally.
    
    Change-Id: I3a9e51b7f54c738d8cc01b32428ac88a344de376
    Reviewed-on: http://gerrit.cloudera.org:8080/20267
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Michael Smith <mi...@cloudera.com>
---
 bin/bootstrap_system.sh    | 13 +------------
 bin/bootstrap_toolchain.py |  3 ++-
 bin/impala-config.sh       | 12 +++++++++++-
 buildall.sh                | 40 ++++++++++++++++++++++++++--------------
 4 files changed, 40 insertions(+), 28 deletions(-)

diff --git a/bin/bootstrap_system.sh b/bin/bootstrap_system.sh
index ef9cfc520..3a50b25ad 100755
--- a/bin/bootstrap_system.sh
+++ b/bin/bootstrap_system.sh
@@ -481,23 +481,12 @@ echo -e "\n$SET_IMPALA_HOME" >> ~/.bashrc
 eval "$SET_IMPALA_HOME"
 
 if [[ $ARCH_NAME == 'aarch64' ]]; then
-  echo -e "\nexport SKIP_TOOLCHAIN_BOOTSTRAP=true" >> \
-    "${IMPALA_HOME}/bin/impala-config-local.sh"
   SET_TOOLCHAIN_HOME="export NATIVE_TOOLCHAIN_HOME=${IMPALA_HOME}/../native-toolchain"
   echo -e "\n$SET_TOOLCHAIN_HOME" >> ~/.bashrc
   echo -e "\n$SET_TOOLCHAIN_HOME" >> "${IMPALA_HOME}/bin/impala-config-local.sh"
   eval "$SET_TOOLCHAIN_HOME"
-  if ! [[ -d "$NATIVE_TOOLCHAIN_HOME" ]]; then
-    time -p git clone https://github.com/cloudera/native-toolchain/ \
-      "$NATIVE_TOOLCHAIN_HOME"
-  fi
-  cd "$NATIVE_TOOLCHAIN_HOME"
-  git pull
-  echo "Begin build tool chain, may need several hours, please be patient...."
+  # Provide access to ~/.cache on build machines so we can use ccache.
   sudo chmod 755 ~/.cache
-  ./buildall.sh
-  cd -
-  mkdir -p ${IMPALA_HOME}/toolchain
 fi
 
 # Try to prepopulate the m2 directory to save time
diff --git a/bin/bootstrap_toolchain.py b/bin/bootstrap_toolchain.py
index f52c76d20..45cd1abd4 100755
--- a/bin/bootstrap_toolchain.py
+++ b/bin/bootstrap_toolchain.py
@@ -568,7 +568,8 @@ def main():
   if os.getenv("DOWNLOAD_CDH_COMPONENTS", "false") == "true":
     create_directory_from_env_var("CDP_COMPONENTS_HOME")
     create_directory_from_env_var("APACHE_COMPONENTS_HOME")
-    if platform.processor() != "aarch64":
+    if os.getenv("SKIP_TOOLCHAIN_BOOTSTRAP", "false") != "true":
+      # Kudu is currently sourced from native-toolchain
       downloads += get_kudu_downloads()
     downloads += get_hadoop_downloads()
 
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 13c2f872c..f597c5439 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -86,6 +86,11 @@ export USE_AVRO_CPP=${USE_AVRO_CPP:=false}
 # compile option is changed. The build id can be found in the output of the toolchain
 # build jobs, it is constructed from the build number and toolchain git hash prefix.
 export IMPALA_TOOLCHAIN_BUILD_ID=358-e7cfab15d3
+export IMPALA_TOOLCHAIN_REPO=\
+${IMPALA_TOOLCHAIN_REPO:-https://github.com/cloudera/native-toolchain.git}
+export IMPALA_TOOLCHAIN_BRANCH=${IMPALA_TOOLCHAIN_BRANCH:-master}
+export IMPALA_TOOLCHAIN_COMMIT_HASH=\
+${IMPALA_TOOLCHAIN_COMMIT_HASH-e7cfab15d36ae051747252b676f0a11a9c58fe05}
 # Versions of toolchain dependencies.
 # -----------------------------------
 if $USE_AVRO_CPP; then
@@ -316,8 +321,13 @@ fi
 # IMPALA_TOOLCHAIN_PACKAGES_HOME is the location inside IMPALA_TOOLCHAIN where native
 # toolchain packages are placed. This uses a subdirectory that contains the information
 # about the compiler to allow using different compiler versions.
-export IMPALA_TOOLCHAIN_PACKAGES_HOME=\
+IMPALA_TOOLCHAIN_PACKAGES_HOME=\
 ${IMPALA_TOOLCHAIN}/toolchain-packages-gcc${IMPALA_GCC_VERSION}
+if ! [ -z ${NATIVE_TOOLCHAIN_HOME-} ]; then
+  IMPALA_TOOLCHAIN_PACKAGES_HOME=$(realpath ${NATIVE_TOOLCHAIN_HOME})/build
+  export SKIP_TOOLCHAIN_BOOTSTRAP=true
+fi
+export IMPALA_TOOLCHAIN_PACKAGES_HOME
 
 export CDP_HADOOP_URL=${CDP_HADOOP_URL-}
 export CDP_HBASE_URL=${CDP_HBASE_URL-}
diff --git a/buildall.sh b/buildall.sh
index 4095cbed7..ba32813eb 100755
--- a/buildall.sh
+++ b/buildall.sh
@@ -416,29 +416,41 @@ bootstrap_dependencies() {
 
   # Populate necessary thirdparty components unless it's set to be skipped.
   if [[ "${SKIP_TOOLCHAIN_BOOTSTRAP}" = true ]]; then
-    echo "SKIP_TOOLCHAIN_BOOTSTRAP is true, skipping toolchain bootstrap."
+    if ! [ -z "${NATIVE_TOOLCHAIN_HOME}" ]; then
+      if ! [ -d "${NATIVE_TOOLCHAIN_HOME}" ]; then
+        mkdir -p "${NATIVE_TOOLCHAIN_HOME}"
+        pushd "${NATIVE_TOOLCHAIN_HOME}"
+        git init
+        git remote add toolchain "${IMPALA_TOOLCHAIN_REPO}"
+        git fetch toolchain "${IMPALA_TOOLCHAIN_BRANCH}"
+        # Specifying a branch avoids a large message from git about detached HEADs.
+        git checkout "${IMPALA_TOOLCHAIN_COMMIT_HASH}" -b "${IMPALA_TOOLCHAIN_BUILD_ID}"
+      else
+        pushd "${NATIVE_TOOLCHAIN_HOME}"
+      fi
+      echo "Begin building toolchain, may need several hours, please be patient...."
+      ./buildall.sh
+      popd
+    else
+      echo "SKIP_TOOLCHAIN_BOOTSTRAP is true, skipping toolchain bootstrap."
+    fi
     if [[ "${DOWNLOAD_CDH_COMPONENTS}" = true ]]; then
       echo ">>> Downloading and extracting cdh components."
       "$IMPALA_HOME/bin/bootstrap_toolchain.py"
     fi
-    # Create soft link to locally builded native-toolchain on aarch64
-    if [[ "$(uname -p)" = "aarch64" ]]; then
-      mkdir -p $IMPALA_TOOLCHAIN_PACKAGES_HOME
-      cd "$IMPALA_TOOLCHAIN_PACKAGES_HOME"
-      ln -f -s ${NATIVE_TOOLCHAIN_HOME}/build/* .
-      cd -
-      if ! [[ -d "$IMPALA_HOME/../hadoopAarch64NativeLibs" ]]; then
-        git clone https://github.com/zhaorenhai/hadoopAarch64NativeLibs \
-          "$IMPALA_HOME/../hadoopAarch64NativeLibs"
-      fi
-      cp $IMPALA_HOME/../hadoopAarch64NativeLibs/lib*  $HADOOP_HOME/lib/native/
-    fi
-
   else
     echo ">>> Downloading and extracting toolchain dependencies."
     "$IMPALA_HOME/bin/bootstrap_toolchain.py"
     echo "Toolchain bootstrap complete."
   fi
+  # Download prebuilt Hadoop native binaries for aarch64
+  if [[ "$(uname -p)" = "aarch64" ]]; then
+    if ! [[ -d "$IMPALA_HOME/../hadoopAarch64NativeLibs" ]]; then
+      git clone https://github.com/zhaorenhai/hadoopAarch64NativeLibs \
+        "$IMPALA_HOME/../hadoopAarch64NativeLibs"
+    fi
+    cp $IMPALA_HOME/../hadoopAarch64NativeLibs/lib*  $HADOOP_HOME/lib/native/
+  fi
   if [[ "${USE_APACHE_HIVE}" = true ]]; then
     "$IMPALA_HOME/testdata/bin/patch_hive.sh"
   fi