You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2023/09/14 19:19:17 UTC
[impala] 03/03: IMPALA-12441: Simplify local toolchain development
This is an automated email from the ASF dual-hosted git repository.
michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 4be517e150148bd852d1fd106a4e4b1adf1229f1
Author: Michael Smith <mi...@cloudera.com>
AuthorDate: Mon Jul 24 16:24:47 2023 -0700
IMPALA-12441: Simplify local toolchain development
If NATIVE_TOOLCHAIN_HOME is set, that will be used to provide the native
toolchain instead of the default in IMPALA_TOOLCHAIN. Overrides
IMPALA_TOOLCHAIN_PACKAGES_HOME and sets SKIP_TOOLCHAIN_BOOTSTRAP=true.
Adds IMPALA_TOOLCHAIN_REPO, IMPALA_TOOLCHAIN_BRANCH, and
IMPALA_TOOLCHAIN_COMMIT_HASH so everything is clear about what toolchain
is used for this Impala commit.
If NATIVE_TOOLCHAIN_HOME does not yet exist, buildall.sh will clone the
repo and checkout the commit hash mentioned above before building.
Also skips downloading Kudu if SKIP_TOOLCHAIN_BOOTSTRAP is true as Kudu
is built from native-toolchain. Normalizes aarch64 logic, which skipped
Kudu because it would always build native-toolchain locally.
Change-Id: I3a9e51b7f54c738d8cc01b32428ac88a344de376
Reviewed-on: http://gerrit.cloudera.org:8080/20267
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Michael Smith <mi...@cloudera.com>
---
bin/bootstrap_system.sh | 13 +------------
bin/bootstrap_toolchain.py | 3 ++-
bin/impala-config.sh | 12 +++++++++++-
buildall.sh | 40 ++++++++++++++++++++++++++--------------
4 files changed, 40 insertions(+), 28 deletions(-)
diff --git a/bin/bootstrap_system.sh b/bin/bootstrap_system.sh
index ef9cfc520..3a50b25ad 100755
--- a/bin/bootstrap_system.sh
+++ b/bin/bootstrap_system.sh
@@ -481,23 +481,12 @@ echo -e "\n$SET_IMPALA_HOME" >> ~/.bashrc
eval "$SET_IMPALA_HOME"
if [[ $ARCH_NAME == 'aarch64' ]]; then
- echo -e "\nexport SKIP_TOOLCHAIN_BOOTSTRAP=true" >> \
- "${IMPALA_HOME}/bin/impala-config-local.sh"
SET_TOOLCHAIN_HOME="export NATIVE_TOOLCHAIN_HOME=${IMPALA_HOME}/../native-toolchain"
echo -e "\n$SET_TOOLCHAIN_HOME" >> ~/.bashrc
echo -e "\n$SET_TOOLCHAIN_HOME" >> "${IMPALA_HOME}/bin/impala-config-local.sh"
eval "$SET_TOOLCHAIN_HOME"
- if ! [[ -d "$NATIVE_TOOLCHAIN_HOME" ]]; then
- time -p git clone https://github.com/cloudera/native-toolchain/ \
- "$NATIVE_TOOLCHAIN_HOME"
- fi
- cd "$NATIVE_TOOLCHAIN_HOME"
- git pull
- echo "Begin build tool chain, may need several hours, please be patient...."
+ # Provide access to ~/.cache on build machines so we can use ccache.
sudo chmod 755 ~/.cache
- ./buildall.sh
- cd -
- mkdir -p ${IMPALA_HOME}/toolchain
fi
# Try to prepopulate the m2 directory to save time
diff --git a/bin/bootstrap_toolchain.py b/bin/bootstrap_toolchain.py
index f52c76d20..45cd1abd4 100755
--- a/bin/bootstrap_toolchain.py
+++ b/bin/bootstrap_toolchain.py
@@ -568,7 +568,8 @@ def main():
if os.getenv("DOWNLOAD_CDH_COMPONENTS", "false") == "true":
create_directory_from_env_var("CDP_COMPONENTS_HOME")
create_directory_from_env_var("APACHE_COMPONENTS_HOME")
- if platform.processor() != "aarch64":
+ if os.getenv("SKIP_TOOLCHAIN_BOOTSTRAP", "false") != "true":
+ # Kudu is currently sourced from native-toolchain
downloads += get_kudu_downloads()
downloads += get_hadoop_downloads()
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 13c2f872c..f597c5439 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -86,6 +86,11 @@ export USE_AVRO_CPP=${USE_AVRO_CPP:=false}
# compile option is changed. The build id can be found in the output of the toolchain
# build jobs, it is constructed from the build number and toolchain git hash prefix.
export IMPALA_TOOLCHAIN_BUILD_ID=358-e7cfab15d3
+export IMPALA_TOOLCHAIN_REPO=\
+${IMPALA_TOOLCHAIN_REPO:-https://github.com/cloudera/native-toolchain.git}
+export IMPALA_TOOLCHAIN_BRANCH=${IMPALA_TOOLCHAIN_BRANCH:-master}
+export IMPALA_TOOLCHAIN_COMMIT_HASH=\
+${IMPALA_TOOLCHAIN_COMMIT_HASH-e7cfab15d36ae051747252b676f0a11a9c58fe05}
# Versions of toolchain dependencies.
# -----------------------------------
if $USE_AVRO_CPP; then
@@ -316,8 +321,13 @@ fi
# IMPALA_TOOLCHAIN_PACKAGES_HOME is the location inside IMPALA_TOOLCHAIN where native
# toolchain packages are placed. This uses a subdirectory that contains the information
# about the compiler to allow using different compiler versions.
-export IMPALA_TOOLCHAIN_PACKAGES_HOME=\
+IMPALA_TOOLCHAIN_PACKAGES_HOME=\
${IMPALA_TOOLCHAIN}/toolchain-packages-gcc${IMPALA_GCC_VERSION}
+if ! [ -z ${NATIVE_TOOLCHAIN_HOME-} ]; then
+ IMPALA_TOOLCHAIN_PACKAGES_HOME=$(realpath ${NATIVE_TOOLCHAIN_HOME})/build
+ export SKIP_TOOLCHAIN_BOOTSTRAP=true
+fi
+export IMPALA_TOOLCHAIN_PACKAGES_HOME
export CDP_HADOOP_URL=${CDP_HADOOP_URL-}
export CDP_HBASE_URL=${CDP_HBASE_URL-}
diff --git a/buildall.sh b/buildall.sh
index 4095cbed7..ba32813eb 100755
--- a/buildall.sh
+++ b/buildall.sh
@@ -416,29 +416,41 @@ bootstrap_dependencies() {
# Populate necessary thirdparty components unless it's set to be skipped.
if [[ "${SKIP_TOOLCHAIN_BOOTSTRAP}" = true ]]; then
- echo "SKIP_TOOLCHAIN_BOOTSTRAP is true, skipping toolchain bootstrap."
+ if ! [ -z "${NATIVE_TOOLCHAIN_HOME}" ]; then
+ if ! [ -d "${NATIVE_TOOLCHAIN_HOME}" ]; then
+ mkdir -p "${NATIVE_TOOLCHAIN_HOME}"
+ pushd "${NATIVE_TOOLCHAIN_HOME}"
+ git init
+ git remote add toolchain "${IMPALA_TOOLCHAIN_REPO}"
+ git fetch toolchain "${IMPALA_TOOLCHAIN_BRANCH}"
+ # Specifying a branch avoids a large message from git about detached HEADs.
+ git checkout "${IMPALA_TOOLCHAIN_COMMIT_HASH}" -b "${IMPALA_TOOLCHAIN_BUILD_ID}"
+ else
+ pushd "${NATIVE_TOOLCHAIN_HOME}"
+ fi
+ echo "Begin building toolchain, may need several hours, please be patient...."
+ ./buildall.sh
+ popd
+ else
+ echo "SKIP_TOOLCHAIN_BOOTSTRAP is true, skipping toolchain bootstrap."
+ fi
if [[ "${DOWNLOAD_CDH_COMPONENTS}" = true ]]; then
echo ">>> Downloading and extracting cdh components."
"$IMPALA_HOME/bin/bootstrap_toolchain.py"
fi
- # Create soft link to locally builded native-toolchain on aarch64
- if [[ "$(uname -p)" = "aarch64" ]]; then
- mkdir -p $IMPALA_TOOLCHAIN_PACKAGES_HOME
- cd "$IMPALA_TOOLCHAIN_PACKAGES_HOME"
- ln -f -s ${NATIVE_TOOLCHAIN_HOME}/build/* .
- cd -
- if ! [[ -d "$IMPALA_HOME/../hadoopAarch64NativeLibs" ]]; then
- git clone https://github.com/zhaorenhai/hadoopAarch64NativeLibs \
- "$IMPALA_HOME/../hadoopAarch64NativeLibs"
- fi
- cp $IMPALA_HOME/../hadoopAarch64NativeLibs/lib* $HADOOP_HOME/lib/native/
- fi
-
else
echo ">>> Downloading and extracting toolchain dependencies."
"$IMPALA_HOME/bin/bootstrap_toolchain.py"
echo "Toolchain bootstrap complete."
fi
+ # Download prebuilt Hadoop native binaries for aarch64
+ if [[ "$(uname -p)" = "aarch64" ]]; then
+ if ! [[ -d "$IMPALA_HOME/../hadoopAarch64NativeLibs" ]]; then
+ git clone https://github.com/zhaorenhai/hadoopAarch64NativeLibs \
+ "$IMPALA_HOME/../hadoopAarch64NativeLibs"
+ fi
+ cp $IMPALA_HOME/../hadoopAarch64NativeLibs/lib* $HADOOP_HOME/lib/native/
+ fi
if [[ "${USE_APACHE_HIVE}" = true ]]; then
"$IMPALA_HOME/testdata/bin/patch_hive.sh"
fi