You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2023/09/14 19:19:14 UTC

[impala] branch master updated (73430a2cd -> 4be517e15)

This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


    from 73430a2cd IMPALA-12399: Add filter to skip OPEN_TXN events from HMS
     new cf819f967 IMPALA-12414: Add scripts to run Trino in the dev environment
     new 130a55e52 IMPALA-11284: Do non-optional rewrites for || and Between predicate
     new 4be517e15 IMPALA-12441: Simplify local toolchain development

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 bin/bootstrap_system.sh                            | 13 +------
 bin/bootstrap_toolchain.py                         |  3 +-
 bin/impala-config.sh                               | 12 ++++++-
 buildall.sh                                        | 40 ++++++++++++++--------
 .../org/apache/impala/analysis/ValuesStmt.java     | 18 +++++++---
 .../org/apache/impala/rewrite/ExprRewriter.java    |  7 ++++
 .../apache/impala/analysis/ExprRewriterTest.java   | 16 +++++++--
 .../bin/build-trino-docker-image.sh                | 15 ++++----
 .../ranger/java_home.sh => bin/kill-trino.sh}      |  3 +-
 .../minicluster_trino/Dockerfile}                  | 36 +++++++------------
 .../bin/minicluster_trino/hive.properties          |  7 ++--
 .../bin/minicluster_trino/iceberg.properties       |  8 ++---
 testdata/bin/{kill-mini-dfs.sh => run-trino.sh}    |  3 +-
 testdata/bin/{kill-mini-dfs.sh => trino-cli.sh}    |  3 +-
 .../functional-query/queries/QueryTest/values.test | 20 ++++++++++-
 15 files changed, 125 insertions(+), 79 deletions(-)
 copy infra/python/deps/download_requirements => testdata/bin/build-trino-docker-image.sh (68%)
 copy testdata/{cluster/ranger/java_home.sh => bin/kill-trino.sh} (94%)
 copy testdata/{cluster/node_templates/common/etc/init.d/hdfs-namenode => bin/minicluster_trino/Dockerfile} (58%)
 mode change 100755 => 100644
 copy bin/impala-gcovr => testdata/bin/minicluster_trino/hive.properties (84%)
 mode change 100755 => 100644
 copy bin/impala-gcovr => testdata/bin/minicluster_trino/iceberg.properties (81%)
 mode change 100755 => 100644
 copy testdata/bin/{kill-mini-dfs.sh => run-trino.sh} (89%)
 copy testdata/bin/{kill-mini-dfs.sh => trino-cli.sh} (91%)


[impala] 02/03: IMPALA-11284: Do non-optional rewrites for || and Between predicate

Posted by mi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 130a55e5269ea023b43ef2c0b495989cb0759800
Author: Abhishek Rawat <ar...@cloudera.com>
AuthorDate: Wed Jun 1 17:50:20 2022 -0700

    IMPALA-11284: Do non-optional rewrites for || and Between predicate
    
    IMPALA-6590 disabled expression rewrites for ValuesStmt. However,
    CompoundVerticalBarExpr (||) cannot be executed directly without
    rewrite. This is because it could either be an OR operation with boolean
    arguments or CONCAT function call with string arguments.
    
    Backend cannot evaluate a BetweenPredicate and relies on rewriting
    BetweenPredicate into a conjunctive or disjunctive CompoundPredicate.
    
    This patch enables non-optional expression rewrites for ValuesStmt with
    CompoundVerticalBarExpr or BetweenPredicate.
    
    Testing:
    - Extended ExprRewriterTest and Planner test to have values clause
      with || and Between predicate
    
    Change-Id: I99b8b33bf6468d12b9e26f0a6e744feb7072619c
    Reviewed-on: http://gerrit.cloudera.org:8080/18581
    Reviewed-by: Michael Smith <mi...@cloudera.com>
    Reviewed-by: Daniel Becker <da...@cloudera.com>
    Tested-by: Riza Suminto <ri...@cloudera.com>
---
 .../java/org/apache/impala/analysis/ValuesStmt.java  | 18 ++++++++++++++----
 .../java/org/apache/impala/rewrite/ExprRewriter.java |  7 +++++++
 .../org/apache/impala/analysis/ExprRewriterTest.java | 16 ++++++++++++++--
 .../functional-query/queries/QueryTest/values.test   | 20 +++++++++++++++++++-
 4 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/ValuesStmt.java b/fe/src/main/java/org/apache/impala/analysis/ValuesStmt.java
index 6231497dc..d6a3d7348 100644
--- a/fe/src/main/java/org/apache/impala/analysis/ValuesStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/ValuesStmt.java
@@ -17,13 +17,16 @@
 
 package org.apache.impala.analysis;
 
+import java.util.Arrays;
 import java.util.List;
 
 import com.google.common.base.Preconditions;
 
 import static org.apache.impala.analysis.ToSqlOptions.DEFAULT;
 import org.apache.impala.common.AnalysisException;
+import org.apache.impala.rewrite.BetweenToCompoundRule;
 import org.apache.impala.rewrite.ExprRewriter;
+import org.apache.impala.rewrite.ExtractCompoundVerticalBarExprRule;
 
 /**
  * Representation of a values() statement with a list of constant-expression lists.
@@ -84,11 +87,18 @@ public class ValuesStmt extends UnionStmt {
   @Override
   public ValuesStmt clone() { return new ValuesStmt(this); }
 
-  /**
-   * Intentionally left empty to disable expression rewrite for values clause.
-   */
   @Override
-  public void rewriteExprs(ExprRewriter rewriter) {}
+  public void rewriteExprs(ExprRewriter rewriter) throws AnalysisException {
+    // IMPALA-11284: Expression rewrites for VALUES() could result in performance
+    // regression since overhead can be huge and there is virtually no benefit of
+    // rewrite if the expression will only ever be evaluated once (IMPALA-6590).
+    // The following code only does the non-optional rewrites for || and BETWEEN
+    // operator as the backend cannot execute them directly.
+    ExprRewriter mandatoryRewriter = new ExprRewriter(Arrays.asList(
+        BetweenToCompoundRule.INSTANCE, ExtractCompoundVerticalBarExprRule.INSTANCE));
+    super.rewriteExprs(mandatoryRewriter);
+    rewriter.addNumChanges(mandatoryRewriter);
+  }
 
   @Override
   protected boolean shouldAvoidLossyCharPadding(Analyzer analyzer) {
diff --git a/fe/src/main/java/org/apache/impala/rewrite/ExprRewriter.java b/fe/src/main/java/org/apache/impala/rewrite/ExprRewriter.java
index 37c092733..d30531ac6 100644
--- a/fe/src/main/java/org/apache/impala/rewrite/ExprRewriter.java
+++ b/fe/src/main/java/org/apache/impala/rewrite/ExprRewriter.java
@@ -91,6 +91,13 @@ public class ExprRewriter {
     for (int i = 0; i < exprs.size(); ++i) exprs.set(i, rewrite(exprs.get(i), analyzer));
   }
 
+  /**
+   * Add numChanges_ of otherRewriter to this rewriter's numChanges_.
+   */
+  public void addNumChanges(ExprRewriter otherRewriter) {
+    numChanges_ += otherRewriter.numChanges_;
+  }
+
   public void reset() { numChanges_ = 0; }
   public boolean changed() { return numChanges_ > 0; }
   public int getNumChanges() { return numChanges_; }
diff --git a/fe/src/test/java/org/apache/impala/analysis/ExprRewriterTest.java b/fe/src/test/java/org/apache/impala/analysis/ExprRewriterTest.java
index 9880db3b2..4c9457a39 100644
--- a/fe/src/test/java/org/apache/impala/analysis/ExprRewriterTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/ExprRewriterTest.java
@@ -149,8 +149,20 @@ public class ExprRewriterTest extends AnalyzerTest {
         stmt_, stmt_), 47, 23);
     // Constant select.
     RewritesOk("select 1, 2, 3, 4", 4, 4);
-    // Values stmt - expression rewrites are disabled.
-    RewritesOk("values(1, '2', 3, 4.1), (1, '2', 3, 4.1)", 0, 0);
+    // Values stmt - expression rewrites are not required in this test cases.
+    RewritesOk("values(1, '2', 3, 4.1), (1, '2', 3, 4.1),"
+            + "(CAST(true OR false AS INT), '2', 3*1+2-4, 1.1%1)",
+        0, 0);
+    RewritesOk("values(CONCAT('a', 'b'), true OR true)", 0, 0);
+    // Values stmt - expression rewrites are required for || and Between predicate.
+    RewritesOk("values(1 <= 2 || 'impala' <> 'IMPALA'), (0.5 BETWEEN 0 AND 1),"
+            + "('a' NOT BETWEEN 'b' AND 'c')",
+        3, 0);
+    // Values stmt - expression rewrites are required for || and Between predicate that
+    // is not at root Expr.
+    RewritesOk("values(1 <= 2 AND ((0.5 BETWEEN 0 AND 1) AND "
+            + "(('a' || 'b') = 'ab' AND (true || false))))",
+        3, 0);
     // Test WHERE-clause subqueries.
     RewritesOk("select id, int_col from functional.alltypes a " +
         "where exists (select 1 from functional.alltypes " +
diff --git a/testdata/workloads/functional-query/queries/QueryTest/values.test b/testdata/workloads/functional-query/queries/QueryTest/values.test
index bfda103b2..f19830dbe 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/values.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/values.test
@@ -140,4 +140,22 @@ select cast("0.43149576573887316" as double)
 0.43149576573887316
 ---- TYPES
 DOUBLE
-====
\ No newline at end of file
+====
+---- QUERY
+# IMPALA-11284: Don't skip rewrites for || and BETWEEN operator as the backend cannot
+# execute them directly.
+select * from
+(
+  values (concat("a", "b" || "c"), 1 <= 2 AND ((0.5 BETWEEN 0 AND 1) AND (true || false))),
+         ("hello" || "world", 0 <= 1 || 0.5 < 0.6),
+         ("impala", 4.0 BETWEEN 3.2 AND 4.1),
+         ("sql", 'a' NOT BETWEEN 'b' AND 'c')
+) t;
+---- RESULTS
+'abc',true
+'helloworld',true
+'impala',true
+'sql',true
+---- TYPES
+string,boolean
+====


[impala] 01/03: IMPALA-12414: Add scripts to run Trino in the dev environment

Posted by mi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit cf819f967b2ba01e9903c8f69fea0ad09e35e34e
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
AuthorDate: Thu Aug 31 17:01:31 2023 +0200

    IMPALA-12414: Add scripts to run Trino in the dev environment
    
    This patch set adds a couple of scripts that makes it easy to run
    Trino in the dev environment.
    
    At first one need to build our custom Trino docker image via:
    
     testdata/bin/build-trino-docker-image.sh
    
    Then we can run the Trino container via:
    
     testdata/bin/run-trino.sh
    
    One can connect to the Trino CLI by:
    
     testdata/bin/trino-cli.sh
    
    Trino will be configured to access our HMS and HDFS. We add the
    'hive' and 'iceberg' catalogs for Trino. Legacy Hive tables can
    be accessed via the 'hive' catalog, while Iceberg tables (only the
    ones reside in HiveCatalog) can be accessed via the 'iceberg'
    catalog. E.g.:
    
    trino> use iceberg.functional_parquet;
    trino:functional_parquet> select count(*)
                              from iceberg_mixed_file_format;
     _col0
    -------
         3
    (1 row)
    
    Testing
     * no tests added
     * later we might always run Trino as part of the minicluster and
       add interop tests with it
    
    Change-Id: I49818c7a95e23988b3fbc3d31b4c7fa738e0d952
    Reviewed-on: http://gerrit.cloudera.org:8080/20444
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 testdata/bin/build-trino-docker-image.sh          | 28 +++++++++++++++++++
 testdata/bin/kill-trino.sh                        | 20 ++++++++++++++
 testdata/bin/minicluster_trino/Dockerfile         | 33 +++++++++++++++++++++++
 testdata/bin/minicluster_trino/hive.properties    | 20 ++++++++++++++
 testdata/bin/minicluster_trino/iceberg.properties | 21 +++++++++++++++
 testdata/bin/run-trino.sh                         | 20 ++++++++++++++
 testdata/bin/trino-cli.sh                         | 20 ++++++++++++++
 7 files changed, 162 insertions(+)

diff --git a/testdata/bin/build-trino-docker-image.sh b/testdata/bin/build-trino-docker-image.sh
new file mode 100755
index 000000000..4c86a7a7d
--- /dev/null
+++ b/testdata/bin/build-trino-docker-image.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Copy cluster configs to trino docker directory.
+pushd ${HADOOP_CONF_DIR}
+cp hive-site.xml core-site.xml hdfs-site.xml ${IMPALA_HOME}/testdata/bin/minicluster_trino
+popd
+
+# Build trino image with USERNAME=$USER so Trino will be able to write to HDFS.
+docker build ${IMPALA_HOME}/testdata/bin/minicluster_trino -t impala-minicluster-trino \
+    --build-arg USERNAME=$USER
+
diff --git a/testdata/bin/kill-trino.sh b/testdata/bin/kill-trino.sh
new file mode 100755
index 000000000..aca786cdc
--- /dev/null
+++ b/testdata/bin/kill-trino.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+docker stop impala-minicluster-trino
diff --git a/testdata/bin/minicluster_trino/Dockerfile b/testdata/bin/minicluster_trino/Dockerfile
new file mode 100644
index 000000000..fa88a68c6
--- /dev/null
+++ b/testdata/bin/minicluster_trino/Dockerfile
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Use an official Trino image as the base
+FROM trinodb/trino:latest
+
+# Use the developer username, so Trino will have write access to HDFS
+ARG USERNAME
+
+RUN \
+    sed -i 's/http-server.http.port=8080/http-server.http.port=9091/' /etc/trino/config.properties && \
+    sed -i 's/localhost:8080/localhost:9091/' /etc/trino/config.properties && \
+    echo "-DHADOOP_USER_NAME=$USERNAME" >> /etc/trino/jvm.config
+
+COPY hive-site.xml core-site.xml hdfs-site.xml /etc/
+COPY iceberg.properties hive.properties /etc/trino/catalog/
+
+# Expose the Trino port
+EXPOSE 9091
diff --git a/testdata/bin/minicluster_trino/hive.properties b/testdata/bin/minicluster_trino/hive.properties
new file mode 100644
index 000000000..4a442f019
--- /dev/null
+++ b/testdata/bin/minicluster_trino/hive.properties
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+connector.name=hive
+hive.metastore.uri=thrift://localhost:9083
+hive.config.resources=/etc/hive-site.xml,/etc/hdfs-site.xml,/etc/core-site.xml
diff --git a/testdata/bin/minicluster_trino/iceberg.properties b/testdata/bin/minicluster_trino/iceberg.properties
new file mode 100644
index 000000000..bab71238d
--- /dev/null
+++ b/testdata/bin/minicluster_trino/iceberg.properties
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+connector.name=iceberg
+iceberg.catalog.type=hive_metastore
+hive.metastore.uri=thrift://localhost:9083
+hive.config.resources=/etc/hive-site.xml,/etc/hdfs-site.xml,/etc/core-site.xml
diff --git a/testdata/bin/run-trino.sh b/testdata/bin/run-trino.sh
new file mode 100755
index 000000000..e96899576
--- /dev/null
+++ b/testdata/bin/run-trino.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+docker run -detach --network=host --name impala-minicluster-trino impala-minicluster-trino
diff --git a/testdata/bin/trino-cli.sh b/testdata/bin/trino-cli.sh
new file mode 100755
index 000000000..31f1b3b02
--- /dev/null
+++ b/testdata/bin/trino-cli.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+docker exec -it impala-minicluster-trino trino --server=localhost:9091


[impala] 03/03: IMPALA-12441: Simplify local toolchain development

Posted by mi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 4be517e150148bd852d1fd106a4e4b1adf1229f1
Author: Michael Smith <mi...@cloudera.com>
AuthorDate: Mon Jul 24 16:24:47 2023 -0700

    IMPALA-12441: Simplify local toolchain development
    
    If NATIVE_TOOLCHAIN_HOME is set, that will be used to provide the native
    toolchain instead of the default in IMPALA_TOOLCHAIN. Overrides
    IMPALA_TOOLCHAIN_PACKAGES_HOME and sets SKIP_TOOLCHAIN_BOOTSTRAP=true.
    
    Adds IMPALA_TOOLCHAIN_REPO, IMPALA_TOOLCHAIN_BRANCH, and
    IMPALA_TOOLCHAIN_COMMIT_HASH so everything is clear about what toolchain
    is used for this Impala commit.
    
    If NATIVE_TOOLCHAIN_HOME does not yet exist, buildall.sh will clone the
    repo and checkout the commit hash mentioned above before building.
    
    Also skips downloading Kudu if SKIP_TOOLCHAIN_BOOTSTRAP is true as Kudu
    is built from native-toolchain. Normalizes aarch64 logic, which skipped
    Kudu because it would always build native-toolchain locally.
    
    Change-Id: I3a9e51b7f54c738d8cc01b32428ac88a344de376
    Reviewed-on: http://gerrit.cloudera.org:8080/20267
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Michael Smith <mi...@cloudera.com>
---
 bin/bootstrap_system.sh    | 13 +------------
 bin/bootstrap_toolchain.py |  3 ++-
 bin/impala-config.sh       | 12 +++++++++++-
 buildall.sh                | 40 ++++++++++++++++++++++++++--------------
 4 files changed, 40 insertions(+), 28 deletions(-)

diff --git a/bin/bootstrap_system.sh b/bin/bootstrap_system.sh
index ef9cfc520..3a50b25ad 100755
--- a/bin/bootstrap_system.sh
+++ b/bin/bootstrap_system.sh
@@ -481,23 +481,12 @@ echo -e "\n$SET_IMPALA_HOME" >> ~/.bashrc
 eval "$SET_IMPALA_HOME"
 
 if [[ $ARCH_NAME == 'aarch64' ]]; then
-  echo -e "\nexport SKIP_TOOLCHAIN_BOOTSTRAP=true" >> \
-    "${IMPALA_HOME}/bin/impala-config-local.sh"
   SET_TOOLCHAIN_HOME="export NATIVE_TOOLCHAIN_HOME=${IMPALA_HOME}/../native-toolchain"
   echo -e "\n$SET_TOOLCHAIN_HOME" >> ~/.bashrc
   echo -e "\n$SET_TOOLCHAIN_HOME" >> "${IMPALA_HOME}/bin/impala-config-local.sh"
   eval "$SET_TOOLCHAIN_HOME"
-  if ! [[ -d "$NATIVE_TOOLCHAIN_HOME" ]]; then
-    time -p git clone https://github.com/cloudera/native-toolchain/ \
-      "$NATIVE_TOOLCHAIN_HOME"
-  fi
-  cd "$NATIVE_TOOLCHAIN_HOME"
-  git pull
-  echo "Begin build tool chain, may need several hours, please be patient...."
+  # Provide access to ~/.cache on build machines so we can use ccache.
   sudo chmod 755 ~/.cache
-  ./buildall.sh
-  cd -
-  mkdir -p ${IMPALA_HOME}/toolchain
 fi
 
 # Try to prepopulate the m2 directory to save time
diff --git a/bin/bootstrap_toolchain.py b/bin/bootstrap_toolchain.py
index f52c76d20..45cd1abd4 100755
--- a/bin/bootstrap_toolchain.py
+++ b/bin/bootstrap_toolchain.py
@@ -568,7 +568,8 @@ def main():
   if os.getenv("DOWNLOAD_CDH_COMPONENTS", "false") == "true":
     create_directory_from_env_var("CDP_COMPONENTS_HOME")
     create_directory_from_env_var("APACHE_COMPONENTS_HOME")
-    if platform.processor() != "aarch64":
+    if os.getenv("SKIP_TOOLCHAIN_BOOTSTRAP", "false") != "true":
+      # Kudu is currently sourced from native-toolchain
       downloads += get_kudu_downloads()
     downloads += get_hadoop_downloads()
 
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 13c2f872c..f597c5439 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -86,6 +86,11 @@ export USE_AVRO_CPP=${USE_AVRO_CPP:=false}
 # compile option is changed. The build id can be found in the output of the toolchain
 # build jobs, it is constructed from the build number and toolchain git hash prefix.
 export IMPALA_TOOLCHAIN_BUILD_ID=358-e7cfab15d3
+export IMPALA_TOOLCHAIN_REPO=\
+${IMPALA_TOOLCHAIN_REPO:-https://github.com/cloudera/native-toolchain.git}
+export IMPALA_TOOLCHAIN_BRANCH=${IMPALA_TOOLCHAIN_BRANCH:-master}
+export IMPALA_TOOLCHAIN_COMMIT_HASH=\
+${IMPALA_TOOLCHAIN_COMMIT_HASH-e7cfab15d36ae051747252b676f0a11a9c58fe05}
 # Versions of toolchain dependencies.
 # -----------------------------------
 if $USE_AVRO_CPP; then
@@ -316,8 +321,13 @@ fi
 # IMPALA_TOOLCHAIN_PACKAGES_HOME is the location inside IMPALA_TOOLCHAIN where native
 # toolchain packages are placed. This uses a subdirectory that contains the information
 # about the compiler to allow using different compiler versions.
-export IMPALA_TOOLCHAIN_PACKAGES_HOME=\
+IMPALA_TOOLCHAIN_PACKAGES_HOME=\
 ${IMPALA_TOOLCHAIN}/toolchain-packages-gcc${IMPALA_GCC_VERSION}
+if ! [ -z ${NATIVE_TOOLCHAIN_HOME-} ]; then
+  IMPALA_TOOLCHAIN_PACKAGES_HOME=$(realpath ${NATIVE_TOOLCHAIN_HOME})/build
+  export SKIP_TOOLCHAIN_BOOTSTRAP=true
+fi
+export IMPALA_TOOLCHAIN_PACKAGES_HOME
 
 export CDP_HADOOP_URL=${CDP_HADOOP_URL-}
 export CDP_HBASE_URL=${CDP_HBASE_URL-}
diff --git a/buildall.sh b/buildall.sh
index 4095cbed7..ba32813eb 100755
--- a/buildall.sh
+++ b/buildall.sh
@@ -416,29 +416,41 @@ bootstrap_dependencies() {
 
   # Populate necessary thirdparty components unless it's set to be skipped.
   if [[ "${SKIP_TOOLCHAIN_BOOTSTRAP}" = true ]]; then
-    echo "SKIP_TOOLCHAIN_BOOTSTRAP is true, skipping toolchain bootstrap."
+    if ! [ -z "${NATIVE_TOOLCHAIN_HOME}" ]; then
+      if ! [ -d "${NATIVE_TOOLCHAIN_HOME}" ]; then
+        mkdir -p "${NATIVE_TOOLCHAIN_HOME}"
+        pushd "${NATIVE_TOOLCHAIN_HOME}"
+        git init
+        git remote add toolchain "${IMPALA_TOOLCHAIN_REPO}"
+        git fetch toolchain "${IMPALA_TOOLCHAIN_BRANCH}"
+        # Specifying a branch avoids a large message from git about detached HEADs.
+        git checkout "${IMPALA_TOOLCHAIN_COMMIT_HASH}" -b "${IMPALA_TOOLCHAIN_BUILD_ID}"
+      else
+        pushd "${NATIVE_TOOLCHAIN_HOME}"
+      fi
+      echo "Begin building toolchain, may need several hours, please be patient...."
+      ./buildall.sh
+      popd
+    else
+      echo "SKIP_TOOLCHAIN_BOOTSTRAP is true, skipping toolchain bootstrap."
+    fi
     if [[ "${DOWNLOAD_CDH_COMPONENTS}" = true ]]; then
       echo ">>> Downloading and extracting cdh components."
       "$IMPALA_HOME/bin/bootstrap_toolchain.py"
     fi
-    # Create soft link to locally builded native-toolchain on aarch64
-    if [[ "$(uname -p)" = "aarch64" ]]; then
-      mkdir -p $IMPALA_TOOLCHAIN_PACKAGES_HOME
-      cd "$IMPALA_TOOLCHAIN_PACKAGES_HOME"
-      ln -f -s ${NATIVE_TOOLCHAIN_HOME}/build/* .
-      cd -
-      if ! [[ -d "$IMPALA_HOME/../hadoopAarch64NativeLibs" ]]; then
-        git clone https://github.com/zhaorenhai/hadoopAarch64NativeLibs \
-          "$IMPALA_HOME/../hadoopAarch64NativeLibs"
-      fi
-      cp $IMPALA_HOME/../hadoopAarch64NativeLibs/lib*  $HADOOP_HOME/lib/native/
-    fi
-
   else
     echo ">>> Downloading and extracting toolchain dependencies."
     "$IMPALA_HOME/bin/bootstrap_toolchain.py"
     echo "Toolchain bootstrap complete."
   fi
+  # Download prebuilt Hadoop native binaries for aarch64
+  if [[ "$(uname -p)" = "aarch64" ]]; then
+    if ! [[ -d "$IMPALA_HOME/../hadoopAarch64NativeLibs" ]]; then
+      git clone https://github.com/zhaorenhai/hadoopAarch64NativeLibs \
+        "$IMPALA_HOME/../hadoopAarch64NativeLibs"
+    fi
+    cp $IMPALA_HOME/../hadoopAarch64NativeLibs/lib*  $HADOOP_HOME/lib/native/
+  fi
   if [[ "${USE_APACHE_HIVE}" = true ]]; then
     "$IMPALA_HOME/testdata/bin/patch_hive.sh"
   fi