You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2023/09/14 19:19:14 UTC
[impala] branch master updated (73430a2cd -> 4be517e15)
This is an automated email from the ASF dual-hosted git repository.
michaelsmith pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
from 73430a2cd IMPALA-12399: Add filter to skip OPEN_TXN events from HMS
new cf819f967 IMPALA-12414: Add scripts to run Trino in the dev environment
new 130a55e52 IMPALA-11284: Do non-optional rewrites for || and Between predicate
new 4be517e15 IMPALA-12441: Simplify local toolchain development
The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
bin/bootstrap_system.sh | 13 +------
bin/bootstrap_toolchain.py | 3 +-
bin/impala-config.sh | 12 ++++++-
buildall.sh | 40 ++++++++++++++--------
.../org/apache/impala/analysis/ValuesStmt.java | 18 +++++++---
.../org/apache/impala/rewrite/ExprRewriter.java | 7 ++++
.../apache/impala/analysis/ExprRewriterTest.java | 16 +++++++--
.../bin/build-trino-docker-image.sh | 15 ++++----
.../ranger/java_home.sh => bin/kill-trino.sh} | 3 +-
.../minicluster_trino/Dockerfile} | 36 +++++++------------
.../bin/minicluster_trino/hive.properties | 7 ++--
.../bin/minicluster_trino/iceberg.properties | 8 ++---
testdata/bin/{kill-mini-dfs.sh => run-trino.sh} | 3 +-
testdata/bin/{kill-mini-dfs.sh => trino-cli.sh} | 3 +-
.../functional-query/queries/QueryTest/values.test | 20 ++++++++++-
15 files changed, 125 insertions(+), 79 deletions(-)
copy infra/python/deps/download_requirements => testdata/bin/build-trino-docker-image.sh (68%)
copy testdata/{cluster/ranger/java_home.sh => bin/kill-trino.sh} (94%)
copy testdata/{cluster/node_templates/common/etc/init.d/hdfs-namenode => bin/minicluster_trino/Dockerfile} (58%)
mode change 100755 => 100644
copy bin/impala-gcovr => testdata/bin/minicluster_trino/hive.properties (84%)
mode change 100755 => 100644
copy bin/impala-gcovr => testdata/bin/minicluster_trino/iceberg.properties (81%)
mode change 100755 => 100644
copy testdata/bin/{kill-mini-dfs.sh => run-trino.sh} (89%)
copy testdata/bin/{kill-mini-dfs.sh => trino-cli.sh} (91%)
[impala] 02/03: IMPALA-11284: Do non-optional rewrites for || and Between predicate
Posted by mi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 130a55e5269ea023b43ef2c0b495989cb0759800
Author: Abhishek Rawat <ar...@cloudera.com>
AuthorDate: Wed Jun 1 17:50:20 2022 -0700
IMPALA-11284: Do non-optional rewrites for || and Between predicate
IMPALA-6590 disabled expression rewrites for ValuesStmt. However,
CompoundVerticalBarExpr (||) cannot be executed directly without
rewrite. This is because it could either be an OR operation with boolean
arguments or CONCAT function call with string arguments.
Backend cannot evaluate a BetweenPredicate and relies on rewriting
BetweenPredicate into a conjunctive or disjunctive CompoundPredicate.
This patch enables non-optional expression rewrites for ValuesStmt with
CompoundVerticalBarExpr or BetweenPredicate.
Testing:
- Extended ExprRewriterTest and Planner test to have values clause
with || and Between predicate
Change-Id: I99b8b33bf6468d12b9e26f0a6e744feb7072619c
Reviewed-on: http://gerrit.cloudera.org:8080/18581
Reviewed-by: Michael Smith <mi...@cloudera.com>
Reviewed-by: Daniel Becker <da...@cloudera.com>
Tested-by: Riza Suminto <ri...@cloudera.com>
---
.../java/org/apache/impala/analysis/ValuesStmt.java | 18 ++++++++++++++----
.../java/org/apache/impala/rewrite/ExprRewriter.java | 7 +++++++
.../org/apache/impala/analysis/ExprRewriterTest.java | 16 ++++++++++++++--
.../functional-query/queries/QueryTest/values.test | 20 +++++++++++++++++++-
4 files changed, 54 insertions(+), 7 deletions(-)
diff --git a/fe/src/main/java/org/apache/impala/analysis/ValuesStmt.java b/fe/src/main/java/org/apache/impala/analysis/ValuesStmt.java
index 6231497dc..d6a3d7348 100644
--- a/fe/src/main/java/org/apache/impala/analysis/ValuesStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/ValuesStmt.java
@@ -17,13 +17,16 @@
package org.apache.impala.analysis;
+import java.util.Arrays;
import java.util.List;
import com.google.common.base.Preconditions;
import static org.apache.impala.analysis.ToSqlOptions.DEFAULT;
import org.apache.impala.common.AnalysisException;
+import org.apache.impala.rewrite.BetweenToCompoundRule;
import org.apache.impala.rewrite.ExprRewriter;
+import org.apache.impala.rewrite.ExtractCompoundVerticalBarExprRule;
/**
* Representation of a values() statement with a list of constant-expression lists.
@@ -84,11 +87,18 @@ public class ValuesStmt extends UnionStmt {
@Override
public ValuesStmt clone() { return new ValuesStmt(this); }
- /**
- * Intentionally left empty to disable expression rewrite for values clause.
- */
@Override
- public void rewriteExprs(ExprRewriter rewriter) {}
+ public void rewriteExprs(ExprRewriter rewriter) throws AnalysisException {
+ // IMPALA-11284: Expression rewrites for VALUES() could result in performance
+ // regression since overhead can be huge and there is virtually no benefit of
+ // rewrite if the expression will only ever be evaluated once (IMPALA-6590).
+ // The following code only does the non-optional rewrites for || and BETWEEN
+ // operator as the backend cannot execute them directly.
+ ExprRewriter mandatoryRewriter = new ExprRewriter(Arrays.asList(
+ BetweenToCompoundRule.INSTANCE, ExtractCompoundVerticalBarExprRule.INSTANCE));
+ super.rewriteExprs(mandatoryRewriter);
+ rewriter.addNumChanges(mandatoryRewriter);
+ }
@Override
protected boolean shouldAvoidLossyCharPadding(Analyzer analyzer) {
diff --git a/fe/src/main/java/org/apache/impala/rewrite/ExprRewriter.java b/fe/src/main/java/org/apache/impala/rewrite/ExprRewriter.java
index 37c092733..d30531ac6 100644
--- a/fe/src/main/java/org/apache/impala/rewrite/ExprRewriter.java
+++ b/fe/src/main/java/org/apache/impala/rewrite/ExprRewriter.java
@@ -91,6 +91,13 @@ public class ExprRewriter {
for (int i = 0; i < exprs.size(); ++i) exprs.set(i, rewrite(exprs.get(i), analyzer));
}
+ /**
+ * Add numChanges_ of otherRewriter to this rewriter's numChanges_.
+ */
+ public void addNumChanges(ExprRewriter otherRewriter) {
+ numChanges_ += otherRewriter.numChanges_;
+ }
+
public void reset() { numChanges_ = 0; }
public boolean changed() { return numChanges_ > 0; }
public int getNumChanges() { return numChanges_; }
diff --git a/fe/src/test/java/org/apache/impala/analysis/ExprRewriterTest.java b/fe/src/test/java/org/apache/impala/analysis/ExprRewriterTest.java
index 9880db3b2..4c9457a39 100644
--- a/fe/src/test/java/org/apache/impala/analysis/ExprRewriterTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/ExprRewriterTest.java
@@ -149,8 +149,20 @@ public class ExprRewriterTest extends AnalyzerTest {
stmt_, stmt_), 47, 23);
// Constant select.
RewritesOk("select 1, 2, 3, 4", 4, 4);
- // Values stmt - expression rewrites are disabled.
- RewritesOk("values(1, '2', 3, 4.1), (1, '2', 3, 4.1)", 0, 0);
+ // Values stmt - expression rewrites are not required in this test cases.
+ RewritesOk("values(1, '2', 3, 4.1), (1, '2', 3, 4.1),"
+ + "(CAST(true OR false AS INT), '2', 3*1+2-4, 1.1%1)",
+ 0, 0);
+ RewritesOk("values(CONCAT('a', 'b'), true OR true)", 0, 0);
+ // Values stmt - expression rewrites are required for || and Between predicate.
+ RewritesOk("values(1 <= 2 || 'impala' <> 'IMPALA'), (0.5 BETWEEN 0 AND 1),"
+ + "('a' NOT BETWEEN 'b' AND 'c')",
+ 3, 0);
+ // Values stmt - expression rewrites are required for || and Between predicate that
+ // is not at root Expr.
+ RewritesOk("values(1 <= 2 AND ((0.5 BETWEEN 0 AND 1) AND "
+ + "(('a' || 'b') = 'ab' AND (true || false))))",
+ 3, 0);
// Test WHERE-clause subqueries.
RewritesOk("select id, int_col from functional.alltypes a " +
"where exists (select 1 from functional.alltypes " +
diff --git a/testdata/workloads/functional-query/queries/QueryTest/values.test b/testdata/workloads/functional-query/queries/QueryTest/values.test
index bfda103b2..f19830dbe 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/values.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/values.test
@@ -140,4 +140,22 @@ select cast("0.43149576573887316" as double)
0.43149576573887316
---- TYPES
DOUBLE
-====
\ No newline at end of file
+====
+---- QUERY
+# IMPALA-11284: Don't skip rewrites for || and BETWEEN operator as the backend cannot
+# execute them directly.
+select * from
+(
+ values (concat("a", "b" || "c"), 1 <= 2 AND ((0.5 BETWEEN 0 AND 1) AND (true || false))),
+ ("hello" || "world", 0 <= 1 || 0.5 < 0.6),
+ ("impala", 4.0 BETWEEN 3.2 AND 4.1),
+ ("sql", 'a' NOT BETWEEN 'b' AND 'c')
+) t;
+---- RESULTS
+'abc',true
+'helloworld',true
+'impala',true
+'sql',true
+---- TYPES
+string,boolean
+====
[impala] 01/03: IMPALA-12414: Add scripts to run Trino in the dev environment
Posted by mi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit cf819f967b2ba01e9903c8f69fea0ad09e35e34e
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
AuthorDate: Thu Aug 31 17:01:31 2023 +0200
IMPALA-12414: Add scripts to run Trino in the dev environment
This patch set adds a couple of scripts that makes it easy to run
Trino in the dev environment.
At first one need to build our custom Trino docker image via:
testdata/bin/build-trino-docker-image.sh
Then we can run the Trino container via:
testdata/bin/run-trino.sh
One can connect to the Trino CLI by:
testdata/bin/trino-cli.sh
Trino will be configured to access our HMS and HDFS. We add the
'hive' and 'iceberg' catalogs for Trino. Legacy Hive tables can
be accessed via the 'hive' catalog, while Iceberg tables (only the
ones reside in HiveCatalog) can be accessed via the 'iceberg'
catalog. E.g.:
trino> use iceberg.functional_parquet;
trino:functional_parquet> select count(*)
from iceberg_mixed_file_format;
_col0
-------
3
(1 row)
Testing
* no tests added
* later we might always run Trino as part of the minicluster and
add interop tests with it
Change-Id: I49818c7a95e23988b3fbc3d31b4c7fa738e0d952
Reviewed-on: http://gerrit.cloudera.org:8080/20444
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
testdata/bin/build-trino-docker-image.sh | 28 +++++++++++++++++++
testdata/bin/kill-trino.sh | 20 ++++++++++++++
testdata/bin/minicluster_trino/Dockerfile | 33 +++++++++++++++++++++++
testdata/bin/minicluster_trino/hive.properties | 20 ++++++++++++++
testdata/bin/minicluster_trino/iceberg.properties | 21 +++++++++++++++
testdata/bin/run-trino.sh | 20 ++++++++++++++
testdata/bin/trino-cli.sh | 20 ++++++++++++++
7 files changed, 162 insertions(+)
diff --git a/testdata/bin/build-trino-docker-image.sh b/testdata/bin/build-trino-docker-image.sh
new file mode 100755
index 000000000..4c86a7a7d
--- /dev/null
+++ b/testdata/bin/build-trino-docker-image.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Copy cluster configs to trino docker directory.
+pushd ${HADOOP_CONF_DIR}
+cp hive-site.xml core-site.xml hdfs-site.xml ${IMPALA_HOME}/testdata/bin/minicluster_trino
+popd
+
+# Build trino image with USERNAME=$USER so Trino will be able to write to HDFS.
+docker build ${IMPALA_HOME}/testdata/bin/minicluster_trino -t impala-minicluster-trino \
+ --build-arg USERNAME=$USER
+
diff --git a/testdata/bin/kill-trino.sh b/testdata/bin/kill-trino.sh
new file mode 100755
index 000000000..aca786cdc
--- /dev/null
+++ b/testdata/bin/kill-trino.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+docker stop impala-minicluster-trino
diff --git a/testdata/bin/minicluster_trino/Dockerfile b/testdata/bin/minicluster_trino/Dockerfile
new file mode 100644
index 000000000..fa88a68c6
--- /dev/null
+++ b/testdata/bin/minicluster_trino/Dockerfile
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Use an official Trino image as the base
+FROM trinodb/trino:latest
+
+# Use the developer username, so Trino will have write access to HDFS
+ARG USERNAME
+
+RUN \
+ sed -i 's/http-server.http.port=8080/http-server.http.port=9091/' /etc/trino/config.properties && \
+ sed -i 's/localhost:8080/localhost:9091/' /etc/trino/config.properties && \
+ echo "-DHADOOP_USER_NAME=$USERNAME" >> /etc/trino/jvm.config
+
+COPY hive-site.xml core-site.xml hdfs-site.xml /etc/
+COPY iceberg.properties hive.properties /etc/trino/catalog/
+
+# Expose the Trino port
+EXPOSE 9091
diff --git a/testdata/bin/minicluster_trino/hive.properties b/testdata/bin/minicluster_trino/hive.properties
new file mode 100644
index 000000000..4a442f019
--- /dev/null
+++ b/testdata/bin/minicluster_trino/hive.properties
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+connector.name=hive
+hive.metastore.uri=thrift://localhost:9083
+hive.config.resources=/etc/hive-site.xml,/etc/hdfs-site.xml,/etc/core-site.xml
diff --git a/testdata/bin/minicluster_trino/iceberg.properties b/testdata/bin/minicluster_trino/iceberg.properties
new file mode 100644
index 000000000..bab71238d
--- /dev/null
+++ b/testdata/bin/minicluster_trino/iceberg.properties
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+connector.name=iceberg
+iceberg.catalog.type=hive_metastore
+hive.metastore.uri=thrift://localhost:9083
+hive.config.resources=/etc/hive-site.xml,/etc/hdfs-site.xml,/etc/core-site.xml
diff --git a/testdata/bin/run-trino.sh b/testdata/bin/run-trino.sh
new file mode 100755
index 000000000..e96899576
--- /dev/null
+++ b/testdata/bin/run-trino.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+docker run -detach --network=host --name impala-minicluster-trino impala-minicluster-trino
diff --git a/testdata/bin/trino-cli.sh b/testdata/bin/trino-cli.sh
new file mode 100755
index 000000000..31f1b3b02
--- /dev/null
+++ b/testdata/bin/trino-cli.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+docker exec -it impala-minicluster-trino trino --server=localhost:9091
[impala] 03/03: IMPALA-12441: Simplify local toolchain development
Posted by mi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 4be517e150148bd852d1fd106a4e4b1adf1229f1
Author: Michael Smith <mi...@cloudera.com>
AuthorDate: Mon Jul 24 16:24:47 2023 -0700
IMPALA-12441: Simplify local toolchain development
If NATIVE_TOOLCHAIN_HOME is set, that will be used to provide the native
toolchain instead of the default in IMPALA_TOOLCHAIN. Overrides
IMPALA_TOOLCHAIN_PACKAGES_HOME and sets SKIP_TOOLCHAIN_BOOTSTRAP=true.
Adds IMPALA_TOOLCHAIN_REPO, IMPALA_TOOLCHAIN_BRANCH, and
IMPALA_TOOLCHAIN_COMMIT_HASH so everything is clear about what toolchain
is used for this Impala commit.
If NATIVE_TOOLCHAIN_HOME does not yet exist, buildall.sh will clone the
repo and checkout the commit hash mentioned above before building.
Also skips downloading Kudu if SKIP_TOOLCHAIN_BOOTSTRAP is true as Kudu
is built from native-toolchain. Normalizes aarch64 logic, which skipped
Kudu because it would always build native-toolchain locally.
Change-Id: I3a9e51b7f54c738d8cc01b32428ac88a344de376
Reviewed-on: http://gerrit.cloudera.org:8080/20267
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Michael Smith <mi...@cloudera.com>
---
bin/bootstrap_system.sh | 13 +------------
bin/bootstrap_toolchain.py | 3 ++-
bin/impala-config.sh | 12 +++++++++++-
buildall.sh | 40 ++++++++++++++++++++++++++--------------
4 files changed, 40 insertions(+), 28 deletions(-)
diff --git a/bin/bootstrap_system.sh b/bin/bootstrap_system.sh
index ef9cfc520..3a50b25ad 100755
--- a/bin/bootstrap_system.sh
+++ b/bin/bootstrap_system.sh
@@ -481,23 +481,12 @@ echo -e "\n$SET_IMPALA_HOME" >> ~/.bashrc
eval "$SET_IMPALA_HOME"
if [[ $ARCH_NAME == 'aarch64' ]]; then
- echo -e "\nexport SKIP_TOOLCHAIN_BOOTSTRAP=true" >> \
- "${IMPALA_HOME}/bin/impala-config-local.sh"
SET_TOOLCHAIN_HOME="export NATIVE_TOOLCHAIN_HOME=${IMPALA_HOME}/../native-toolchain"
echo -e "\n$SET_TOOLCHAIN_HOME" >> ~/.bashrc
echo -e "\n$SET_TOOLCHAIN_HOME" >> "${IMPALA_HOME}/bin/impala-config-local.sh"
eval "$SET_TOOLCHAIN_HOME"
- if ! [[ -d "$NATIVE_TOOLCHAIN_HOME" ]]; then
- time -p git clone https://github.com/cloudera/native-toolchain/ \
- "$NATIVE_TOOLCHAIN_HOME"
- fi
- cd "$NATIVE_TOOLCHAIN_HOME"
- git pull
- echo "Begin build tool chain, may need several hours, please be patient...."
+ # Provide access to ~/.cache on build machines so we can use ccache.
sudo chmod 755 ~/.cache
- ./buildall.sh
- cd -
- mkdir -p ${IMPALA_HOME}/toolchain
fi
# Try to prepopulate the m2 directory to save time
diff --git a/bin/bootstrap_toolchain.py b/bin/bootstrap_toolchain.py
index f52c76d20..45cd1abd4 100755
--- a/bin/bootstrap_toolchain.py
+++ b/bin/bootstrap_toolchain.py
@@ -568,7 +568,8 @@ def main():
if os.getenv("DOWNLOAD_CDH_COMPONENTS", "false") == "true":
create_directory_from_env_var("CDP_COMPONENTS_HOME")
create_directory_from_env_var("APACHE_COMPONENTS_HOME")
- if platform.processor() != "aarch64":
+ if os.getenv("SKIP_TOOLCHAIN_BOOTSTRAP", "false") != "true":
+ # Kudu is currently sourced from native-toolchain
downloads += get_kudu_downloads()
downloads += get_hadoop_downloads()
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 13c2f872c..f597c5439 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -86,6 +86,11 @@ export USE_AVRO_CPP=${USE_AVRO_CPP:=false}
# compile option is changed. The build id can be found in the output of the toolchain
# build jobs, it is constructed from the build number and toolchain git hash prefix.
export IMPALA_TOOLCHAIN_BUILD_ID=358-e7cfab15d3
+export IMPALA_TOOLCHAIN_REPO=\
+${IMPALA_TOOLCHAIN_REPO:-https://github.com/cloudera/native-toolchain.git}
+export IMPALA_TOOLCHAIN_BRANCH=${IMPALA_TOOLCHAIN_BRANCH:-master}
+export IMPALA_TOOLCHAIN_COMMIT_HASH=\
+${IMPALA_TOOLCHAIN_COMMIT_HASH-e7cfab15d36ae051747252b676f0a11a9c58fe05}
# Versions of toolchain dependencies.
# -----------------------------------
if $USE_AVRO_CPP; then
@@ -316,8 +321,13 @@ fi
# IMPALA_TOOLCHAIN_PACKAGES_HOME is the location inside IMPALA_TOOLCHAIN where native
# toolchain packages are placed. This uses a subdirectory that contains the information
# about the compiler to allow using different compiler versions.
-export IMPALA_TOOLCHAIN_PACKAGES_HOME=\
+IMPALA_TOOLCHAIN_PACKAGES_HOME=\
${IMPALA_TOOLCHAIN}/toolchain-packages-gcc${IMPALA_GCC_VERSION}
+if ! [ -z ${NATIVE_TOOLCHAIN_HOME-} ]; then
+ IMPALA_TOOLCHAIN_PACKAGES_HOME=$(realpath ${NATIVE_TOOLCHAIN_HOME})/build
+ export SKIP_TOOLCHAIN_BOOTSTRAP=true
+fi
+export IMPALA_TOOLCHAIN_PACKAGES_HOME
export CDP_HADOOP_URL=${CDP_HADOOP_URL-}
export CDP_HBASE_URL=${CDP_HBASE_URL-}
diff --git a/buildall.sh b/buildall.sh
index 4095cbed7..ba32813eb 100755
--- a/buildall.sh
+++ b/buildall.sh
@@ -416,29 +416,41 @@ bootstrap_dependencies() {
# Populate necessary thirdparty components unless it's set to be skipped.
if [[ "${SKIP_TOOLCHAIN_BOOTSTRAP}" = true ]]; then
- echo "SKIP_TOOLCHAIN_BOOTSTRAP is true, skipping toolchain bootstrap."
+ if ! [ -z "${NATIVE_TOOLCHAIN_HOME}" ]; then
+ if ! [ -d "${NATIVE_TOOLCHAIN_HOME}" ]; then
+ mkdir -p "${NATIVE_TOOLCHAIN_HOME}"
+ pushd "${NATIVE_TOOLCHAIN_HOME}"
+ git init
+ git remote add toolchain "${IMPALA_TOOLCHAIN_REPO}"
+ git fetch toolchain "${IMPALA_TOOLCHAIN_BRANCH}"
+ # Specifying a branch avoids a large message from git about detached HEADs.
+ git checkout "${IMPALA_TOOLCHAIN_COMMIT_HASH}" -b "${IMPALA_TOOLCHAIN_BUILD_ID}"
+ else
+ pushd "${NATIVE_TOOLCHAIN_HOME}"
+ fi
+ echo "Begin building toolchain, may need several hours, please be patient...."
+ ./buildall.sh
+ popd
+ else
+ echo "SKIP_TOOLCHAIN_BOOTSTRAP is true, skipping toolchain bootstrap."
+ fi
if [[ "${DOWNLOAD_CDH_COMPONENTS}" = true ]]; then
echo ">>> Downloading and extracting cdh components."
"$IMPALA_HOME/bin/bootstrap_toolchain.py"
fi
- # Create soft link to locally builded native-toolchain on aarch64
- if [[ "$(uname -p)" = "aarch64" ]]; then
- mkdir -p $IMPALA_TOOLCHAIN_PACKAGES_HOME
- cd "$IMPALA_TOOLCHAIN_PACKAGES_HOME"
- ln -f -s ${NATIVE_TOOLCHAIN_HOME}/build/* .
- cd -
- if ! [[ -d "$IMPALA_HOME/../hadoopAarch64NativeLibs" ]]; then
- git clone https://github.com/zhaorenhai/hadoopAarch64NativeLibs \
- "$IMPALA_HOME/../hadoopAarch64NativeLibs"
- fi
- cp $IMPALA_HOME/../hadoopAarch64NativeLibs/lib* $HADOOP_HOME/lib/native/
- fi
-
else
echo ">>> Downloading and extracting toolchain dependencies."
"$IMPALA_HOME/bin/bootstrap_toolchain.py"
echo "Toolchain bootstrap complete."
fi
+ # Download prebuilt Hadoop native binaries for aarch64
+ if [[ "$(uname -p)" = "aarch64" ]]; then
+ if ! [[ -d "$IMPALA_HOME/../hadoopAarch64NativeLibs" ]]; then
+ git clone https://github.com/zhaorenhai/hadoopAarch64NativeLibs \
+ "$IMPALA_HOME/../hadoopAarch64NativeLibs"
+ fi
+ cp $IMPALA_HOME/../hadoopAarch64NativeLibs/lib* $HADOOP_HOME/lib/native/
+ fi
if [[ "${USE_APACHE_HIVE}" = true ]]; then
"$IMPALA_HOME/testdata/bin/patch_hive.sh"
fi