You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2018/11/20 00:00:48 UTC
[2/3] impala git commit: IMPALA-7871: Don't load Hive builtins
IMPALA-7871: Don't load Hive builtins
Dataload has a step of "Loading Hive builtins" that
loads a bunch of jars into HDFS/S3/etc. Despite
its name, nothing seems to be using these.
Dataload and core tests succeed without this step.
This removes the Hive builtins step and associated
scripts.
Change-Id: Iaca5ffdaca4b5506e9401b17a7806d37fd7b1844
Reviewed-on: http://gerrit.cloudera.org:8080/11944
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/70fbd1df
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/70fbd1df
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/70fbd1df
Branch: refs/heads/master
Commit: 70fbd1df446f4830b55d684d82c851867b9c1444
Parents: 48d1d2d
Author: Joe McDonnell <jo...@cloudera.com>
Authored: Fri Nov 16 12:14:27 2018 -0800
Committer: Joe McDonnell <jo...@cloudera.com>
Committed: Mon Nov 19 23:33:20 2018 +0000
----------------------------------------------------------------------
testdata/bin/create-load-data.sh | 4 --
testdata/bin/load-hive-builtins.sh | 69 -----------------------
testdata/bin/load-test-warehouse-snapshot.sh | 7 ---
3 files changed, 80 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/impala/blob/70fbd1df/testdata/bin/create-load-data.sh
----------------------------------------------------------------------
diff --git a/testdata/bin/create-load-data.sh b/testdata/bin/create-load-data.sh
index 74ae248..2a67bd4 100755
--- a/testdata/bin/create-load-data.sh
+++ b/testdata/bin/create-load-data.sh
@@ -98,10 +98,6 @@ do
done
if [[ $SKIP_METADATA_LOAD -eq 0 && "$SNAPSHOT_FILE" = "" ]]; then
- if [[ -z "$REMOTE_LOAD" ]]; then
- run-step "Loading Hive Builtins" load-hive-builtins.log \
- ${IMPALA_HOME}/testdata/bin/load-hive-builtins.sh
- fi
run-step "Generating HBase data" create-hbase.log \
${IMPALA_HOME}/testdata/bin/create-hbase.sh
run-step "Creating /test-warehouse HDFS directory" create-test-warehouse-dir.log \
http://git-wip-us.apache.org/repos/asf/impala/blob/70fbd1df/testdata/bin/load-hive-builtins.sh
----------------------------------------------------------------------
diff --git a/testdata/bin/load-hive-builtins.sh b/testdata/bin/load-hive-builtins.sh
deleted file mode 100755
index 55cc845..0000000
--- a/testdata/bin/load-hive-builtins.sh
+++ /dev/null
@@ -1,69 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -euo pipefail
-. $IMPALA_HOME/bin/report_build_error.sh
-setup_report_build_error
-
-. ${IMPALA_HOME}/bin/impala-config.sh > /dev/null 2>&1
-
-# TODO: remove this once we understand why Hive looks in HDFS for many of its jars
-
-# Remove all directories in one command for efficiency
-${HADOOP_HOME}/bin/hadoop fs -rm -skipTrash -r -f ${FILESYSTEM_PREFIX}${HIVE_HOME}/lib/ \
- ${FILESYSTEM_PREFIX}${HBASE_HOME}/lib/ \
- ${FILESYSTEM_PREFIX}${HADOOP_HOME}/share/hadoop/common/ \
- ${FILESYSTEM_PREFIX}${HADOOP_HOME}/share/hadoop/mapreduce/ \
- ${FILESYSTEM_PREFIX}${HADOOP_HOME}/share/hadoop/tools/lib \
- ${FILESYSTEM_PREFIX}${HADOOP_LZO}/build \
- ${FILESYSTEM_PREFIX}${SENTRY_HOME}/lib/ \
- ${FILESYSTEM_PREFIX}${IMPALA_HOME}/thirdparty/postgresql-jdbc/
-
-TMP_DIR=$(mktemp -d)
-
-# Create the directory structure to copy over
-mkdir -p ${TMP_DIR}/${HIVE_HOME}/lib \
- ${TMP_DIR}/${HBASE_HOME}/lib \
- ${TMP_DIR}/${HADOOP_HOME}/share/hadoop/common/lib \
- ${TMP_DIR}/${HADOOP_HOME}/share/hadoop/mapreduce \
- ${TMP_DIR}/${HADOOP_HOME}/share/hadoop/tools/lib \
- ${TMP_DIR}/${HADOOP_LZO}/build \
- ${TMP_DIR}/${SENTRY_HOME}/lib \
- ${TMP_DIR}/${IMPALA_HOME}/thirdparty/postgresql-jdbc/
-
-# Add symbolic links to files in the appropriate places
-ln -s ${HIVE_HOME}/lib/*.jar ${TMP_DIR}/${HIVE_HOME}/lib
-ln -s ${HBASE_HOME}/lib/*.jar ${TMP_DIR}/${HBASE_HOME}/lib
-ln -s ${HADOOP_HOME}/share/hadoop/common/*.jar \
- ${TMP_DIR}/${HADOOP_HOME}/share/hadoop/common
-ln -s ${HADOOP_HOME}/share/hadoop/common/lib/*.jar \
- ${TMP_DIR}/${HADOOP_HOME}/share/hadoop/common/lib
-ln -s ${HADOOP_HOME}/share/hadoop/mapreduce/*.jar \
- ${TMP_DIR}/${HADOOP_HOME}/share/hadoop/mapreduce
-ln -s ${HADOOP_HOME}/share/hadoop/tools/lib/*.jar \
- ${TMP_DIR}/${HADOOP_HOME}/share/hadoop/tools/lib
-ln -s ${HADOOP_LZO}/build/hadoop-lzo*.jar ${TMP_DIR}/${HADOOP_LZO}/build
-ln -s ${SENTRY_HOME}/lib/*.jar ${TMP_DIR}/${SENTRY_HOME}/lib
-# This is the only item that uses a different path
-# TODO: why is this path different?
-ln -s ${POSTGRES_JDBC_DRIVER} ${TMP_DIR}/${IMPALA_HOME}/thirdparty/postgresql-jdbc
-
-${HADOOP_HOME}/bin/hadoop fs -put ${TMP_DIR}/* ${FILESYSTEM_PREFIX}/
-
-rm -r ${TMP_DIR}
http://git-wip-us.apache.org/repos/asf/impala/blob/70fbd1df/testdata/bin/load-test-warehouse-snapshot.sh
----------------------------------------------------------------------
diff --git a/testdata/bin/load-test-warehouse-snapshot.sh b/testdata/bin/load-test-warehouse-snapshot.sh
index da5fb07..cfec558 100755
--- a/testdata/bin/load-test-warehouse-snapshot.sh
+++ b/testdata/bin/load-test-warehouse-snapshot.sh
@@ -102,13 +102,6 @@ if [ ! -f ${SNAPSHOT_STAGING_DIR}${TEST_WAREHOUSE_DIR}/githash.txt ]; then
exit 1
fi
-
-# Hive builtins are already present on a pre-setup CM managed cluster.
-if [[ -z "$REMOTE_LOAD" ]]; then
- echo "Loading hive builtins"
- ${IMPALA_HOME}/testdata/bin/load-hive-builtins.sh
-fi
-
echo "Copying data to ${TARGET_FILESYSTEM}"
if [ "${TARGET_FILESYSTEM}" = "s3" ]; then
# hive does not yet work well with s3, so we won't need hive builtins.