You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2018/11/20 00:00:48 UTC

[2/3] impala git commit: IMPALA-7871: Don't load Hive builtins

IMPALA-7871: Don't load Hive builtins

Dataload has a step of "Loading Hive builtins" that
loads a bunch of jars into HDFS/S3/etc. Despite
its name, nothing seems to be using these.
Dataload and core tests succeed without this step.

This removes the Hive builtins step and associated
scripts.

Change-Id: Iaca5ffdaca4b5506e9401b17a7806d37fd7b1844
Reviewed-on: http://gerrit.cloudera.org:8080/11944
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/70fbd1df
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/70fbd1df
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/70fbd1df

Branch: refs/heads/master
Commit: 70fbd1df446f4830b55d684d82c851867b9c1444
Parents: 48d1d2d
Author: Joe McDonnell <jo...@cloudera.com>
Authored: Fri Nov 16 12:14:27 2018 -0800
Committer: Joe McDonnell <jo...@cloudera.com>
Committed: Mon Nov 19 23:33:20 2018 +0000

----------------------------------------------------------------------
 testdata/bin/create-load-data.sh             |  4 --
 testdata/bin/load-hive-builtins.sh           | 69 -----------------------
 testdata/bin/load-test-warehouse-snapshot.sh |  7 ---
 3 files changed, 80 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/70fbd1df/testdata/bin/create-load-data.sh
----------------------------------------------------------------------
diff --git a/testdata/bin/create-load-data.sh b/testdata/bin/create-load-data.sh
index 74ae248..2a67bd4 100755
--- a/testdata/bin/create-load-data.sh
+++ b/testdata/bin/create-load-data.sh
@@ -98,10 +98,6 @@ do
 done
 
 if [[ $SKIP_METADATA_LOAD -eq 0  && "$SNAPSHOT_FILE" = "" ]]; then
-  if [[ -z "$REMOTE_LOAD" ]]; then
-    run-step "Loading Hive Builtins" load-hive-builtins.log \
-      ${IMPALA_HOME}/testdata/bin/load-hive-builtins.sh
-  fi
   run-step "Generating HBase data" create-hbase.log \
       ${IMPALA_HOME}/testdata/bin/create-hbase.sh
   run-step "Creating /test-warehouse HDFS directory" create-test-warehouse-dir.log \

http://git-wip-us.apache.org/repos/asf/impala/blob/70fbd1df/testdata/bin/load-hive-builtins.sh
----------------------------------------------------------------------
diff --git a/testdata/bin/load-hive-builtins.sh b/testdata/bin/load-hive-builtins.sh
deleted file mode 100755
index 55cc845..0000000
--- a/testdata/bin/load-hive-builtins.sh
+++ /dev/null
@@ -1,69 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -euo pipefail
-. $IMPALA_HOME/bin/report_build_error.sh
-setup_report_build_error
-
-. ${IMPALA_HOME}/bin/impala-config.sh > /dev/null 2>&1
-
-# TODO: remove this once we understand why Hive looks in HDFS for many of its jars
-
-# Remove all directories in one command for efficiency
-${HADOOP_HOME}/bin/hadoop fs -rm -skipTrash -r -f ${FILESYSTEM_PREFIX}${HIVE_HOME}/lib/ \
-  ${FILESYSTEM_PREFIX}${HBASE_HOME}/lib/ \
-  ${FILESYSTEM_PREFIX}${HADOOP_HOME}/share/hadoop/common/ \
-  ${FILESYSTEM_PREFIX}${HADOOP_HOME}/share/hadoop/mapreduce/ \
-  ${FILESYSTEM_PREFIX}${HADOOP_HOME}/share/hadoop/tools/lib \
-  ${FILESYSTEM_PREFIX}${HADOOP_LZO}/build \
-  ${FILESYSTEM_PREFIX}${SENTRY_HOME}/lib/ \
-  ${FILESYSTEM_PREFIX}${IMPALA_HOME}/thirdparty/postgresql-jdbc/
-
-TMP_DIR=$(mktemp -d)
-
-# Create the directory structure to copy over
-mkdir -p ${TMP_DIR}/${HIVE_HOME}/lib \
-  ${TMP_DIR}/${HBASE_HOME}/lib \
-  ${TMP_DIR}/${HADOOP_HOME}/share/hadoop/common/lib \
-  ${TMP_DIR}/${HADOOP_HOME}/share/hadoop/mapreduce \
-  ${TMP_DIR}/${HADOOP_HOME}/share/hadoop/tools/lib \
-  ${TMP_DIR}/${HADOOP_LZO}/build \
-  ${TMP_DIR}/${SENTRY_HOME}/lib \
-  ${TMP_DIR}/${IMPALA_HOME}/thirdparty/postgresql-jdbc/
-
-# Add symbolic links to files in the appropriate places
-ln -s ${HIVE_HOME}/lib/*.jar ${TMP_DIR}/${HIVE_HOME}/lib
-ln -s ${HBASE_HOME}/lib/*.jar ${TMP_DIR}/${HBASE_HOME}/lib
-ln -s ${HADOOP_HOME}/share/hadoop/common/*.jar \
-  ${TMP_DIR}/${HADOOP_HOME}/share/hadoop/common
-ln -s ${HADOOP_HOME}/share/hadoop/common/lib/*.jar \
-  ${TMP_DIR}/${HADOOP_HOME}/share/hadoop/common/lib
-ln -s ${HADOOP_HOME}/share/hadoop/mapreduce/*.jar \
-  ${TMP_DIR}/${HADOOP_HOME}/share/hadoop/mapreduce
-ln -s ${HADOOP_HOME}/share/hadoop/tools/lib/*.jar \
-  ${TMP_DIR}/${HADOOP_HOME}/share/hadoop/tools/lib
-ln -s ${HADOOP_LZO}/build/hadoop-lzo*.jar ${TMP_DIR}/${HADOOP_LZO}/build
-ln -s ${SENTRY_HOME}/lib/*.jar ${TMP_DIR}/${SENTRY_HOME}/lib
-# This is the only item that uses a different path
-# TODO: why is this path different?
-ln -s ${POSTGRES_JDBC_DRIVER} ${TMP_DIR}/${IMPALA_HOME}/thirdparty/postgresql-jdbc
-
-${HADOOP_HOME}/bin/hadoop fs -put ${TMP_DIR}/* ${FILESYSTEM_PREFIX}/
-
-rm -r ${TMP_DIR}

http://git-wip-us.apache.org/repos/asf/impala/blob/70fbd1df/testdata/bin/load-test-warehouse-snapshot.sh
----------------------------------------------------------------------
diff --git a/testdata/bin/load-test-warehouse-snapshot.sh b/testdata/bin/load-test-warehouse-snapshot.sh
index da5fb07..cfec558 100755
--- a/testdata/bin/load-test-warehouse-snapshot.sh
+++ b/testdata/bin/load-test-warehouse-snapshot.sh
@@ -102,13 +102,6 @@ if [ ! -f ${SNAPSHOT_STAGING_DIR}${TEST_WAREHOUSE_DIR}/githash.txt ]; then
   exit 1
 fi
 
-
-# Hive builtins are already present on a pre-setup CM managed cluster.
-if [[ -z "$REMOTE_LOAD" ]]; then
-  echo "Loading hive builtins"
-  ${IMPALA_HOME}/testdata/bin/load-hive-builtins.sh
-fi
-
 echo "Copying data to ${TARGET_FILESYSTEM}"
 if [ "${TARGET_FILESYSTEM}" = "s3" ]; then
   # hive does not yet work well with s3, so we won't need hive builtins.