You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/02/21 05:00:40 UTC
[impala] 02/03: IMPALA-7119: Restart whole minicluster when HDFS
replication stalls
This is an automated email from the ASF dual-hosted git repository.
tarmstrong pushed a commit to branch 2.x
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 9fdb93987cf13f346ad56c1b273a1e0fed86fd10
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Fri Jun 8 11:20:42 2018 -0700
IMPALA-7119: Restart whole minicluster when HDFS replication stalls
After loading data, we wait for HDFS to replicate
all of the blocks appropriately. If this takes too long,
we restart HDFS. However, HBase can fail if HDFS is
restarted and HBase is unable to write its logs.
In general, there is no real reason to keep HBase
and the other minicluster components running while
restarting HDFS.
This changes the HDFS health check to restart the
whole minicluster and Impala rather than just HDFS.
Testing:
- Tested with a modified version that always does
the restart in the HDFS health check and verified
that the tests pass
Change-Id: I58ffe301708c78c26ee61aa754a06f46c224c6e2
Reviewed-on: http://gerrit.cloudera.org:8080/10665
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
testdata/bin/create-load-data.sh | 39 +++++++++++++++++++++++++++++----------
1 file changed, 29 insertions(+), 10 deletions(-)
diff --git a/testdata/bin/create-load-data.sh b/testdata/bin/create-load-data.sh
index c116fca..0c1ea7d 100755
--- a/testdata/bin/create-load-data.sh
+++ b/testdata/bin/create-load-data.sh
@@ -134,6 +134,31 @@ echo "SNAPSHOT_FILE=${SNAPSHOT_FILE:-}"
echo "CM_HOST=${CM_HOST:-}"
echo "REMOTE_LOAD=${REMOTE_LOAD:-}"
+function start-impala {
+ : ${START_CLUSTER_ARGS=""}
+ START_CLUSTER_ARGS_INT=""
+ if [[ "${TARGET_FILESYSTEM}" == "local" ]]; then
+ START_CLUSTER_ARGS_INT+=("--impalad_args=--abort_on_config_error=false -s 1")
+ else
+ START_CLUSTER_ARGS_INT+=("-s 3")
+ fi
+ START_CLUSTER_ARGS_INT+=("${START_CLUSTER_ARGS}")
+ ${IMPALA_HOME}/bin/start-impala-cluster.py --log_dir=${IMPALA_DATA_LOADING_LOGS_DIR} \
+ ${START_CLUSTER_ARGS_INT}
+}
+
+function restart-cluster {
+ # Break out each individual step for clarity
+ echo "Shutting down Impala"
+ ${IMPALA_HOME}/bin/start-impala-cluster.py --kill
+ echo "Shutting down the minicluster"
+ ${IMPALA_HOME}/testdata/bin/kill-all.sh
+ echo "Starting the minicluster"
+ ${IMPALA_HOME}/testdata/bin/run-all.sh
+ echo "Starting Impala"
+ start-impala
+}
+
function load-custom-schemas {
# HDFS commandline calls are slow, so consolidate the manipulation into
# as few calls as possible by populating a temporary directory with the
@@ -483,7 +508,9 @@ function wait-hdfs-replication {
if [[ "$NUMBER_UNDER_REPLICATED" -eq "$LAST_NUMBER_UNDER_REPLICATED" ]] ; then
echo "There are under-replicated blocks in HDFS and HDFS is not making progress"\
"in $SLEEP_SEC seconds. Attempting to restart HDFS to resolve this issue."
- ${IMPALA_HOME}/testdata/bin/run-mini-dfs.sh
+ # IMPALA-7119: Other minicluster components (like HBase) can fail if HDFS is
+ # restarted by itself, so restart the whole cluster, including Impala.
+ restart-cluster
fi
LAST_NUMBER_UNDER_REPLICATED="$NUMBER_UNDER_REPLICATED"
echo "$NUMBER_UNDER_REPLICATED under replicated blocks remaining."
@@ -498,16 +525,8 @@ if ${CLUSTER_DIR}/admin is_kerberized; then
fi
# Start Impala
-: ${START_CLUSTER_ARGS=""}
-if [[ "${TARGET_FILESYSTEM}" == "local" ]]; then
- START_CLUSTER_ARGS="--impalad_args=--abort_on_config_error=false -s 1 ${START_CLUSTER_ARGS}"
-else
- START_CLUSTER_ARGS="-s 3 ${START_CLUSTER_ARGS}"
-fi
if [[ -z "$REMOTE_LOAD" ]]; then
- run-step "Starting Impala cluster" start-impala-cluster.log \
- ${IMPALA_HOME}/bin/start-impala-cluster.py --log_dir=${IMPALA_DATA_LOADING_LOGS_DIR} \
- ${START_CLUSTER_ARGS}
+ run-step "Starting Impala cluster" start-impala-cluster.log start-impala
fi
# The hdfs environment script sets up kms (encryption) and cache pools (hdfs caching).