You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/02/21 05:00:40 UTC
[impala] 02/03: IMPALA-7119: Restart whole minicluster when HDFS replication stalls

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch 2.x
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 9fdb93987cf13f346ad56c1b273a1e0fed86fd10
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Fri Jun 8 11:20:42 2018 -0700

    IMPALA-7119: Restart whole minicluster when HDFS replication stalls
    
    After loading data, we wait for HDFS to replicate
    all of the blocks appropriately. If this takes too long,
    we restart HDFS. However, HBase can fail if HDFS is
    restarted and HBase is unable to write its logs.
    In general, there is no real reason to keep HBase
    and the other minicluster components running while
    restarting HDFS.
    
    This changes the HDFS health check to restart the
    whole minicluster and Impala rather than just HDFS.
    
    Testing:
     - Tested with a modified version that always does
       the restart in the HDFS health check and verified
       that the tests pass
    
    Change-Id: I58ffe301708c78c26ee61aa754a06f46c224c6e2
    Reviewed-on: http://gerrit.cloudera.org:8080/10665
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 testdata/bin/create-load-data.sh | 39 +++++++++++++++++++++++++++++----------
 1 file changed, 29 insertions(+), 10 deletions(-)

diff --git a/testdata/bin/create-load-data.sh b/testdata/bin/create-load-data.sh
index c116fca..0c1ea7d 100755
--- a/testdata/bin/create-load-data.sh
+++ b/testdata/bin/create-load-data.sh
@@ -134,6 +134,31 @@ echo "SNAPSHOT_FILE=${SNAPSHOT_FILE:-}"
 echo "CM_HOST=${CM_HOST:-}"
 echo "REMOTE_LOAD=${REMOTE_LOAD:-}"
 
+function start-impala {
+  : ${START_CLUSTER_ARGS=""}
+  START_CLUSTER_ARGS_INT=""
+  if [[ "${TARGET_FILESYSTEM}" == "local" ]]; then
+    START_CLUSTER_ARGS_INT+=("--impalad_args=--abort_on_config_error=false -s 1")
+  else
+    START_CLUSTER_ARGS_INT+=("-s 3")
+  fi
+  START_CLUSTER_ARGS_INT+=("${START_CLUSTER_ARGS}")
+  ${IMPALA_HOME}/bin/start-impala-cluster.py --log_dir=${IMPALA_DATA_LOADING_LOGS_DIR} \
+    ${START_CLUSTER_ARGS_INT}
+}
+
+function restart-cluster {
+  # Break out each individual step for clarity
+  echo "Shutting down Impala"
+  ${IMPALA_HOME}/bin/start-impala-cluster.py --kill
+  echo "Shutting down the minicluster"
+  ${IMPALA_HOME}/testdata/bin/kill-all.sh
+  echo "Starting the minicluster"
+  ${IMPALA_HOME}/testdata/bin/run-all.sh
+  echo "Starting Impala"
+  start-impala
+}
+
 function load-custom-schemas {
   # HDFS commandline calls are slow, so consolidate the manipulation into
   # as few calls as possible by populating a temporary directory with the
@@ -483,7 +508,9 @@ function wait-hdfs-replication {
     if [[ "$NUMBER_UNDER_REPLICATED" -eq "$LAST_NUMBER_UNDER_REPLICATED" ]] ; then
       echo "There are under-replicated blocks in HDFS and HDFS is not making progress"\
           "in $SLEEP_SEC seconds. Attempting to restart HDFS to resolve this issue."
-      ${IMPALA_HOME}/testdata/bin/run-mini-dfs.sh
+      # IMPALA-7119: Other minicluster components (like HBase) can fail if HDFS is
+      # restarted by itself, so restart the whole cluster, including Impala.
+      restart-cluster
     fi
     LAST_NUMBER_UNDER_REPLICATED="$NUMBER_UNDER_REPLICATED"
     echo "$NUMBER_UNDER_REPLICATED under replicated blocks remaining."
@@ -498,16 +525,8 @@ if ${CLUSTER_DIR}/admin is_kerberized; then
 fi
 
 # Start Impala
-: ${START_CLUSTER_ARGS=""}
-if [[ "${TARGET_FILESYSTEM}" == "local" ]]; then
-  START_CLUSTER_ARGS="--impalad_args=--abort_on_config_error=false -s 1 ${START_CLUSTER_ARGS}"
-else
-  START_CLUSTER_ARGS="-s 3 ${START_CLUSTER_ARGS}"
-fi
 if [[ -z "$REMOTE_LOAD" ]]; then
-  run-step "Starting Impala cluster" start-impala-cluster.log \
-    ${IMPALA_HOME}/bin/start-impala-cluster.py --log_dir=${IMPALA_DATA_LOADING_LOGS_DIR} \
-    ${START_CLUSTER_ARGS}
+  run-step "Starting Impala cluster" start-impala-cluster.log start-impala
 fi
 
 # The hdfs environment script sets up kms (encryption) and cache pools (hdfs caching).