You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2017/05/17 01:56:50 UTC

hbase git commit: HBASE-18018 Changes to support abort for all procedures by default

Repository: hbase
Updated Branches:
  refs/heads/master a8775b11d -> 5eb1b7b96


HBASE-18018 Changes to support abort for all procedures by default

The default behavior for abort() method of StateMachineProcedure is changed to support aborting all procedures irrespective of if rollback is supported or not. Currently its observed that sometimes procedures may fail on a step which will be considered as retryable error as abort is not supported. As a result procedure may stuck in a endless loop repeating same step again.User should have an option to abort any stuck procedure and do clean up manually. Please refer to HBASE-18016 and discussion there.

Signed-off-by: Michael Stack <st...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/5eb1b7b9
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/5eb1b7b9
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/5eb1b7b9

Branch: refs/heads/master
Commit: 5eb1b7b96c6b870959669636057aa16e93646fa6
Parents: a8775b1
Author: Umesh Agashe <ua...@cloudera.com>
Authored: Wed May 10 11:41:59 2017 -0700
Committer: Michael Stack <st...@apache.org>
Committed: Tue May 16 18:56:32 2017 -0700

----------------------------------------------------------------------
 .../hbase/procedure2/StateMachineProcedure.java | 37 +++++++++++++-------
 .../procedure2/TestStateMachineProcedure.java   | 20 ++++++++++-
 .../master/procedure/DeleteTableProcedure.java  |  9 +++++
 3 files changed, 52 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/5eb1b7b9/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/StateMachineProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/StateMachineProcedure.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/StateMachineProcedure.java
index ea2a41f..0590a93 100644
--- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/StateMachineProcedure.java
+++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/StateMachineProcedure.java
@@ -105,14 +105,8 @@ public abstract class StateMachineProcedure<TEnvironment, TState>
    * @param state the state enum object
    */
   protected void setNextState(final TState state) {
-    if (aborted.get() && isRollbackSupported(getCurrentState())) {
-      setAbortFailure(getClass().getSimpleName(), "abort requested");
-    } else {
-      if (aborted.get()) {
-        LOG.warn("ignoring abort request " + state);
-      }
-      setNextState(getStateId(state));
-    }
+    setNextState(getStateId(state));
+    failIfAborted();
   }
 
   /**
@@ -147,6 +141,8 @@ public abstract class StateMachineProcedure<TEnvironment, TState>
       throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
     updateTimestamp();
     try {
+      failIfAborted();
+
       if (!hasMoreState() || isFailed()) return null;
 
       TState state = getCurrentState();
@@ -190,10 +186,11 @@ public abstract class StateMachineProcedure<TEnvironment, TState>
   protected boolean abort(final TEnvironment env) {
     final boolean isDebugEnabled = LOG.isDebugEnabled();
     final TState state = getCurrentState();
-    if (isRollbackSupported(state)) {
-      if (isDebugEnabled) {
-        LOG.debug("abort requested for " + this + " state=" + state);
-      }
+    if (isDebugEnabled) {
+      LOG.debug("abort requested for " + this + " state=" + state);
+    }
+
+    if (hasMoreState()) {
       aborted.set(true);
       return true;
     } else if (isDebugEnabled) {
@@ -203,6 +200,20 @@ public abstract class StateMachineProcedure<TEnvironment, TState>
   }
 
   /**
+   * If procedure has more states then abort it otherwise procedure is finished and abort can be
+   * ignored.
+   */
+  protected final void failIfAborted() {
+    if (aborted.get()) {
+      if (hasMoreState()) {
+        setAbortFailure(getClass().getSimpleName(), "abort requested");
+      } else {
+        LOG.warn("Ignoring abort request on state='" + getCurrentState() + "' for " + this);
+      }
+    }
+  }
+
+  /**
    * Used by the default implementation of abort() to know if the current state can be aborted
    * and rollback can be triggered.
    */
@@ -219,7 +230,7 @@ public abstract class StateMachineProcedure<TEnvironment, TState>
     return stateFlow != Flow.NO_MORE_STATE;
   }
 
-  private TState getCurrentState() {
+  protected TState getCurrentState() {
     return stateCount > 0 ? getState(states[stateCount-1]) : getInitialState();
   }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/5eb1b7b9/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/TestStateMachineProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/TestStateMachineProcedure.java b/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/TestStateMachineProcedure.java
index 82b767e..8347dbf 100644
--- a/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/TestStateMachineProcedure.java
+++ b/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/TestStateMachineProcedure.java
@@ -93,6 +93,21 @@ public class TestStateMachineProcedure {
   }
 
   @Test
+  public void testAbortStuckProcedure() throws InterruptedException {
+    try {
+      procExecutor.getEnvironment().loop = true;
+      TestSMProcedure proc = new TestSMProcedure();
+      long procId = procExecutor.submitProcedure(proc);
+      Thread.sleep(1000 + (int) (Math.random() * 4001));
+      proc.abort(procExecutor.getEnvironment());
+      ProcedureTestingUtility.waitProcedure(procExecutor, procId);
+      assertEquals(true, proc.isFailed());
+    } finally {
+      procExecutor.getEnvironment().loop = false;
+    }
+  }
+
+  @Test
   public void testChildOnLastStep() {
     long procId = procExecutor.submitProcedure(new TestSMProcedure());
     ProcedureTestingUtility.waitProcedure(procExecutor, procId);
@@ -142,7 +157,9 @@ public class TestStateMachineProcedure {
       env.execCount.incrementAndGet();
       switch (state) {
         case STEP_1:
-          setNextState(TestSMProcedureState.STEP_2);
+          if (!env.loop) {
+            setNextState(TestSMProcedureState.STEP_2);
+          }
           break;
         case STEP_2:
           addChildProcedure(new SimpleChildProcedure());
@@ -190,5 +207,6 @@ public class TestStateMachineProcedure {
     AtomicInteger execCount = new AtomicInteger(0);
     AtomicInteger rollbackCount = new AtomicInteger(0);
     boolean triggerChildRollback = false;
+    boolean loop = false;
   }
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/5eb1b7b9/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java
index 9d0a283..bda68eb 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java
@@ -146,6 +146,15 @@ public class DeleteTableProcedure
   }
 
   @Override
+  protected boolean abort(MasterProcedureEnv env) {
+    // TODO: Current behavior is: with no rollback and no abort support, procedure may stuck
+    // looping in retrying failing step forever. Default behavior of abort is changed to support
+    // aborting all procedures. Override the default wisely. Following code retains the current
+    // behavior. Revisit it later.
+    return isRollbackSupported(getCurrentState()) ? super.abort(env) : false;
+  }
+
+  @Override
   protected void rollbackState(final MasterProcedureEnv env, final DeleteTableState state) {
     if (state == DeleteTableState.DELETE_TABLE_PRE_OPERATION) {
       // nothing to rollback, pre-delete is just table-state checks.