You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2018/08/01 07:32:50 UTC

hbase git commit: HBASE-20893 Data loss if splitting region while ServerCrashProcedure executing ADDENDUM: Rather than rollback, just do region reopens.

Repository: hbase
Updated Branches:
  refs/heads/branch-2.0 7b36f2b0b -> 7bb867788


HBASE-20893 Data loss if splitting region while ServerCrashProcedure executing ADDENDUM: Rather than rollback, just do region reopens.

In split, reopen the parent if recovered.edits and in merge, reopen the
parent region or regions that happened to have recovered.edits on close.


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/7bb86778
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/7bb86778
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/7bb86778

Branch: refs/heads/branch-2.0
Commit: 7bb867788acd99b5f454cc357417726a9590cdde
Parents: 7b36f2b
Author: Michael Stack <st...@apache.org>
Authored: Thu Jul 26 22:39:52 2018 -0700
Committer: Michael Stack <st...@apache.org>
Committed: Wed Aug 1 00:32:38 2018 -0700

----------------------------------------------------------------------
 .../master/assignment/AssignProcedure.java      |  1 +
 .../assignment/MergeTableRegionsProcedure.java  | 46 ++++++++++----------
 .../assignment/SplitTableRegionProcedure.java   | 32 ++++++++------
 .../TestMergeTableRegionsWhileRSCrash.java      |  7 ---
 4 files changed, 43 insertions(+), 43 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/7bb86778/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java
index 86f0a3f..55aee4a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java
@@ -68,6 +68,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProto
  * failure. Should we ignore rollback calls to Assign/Unassign then? Or just
  * remove rollback here?
  */
+// TODO: Add being able to assign a region to open read-only.
 @InterfaceAudience.Private
 public class AssignProcedure extends RegionTransitionProcedure {
   private static final Logger LOG = LoggerFactory.getLogger(AssignProcedure.class);

http://git-wip-us.apache.org/repos/asf/hbase/blob/7bb86778/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java
index 343cf87..87995f0 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java
@@ -239,8 +239,21 @@ public class MergeTableRegionsProcedure
           setNextState(MergeTableRegionsState.MERGE_TABLE_REGIONS_CHECK_CLOSED_REGIONS);
           break;
         case MERGE_TABLE_REGIONS_CHECK_CLOSED_REGIONS:
-          checkClosedRegions(env);
-          setNextState(MergeTableRegionsState.MERGE_TABLE_REGIONS_CREATE_MERGED_REGION);
+          List<RegionInfo> ris = hasRecoveredEdits(env);
+          if (ris.isEmpty()) {
+            setNextState(MergeTableRegionsState.MERGE_TABLE_REGIONS_CREATE_MERGED_REGION);
+          } else {
+            // Need to reopen parent regions to pickup missed recovered.edits. Do it by creating
+            // child assigns and then stepping back to MERGE_TABLE_REGIONS_CLOSE_REGIONS.
+            // Just assign the primary regions recovering the missed recovered.edits -- no replicas.
+            // May need to cycle here a few times if heavy writes.
+            // TODO: Add an assign read-only.
+            for (RegionInfo ri: ris) {
+              LOG.info("Found recovered.edits under {}, reopen to pickup missed edits!", ri);
+              addChildProcedure(env.getAssignmentManager().createAssignProcedure(ri));
+            }
+            setNextState(MergeTableRegionsState.MERGE_TABLE_REGIONS_CLOSE_REGIONS);
+          }
           break;
         case MERGE_TABLE_REGIONS_CREATE_MERGED_REGION:
           createMergedRegion(env);
@@ -468,30 +481,19 @@ public class MergeTableRegionsProcedure
   }
 
   /**
-   * check the closed regions
+   * Return list of regions that have recovered.edits... usually its an empty list.
    * @param env the master env
    * @throws IOException IOException
    */
-  private void checkClosedRegions(final MasterProcedureEnv env) throws IOException {
-    checkClosedRegion(env, regionsToMerge[0]);
-    checkClosedRegion(env, regionsToMerge[1]);
-  }
-
-  /**
-   * Check whether there is recovered.edits in the closed region
-   * If any, that means this region is not closed property, we need
-   * to abort region merge to prevent data loss
-   * @param env master env
-   * @param regionInfo regioninfo
-   * @throws IOException IOException
-   */
-  private void checkClosedRegion(final MasterProcedureEnv env,
-      RegionInfo regionInfo) throws IOException {
-    if (WALSplitter.hasRecoveredEdits(env.getMasterServices().getFileSystem(),
-        env.getMasterConfiguration(), regionInfo)) {
-      throw new IOException("Recovered.edits are found in Region: " + regionInfo
-          + ", abort merge to prevent data loss");
+  private List<RegionInfo> hasRecoveredEdits(final MasterProcedureEnv env) throws IOException {
+    List<RegionInfo> ris =  new ArrayList<RegionInfo>(regionsToMerge.length);
+    for (int i = 0; i < regionsToMerge.length; i++) {
+      RegionInfo ri = regionsToMerge[i];
+      if (SplitTableRegionProcedure.hasRecoveredEdits(env, ri)) {
+        ris.add(ri);
+      }
     }
+    return ris;
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/hbase/blob/7bb86778/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java
index 371583a..2ad81d7 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java
@@ -139,18 +139,13 @@ public class SplitTableRegionProcedure
   }
 
   /**
-   * Check whether there is recovered.edits in the closed region
-   * If any, that means this region is not closed property, we need
-   * to abort region split to prevent data loss
+   * Check whether there are recovered.edits in the parent closed region.
    * @param env master env
    * @throws IOException IOException
    */
-  private void checkClosedRegion(final MasterProcedureEnv env) throws IOException {
-    if (WALSplitter.hasRecoveredEdits(env.getMasterServices().getFileSystem(),
-        env.getMasterConfiguration(), getRegion())) {
-      throw new IOException("Recovered.edits are found in Region: " + getRegion()
-          + ", abort split to prevent data loss");
-    }
+  static boolean hasRecoveredEdits(MasterProcedureEnv env, RegionInfo ri) throws IOException {
+    return WALSplitter.hasRecoveredEdits(env.getMasterServices().getFileSystem(),
+        env.getMasterConfiguration(), ri);
   }
 
   /**
@@ -256,8 +251,18 @@ public class SplitTableRegionProcedure
           setNextState(SplitTableRegionState.SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS);
           break;
         case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS:
-          checkClosedRegion(env);
-          setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS);
+          if (hasRecoveredEdits(env, getRegion())) {
+            // If recovered edits, reopen parent region and then re-run the close by going back to
+            // SPLIT_TABLE_REGION_CLOSE_PARENT_REGION. We might have to cycle here a few times
+            // (TODO: Add being able to open a region in read-only mode). Open the primary replica
+            // in this case only where we just want to pickup the left-out replicated.edits.
+            LOG.info("Found recovered.edits under {}, reopen so we pickup these missed edits!",
+                getRegion().getEncodedName());
+            addChildProcedure(env.getAssignmentManager().createAssignProcedure(getParentRegion()));
+            setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CLOSE_PARENT_REGION);
+          } else {
+            setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS);
+          }
           break;
         case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS:
           createDaughterRegions(env);
@@ -290,10 +295,9 @@ public class SplitTableRegionProcedure
           throw new UnsupportedOperationException(this + " unhandled state=" + state);
       }
     } catch (IOException e) {
-      String msg = "Error trying to split region " + getParentRegion().getEncodedName() +
-          " in the table " + getTableName() + " (in state=" + state + ")";
+      String msg = "Splitting " + getParentRegion().getEncodedName() + ", " + this;
       if (!isRollbackSupported(state)) {
-        // We reach a state that cannot be rolled back. We just need to keep retry.
+        // We reach a state that cannot be rolled back. We just need to keep retrying.
         LOG.warn(msg, e);
       } else {
         LOG.error(msg, e);

http://git-wip-us.apache.org/repos/asf/hbase/blob/7bb86778/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMergeTableRegionsWhileRSCrash.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMergeTableRegionsWhileRSCrash.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMergeTableRegionsWhileRSCrash.java
index b979bb8..9608e5c 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMergeTableRegionsWhileRSCrash.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMergeTableRegionsWhileRSCrash.java
@@ -119,12 +119,5 @@ public class TestMergeTableRegionsWhileRSCrash {
       count++;
     }
     Assert.assertEquals("There should be 10 rows!", 10, count);
-
-
-
-
   }
-
-
-
 }