You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by zh...@apache.org on 2020/10/24 15:19:14 UTC

[hbase] branch branch-2 updated: HBASE-25206 Data loss can happen if a cloned table loses original split region(delete table) (#2569)

This is an automated email from the ASF dual-hosted git repository.

zhangduo pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2 by this push:
     new f7e6143  HBASE-25206 Data loss can happen if a cloned table loses original split region(delete table) (#2569)
f7e6143 is described below

commit f7e6143f4977b92ed21bd08803bc8dac71648258
Author: Toshihiro Suzuki <br...@gmail.com>
AuthorDate: Sat Oct 24 23:49:14 2020 +0900

    HBASE-25206 Data loss can happen if a cloned table loses original split region(delete table) (#2569)
    
    Signed-off-by: Duo Zhang <zh...@apache.org>
---
 .../hbase/master/assignment/RegionStates.java      | 12 ++++++++
 .../assignment/TransitRegionStateProcedure.java    |  1 +
 .../master/procedure/DeleteTableProcedure.java     |  3 +-
 ...shotFromClientAfterSplittingRegionTestBase.java | 36 ++++++++++++++++++++++
 4 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java
index 84f32fc..5476540 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java
@@ -345,6 +345,18 @@ public class RegionStates {
   }
 
   /**
+   * Get the regions for deleting a table.
+   * <p/>
+   * Here we need to return all the regions irrespective of the states in order to archive them
+   * all. This is because if we don't archive OFFLINE/SPLIT regions and if a snapshot or a cloned
+   * table references to the regions, we will lose the data of the regions.
+   */
+  public List<RegionInfo> getRegionsOfTableForDeleting(TableName table) {
+    return getTableRegionStateNodes(table).stream().map(RegionStateNode::getRegionInfo)
+      .collect(Collectors.toList());
+  }
+
+  /**
    * @return Return the regions of the table and filter them.
    */
   private List<RegionInfo> getRegionsOfTable(TableName table, Predicate<RegionStateNode> filter) {
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java
index b0a697d..63bb345 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java
@@ -348,6 +348,7 @@ public class TransitRegionStateProcedure
               LOG.error(
                 "Cannot assign replica region {} because its primary region {} does not exist.",
                 regionNode.getRegionInfo(), defaultRI);
+              regionNode.unsetProcedure(this);
               return Flow.NO_MORE_STATE;
             }
           }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java
index 9cfce0c..80dddc7 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java
@@ -99,7 +99,8 @@ public class DeleteTableProcedure
 
           // TODO: Move out... in the acquireLock()
           LOG.debug("Waiting for RIT for {}", this);
-          regions = env.getAssignmentManager().getRegionStates().getRegionsOfTable(getTableName());
+          regions = env.getAssignmentManager().getRegionStates()
+            .getRegionsOfTableForDeleting(getTableName());
           assert regions != null && !regions.isEmpty() : "unexpected 0 regions";
           ProcedureSyncWait.waitRegionInTransition(env, regions);
 
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/CloneSnapshotFromClientAfterSplittingRegionTestBase.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/CloneSnapshotFromClientAfterSplittingRegionTestBase.java
index 5ed100f..e8c0167 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/CloneSnapshotFromClientAfterSplittingRegionTestBase.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/CloneSnapshotFromClientAfterSplittingRegionTestBase.java
@@ -80,4 +80,40 @@ public class CloneSnapshotFromClientAfterSplittingRegionTestBase
       admin.catalogJanitorSwitch(true);
     }
   }
+
+  @Test
+  public void testCloneSnapshotBeforeSplittingRegionAndDroppingTable()
+    throws IOException, InterruptedException {
+    // Turn off the CatalogJanitor
+    admin.catalogJanitorSwitch(false);
+
+    try {
+      // Take a snapshot
+      admin.snapshot(snapshotName2, tableName);
+
+      // Clone the snapshot to another table
+      TableName clonedTableName =
+        TableName.valueOf(getValidMethodName() + "-" + System.currentTimeMillis());
+      admin.cloneSnapshot(snapshotName2, clonedTableName);
+      SnapshotTestingUtils.waitForTableToBeOnline(TEST_UTIL, clonedTableName);
+
+      // Split the first region of the original table
+      List<RegionInfo> regionInfos = admin.getRegions(tableName);
+      RegionReplicaUtil.removeNonDefaultRegions(regionInfos);
+      splitRegion(regionInfos.get(0));
+
+      // Drop the original table
+      admin.disableTable(tableName);
+      admin.deleteTable(tableName);
+
+      // Disable and enable the cloned table. This should be successful
+      admin.disableTable(clonedTableName);
+      admin.enableTable(clonedTableName);
+      SnapshotTestingUtils.waitForTableToBeOnline(TEST_UTIL, clonedTableName);
+
+      verifyRowCount(TEST_UTIL, clonedTableName, snapshot1Rows);
+    } finally {
+      admin.catalogJanitorSwitch(true);
+    }
+  }
 }