You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ap...@apache.org on 2017/12/09 01:25:33 UTC

hbase git commit: HBASE-19457 Debugging flaky TestTruncateTableProcedure

Repository: hbase
Updated Branches:
  refs/heads/master a5a77ae3d -> 7092b814b


HBASE-19457 Debugging flaky TestTruncateTableProcedure

- Adds debug logging for future ease
- Removes 60s timeout since testRecoveryAndDoubleExecutionPreserveSplits is only halfway after a minute.
- Adds some comments
- Logging change: Some places report "regionState=" while others just "state=".
  State machine procs also have "state=" in their logs. Let me change all region related logging to "regionState=" so that
  1) it's consistent everywhere, 2) more filtered results when searching through logs.


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/7092b814
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/7092b814
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/7092b814

Branch: refs/heads/master
Commit: 7092b814bd5c5e243987853771dc2ec339ae0b0c
Parents: a5a77ae
Author: Apekshit Sharma <ap...@apache.org>
Authored: Fri Dec 8 17:16:06 2017 -0800
Committer: Apekshit Sharma <ap...@apache.org>
Committed: Fri Dec 8 17:25:16 2017 -0800

----------------------------------------------------------------------
 .../org/apache/hadoop/hbase/util/CommonFSUtils.java   |  2 +-
 .../hbase/procedure2/StateMachineProcedure.java       |  3 +++
 .../hbase/master/assignment/AssignmentManager.java    | 13 +++++++------
 .../master/procedure/TruncateTableProcedure.java      |  6 ++++++
 .../procedure/MasterProcedureTestingUtility.java      |  7 ++++---
 .../master/procedure/TestTruncateTableProcedure.java  | 14 ++++++++------
 6 files changed, 29 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/7092b814/hbase-common/src/main/java/org/apache/hadoop/hbase/util/CommonFSUtils.java
----------------------------------------------------------------------
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/CommonFSUtils.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/CommonFSUtils.java
index bdf148e..38ae476 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/CommonFSUtils.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/CommonFSUtils.java
@@ -733,7 +733,7 @@ public abstract class CommonFSUtils {
    */
   public static void logFileSystemState(final FileSystem fs, final Path root, Log LOG)
       throws IOException {
-    LOG.debug("Current file system:");
+    LOG.debug("File system contents for path " + root);
     logFSTree(LOG, fs, root, "|-");
   }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/7092b814/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/StateMachineProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/StateMachineProcedure.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/StateMachineProcedure.java
index bb1d4fc..893ee0c 100644
--- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/StateMachineProcedure.java
+++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/StateMachineProcedure.java
@@ -176,6 +176,9 @@ public abstract class StateMachineProcedure<TEnvironment, TState>
         this.cycles++;
       }
 
+      if (LOG.isDebugEnabled()) {
+        LOG.debug(toString());
+      }
       stateFlow = executeFromState(env, state);
       if (!hasMoreState()) setNextState(EOF_STATE);
       if (subProcList != null && !subProcList.isEmpty()) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/7092b814/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
index 26c6bb7..cebe0b0 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
@@ -805,7 +805,7 @@ public class AssignmentManager implements ServerListener {
     }
 
     if (LOG.isTraceEnabled()) {
-      LOG.trace(String.format("Update region transition serverName=%s region=%s state=%s",
+      LOG.trace(String.format("Update region transition serverName=%s region=%s regionState=%s",
         serverName, regionNode, state));
     }
 
@@ -838,7 +838,7 @@ public class AssignmentManager implements ServerListener {
     checkFailoverCleanupCompleted(parent);
 
     if (state != TransitionCode.READY_TO_SPLIT) {
-      throw new UnexpectedStateException("unsupported split state=" + state +
+      throw new UnexpectedStateException("unsupported split regionState=" + state +
         " for parent region " + parent +
         " maybe an old RS (< 2.0) had the operation in progress");
     }
@@ -870,7 +870,7 @@ public class AssignmentManager implements ServerListener {
     checkFailoverCleanupCompleted(merged);
 
     if (state != TransitionCode.READY_TO_MERGE) {
-      throw new UnexpectedStateException("Unsupported merge state=" + state +
+      throw new UnexpectedStateException("Unsupported merge regionState=" + state +
         " for regionA=" + hriA + " regionB=" + hriB + " merged=" + merged +
         " maybe an old RS (< 2.0) had the operation in progress");
     }
@@ -884,7 +884,8 @@ public class AssignmentManager implements ServerListener {
     // If the RS is < 2.0 throw an exception to abort the operation, we are handling the merge
     if (regionStates.getOrCreateServer(serverName).getVersionNumber() < 0x0200000) {
       throw new UnsupportedOperationException(String.format(
-        "Merge not handled yet: state=%s merged=%s hriA=%s hriB=%s", state, merged, hriA, hriB));
+        "Merge not handled yet: regionState=%s merged=%s hriA=%s hriB=%s", state, merged, hriA,
+          hriB));
     }
   }
 
@@ -1162,7 +1163,7 @@ public class AssignmentManager implements ServerListener {
       LOG.info("waiting for RS to join");
       Threads.sleep(250);
     }
-    LOG.info("RS joined " + master.getServerManager().countOfRegionServers());
+    LOG.info("RS joined. Num RS = " + master.getServerManager().countOfRegionServers());
 
     // This method will assign all user regions if a clean server startup or
     // it will reconstruct master state and cleanup any leftovers from previous master process.
@@ -1188,7 +1189,7 @@ public class AssignmentManager implements ServerListener {
           // hbase1 to hbase2? Am I restoring a SNAPSHOT or otherwise adding a region to hbase:meta?
           // In any of these cases, state is empty. For now, presume OFFLINE but there are probably
           // cases where we need to probe more to be sure this correct; TODO informed by experience.
-          LOG.info(regionInfo.getEncodedName() + " state=null; presuming " + State.OFFLINE);
+          LOG.info(regionInfo.getEncodedName() + " regionState=null; presuming " + State.OFFLINE);
           localState = State.OFFLINE;
         }
         synchronized (regionNode) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/7092b814/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateTableProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateTableProcedure.java
index c82f8d1..79b2271 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateTableProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateTableProcedure.java
@@ -104,6 +104,12 @@ public class TruncateTableProcedure
           break;
         case TRUNCATE_TABLE_CLEAR_FS_LAYOUT:
           DeleteTableProcedure.deleteFromFs(env, getTableName(), regions, true);
+          // NOTE: It's very important that we create new HRegions before next state, so that
+          // they get persisted in procedure state before we start using them for anything.
+          // Otherwise, if we create them in next step and master crashes after creating fs
+          // layout but before saving state, region re-created after recovery will have different
+          // regionId(s) and encoded names. That will lead to unwanted regions in FS layout
+          // (which were created before the crash).
           if (!preserveSplits) {
             // if we are not preserving splits, generate a new single region
             regions = Arrays.asList(ModifyRegionUtils.createRegionInfos(tableDescriptor, null));

http://git-wip-us.apache.org/repos/asf/hbase/blob/7092b814/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java
index 1485966..c68c01f 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java
@@ -170,11 +170,11 @@ public class MasterProcedureTestingUtility {
     final Path tableDir = FSUtils.getTableDir(master.getMasterFileSystem().getRootDir(), tableName);
     assertTrue(fs.exists(tableDir));
     FSUtils.logFileSystemState(fs, tableDir, LOG);
-    List<Path> allRegionDirs = FSUtils.getRegionDirs(fs, tableDir);
+    List<Path> unwantedRegionDirs = FSUtils.getRegionDirs(fs, tableDir);
     for (int i = 0; i < regions.length; ++i) {
       Path regionDir = new Path(tableDir, regions[i].getEncodedName());
       assertTrue(regions[i] + " region dir does not exist", fs.exists(regionDir));
-      assertTrue(allRegionDirs.remove(regionDir));
+      assertTrue(unwantedRegionDirs.remove(regionDir));
       List<Path> allFamilyDirs = FSUtils.getFamilyDirs(fs, regionDir);
       for (int j = 0; j < family.length; ++j) {
         final Path familyDir = new Path(regionDir, family[j]);
@@ -191,7 +191,8 @@ public class MasterProcedureTestingUtility {
       }
       assertTrue("found extraneous families: " + allFamilyDirs, allFamilyDirs.isEmpty());
     }
-    assertTrue("found extraneous regions: " + allRegionDirs, allRegionDirs.isEmpty());
+    assertTrue("found extraneous regions: " + unwantedRegionDirs, unwantedRegionDirs.isEmpty());
+    LOG.debug("Table directory layout is as expected.");
 
     // check meta
     assertTrue(MetaTableAccessor.tableExists(master.getConnection(), tableName));

http://git-wip-us.apache.org/repos/asf/hbase/blob/7092b814/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestTruncateTableProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestTruncateTableProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestTruncateTableProcedure.java
index c5a17a0..2368af5 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestTruncateTableProcedure.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestTruncateTableProcedure.java
@@ -49,7 +49,7 @@ public class TestTruncateTableProcedure extends TestTableDDLProcedureBase {
   @Rule
   public TestName name = new TestName();
 
-  @Test(timeout=60000)
+  @Test
   public void testTruncateNotExistentTable() throws Exception {
     final TableName tableName = TableName.valueOf(name.getMethodName());
 
@@ -64,7 +64,7 @@ public class TestTruncateTableProcedure extends TestTableDDLProcedureBase {
     assertTrue(ProcedureTestingUtility.getExceptionCause(result) instanceof TableNotFoundException);
   }
 
-  @Test(timeout=60000)
+  @Test
   public void testTruncateNotDisabledTable() throws Exception {
     final TableName tableName = TableName.valueOf(name.getMethodName());
 
@@ -82,13 +82,13 @@ public class TestTruncateTableProcedure extends TestTableDDLProcedureBase {
       ProcedureTestingUtility.getExceptionCause(result) instanceof TableNotDisabledException);
   }
 
-  @Test(timeout=60000)
+  @Test
   public void testSimpleTruncatePreserveSplits() throws Exception {
     final TableName tableName = TableName.valueOf(name.getMethodName());
     testSimpleTruncate(tableName, true);
   }
 
-  @Test(timeout=60000)
+  @Test
   public void testSimpleTruncateNoPreserveSplits() throws Exception {
     final TableName tableName = TableName.valueOf(name.getMethodName());
     testSimpleTruncate(tableName, false);
@@ -116,6 +116,8 @@ public class TestTruncateTableProcedure extends TestTableDDLProcedureBase {
       new TruncateTableProcedure(procExec.getEnvironment(), tableName, preserveSplits));
     ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
 
+    // If truncate procedure completed successfully, it means all regions were assigned correctly
+    // and table is enabled now.
     UTIL.waitUntilAllRegionsAssigned(tableName);
 
     // validate the table regions and layout
@@ -137,13 +139,13 @@ public class TestTruncateTableProcedure extends TestTableDDLProcedureBase {
     assertEquals(50, UTIL.countRows(tableName));
   }
 
-  @Test(timeout=60000)
+  @Test
   public void testRecoveryAndDoubleExecutionPreserveSplits() throws Exception {
     final TableName tableName = TableName.valueOf(name.getMethodName());
     testRecoveryAndDoubleExecution(tableName, true);
   }
 
-  @Test(timeout=60000)
+  @Test
   public void testRecoveryAndDoubleExecutionNoPreserveSplits() throws Exception {
     final TableName tableName = TableName.valueOf(name.getMethodName());
     testRecoveryAndDoubleExecution(tableName, false);