You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ap...@apache.org on 2017/12/09 01:25:33 UTC
hbase git commit: HBASE-19457 Debugging flaky
TestTruncateTableProcedure
Repository: hbase
Updated Branches:
refs/heads/master a5a77ae3d -> 7092b814b
HBASE-19457 Debugging flaky TestTruncateTableProcedure
- Adds debug logging for future ease
- Removes 60s timeout since testRecoveryAndDoubleExecutionPreserveSplits is only halfway after a minute.
- Adds some comments
- Logging change: Some places report "regionState=" while others just "state=".
State machine procs also have "state=" in their logs. Let me change all region related logging to "regionState=" so that
1) it's consistent everywhere, 2) more filtered results when searching through logs.
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/7092b814
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/7092b814
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/7092b814
Branch: refs/heads/master
Commit: 7092b814bd5c5e243987853771dc2ec339ae0b0c
Parents: a5a77ae
Author: Apekshit Sharma <ap...@apache.org>
Authored: Fri Dec 8 17:16:06 2017 -0800
Committer: Apekshit Sharma <ap...@apache.org>
Committed: Fri Dec 8 17:25:16 2017 -0800
----------------------------------------------------------------------
.../org/apache/hadoop/hbase/util/CommonFSUtils.java | 2 +-
.../hbase/procedure2/StateMachineProcedure.java | 3 +++
.../hbase/master/assignment/AssignmentManager.java | 13 +++++++------
.../master/procedure/TruncateTableProcedure.java | 6 ++++++
.../procedure/MasterProcedureTestingUtility.java | 7 ++++---
.../master/procedure/TestTruncateTableProcedure.java | 14 ++++++++------
6 files changed, 29 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/7092b814/hbase-common/src/main/java/org/apache/hadoop/hbase/util/CommonFSUtils.java
----------------------------------------------------------------------
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/CommonFSUtils.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/CommonFSUtils.java
index bdf148e..38ae476 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/CommonFSUtils.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/CommonFSUtils.java
@@ -733,7 +733,7 @@ public abstract class CommonFSUtils {
*/
public static void logFileSystemState(final FileSystem fs, final Path root, Log LOG)
throws IOException {
- LOG.debug("Current file system:");
+ LOG.debug("File system contents for path " + root);
logFSTree(LOG, fs, root, "|-");
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/7092b814/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/StateMachineProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/StateMachineProcedure.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/StateMachineProcedure.java
index bb1d4fc..893ee0c 100644
--- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/StateMachineProcedure.java
+++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/StateMachineProcedure.java
@@ -176,6 +176,9 @@ public abstract class StateMachineProcedure<TEnvironment, TState>
this.cycles++;
}
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(toString());
+ }
stateFlow = executeFromState(env, state);
if (!hasMoreState()) setNextState(EOF_STATE);
if (subProcList != null && !subProcList.isEmpty()) {
http://git-wip-us.apache.org/repos/asf/hbase/blob/7092b814/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
index 26c6bb7..cebe0b0 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
@@ -805,7 +805,7 @@ public class AssignmentManager implements ServerListener {
}
if (LOG.isTraceEnabled()) {
- LOG.trace(String.format("Update region transition serverName=%s region=%s state=%s",
+ LOG.trace(String.format("Update region transition serverName=%s region=%s regionState=%s",
serverName, regionNode, state));
}
@@ -838,7 +838,7 @@ public class AssignmentManager implements ServerListener {
checkFailoverCleanupCompleted(parent);
if (state != TransitionCode.READY_TO_SPLIT) {
- throw new UnexpectedStateException("unsupported split state=" + state +
+ throw new UnexpectedStateException("unsupported split regionState=" + state +
" for parent region " + parent +
" maybe an old RS (< 2.0) had the operation in progress");
}
@@ -870,7 +870,7 @@ public class AssignmentManager implements ServerListener {
checkFailoverCleanupCompleted(merged);
if (state != TransitionCode.READY_TO_MERGE) {
- throw new UnexpectedStateException("Unsupported merge state=" + state +
+ throw new UnexpectedStateException("Unsupported merge regionState=" + state +
" for regionA=" + hriA + " regionB=" + hriB + " merged=" + merged +
" maybe an old RS (< 2.0) had the operation in progress");
}
@@ -884,7 +884,8 @@ public class AssignmentManager implements ServerListener {
// If the RS is < 2.0 throw an exception to abort the operation, we are handling the merge
if (regionStates.getOrCreateServer(serverName).getVersionNumber() < 0x0200000) {
throw new UnsupportedOperationException(String.format(
- "Merge not handled yet: state=%s merged=%s hriA=%s hriB=%s", state, merged, hriA, hriB));
+ "Merge not handled yet: regionState=%s merged=%s hriA=%s hriB=%s", state, merged, hriA,
+ hriB));
}
}
@@ -1162,7 +1163,7 @@ public class AssignmentManager implements ServerListener {
LOG.info("waiting for RS to join");
Threads.sleep(250);
}
- LOG.info("RS joined " + master.getServerManager().countOfRegionServers());
+ LOG.info("RS joined. Num RS = " + master.getServerManager().countOfRegionServers());
// This method will assign all user regions if a clean server startup or
// it will reconstruct master state and cleanup any leftovers from previous master process.
@@ -1188,7 +1189,7 @@ public class AssignmentManager implements ServerListener {
// hbase1 to hbase2? Am I restoring a SNAPSHOT or otherwise adding a region to hbase:meta?
// In any of these cases, state is empty. For now, presume OFFLINE but there are probably
// cases where we need to probe more to be sure this correct; TODO informed by experience.
- LOG.info(regionInfo.getEncodedName() + " state=null; presuming " + State.OFFLINE);
+ LOG.info(regionInfo.getEncodedName() + " regionState=null; presuming " + State.OFFLINE);
localState = State.OFFLINE;
}
synchronized (regionNode) {
http://git-wip-us.apache.org/repos/asf/hbase/blob/7092b814/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateTableProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateTableProcedure.java
index c82f8d1..79b2271 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateTableProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateTableProcedure.java
@@ -104,6 +104,12 @@ public class TruncateTableProcedure
break;
case TRUNCATE_TABLE_CLEAR_FS_LAYOUT:
DeleteTableProcedure.deleteFromFs(env, getTableName(), regions, true);
+ // NOTE: It's very important that we create new HRegions before next state, so that
+ // they get persisted in procedure state before we start using them for anything.
+ // Otherwise, if we create them in next step and master crashes after creating fs
+ // layout but before saving state, region re-created after recovery will have different
+ // regionId(s) and encoded names. That will lead to unwanted regions in FS layout
+ // (which were created before the crash).
if (!preserveSplits) {
// if we are not preserving splits, generate a new single region
regions = Arrays.asList(ModifyRegionUtils.createRegionInfos(tableDescriptor, null));
http://git-wip-us.apache.org/repos/asf/hbase/blob/7092b814/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java
index 1485966..c68c01f 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java
@@ -170,11 +170,11 @@ public class MasterProcedureTestingUtility {
final Path tableDir = FSUtils.getTableDir(master.getMasterFileSystem().getRootDir(), tableName);
assertTrue(fs.exists(tableDir));
FSUtils.logFileSystemState(fs, tableDir, LOG);
- List<Path> allRegionDirs = FSUtils.getRegionDirs(fs, tableDir);
+ List<Path> unwantedRegionDirs = FSUtils.getRegionDirs(fs, tableDir);
for (int i = 0; i < regions.length; ++i) {
Path regionDir = new Path(tableDir, regions[i].getEncodedName());
assertTrue(regions[i] + " region dir does not exist", fs.exists(regionDir));
- assertTrue(allRegionDirs.remove(regionDir));
+ assertTrue(unwantedRegionDirs.remove(regionDir));
List<Path> allFamilyDirs = FSUtils.getFamilyDirs(fs, regionDir);
for (int j = 0; j < family.length; ++j) {
final Path familyDir = new Path(regionDir, family[j]);
@@ -191,7 +191,8 @@ public class MasterProcedureTestingUtility {
}
assertTrue("found extraneous families: " + allFamilyDirs, allFamilyDirs.isEmpty());
}
- assertTrue("found extraneous regions: " + allRegionDirs, allRegionDirs.isEmpty());
+ assertTrue("found extraneous regions: " + unwantedRegionDirs, unwantedRegionDirs.isEmpty());
+ LOG.debug("Table directory layout is as expected.");
// check meta
assertTrue(MetaTableAccessor.tableExists(master.getConnection(), tableName));
http://git-wip-us.apache.org/repos/asf/hbase/blob/7092b814/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestTruncateTableProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestTruncateTableProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestTruncateTableProcedure.java
index c5a17a0..2368af5 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestTruncateTableProcedure.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestTruncateTableProcedure.java
@@ -49,7 +49,7 @@ public class TestTruncateTableProcedure extends TestTableDDLProcedureBase {
@Rule
public TestName name = new TestName();
- @Test(timeout=60000)
+ @Test
public void testTruncateNotExistentTable() throws Exception {
final TableName tableName = TableName.valueOf(name.getMethodName());
@@ -64,7 +64,7 @@ public class TestTruncateTableProcedure extends TestTableDDLProcedureBase {
assertTrue(ProcedureTestingUtility.getExceptionCause(result) instanceof TableNotFoundException);
}
- @Test(timeout=60000)
+ @Test
public void testTruncateNotDisabledTable() throws Exception {
final TableName tableName = TableName.valueOf(name.getMethodName());
@@ -82,13 +82,13 @@ public class TestTruncateTableProcedure extends TestTableDDLProcedureBase {
ProcedureTestingUtility.getExceptionCause(result) instanceof TableNotDisabledException);
}
- @Test(timeout=60000)
+ @Test
public void testSimpleTruncatePreserveSplits() throws Exception {
final TableName tableName = TableName.valueOf(name.getMethodName());
testSimpleTruncate(tableName, true);
}
- @Test(timeout=60000)
+ @Test
public void testSimpleTruncateNoPreserveSplits() throws Exception {
final TableName tableName = TableName.valueOf(name.getMethodName());
testSimpleTruncate(tableName, false);
@@ -116,6 +116,8 @@ public class TestTruncateTableProcedure extends TestTableDDLProcedureBase {
new TruncateTableProcedure(procExec.getEnvironment(), tableName, preserveSplits));
ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
+ // If truncate procedure completed successfully, it means all regions were assigned correctly
+ // and table is enabled now.
UTIL.waitUntilAllRegionsAssigned(tableName);
// validate the table regions and layout
@@ -137,13 +139,13 @@ public class TestTruncateTableProcedure extends TestTableDDLProcedureBase {
assertEquals(50, UTIL.countRows(tableName));
}
- @Test(timeout=60000)
+ @Test
public void testRecoveryAndDoubleExecutionPreserveSplits() throws Exception {
final TableName tableName = TableName.valueOf(name.getMethodName());
testRecoveryAndDoubleExecution(tableName, true);
}
- @Test(timeout=60000)
+ @Test
public void testRecoveryAndDoubleExecutionNoPreserveSplits() throws Exception {
final TableName tableName = TableName.valueOf(name.getMethodName());
testRecoveryAndDoubleExecution(tableName, false);