You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2011/12/22 06:45:10 UTC
svn commit: r1222047 - in /hbase/branches/0.92: CHANGES.txt
src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java
src/test/java/org/apache/hadoop/hbase/master/TestSplitLogManager.java
Author: stack
Date: Thu Dec 22 05:45:09 2011
New Revision: 1222047
URL: http://svn.apache.org/viewvc?rev=1222047&view=rev
Log:
HBASE-5081 Distributed log splitting deleteNode races againsth splitLog retry; REVERT -- COMMITTED BEFORE REVIEW FINISHED -- AGAIN
Modified:
hbase/branches/0.92/CHANGES.txt
hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java
hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/master/TestSplitLogManager.java
Modified: hbase/branches/0.92/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/CHANGES.txt?rev=1222047&r1=1222046&r2=1222047&view=diff
==============================================================================
--- hbase/branches/0.92/CHANGES.txt (original)
+++ hbase/branches/0.92/CHANGES.txt Thu Dec 22 05:45:09 2011
@@ -522,8 +522,6 @@ Release 0.92.0 - Unreleased
(Gao Jinchao)
HBASE-4874 Run tests with non-secure random, some tests hang otherwise (Lars H)
HBASE-4987 wrong use of incarnation var in SplitLogManager (Prakash Khemani)
- HBASE-5081 Distributed log splitting deleteNode races againsth
- splitLog retry (Jimmy Xiang)
IMPROVEMENTS
HBASE-3290 Max Compaction Size (Nicolas Spiegelberg via Stack)
Modified: hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java?rev=1222047&r1=1222046&r2=1222047&view=diff
==============================================================================
--- hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java (original)
+++ hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java Thu Dec 22 05:45:09 2011
@@ -324,7 +324,6 @@ public class SplitLogManager extends Zoo
LOG.warn("Error splitting " + path);
}
}
- boolean safeToDeleteNodeAsync = true;
Task task = tasks.get(path);
if (task == null) {
if (!ZKSplitLog.isRescanNode(watcher, path)) {
@@ -338,13 +337,6 @@ public class SplitLogManager extends Zoo
// forgetting about them then we will have to handle the race when
// accessing task.batch here.
if (!task.isOrphan()) {
- if (status != SUCCESS) {
- // If the task is failed, deleting the node asynchronously
- // will cause race issue against split log retry.
- // In this case, we should delete it now.
- safeToDeleteNodeAsync = false;
- deleteNodeNow(path);
- }
synchronized (task.batch) {
if (status == SUCCESS) {
task.batch.done++;
@@ -359,35 +351,11 @@ public class SplitLogManager extends Zoo
// delete the task node in zk. Keep trying indefinitely - its an async
// call and no one is blocked waiting for this node to be deleted. All
// task names are unique (log.<timestamp>) there is no risk of deleting
- // a future task. This is true if the task status is SUCCESS, otherwise,
- // it may race against split log retry.
- if (safeToDeleteNodeAsync) {
- deleteNode(path, Long.MAX_VALUE);
- }
+ // a future task.
+ deleteNode(path, Long.MAX_VALUE);
return;
}
- private void deleteNodeNow(String path) {
- try {
- tot_mgr_node_delete_queued.incrementAndGet();
- this.watcher.getRecoverableZooKeeper().delete(path, -1);
- tot_mgr_task_deleted.incrementAndGet();
- } catch (KeeperException ke) {
- if (ke.code() != KeeperException.Code.NONODE) {
- tot_mgr_node_delete_err.incrementAndGet();
- LOG.warn("Failed to delete failed task node: "
- + path + " due to " + ke.getMessage());
- } else {
- LOG.info("Failed task node does not exist, "
- + "either was never created or was already deleted: " + path);
- tot_mgr_task_deleted.incrementAndGet();
- }
- } catch (InterruptedException ie) {
- LOG.warn("Interrupted while waiting for failed task node to be deleted");
- Thread.currentThread().interrupt();
- }
- }
-
private void createNode(String path, Long retry_count) {
ZKUtil.asyncCreate(this.watcher, path,
TaskState.TASK_UNASSIGNED.get(serverName), new CreateAsyncCallback(),
Modified: hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/master/TestSplitLogManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/master/TestSplitLogManager.java?rev=1222047&r1=1222046&r2=1222047&view=diff
==============================================================================
--- hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/master/TestSplitLogManager.java (original)
+++ hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/master/TestSplitLogManager.java Thu Dec 22 05:45:09 2011
@@ -328,28 +328,6 @@ public class TestSplitLogManager {
}
waitForCounter(tot_mgr_task_deleted, 0, 1, 1000);
assertTrue(ZKUtil.checkExists(zkw, tasknode) == -1);
-
- conf.setInt("hbase.splitlog.max.resubmit", 0);
- slm.stopTrackingTasks(batch);
- batch = new TaskBatch();
- resetCounters();
-
- // inject a failed task node, and retry
- ZKUtil.createAndWatch(zkw, tasknode, TaskState.TASK_ERR.get("worker"));
-
- slm.enqueueSplitTask("foo/1", batch);
- assertEquals(1, batch.installed);
- assertTrue(slm.findOrCreateOrphanTask(tasknode).batch == batch);
- waitForCounter(tot_mgr_node_already_exists, 0, 1, 1000);
-
- synchronized (batch) {
- while (batch.installed != batch.error) {
- batch.wait();
- }
- }
- waitForCounter(tot_mgr_task_deleted, 0, 1, 1000);
- assertTrue(ZKUtil.checkExists(zkw, tasknode) == -1);
-
conf.setInt("hbase.splitlog.max.resubmit", ZKSplitLog.DEFAULT_MAX_RESUBMIT);
}