You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by ki...@apache.org on 2013/06/07 22:05:18 UTC
svn commit: r1490805 - in
/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs: ./
src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/
src/main/java/org/apache/hadoop/hdfs/server/namenode/
src/test/java/org/apache/hadoop/hdfs/serve...
Author: kihwal
Date: Fri Jun 7 20:05:17 2013
New Revision: 1490805
URL: http://svn.apache.org/r1490805
Log:
svn merge -c 1490803 Merging from trunk to branch-2 to fix HDFS-4832.
Modified:
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java
Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1490805&r1=1490804&r2=1490805&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original)
+++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Fri Jun 7 20:05:17 2013
@@ -2940,6 +2940,9 @@ Release 0.23.9 - UNRELEASED
HDFS-4862. SafeModeInfo.isManual() returns true when resources are low even
if it wasn't entered into manually (Ravi Prakash via kihwal)
+ HDFS-4832. Namenode doesn't change the number of missing blocks in
+ safemode when DNs rejoin or leave (Ravi Prakash via kihwal)
+
Release 0.23.8 - 2013-06-05
INCOMPATIBLE CHANGES
Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java?rev=1490805&r1=1490804&r2=1490805&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java (original)
+++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java Fri Jun 7 20:05:17 2013
@@ -2156,7 +2156,7 @@ assert storedBlock.findDatanode(dn) < 0
return storedBlock;
}
- // do not try to handle over/under-replicated blocks during safe mode
+ // do not try to handle over/under-replicated blocks during first safe mode
if (!namesystem.isPopulatingReplQueues()) {
return storedBlock;
}
Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java?rev=1490805&r1=1490804&r2=1490805&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java (original)
+++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java Fri Jun 7 20:05:17 2013
@@ -1190,7 +1190,13 @@ public class DatanodeManager {
heartbeatManager.updateHeartbeat(nodeinfo, capacity, dfsUsed,
remaining, blockPoolUsed, xceiverCount, failedVolumes);
-
+
+ // If we are in safemode, do not send back any recovery / replication
+ // requests. Don't even drain the existing queue of work.
+ if(namesystem.isInSafeMode()) {
+ return new DatanodeCommand[0];
+ }
+
//check lease recovery
BlockInfoUnderConstruction[] blocks = nodeinfo
.getLeaseRecoveryCommand(Integer.MAX_VALUE);
Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java?rev=1490805&r1=1490804&r2=1490805&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java (original)
+++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java Fri Jun 7 20:05:17 2013
@@ -223,7 +223,7 @@ class HeartbeatManager implements Datano
final DatanodeManager dm = blockManager.getDatanodeManager();
// It's OK to check safe mode w/o taking the lock here, we re-check
// for safe mode after taking the lock before removing a datanode.
- if (namesystem.isInSafeMode()) {
+ if (namesystem.isInStartupSafeMode()) {
return;
}
boolean allAlive = false;
@@ -252,7 +252,7 @@ class HeartbeatManager implements Datano
// acquire the fsnamesystem lock, and then remove the dead node.
namesystem.writeLock();
try {
- if (namesystem.isInSafeMode()) {
+ if (namesystem.isInStartupSafeMode()) {
return;
}
synchronized(this) {
Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1490805&r1=1490804&r2=1490805&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original)
+++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Fri Jun 7 20:05:17 2013
@@ -4076,7 +4076,7 @@ public class FSNamesystem implements Nam
*
* @see SafeModeInfo
*/
- private SafeModeInfo(boolean resourcesLow) {
+ private SafeModeInfo(boolean resourcesLow, boolean isReplQueuesInited) {
this.threshold = 1.5f; // this threshold can never be reached
this.datanodeThreshold = Integer.MAX_VALUE;
this.extension = Integer.MAX_VALUE;
@@ -4085,6 +4085,7 @@ public class FSNamesystem implements Nam
this.blockTotal = -1;
this.blockSafe = -1;
this.resourcesLow = resourcesLow;
+ this.initializedReplQueues = isReplQueuesInited;
enter();
reportStatus("STATE* Safe mode is ON.", true);
}
@@ -4510,6 +4511,10 @@ public class FSNamesystem implements Nam
&& safeMode.isOn();
}
+ /**
+ * Check if replication queues are to be populated
+ * @return true when node is HAState.Active and not in the very first safemode
+ */
@Override
public boolean isPopulatingReplQueues() {
if (!shouldPopulateReplQueues()) {
@@ -4640,7 +4645,7 @@ public class FSNamesystem implements Nam
getEditLog().logSyncAll();
}
if (!isInSafeMode()) {
- safeMode = new SafeModeInfo(resourcesLow);
+ safeMode = new SafeModeInfo(resourcesLow, isPopulatingReplQueues());
return;
}
if (resourcesLow) {
Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java?rev=1490805&r1=1490804&r2=1490805&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java (original)
+++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java Fri Jun 7 20:05:17 2013
@@ -34,9 +34,12 @@ import org.apache.hadoop.hdfs.DFSTestUti
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
+import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
+import org.apache.hadoop.hdfs.server.namenode.ha.HAState;
import org.junit.After;
import org.junit.Test;
import org.mockito.Mockito;
+import org.mockito.internal.util.reflection.Whitebox;
public class TestFSNamesystem {
@@ -104,4 +107,39 @@ public class TestFSNamesystem {
assertTrue("After entering safemode due to low resources FSNamesystem."
+ "isInSafeMode still returned false", fsn.isInSafeMode());
}
+
+ @Test
+ public void testReplQueuesActiveAfterStartupSafemode() throws IOException, InterruptedException{
+ Configuration conf = new Configuration();
+
+ FSEditLog fsEditLog = Mockito.mock(FSEditLog.class);
+ FSImage fsImage = Mockito.mock(FSImage.class);
+ Mockito.when(fsImage.getEditLog()).thenReturn(fsEditLog);
+
+ FSNamesystem fsNamesystem = new FSNamesystem(conf, fsImage);
+ FSNamesystem fsn = Mockito.spy(fsNamesystem);
+
+ //Make shouldPopulaeReplQueues return true
+ HAContext haContext = Mockito.mock(HAContext.class);
+ HAState haState = Mockito.mock(HAState.class);
+ Mockito.when(haContext.getState()).thenReturn(haState);
+ Mockito.when(haState.shouldPopulateReplQueues()).thenReturn(true);
+ Whitebox.setInternalState(fsn, "haContext", haContext);
+
+ //Make NameNode.getNameNodeMetrics() not return null
+ NameNode.initMetrics(conf, NamenodeRole.NAMENODE);
+
+ fsn.enterSafeMode(false);
+ assertTrue("FSNamesystem didn't enter safemode", fsn.isInSafeMode());
+ assertTrue("Replication queues were being populated during very first "
+ + "safemode", !fsn.isPopulatingReplQueues());
+ fsn.leaveSafeMode();
+ assertTrue("FSNamesystem didn't leave safemode", !fsn.isInSafeMode());
+ assertTrue("Replication queues weren't being populated even after leaving "
+ + "safemode", fsn.isPopulatingReplQueues());
+ fsn.enterSafeMode(false);
+ assertTrue("FSNamesystem didn't enter safemode", fsn.isInSafeMode());
+ assertTrue("Replication queues weren't being populated after entering "
+ + "safemode 2nd time", fsn.isPopulatingReplQueues());
+ }
}