You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by dh...@apache.org on 2007/12/19 08:26:49 UTC

svn commit: r605453 - in /lucene/hadoop/trunk: CHANGES.txt src/java/org/apache/hadoop/dfs/DataNode.java src/java/org/apache/hadoop/dfs/FSConstants.java src/test/org/apache/hadoop/dfs/MiniDFSCluster.java

Author: dhruba
Date: Tue Dec 18 23:26:45 2007
New Revision: 605453

URL: http://svn.apache.org/viewvc?rev=605453&view=rev
Log:
HADOOP-2326. The initial block report at Datanode startup time has
a random backoff period.  (Sanjay Radia via dhruba)


Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSConstants.java
    lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/MiniDFSCluster.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=605453&r1=605452&r2=605453&view=diff
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Tue Dec 18 23:26:45 2007
@@ -123,6 +123,9 @@
     HADOOP-2248. Speeds up the framework w.r.t Counters. Also has API
     updates to the Counters part. (Owen O'Malley via ddas)
 
+    HADOOP-2326. The initial block report at Datanode startup time has
+    a random backoff period.  (Sanjay Radia via dhruba)
+
   OPTIMIZATIONS
 
     HADOOP-1898.  Release the lock protecting the last time of the last stack

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java?rev=605453&r1=605452&r2=605453&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java Tue Dec 18 23:26:45 2007
@@ -103,6 +103,8 @@
   Daemon dataXceiveServer = null;
   long blockReportInterval;
   long lastBlockReport = 0;
+  boolean resetBlockReportTime = true;
+  long initialBlockReportDelay = BLOCKREPORT_INITIAL_DELAY * 1000L;
   long lastHeartbeat = 0;
   long heartBeatInterval;
   private DataStorage storage = null;
@@ -285,6 +287,13 @@
       conf.getLong("dfs.blockreport.intervalMsec", BLOCKREPORT_INTERVAL);
     this.blockReportInterval =
       blockReportIntervalBasis - new Random().nextInt((int)(blockReportIntervalBasis/10));
+    this.initialBlockReportDelay = conf.getLong("dfs.blockreport.initialDelay",
+                                            BLOCKREPORT_INITIAL_DELAY)* 1000L; 
+    if (this.initialBlockReportDelay >= blockReportIntervalBasis) {
+      this.initialBlockReportDelay = 0;
+      LOG.info("dfs.blockreport.initialDelay is greater than " +
+      	"dfs.blockreport.intervalMsec." + " Setting initial delay to 0 msec:");
+    }
     this.heartBeatInterval = conf.getLong("dfs.heartbeat.interval", HEARTBEAT_INTERVAL) * 1000L;
     DataNode.nameNodeAddr = nameNodeAddr;
 
@@ -520,7 +529,8 @@
    */
   public void offerService() throws Exception {
      
-    LOG.info("using BLOCKREPORT_INTERVAL of " + blockReportInterval + "msec");
+    LOG.info("using BLOCKREPORT_INTERVAL of " + blockReportInterval + "msec" + 
+       " Initial delay: " + initialBlockReportDelay + "msec");
 
     //
     // Now loop for a long time....
@@ -599,8 +609,9 @@
           // If we have sent the first block report, then wait a random
           // time before we start the periodic block reports.
           //
-          if (lastBlockReport == 0) {
+          if (resetBlockReportTime) {
             lastBlockReport = now - new Random().nextInt((int)(blockReportInterval));
+            resetBlockReportTime = false;
           } else {
             lastBlockReport = now;
           }
@@ -672,9 +683,10 @@
       this.shutdown();
       return false;
     case DatanodeProtocol.DNA_REGISTER:
-      // namenode requested a registration
+      // namenode requested a registration - at start or if NN lost contact
       register();
-      scheduleBlockReport();
+      // random short delay - helps scatter the BR from all DNs
+      scheduleBlockReport(initialBlockReportDelay);
       break;
     case DatanodeProtocol.DNA_FINALIZE:
       storage.finalizeUpgrade();
@@ -1923,9 +1935,14 @@
   /**
    * This methods  arranges for the data node to send the block report at the next heartbeat.
    */
-  public void scheduleBlockReport() {
-    lastHeartbeat=0;
-    lastBlockReport=0;
+  public void scheduleBlockReport(long delay) {
+    if (delay > 0) { // send BR after random delay
+      lastBlockReport = System.currentTimeMillis()
+  							- ( blockReportInterval - new Random().nextInt((int)(delay)));
+    } else { // send at next heartbeat
+      lastBlockReport = lastHeartbeat - blockReportInterval;
+    }
+    resetBlockReportTime = true; // reset future BRs for randomness
   }
   
   

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSConstants.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSConstants.java?rev=605453&r1=605452&r2=605453&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSConstants.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSConstants.java Tue Dec 18 23:26:45 2007
@@ -128,6 +128,7 @@
   //
   public static long HEARTBEAT_INTERVAL = 3;
   public static long BLOCKREPORT_INTERVAL = 60 * 60 * 1000;
+  public static long BLOCKREPORT_INITIAL_DELAY = 60;
   public static final long LEASE_SOFTLIMIT_PERIOD = 60 * 1000;
   public static final long LEASE_HARDLIMIT_PERIOD = 60 * LEASE_SOFTLIMIT_PERIOD;
   public static int READ_TIMEOUT = 60 * 1000;

Modified: lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/MiniDFSCluster.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/MiniDFSCluster.java?rev=605453&r1=605452&r2=605453&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/MiniDFSCluster.java (original)
+++ lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/MiniDFSCluster.java Tue Dec 18 23:26:45 2007
@@ -219,6 +219,10 @@
                              String[] racks,
                              long[] simulatedCapacities) throws IOException {
 
+    // for mincluster's the default initialDelay for BRs is 0
+    if (conf.get("dfs.blockreport.initialDelay") == null) {
+      conf.setLong("dfs.blockreport.initialDelay", 0);
+    }
     // If minicluster's name node is null assume that the conf has been
     // set with the right address:port of the name node.
     //
@@ -484,7 +488,7 @@
     }
     SimulatedFSDataset sdataset = (SimulatedFSDataset) dataSet;
     sdataset.injectBlocks(blocksToInject);
-    dataNodes.get(dataNodeIndex).scheduleBlockReport();
+    dataNodes.get(dataNodeIndex).scheduleBlockReport(0);
   }
   
   /**