You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by sz...@apache.org on 2008/10/11 01:59:46 UTC

svn commit: r703610 - in /hadoop/core/branches/branch-0.19: ./ src/hdfs/org/apache/hadoop/hdfs/protocol/ src/hdfs/org/apache/hadoop/hdfs/server/datanode/ src/hdfs/org/apache/hadoop/hdfs/server/namenode/ src/test/org/apache/hadoop/hdfs/

Author: szetszwo
Date: Fri Oct 10 16:59:45 2008
New Revision: 703610

URL: http://svn.apache.org/viewvc?rev=703610&view=rev
Log:
HADOOP-3883. Limit namenode to assign at most one generation stamp for a particular block within a short period. (szetszwo)

Modified:
    hadoop/core/branches/branch-0.19/CHANGES.txt
    hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/protocol/FSConstants.java
    hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/datanode/DataNode.java
    hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
    hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/namenode/INodeFileUnderConstruction.java
    hadoop/core/branches/branch-0.19/src/test/org/apache/hadoop/hdfs/TestLeaseRecovery2.java

Modified: hadoop/core/branches/branch-0.19/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.19/CHANGES.txt?rev=703610&r1=703609&r2=703610&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.19/CHANGES.txt (original)
+++ hadoop/core/branches/branch-0.19/CHANGES.txt Fri Oct 10 16:59:45 2008
@@ -828,7 +828,10 @@
     pre-configured maximum value. (dhruba)
 
     HADOOP-4288. Fixes a NPE problem in CapacityScheduler. 
-   (Amar Kamat via ddas)
+    (Amar Kamat via ddas)
+
+    HADOOP-3883. Limit namenode to assign at most one generation stamp for
+    a particular block within a short period. (szetszwo)
 
 Release 0.18.2 - Unreleased
 

Modified: hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/protocol/FSConstants.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/protocol/FSConstants.java?rev=703610&r1=703609&r2=703610&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/protocol/FSConstants.java (original)
+++ hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/protocol/FSConstants.java Fri Oct 10 16:59:45 2008
@@ -41,6 +41,7 @@
   public static long BLOCKREPORT_INITIAL_DELAY = 0;
   public static final long LEASE_SOFTLIMIT_PERIOD = 60 * 1000;
   public static final long LEASE_HARDLIMIT_PERIOD = 60 * LEASE_SOFTLIMIT_PERIOD;
+  public static final long LEASE_RECOVER_PERIOD = 10 * 1000; //in ms
   
   // We need to limit the length and depth of a path in the filesystem.  HADOOP-438
   // Currently we set the maximum length to 8k characters and the maximum depth to 1k.  

Modified: hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/datanode/DataNode.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/datanode/DataNode.java?rev=703610&r1=703609&r2=703610&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/datanode/DataNode.java (original)
+++ hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/datanode/DataNode.java Fri Oct 10 16:59:45 2008
@@ -1322,9 +1322,10 @@
       public void run() {
         for(int i = 0; i < blocks.length; i++) {
           try {
+            logRecoverBlock("NameNode", blocks[i], targets[i]);
             recoverBlock(blocks[i], targets[i], true);
           } catch (IOException e) {
-            LOG.warn("recoverBlocks, i=" + i, e);
+            LOG.warn("recoverBlocks FAILED, blocks[" + i + "]=" + blocks[i], e);
           }
         }
       }
@@ -1397,10 +1398,6 @@
       ongoingRecovery.put(block, block);
     }
     try {
-      if (LOG.isDebugEnabled()) {
-        LOG.debug("block=" + block
-            + ", datanodeids=" + Arrays.asList(datanodeids));
-      }
       List<BlockRecord> syncList = new ArrayList<BlockRecord>();
       long minlength = Long.MAX_VALUE;
       int errorCount = 0;
@@ -1480,22 +1477,31 @@
       }
       return new LocatedBlock(newblock, info); // success
     }
-    return null; // failed
+
+    //failed
+    StringBuilder b = new StringBuilder();
+    for(BlockRecord r : syncList) {
+      b.append("\n  " + r.id);
+    }
+    throw new IOException("Cannot recover " + block + ", none of these "
+        + syncList.size() + " datanodes success {" + b + "\n}");
   }
   
   // ClientDataNodeProtocol implementation
   /** {@inheritDoc} */
   public LocatedBlock recoverBlock(Block block, DatanodeInfo[] targets
       ) throws IOException {
-    StringBuilder msg = new StringBuilder();
-    for (int i = 0; i < targets.length; i++) {
-      msg.append(targets[i].getName());
-      if (i < targets.length - 1) {
-        msg.append(",");
-      }
-    }
-    LOG.info("Client invoking recoverBlock for block " + block +
-             " on datanodes " + msg.toString());
+    logRecoverBlock("Client", block, targets);
     return recoverBlock(block, targets, false);
   }
+
+  private static void logRecoverBlock(String who,
+      Block block, DatanodeID[] targets) {
+    StringBuilder msg = new StringBuilder(targets[0].getName());
+    for (int i = 1; i < targets.length; i++) {
+      msg.append(", " + targets[i].getName());
+    }
+    LOG.info(who + " calls recoverBlock(block=" + block
+        + ", targets=[" + msg + "])");
+  }
 }

Modified: hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=703610&r1=703609&r2=703610&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original)
+++ hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Fri Oct 10 16:59:45 2008
@@ -1904,10 +1904,7 @@
     if (!closeFile) {
       dir.persistBlocks(src, pendingFile);
       getEditLog().logSync();
-      LOG.info("commitBlockSynchronization(lastblock=" + lastblock
-          + ", newgenerationstamp=" + newgenerationstamp
-          + ", newlength=" + newlength
-          + ", newtargets=" + Arrays.asList(newtargets) + ") successful");
+      LOG.info("commitBlockSynchronization(" + lastblock + ") successful");
       return;
     }
     
@@ -4472,14 +4469,20 @@
    * Increments, logs and then returns the stamp
    */
   synchronized long nextGenerationStampForBlock(Block block) throws IOException {
-    String msg = "Block " + block + " is already commited.";
     BlockInfo storedBlock = blocksMap.getStoredBlock(block);
     if (storedBlock == null) {
+      String msg = block + " is already commited, storedBlock == null.";
       LOG.info(msg);
       throw new IOException(msg);
     }
-    INode fileINode = storedBlock.getINode();
+    INodeFile fileINode = storedBlock.getINode();
     if (!fileINode.isUnderConstruction()) {
+      String msg = block + " is already commited, !fileINode.isUnderConstruction().";
+      LOG.info(msg);
+      throw new IOException(msg);
+    }
+    if (!((INodeFileUnderConstruction)fileINode).setLastRecoveryTime(now())) {
+      String msg = block + " is beening recovered, ignoring this request.";
       LOG.info(msg);
       throw new IOException(msg);
     }

Modified: hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/namenode/INodeFileUnderConstruction.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/namenode/INodeFileUnderConstruction.java?rev=703610&r1=703609&r2=703610&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/namenode/INodeFileUnderConstruction.java (original)
+++ hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/namenode/INodeFileUnderConstruction.java Fri Oct 10 16:59:45 2008
@@ -31,6 +31,7 @@
 
   private int primaryNodeIndex = -1; //the node working on lease recovery
   private DatanodeDescriptor[] targets = null;   //locations for last block
+  private long lastRecoveryTime = 0;
   
   INodeFileUnderConstruction() {}
 
@@ -133,7 +134,7 @@
     targets = null;
   }
 
-  void setLastBlock(BlockInfo newblock, DatanodeDescriptor[] newtargets
+  synchronized void setLastBlock(BlockInfo newblock, DatanodeDescriptor[] newtargets
       ) throws IOException {
     if (blocks == null) {
       throw new IOException("Trying to update non-existant block (newblock="
@@ -141,6 +142,7 @@
     }
     blocks[blocks.length - 1] = newblock;
     setTargets(newtargets);
+    lastRecoveryTime = 0;
   }
 
   /**
@@ -168,4 +170,16 @@
       }
     }
   }
+  
+  /**
+   * Update lastRecoveryTime if expired.
+   * @return true if lastRecoveryTimeis updated. 
+   */
+  synchronized boolean setLastRecoveryTime(long now) {
+    boolean expired = now - lastRecoveryTime > NameNode.LEASE_RECOVER_PERIOD;
+    if (expired) {
+      lastRecoveryTime = now;
+    }
+    return expired;
+  }
 }

Modified: hadoop/core/branches/branch-0.19/src/test/org/apache/hadoop/hdfs/TestLeaseRecovery2.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.19/src/test/org/apache/hadoop/hdfs/TestLeaseRecovery2.java?rev=703610&r1=703609&r2=703610&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.19/src/test/org/apache/hadoop/hdfs/TestLeaseRecovery2.java (original)
+++ hadoop/core/branches/branch-0.19/src/test/org/apache/hadoop/hdfs/TestLeaseRecovery2.java Fri Oct 10 16:59:45 2008
@@ -19,16 +19,27 @@
 
 import java.io.IOException;
 
+import org.apache.commons.logging.impl.Log4JLogger;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
+import org.apache.hadoop.hdfs.server.datanode.DataNode;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.hdfs.server.namenode.LeaseManager;
 import org.apache.hadoop.security.UnixUserGroupInformation;
 import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.log4j.Level;
 
 public class TestLeaseRecovery2 extends junit.framework.TestCase {
+  {
+    ((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.ALL);
+    ((Log4JLogger)LeaseManager.LOG).getLogger().setLevel(Level.ALL);
+    ((Log4JLogger)FSNamesystem.LOG).getLogger().setLevel(Level.ALL);
+  }
+
   static final long BLOCK_SIZE = 1024;
   static final int FILE_SIZE = 1024*16;
   static final short REPLICATION_NUM = (short)3;