You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by dh...@apache.org on 2008/10/03 03:04:55 UTC

svn commit: r701273 - in /hadoop/core/trunk: ./ src/hdfs/org/apache/hadoop/hdfs/ src/hdfs/org/apache/hadoop/hdfs/server/datanode/ src/hdfs/org/apache/hadoop/hdfs/server/namenode/ src/test/org/apache/hadoop/hdfs/

Author: dhruba
Date: Thu Oct  2 18:04:54 2008
New Revision: 701273

URL: http://svn.apache.org/viewvc?rev=701273&view=rev
Log:
HADOOP-4257. The DFS client should pick only one datanode as the candidate
to initiate lease recovery.  (Tsz Wo (Nicholas), SZE via cdouglas)


Modified:
    hadoop/core/trunk/CHANGES.txt
    hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/DFSClient.java
    hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
    hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/datanode/DataNode.java
    hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/datanode/FSDataset.java
    hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/INodeFileUnderConstruction.java
    hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/AppendTestUtil.java
    hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/TestLeaseRecovery2.java

Modified: hadoop/core/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=701273&r1=701272&r2=701273&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Thu Oct  2 18:04:54 2008
@@ -825,6 +825,9 @@
     HADOOP-3942. Update distcp documentation to include features introduced in
     HADOOP-3873, HADOOP-3939. (Tsz Wo (Nicholas), SZE via cdouglas)
 
+    HADOOP-4257. The DFS client should pick only one datanode as the candidate
+    to initiate lease recovery.  (Tsz Wo (Nicholas), SZE via cdouglas)
+
 Release 0.18.2 - Unreleased
 
   BUG FIXES

Modified: hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/DFSClient.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/DFSClient.java?rev=701273&r1=701272&r2=701273&view=diff
==============================================================================
--- hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/DFSClient.java (original)
+++ hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/DFSClient.java Thu Oct  2 18:04:54 2008
@@ -71,8 +71,8 @@
   final UnixUserGroupInformation ugi;
   volatile boolean clientRunning = true;
   Random r = new Random();
-  String clientName;
-  private final LeaseChecker leasechecker = new LeaseChecker();
+  final String clientName;
+  final LeaseChecker leasechecker = new LeaseChecker();
   private Configuration conf;
   private long defaultBlockSize;
   private short defaultReplication;
@@ -904,7 +904,7 @@
   }
 
   /** Lease management*/
-  private class LeaseChecker implements Runnable {
+  class LeaseChecker implements Runnable {
     /** A map from src -> DFSOutputStream of files that are currently being
      * written by this client.
      */
@@ -926,6 +926,12 @@
     synchronized void remove(String src) {
       pendingCreates.remove(src);
     }
+    
+    synchronized void interrupt() {
+      if (daemon != null) {
+        daemon.interrupt();
+      }
+    }
 
     synchronized void close() {
       while (!pendingCreates.isEmpty()) {
@@ -941,9 +947,7 @@
         }
       }
       
-      if (daemon != null) {
-        daemon.interrupt();
-      }
+      interrupt();
     }
 
     private void renew() throws IOException {

Modified: hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/datanode/BlockSender.java?rev=701273&r1=701272&r2=701273&view=diff
==============================================================================
--- hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/datanode/BlockSender.java (original)
+++ hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/datanode/BlockSender.java Thu Oct  2 18:04:54 2008
@@ -244,8 +244,10 @@
         IOUtils.closeStream(checksumIn);
         checksumIn = null;
         if (corruptChecksumOk) {
-          // Just fill the array with zeros.
-          Arrays.fill(buf, checksumOff, checksumLen, (byte) 0);
+          if (checksumOff < checksumLen) {
+            // Just fill the array with zeros.
+            Arrays.fill(buf, checksumOff, checksumLen, (byte) 0);
+          }
         } else {
           throw e;
         }

Modified: hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/datanode/DataNode.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/datanode/DataNode.java?rev=701273&r1=701272&r2=701273&view=diff
==============================================================================
--- hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/datanode/DataNode.java (original)
+++ hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/datanode/DataNode.java Thu Oct  2 18:04:54 2008
@@ -1335,7 +1335,8 @@
 
   /** {@inheritDoc} */
   public void updateBlock(Block oldblock, Block newblock, boolean finalize) throws IOException {
-    LOG.info("oldblock=" + oldblock + ", newblock=" + newblock);
+    LOG.info("oldblock=" + oldblock + ", newblock=" + newblock
+        + ", datanode=" + dnRegistration.getName());
     data.updateBlock(oldblock, newblock);
     if (finalize) {
       data.finalizeBlock(newblock);

Modified: hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/datanode/FSDataset.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/datanode/FSDataset.java?rev=701273&r1=701272&r2=701273&view=diff
==============================================================================
--- hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/datanode/FSDataset.java (original)
+++ hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/datanode/FSDataset.java Thu Oct  2 18:04:54 2008
@@ -33,7 +33,6 @@
 import org.apache.hadoop.util.DiskChecker.DiskErrorException;
 import org.apache.hadoop.util.DiskChecker.DiskOutOfSpaceException;
 import org.apache.hadoop.conf.*;
-import org.apache.hadoop.hdfs.server.common.GenerationStamp;
 import org.apache.hadoop.hdfs.server.datanode.metrics.FSDatasetMBean;
 import org.apache.hadoop.hdfs.server.protocol.InterDatanodeProtocol;
 
@@ -839,7 +838,7 @@
     
     //rename meta file to a tmp file
     File tmpMetaFile = new File(oldMetaFile.getParent(),
-        oldMetaFile.getName()+"_tmp");
+        oldMetaFile.getName()+"_tmp" + newblock.getGenerationStamp());
     if (!oldMetaFile.renameTo(tmpMetaFile)){
       throw new IOException("Cannot rename block meta file to " + tmpMetaFile);
     }

Modified: hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/INodeFileUnderConstruction.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/INodeFileUnderConstruction.java?rev=701273&r1=701272&r2=701273&view=diff
==============================================================================
--- hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/INodeFileUnderConstruction.java (original)
+++ hadoop/core/trunk/src/hdfs/org/apache/hadoop/hdfs/server/namenode/INodeFileUnderConstruction.java Thu Oct  2 18:04:54 2008
@@ -163,7 +163,8 @@
         DatanodeDescriptor primary = targets[primaryNodeIndex = j]; 
         primary.addBlockToBeRecovered(blocks[blocks.length - 1], targets);
         NameNode.stateChangeLog.info("BLOCK* " + blocks[blocks.length - 1]
-          + " recovery started.");
+          + " recovery started, primary=" + primary);
+        return;
       }
     }
   }

Modified: hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/AppendTestUtil.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/AppendTestUtil.java?rev=701273&r1=701272&r2=701273&view=diff
==============================================================================
--- hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/AppendTestUtil.java (original)
+++ hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/AppendTestUtil.java Thu Oct  2 18:04:54 2008
@@ -22,14 +22,19 @@
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 
-
 /** Utilities for append-related tests */ 
 class AppendTestUtil {
+  /** For specifying the random number generator seed,
+   *  change the following value:
+   */
+  static final Long RANDOM_NUMBER_GENERATOR_SEED = null;
+
   static final Log LOG = LogFactory.getLog(AppendTestUtil.class);
 
   private static final Random SEED = new Random();
   static {
-    final long seed = SEED.nextLong();
+    final long seed = RANDOM_NUMBER_GENERATOR_SEED == null?
+        SEED.nextLong(): RANDOM_NUMBER_GENERATOR_SEED;
     LOG.info("seed=" + seed);
     SEED.setSeed(seed);
   }
@@ -39,7 +44,7 @@
       final Random r =  new Random();
       synchronized(SEED) { 
         final long seed = SEED.nextLong();
-        SEED.setSeed(seed);
+        r.setSeed(seed);
         LOG.info(Thread.currentThread().getName() + ": seed=" + seed);
       }
       return r;

Modified: hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/TestLeaseRecovery2.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/TestLeaseRecovery2.java?rev=701273&r1=701272&r2=701273&view=diff
==============================================================================
--- hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/TestLeaseRecovery2.java (original)
+++ hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/TestLeaseRecovery2.java Thu Oct  2 18:04:54 2008
@@ -22,11 +22,15 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
+import org.apache.hadoop.security.UnixUserGroupInformation;
+import org.apache.hadoop.security.UserGroupInformation;
 
 public class TestLeaseRecovery2 extends junit.framework.TestCase {
-  static final int BLOCK_SIZE = 64;
-  static final int FILE_SIZE = 1024;
+  static final long BLOCK_SIZE = 1024;
+  static final int FILE_SIZE = 1024*16;
   static final short REPLICATION_NUM = (short)3;
   static byte[] buffer = new byte[FILE_SIZE];
 
@@ -34,9 +38,12 @@
     final long softLease = 1000;
     final long hardLease = 60 * 60 *1000;
     final short repl = 3;
-    Configuration conf = new Configuration();
+    final Configuration conf = new Configuration();
+    final int bufferSize = conf.getInt("io.file.buffer.size", 4096);
     conf.setLong("dfs.block.size", BLOCK_SIZE);
-    conf.setInt("io.bytes.per.checksum", 16);
+    conf.setInt("dfs.heartbeat.interval", 1);
+  //  conf.setInt("io.bytes.per.checksum", 16);
+
     MiniDFSCluster cluster = null;
     byte[] actual = new byte[FILE_SIZE];
 
@@ -48,18 +55,22 @@
       DistributedFileSystem dfs = (DistributedFileSystem)cluster.getFileSystem();
       // create a random file name
       String filestr = "/foo" + AppendTestUtil.nextInt();
+      System.out.println("filestr=" + filestr);
       Path filepath = new Path(filestr);
       FSDataOutputStream stm = dfs.create(filepath, true,
-                                 dfs.getConf().getInt("io.file.buffer.size", 4096),
-                                 (short)repl, (long)BLOCK_SIZE);
+          bufferSize, repl, BLOCK_SIZE);
       assertTrue(dfs.dfs.exists(filestr));
 
       // write random number of bytes into it.
       int size = AppendTestUtil.nextInt(FILE_SIZE);
+      System.out.println("size=" + size);
       stm.write(buffer, 0, size);
 
       // sync file
+      AppendTestUtil.LOG.info("sync");
       stm.sync();
+      AppendTestUtil.LOG.info("leasechecker.interrupt()");
+      dfs.dfs.leasechecker.interrupt();
 
       // set the soft limit to be 1 second so that the
       // namenode triggers lease recovery on next attempt to write-for-open.
@@ -67,30 +78,44 @@
 
       // try to re-open the file before closing the previous handle. This
       // should fail but will trigger lease recovery.
-      String oldClientName = dfs.dfs.clientName;
-      dfs.dfs.clientName += "_1";
-      while (true) {
-        try {
-          dfs.create(filepath, false,
-            dfs.getConf().getInt("io.file.buffer.size", 4096),
-            (short)repl, (long)BLOCK_SIZE);
-          assertTrue("Creation of an existing file should never succeed.", false);
-        } catch (IOException e) {
-          if (e.getMessage().contains("file exists")) {
-            break;
+      {
+        Configuration conf2 = new Configuration(conf);
+        String username = UserGroupInformation.getCurrentUGI().getUserName()+"_1";
+        UnixUserGroupInformation.saveToConf(conf2,
+            UnixUserGroupInformation.UGI_PROPERTY_NAME,
+            new UnixUserGroupInformation(username, new String[]{"supergroup"}));
+        FileSystem dfs2 = FileSystem.get(conf2);
+  
+        boolean done = false;
+        for(int i = 0; i < 10 && !done; i++) {
+          AppendTestUtil.LOG.info("i=" + i);
+          try {
+            dfs2.create(filepath, false, bufferSize, repl, BLOCK_SIZE);
+            fail("Creation of an existing file should never succeed.");
+          } catch (IOException ioe) {
+            final String message = ioe.getMessage();
+            if (message.contains("file exists")) {
+              AppendTestUtil.LOG.info("done", ioe);
+              done = true;
+            }
+            else if (message.contains(AlreadyBeingCreatedException.class.getSimpleName())) {
+              AppendTestUtil.LOG.info("GOOD! got " + message);
+            }
+            else {
+              AppendTestUtil.LOG.warn("UNEXPECTED IOException", ioe);
+            }
+          }
+
+          if (!done) {
+            AppendTestUtil.LOG.info("sleep " + 5000 + "ms");
+            try {Thread.sleep(5000);} catch (InterruptedException e) {}
           }
-          e.printStackTrace();
-        }
-        try {
-          Thread.sleep(1000);
-        } catch (InterruptedException e) {
         }
+        assertTrue(done);
       }
-      System.out.println("Lease for file " +  filepath + " is recovered. " +
-                         "validating its contents now...");
 
-      // revert back  client identity
-      dfs.dfs.clientName = oldClientName;
+      AppendTestUtil.LOG.info("Lease for file " +  filepath + " is recovered. "
+          + "Validating its contents now...");
 
       // verify that file-size matches
       assertTrue("File should be " + size + " bytes, but is actually " +