You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by at...@apache.org on 2011/09/13 21:37:55 UTC

svn commit: r1170318 - in /hadoop/common/branches/branch-0.20-security: ./ src/core/org/apache/hadoop/io/ src/hdfs/org/apache/hadoop/hdfs/server/namenode/ src/test/org/apache/hadoop/hdfs/server/namenode/

Author: atm
Date: Tue Sep 13 19:37:55 2011
New Revision: 1170318

URL: http://svn.apache.org/viewvc?rev=1170318&view=rev
Log:
HDFS-2305. Running multiple 2NNs can result in corrupt file system. (atm)

Modified:
    hadoop/common/branches/branch-0.20-security/CHANGES.txt
    hadoop/common/branches/branch-0.20-security/src/core/org/apache/hadoop/io/MD5Hash.java
    hadoop/common/branches/branch-0.20-security/src/hdfs/org/apache/hadoop/hdfs/server/namenode/GetImageServlet.java
    hadoop/common/branches/branch-0.20-security/src/hdfs/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
    hadoop/common/branches/branch-0.20-security/src/hdfs/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java
    hadoop/common/branches/branch-0.20-security/src/test/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java

Modified: hadoop/common/branches/branch-0.20-security/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/CHANGES.txt?rev=1170318&r1=1170317&r2=1170318&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/CHANGES.txt (original)
+++ hadoop/common/branches/branch-0.20-security/CHANGES.txt Tue Sep 13 19:37:55 2011
@@ -6,6 +6,8 @@ Release 0.20.206.0 - unreleased
 
   BUG FIXES
 
+    HDFS-2305. Running multiple 2NNs can result in corrupt file system. (atm)
+
   IMPROVEMENTS
 
 Release 0.20.205.0 - unreleased

Modified: hadoop/common/branches/branch-0.20-security/src/core/org/apache/hadoop/io/MD5Hash.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/core/org/apache/hadoop/io/MD5Hash.java?rev=1170318&r1=1170317&r2=1170318&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/core/org/apache/hadoop/io/MD5Hash.java (original)
+++ hadoop/common/branches/branch-0.20-security/src/core/org/apache/hadoop/io/MD5Hash.java Tue Sep 13 19:37:55 2011
@@ -88,12 +88,19 @@ public class MD5Hash implements Writable
   public static MD5Hash digest(byte[] data) {
     return digest(data, 0, data.length);
   }
+  
+  /**
+   * Create a thread local MD5 digester
+   */
+  public static MessageDigest getDigester() {
+    return DIGESTER_FACTORY.get();
+  }
 
   /** Construct a hash value for the content from the InputStream. */
   public static MD5Hash digest(InputStream in) throws IOException {
     final byte[] buffer = new byte[4*1024]; 
 
-    final MessageDigest digester = DIGESTER_FACTORY.get();
+    final MessageDigest digester = getDigester();
     for(int n; (n = in.read(buffer)) != -1; ) {
       digester.update(buffer, 0, n);
     }
@@ -104,7 +111,7 @@ public class MD5Hash implements Writable
   /** Construct a hash value for a byte array. */
   public static MD5Hash digest(byte[] data, int start, int len) {
     byte[] digest;
-    MessageDigest digester = DIGESTER_FACTORY.get();
+    MessageDigest digester = getDigester();
     digester.update(data, start, len);
     digest = digester.digest();
     return new MD5Hash(digest);

Modified: hadoop/common/branches/branch-0.20-security/src/hdfs/org/apache/hadoop/hdfs/server/namenode/GetImageServlet.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/hdfs/org/apache/hadoop/hdfs/server/namenode/GetImageServlet.java?rev=1170318&r1=1170317&r2=1170318&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/hdfs/org/apache/hadoop/hdfs/server/namenode/GetImageServlet.java (original)
+++ hadoop/common/branches/branch-0.20-security/src/hdfs/org/apache/hadoop/hdfs/server/namenode/GetImageServlet.java Tue Sep 13 19:37:55 2011
@@ -37,6 +37,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.io.MD5Hash;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.util.StringUtils;
 
@@ -48,6 +49,13 @@ import org.apache.hadoop.util.StringUtil
 public class GetImageServlet extends HttpServlet {
   private static final long serialVersionUID = -7669068179452648952L;
   private static final Log LOG = LogFactory.getLog(GetImageServlet.class);
+
+  /**
+   * A lock object to prevent multiple 2NNs from simultaneously uploading
+   * fsimage snapshots.
+   */
+  private Object fsImageTransferLock = new Object();
+  
   @SuppressWarnings("unchecked")
   public void doGet(final HttpServletRequest request,
                     final HttpServletResponse response
@@ -80,18 +88,26 @@ public class GetImageServlet extends Htt
             TransferFsImage.getFileServer(response.getOutputStream(),
                                           nnImage.getFsEditName());
           } else if (ff.putImage()) {
-            // issue a HTTP get request to download the new fsimage 
-            nnImage.validateCheckpointUpload(ff.getToken());
-            reloginIfNecessary().doAs(new PrivilegedExceptionAction<Void>() {
-              @Override
-              public Void run() throws Exception {
-                TransferFsImage.getFileClient(ff.getInfoServer(), "getimage=1", 
-                    nnImage.getFsImageNameCheckpoint());
-                return null;
-              }
-            });
-
-            nnImage.checkpointUploadDone();
+            synchronized (fsImageTransferLock) {
+              final MD5Hash expectedChecksum = ff.getNewChecksum();
+              // issue a HTTP get request to download the new fsimage 
+              nnImage.validateCheckpointUpload(ff.getToken());
+              reloginIfNecessary().doAs(new PrivilegedExceptionAction<Void>() {
+                @Override
+                public Void run() throws Exception {
+                  MD5Hash actualChecksum = TransferFsImage.getFileClient(ff.getInfoServer(),
+                      "getimage=1", nnImage.getFsImageNameCheckpoint(), true);
+                  LOG.info("Downloaded new fsimage with checksum: " + actualChecksum);
+                  if (!actualChecksum.equals(expectedChecksum)) {
+                    throw new IOException("Actual checksum of transferred fsimage: "
+                        + actualChecksum + " does not match expected checksum: "
+                        + expectedChecksum);
+                  }
+                  return null;
+                }
+              });
+              nnImage.checkpointUploadDone();
+            }
           }
           return null;
         }

Modified: hadoop/common/branches/branch-0.20-security/src/hdfs/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/hdfs/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java?rev=1170318&r1=1170317&r2=1170318&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/hdfs/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java (original)
+++ hadoop/common/branches/branch-0.20-security/src/hdfs/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java Tue Sep 13 19:37:55 2011
@@ -18,10 +18,12 @@
 package org.apache.hadoop.hdfs.server.namenode;
 
 import java.io.File;
+import java.io.FileInputStream;
 import java.io.IOException;
-import java.net.InetAddress;
 import java.net.InetSocketAddress;
 import java.net.URI;
+import java.security.DigestInputStream;
+import java.security.MessageDigest;
 import java.security.PrivilegedAction;
 import java.security.PrivilegedExceptionAction;
 import java.util.ArrayList;
@@ -33,11 +35,12 @@ import org.apache.commons.logging.LogFac
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
-import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.server.common.HdfsConstants;
 import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
+import static org.apache.hadoop.hdfs.protocol.FSConstants.BUFFER_SIZE;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
 import org.apache.hadoop.http.HttpServer;
+import org.apache.hadoop.io.MD5Hash;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.metrics2.source.JvmMetricsSource;
@@ -344,7 +347,7 @@ public class SecondaryNameNode implement
           String fileid = "getimage=1";
           File[] srcNames = checkpointImage.getImageFiles();
           assert srcNames.length > 0 : "No checkpoint targets.";
-          TransferFsImage.getFileClient(fsName, fileid, srcNames);
+          TransferFsImage.getFileClient(fsName, fileid, srcNames, false);
           LOG.info("Downloaded file " + srcNames[0].getName() + " size " +
                    srcNames[0].length() + " bytes.");
 
@@ -352,7 +355,7 @@ public class SecondaryNameNode implement
           fileid = "getedit=1";
           srcNames = checkpointImage.getEditsFiles();
           assert srcNames.length > 0 : "No checkpoint targets.";
-          TransferFsImage.getFileClient(fsName, fileid, srcNames);
+          TransferFsImage.getFileClient(fsName, fileid, srcNames, false);
           LOG.info("Downloaded file " + srcNames[0].getName() + " size " +
               srcNames[0].length() + " bytes.");
 
@@ -372,9 +375,35 @@ public class SecondaryNameNode implement
   private void putFSImage(CheckpointSignature sig) throws IOException {
     String fileid = "putimage=1&port=" + imagePort +
       "&machine=" + infoBindAddress +
-      "&token=" + sig.toString();
+      "&token=" + sig.toString() +
+      "&newChecksum=" + getNewChecksum();
     LOG.info("Posted URL " + fsName + fileid);
-    TransferFsImage.getFileClient(fsName, fileid, (File[])null);
+    TransferFsImage.getFileClient(fsName, fileid, (File[])null, false);
+  }
+  
+  /**
+   * Calculate the MD5 hash of the newly-merged fsimage.
+   * @return the checksum of the newly-merged fsimage.
+   */
+  MD5Hash getNewChecksum() throws IOException {
+    DigestInputStream imageIn = null;
+    try {
+      MessageDigest digester = MD5Hash.getDigester();
+      imageIn = new DigestInputStream(
+          new FileInputStream(checkpointImage.getFsImageName()), digester);
+      byte[] in = new byte[BUFFER_SIZE];
+      int totalRead = 0;
+      int read = 0;
+      while ((read = imageIn.read(in)) > 0) {
+        totalRead += read;
+        LOG.debug("Computing fsimage checksum. Read " + totalRead + " bytes so far.");
+      }
+      return new MD5Hash(digester.digest());
+    } finally {
+      if (imageIn != null) {
+        imageIn.close();
+      }
+    }
   }
 
   /**
@@ -399,7 +428,7 @@ public class SecondaryNameNode implement
     startCheckpoint();
 
     // Tell the namenode to start logging transactions in a new edit file
-    // Retuns a token that would be used to upload the merged image.
+    // Retuns a token that should be used to verify the downloaded image file.
     CheckpointSignature sig = (CheckpointSignature)namenode.rollEditLog();
 
     // error simulation code for junit test
@@ -408,13 +437,11 @@ public class SecondaryNameNode implement
                             "after creating edits.new");
     }
 
-    downloadCheckpointFiles(sig);   // Fetch fsimage and edits
-    doMerge(sig);                   // Do the merge
-  
-    //
-    // Upload the new image into the NameNode. Then tell the Namenode
-    // to make this new uploaded image as the most current image.
-    //
+    downloadCheckpointFiles(sig); // Fetch fsimage and edits
+    doMerge(sig);                 // Do the merge
+    
+    // Upload the new image into the NameNode, providing the new checksum for
+    // the image file.
     putFSImage(sig);
 
     // error simulation code for junit test
@@ -423,6 +450,8 @@ public class SecondaryNameNode implement
                             "after uploading new image to NameNode");
     }
 
+    // Then tell the Namenode to make this new uploaded image as the most
+    // current image.
     namenode.rollFsImage();
     checkpointImage.endCheckpoint();
 

Modified: hadoop/common/branches/branch-0.20-security/src/hdfs/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/hdfs/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java?rev=1170318&r1=1170317&r2=1170318&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/hdfs/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java (original)
+++ hadoop/common/branches/branch-0.20-security/src/hdfs/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java Tue Sep 13 19:37:55 2011
@@ -19,6 +19,8 @@ package org.apache.hadoop.hdfs.server.na
 
 import java.io.*;
 import java.net.*;
+import java.security.DigestInputStream;
+import java.security.MessageDigest;
 import java.util.Iterator;
 import java.util.Map;
 import javax.servlet.http.HttpServletResponse;
@@ -29,6 +31,7 @@ import org.apache.commons.logging.LogFac
 import org.apache.hadoop.hdfs.protocol.FSConstants;
 import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode.ErrorSimulator;
+import org.apache.hadoop.io.MD5Hash;
 import org.apache.hadoop.security.UserGroupInformation;
 
 /**
@@ -42,6 +45,7 @@ class TransferFsImage implements FSConst
   private int remoteport;
   private String machineName;
   private CheckpointSignature token;
+  private MD5Hash newChecksum = null;
   
   /**
    * File downloader.
@@ -59,6 +63,7 @@ class TransferFsImage implements FSConst
     remoteport = 0;
     machineName = null;
     token = null;
+    newChecksum = null;
 
     for (Iterator<String> it = pmap.keySet().iterator(); it.hasNext();) {
       String key = it.next();
@@ -74,6 +79,8 @@ class TransferFsImage implements FSConst
         machineName = pmap.get("machine")[0];
       } else if (key.equals("token")) { 
         token = new CheckpointSignature(pmap.get("token")[0]);
+      } else if (key.equals("newChecksum")) { 
+        newChecksum = new MD5Hash(pmap.get("newChecksum")[0]);
       }
     }
 
@@ -98,7 +105,15 @@ class TransferFsImage implements FSConst
   CheckpointSignature getToken() {
     return token;
   }
-
+  
+  /**
+   * Get the MD5 digest of the new image
+   * @return the MD5 digest of the new image
+   */
+  MD5Hash getNewChecksum() {
+    return newChecksum;
+  }
+  
   String getInfoServer() throws IOException{
     if (machineName == null || remoteport == 0) {
       throw new IOException ("MachineName and port undefined");
@@ -140,9 +155,11 @@ class TransferFsImage implements FSConst
   /**
    * Client-side Method to fetch file from a server
    * Copies the response from the URL to a list of local files.
+   * 
+   * @Return a digest of the received file if getChecksum is true
    */
-  static void getFileClient(String fsName, String id, File[] localPath)
-    throws IOException {
+  static MD5Hash getFileClient(String fsName, String id, File[] localPath,
+      boolean getChecksum) throws IOException {
     byte[] buf = new byte[BUFFER_SIZE];
     String proto = UserGroupInformation.isSecurityEnabled() ? "https://" : "http://";
     
@@ -158,6 +175,11 @@ class TransferFsImage implements FSConst
     SecurityUtil.fetchServiceTicket(url);
     URLConnection connection = url.openConnection();
     InputStream stream = connection.getInputStream();
+    MessageDigest digester = null;
+    if (getChecksum) {
+      digester = MD5Hash.getDigester();
+      stream = new DigestInputStream(stream, digester);
+    }
     FileOutputStream[] output = null;
 
     try {
@@ -186,5 +208,6 @@ class TransferFsImage implements FSConst
         }
       }
     }
+    return digester == null ? null : new MD5Hash(digester.digest());
   }
 }

Modified: hadoop/common/branches/branch-0.20-security/src/test/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/test/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java?rev=1170318&r1=1170317&r2=1170318&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/test/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java (original)
+++ hadoop/common/branches/branch-0.20-security/src/test/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java Tue Sep 13 19:37:55 2011
@@ -35,6 +35,7 @@ import org.apache.hadoop.hdfs.server.com
 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
 import org.apache.hadoop.hdfs.server.namenode.FSImage.NameNodeDirType;
 import org.apache.hadoop.hdfs.tools.DFSAdmin;
+import org.apache.hadoop.io.MD5Hash;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileUtil;
@@ -286,14 +287,14 @@ public class TestCheckpoint extends Test
 
       try {
         secondary.doCheckpoint();  // this should fail
-        assertTrue(false);
+        fail();
       } catch (IOException e) {
       }
       ErrorSimulator.clearErrorSimulation(0);
       secondary.shutdown(); // secondary namenode crash!
 
       // start new instance of secondary and verify that 
-      // a new rollEditLog suceedes inspite of the fact that 
+      // a new rollEditLog succeeds in spite of the fact that 
       // edits.new already exists.
       //
       secondary = startSecondaryNameNode(conf);
@@ -711,4 +712,76 @@ public class TestCheckpoint extends Test
       if(cluster!= null) cluster.shutdown();
     }
   }
+
+  /**
+   * Test multiple 2NNs running, where the second 2NN reports the address of the
+   * first 2NN when doing the image upload to the NN. This case will happen when
+   * multiple 2NNs are started with the default configs, which has them report
+   * their address to the NN as being "127.0.0.1".
+   */
+  public void testMultipleSecondaryNameNodes() throws IOException {
+    MiniDFSCluster cluster = null;
+    FileSystem fs = null;
+    try {
+      Configuration conf = new Configuration();
+      cluster = new MiniDFSCluster(conf, 0, true, null);
+      cluster.waitActive();
+      fs = cluster.getFileSystem();
+      
+      Path testPath1 = new Path("/tmp/foo");
+      Path testPath2 = new Path("/tmp/bar");
+
+      assertTrue(fs.mkdirs(testPath1));
+      
+      // Start up a 2NN and do a checkpoint.
+      SecondaryNameNode snn1 = startSecondaryNameNode(conf);
+      snn1.doCheckpoint();
+      
+      assertTrue(testPath1 + " should still exist after good checkpoint",
+          fs.exists(testPath1));
+      assertTrue(fs.mkdirs(testPath2));
+      assertTrue(testPath2 + " should exist", fs.exists(testPath2));
+      
+      
+      // Simulate a checkpoint by a second 2NN, but which tells the NN to grab
+      // the new merged fsimage from the original 2NN.
+      NameNode namenode = cluster.getNameNode();
+      CheckpointSignature sig = (CheckpointSignature)namenode.rollEditLog();
+      
+      String fileid = "putimage=1&port=" +
+          SecondaryNameNode.getHttpAddress(conf).getPort() +
+          "&machine=" + SecondaryNameNode.getHttpAddress(conf).getHostName() +
+          "&token=" + sig.toString() +
+          "&newChecksum=" + MD5Hash.digest("this will be a bad checksum".getBytes());
+      
+      try {
+        TransferFsImage.getFileClient(NameNode.getInfoServer(conf), fileid,
+            (File[])null, false);
+        namenode.rollFsImage();
+        fail();
+      } catch (IOException e) {
+        // This is expected.
+        System.out.println("Got expected exception " + e);
+      }
+      
+      // The in-memory NN state should still be fine. We've only messed with the
+      // HDFS metadata on the local FS.
+      assertTrue(testPath1 + " should exist after bad checkpoint, before restart",
+          fs.exists(testPath1));
+      assertTrue(testPath2 + " should exist after bad checkpoint, before restart",
+          fs.exists(testPath2));
+      
+      cluster.restartNameNode();
+      
+      // After restarting the NN, it will read the HDFS metadata from disk.
+      // Things should still be good.
+      assertTrue(testPath1 + " should exist after bad checkpoint, after restart",
+          fs.exists(testPath1));
+      assertTrue(testPath2 + " should exist after bad checkpoint, after restart",
+          fs.exists(testPath2));
+    } finally {
+      if(fs != null) fs.close();
+      if(cluster!= null) cluster.shutdown();
+    }
+  }
 }