You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by bo...@apache.org on 2012/02/09 17:41:44 UTC

svn commit: r1242389 - in /hadoop/common/trunk/hadoop-common-project/hadoop-common: ./ src/main/java/org/apache/hadoop/fs/ src/main/java/org/apache/hadoop/fs/shell/ src/main/java/org/apache/hadoop/fs/viewfs/ src/test/java/org/apache/hadoop/fs/

Author: bobby
Date: Thu Feb  9 16:41:44 2012
New Revision: 1242389

URL: http://svn.apache.org/viewvc?rev=1242389&view=rev
Log:
HADOOP-8042.  When copying a file out of HDFS, modifying it, and uploading it back into HDFS, the put fails due to a CRC mismatch (Daryn Sharp via bobby)

Added:
    hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java
Modified:
    hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt
    hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java
    hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java
    hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java
    hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CommandWithDestination.java
    hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java
    hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PathData.java
    hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java

Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt?rev=1242389&r1=1242388&r2=1242389&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt (original)
+++ hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt Thu Feb  9 16:41:44 2012
@@ -162,6 +162,10 @@ Release 0.23.2 - UNRELEASED 
 
   BUG FIXES
 
+    HADOOP-8042  When copying a file out of HDFS, modifying it, and uploading
+    it back into HDFS, the put fails due to a CRC mismatch
+    (Daryn Sharp via bobby)
+
 Release 0.23.1 - 2012-02-08 
 
   INCOMPATIBLE CHANGES

Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java?rev=1242389&r1=1242388&r2=1242389&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java (original)
+++ hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java Thu Feb  9 16:41:44 2012
@@ -43,6 +43,7 @@ public abstract class ChecksumFileSystem
   private static final byte[] CHECKSUM_VERSION = new byte[] {'c', 'r', 'c', 0};
   private int bytesPerChecksum = 512;
   private boolean verifyChecksum = true;
+  private boolean writeChecksum = true;
 
   public static double getApproxChkSumLength(long size) {
     return ChecksumFSOutputSummer.CHKSUM_AS_FRACTION * size;
@@ -67,6 +68,11 @@ public abstract class ChecksumFileSystem
     this.verifyChecksum = verifyChecksum;
   }
 
+  @Override
+  public void setWriteChecksum(boolean writeChecksum) {
+    this.writeChecksum = writeChecksum;
+  }
+  
   /** get the raw file system */
   public FileSystem getRawFileSystem() {
     return fs;
@@ -428,9 +434,20 @@ public abstract class ChecksumFileSystem
         throw new IOException("Mkdirs failed to create " + parent);
       }
     }
-    final FSDataOutputStream out = new FSDataOutputStream(
-        new ChecksumFSOutputSummer(this, f, overwrite, bufferSize, replication,
-            blockSize, progress), null);
+    final FSDataOutputStream out;
+    if (writeChecksum) {
+      out = new FSDataOutputStream(
+          new ChecksumFSOutputSummer(this, f, overwrite, bufferSize, replication,
+              blockSize, progress), null);
+    } else {
+      out = fs.create(f, permission, overwrite, bufferSize, replication,
+          blockSize, progress);
+      // remove the checksum file since we aren't writing one
+      Path checkFile = getChecksumFile(f);
+      if (fs.exists(checkFile)) {
+        fs.delete(checkFile, true);
+      }
+    }
     if (permission != null) {
       setPermission(f, permission);
     }

Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java?rev=1242389&r1=1242388&r2=1242389&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java (original)
+++ hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java Thu Feb  9 16:41:44 2012
@@ -1937,6 +1937,15 @@ public abstract class FileSystem extends
   }
 
   /**
+   * Set the write checksum flag. This is only applicable if the 
+   * corresponding FileSystem supports checksum. By default doesn't do anything.
+   * @param writeChecksum
+   */
+  public void setWriteChecksum(boolean writeChecksum) {
+    //doesn't do anything
+  }
+
+  /**
    * Return a list of file status objects that corresponds to the list of paths
    * excluding those non-existent paths.
    * 

Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java?rev=1242389&r1=1242388&r2=1242389&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java (original)
+++ hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java Thu Feb  9 16:41:44 2012
@@ -360,6 +360,11 @@ public class FilterFileSystem extends Fi
   public void setVerifyChecksum(boolean verifyChecksum) {
     fs.setVerifyChecksum(verifyChecksum);
   }
+  
+  @Override
+  public void setWriteChecksum(boolean writeChecksum) {
+    fs.setVerifyChecksum(writeChecksum);
+  }
 
   @Override
   public Configuration getConf() {

Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CommandWithDestination.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CommandWithDestination.java?rev=1242389&r1=1242388&r2=1242389&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CommandWithDestination.java (original)
+++ hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CommandWithDestination.java Thu Feb  9 16:41:44 2012
@@ -41,7 +41,9 @@ import org.apache.hadoop.io.IOUtils;
  */
 abstract class CommandWithDestination extends FsCommand {  
   protected PathData dst;
-  protected boolean overwrite = false;
+  private boolean overwrite = false;
+  private boolean verifyChecksum = true;
+  private boolean writeChecksum = true;
   
   /**
    * 
@@ -53,6 +55,14 @@ abstract class CommandWithDestination ex
     overwrite = flag;
   }
   
+  protected void setVerifyChecksum(boolean flag) {
+    verifyChecksum = flag;
+  }
+  
+  protected void setWriteChecksum(boolean flag) {
+    writeChecksum = flag;
+  }
+  
   /**
    *  The last arg is expected to be a local path, if only one argument is
    *  given then the destination will be the current directory 
@@ -201,6 +211,7 @@ abstract class CommandWithDestination ex
    * @throws IOException if copy fails
    */ 
   protected void copyFileToTarget(PathData src, PathData target) throws IOException {
+    src.fs.setVerifyChecksum(verifyChecksum);
     copyStreamToTarget(src.fs.open(src.path), target);
   }
   
@@ -217,6 +228,7 @@ abstract class CommandWithDestination ex
     if (target.exists && (target.stat.isDirectory() || !overwrite)) {
       throw new PathExistsException(target.toString());
     }
+    target.fs.setWriteChecksum(writeChecksum);
     PathData tempFile = null;
     try {
       tempFile = target.createTempFile(target+"._COPYING_");

Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java?rev=1242389&r1=1242388&r2=1242389&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java (original)
+++ hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java Thu Feb  9 16:41:44 2012
@@ -25,7 +25,6 @@ import java.util.List;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
-import org.apache.hadoop.fs.ChecksumFileSystem;
 import org.apache.hadoop.fs.FileUtil;
 
 /** Various commands for copy files */
@@ -103,43 +102,17 @@ class CopyCommands {  
       "to the local name.  <src> is kept.  When copying multiple,\n" +
       "files, the destination must be a directory.";
 
-    /**
-     * The prefix for the tmp file used in copyToLocal.
-     * It must be at least three characters long, required by
-     * {@link java.io.File#createTempFile(String, String, File)}.
-     */
-    private boolean copyCrc;
-    private boolean verifyChecksum;
-
     @Override
     protected void processOptions(LinkedList<String> args)
     throws IOException {
       CommandFormat cf = new CommandFormat(
           1, Integer.MAX_VALUE, "crc", "ignoreCrc");
       cf.parse(args);
-      copyCrc = cf.getOpt("crc");
-      verifyChecksum = !cf.getOpt("ignoreCrc");
-      
+      setWriteChecksum(cf.getOpt("crc"));
+      setVerifyChecksum(!cf.getOpt("ignoreCrc"));
       setRecursive(true);
       getLocalDestination(args);
     }
-
-    @Override
-    protected void copyFileToTarget(PathData src, PathData target)
-    throws IOException {
-      src.fs.setVerifyChecksum(verifyChecksum);
-
-      if (copyCrc && !(src.fs instanceof ChecksumFileSystem)) {
-        displayWarning(src.fs + ": Does not support checksums");
-        copyCrc = false;
-      }      
-
-      super.copyFileToTarget(src, target);
-      if (copyCrc) {
-        // should we delete real file if crc copy fails?
-        super.copyFileToTarget(src.getChecksumFile(), target.getChecksumFile());
-      }
-    }
   }
 
   /**

Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PathData.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PathData.java?rev=1242389&r1=1242388&r2=1242389&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PathData.java (original)
+++ hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PathData.java Thu Feb  9 16:41:44 2012
@@ -27,7 +27,6 @@ import java.net.URISyntaxException;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.ChecksumFileSystem;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.LocalFileSystem;
@@ -170,19 +169,6 @@ public class PathData {
   }
   
   /**
-   * Return the corresponding crc data for a file.  Avoids exposing the fs
-   * contortions to the caller.  
-   * @return PathData of the crc file
-   * @throws IOException is anything goes wrong
-   */
-  public PathData getChecksumFile() throws IOException {
-    checkIfExists(FileTypeRequirement.SHOULD_NOT_BE_DIRECTORY);
-    ChecksumFileSystem srcFs = (ChecksumFileSystem)fs;
-    Path srcPath = srcFs.getChecksumFile(path);
-    return new PathData(srcFs.getRawFileSystem(), srcPath.toString());
-  }
-
-  /**
    * Returns a temporary file for this PathData with the given extension.
    * The file will be deleted on exit.
    * @param extension for the temporary file

Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java?rev=1242389&r1=1242388&r2=1242389&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java (original)
+++ hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java Thu Feb  9 16:41:44 2012
@@ -470,6 +470,15 @@ public class ViewFileSystem extends File
     }
   }
   
+  @Override
+  public void setWriteChecksum(final boolean writeChecksum) { 
+    List<InodeTree.MountPoint<FileSystem>> mountPoints = 
+        fsState.getMountPoints();
+    for (InodeTree.MountPoint<FileSystem> mount : mountPoints) {
+      mount.target.targetFileSystem.setWriteChecksum(writeChecksum);
+    }
+  }
+
   public MountPoint[] getMountPoints() {
     List<InodeTree.MountPoint<FileSystem>> mountPoints = 
                   fsState.getMountPoints();

Added: hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java?rev=1242389&view=auto
==============================================================================
--- hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java (added)
+++ hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java Thu Feb  9 16:41:44 2012
@@ -0,0 +1,97 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs;
+
+import static org.junit.Assert.*;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestFsShellCopy {  
+  static Configuration conf;
+  static FsShell shell; 
+  static LocalFileSystem lfs;
+  static Path testRootDir, srcPath, dstPath;
+  
+  @BeforeClass
+  public static void setup() throws Exception {
+    conf = new Configuration();
+    shell = new FsShell(conf);
+    lfs = FileSystem.getLocal(conf);
+    testRootDir = new Path(
+        System.getProperty("test.build.data","test/build/data"), "testShellCopy");
+    lfs.mkdirs(testRootDir);    
+    srcPath = new Path(testRootDir, "srcFile");
+    dstPath = new Path(testRootDir, "dstFile");
+  }
+  
+  @Before
+  public void prepFiles() throws Exception {
+    lfs.setVerifyChecksum(true);
+    lfs.setWriteChecksum(true);
+    
+    lfs.delete(srcPath, true);
+    lfs.delete(dstPath, true);
+    FSDataOutputStream out = lfs.create(srcPath);
+    out.writeChars("hi");
+    out.close();
+    assertTrue(lfs.exists(lfs.getChecksumFile(srcPath)));
+  }
+
+  @Test
+  public void testCopyNoCrc() throws Exception {
+    shellRun(0, "-get", srcPath.toString(), dstPath.toString());
+    checkPath(dstPath, false);
+  }
+
+  @Test
+  public void testCopyCrc() throws Exception {
+    shellRun(0, "-get", "-crc", srcPath.toString(), dstPath.toString());
+    checkPath(dstPath, true);
+  }
+
+  
+  @Test
+  public void testCorruptedCopyCrc() throws Exception {
+    FSDataOutputStream out = lfs.getRawFileSystem().create(srcPath);
+    out.writeChars("bang");
+    out.close();
+    shellRun(1, "-get", srcPath.toString(), dstPath.toString());
+  }
+
+  @Test
+  public void testCorruptedCopyIgnoreCrc() throws Exception {
+    shellRun(0, "-get", "-ignoreCrc", srcPath.toString(), dstPath.toString());
+    checkPath(dstPath, false);
+  }
+
+  private void checkPath(Path p, boolean expectChecksum) throws IOException {
+    assertTrue(lfs.exists(p));
+    boolean hasChecksum = lfs.exists(lfs.getChecksumFile(p));
+    assertEquals(expectChecksum, hasChecksum);
+  }
+
+  private void shellRun(int n, String ... args) throws Exception {
+    assertEquals(n, shell.run(args));
+  }
+}