You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by bo...@apache.org on 2012/02/09 17:41:44 UTC
svn commit: r1242389 - in
/hadoop/common/trunk/hadoop-common-project/hadoop-common: ./
src/main/java/org/apache/hadoop/fs/ src/main/java/org/apache/hadoop/fs/shell/
src/main/java/org/apache/hadoop/fs/viewfs/
src/test/java/org/apache/hadoop/fs/
Author: bobby
Date: Thu Feb 9 16:41:44 2012
New Revision: 1242389
URL: http://svn.apache.org/viewvc?rev=1242389&view=rev
Log:
HADOOP-8042. When copying a file out of HDFS, modifying it, and uploading it back into HDFS, the put fails due to a CRC mismatch (Daryn Sharp via bobby)
Added:
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java
Modified:
hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CommandWithDestination.java
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PathData.java
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java
Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt?rev=1242389&r1=1242388&r2=1242389&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt (original)
+++ hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt Thu Feb 9 16:41:44 2012
@@ -162,6 +162,10 @@ Release 0.23.2 - UNRELEASED
BUG FIXES
+ HADOOP-8042 When copying a file out of HDFS, modifying it, and uploading
+ it back into HDFS, the put fails due to a CRC mismatch
+ (Daryn Sharp via bobby)
+
Release 0.23.1 - 2012-02-08
INCOMPATIBLE CHANGES
Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java?rev=1242389&r1=1242388&r2=1242389&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java (original)
+++ hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java Thu Feb 9 16:41:44 2012
@@ -43,6 +43,7 @@ public abstract class ChecksumFileSystem
private static final byte[] CHECKSUM_VERSION = new byte[] {'c', 'r', 'c', 0};
private int bytesPerChecksum = 512;
private boolean verifyChecksum = true;
+ private boolean writeChecksum = true;
public static double getApproxChkSumLength(long size) {
return ChecksumFSOutputSummer.CHKSUM_AS_FRACTION * size;
@@ -67,6 +68,11 @@ public abstract class ChecksumFileSystem
this.verifyChecksum = verifyChecksum;
}
+ @Override
+ public void setWriteChecksum(boolean writeChecksum) {
+ this.writeChecksum = writeChecksum;
+ }
+
/** get the raw file system */
public FileSystem getRawFileSystem() {
return fs;
@@ -428,9 +434,20 @@ public abstract class ChecksumFileSystem
throw new IOException("Mkdirs failed to create " + parent);
}
}
- final FSDataOutputStream out = new FSDataOutputStream(
- new ChecksumFSOutputSummer(this, f, overwrite, bufferSize, replication,
- blockSize, progress), null);
+ final FSDataOutputStream out;
+ if (writeChecksum) {
+ out = new FSDataOutputStream(
+ new ChecksumFSOutputSummer(this, f, overwrite, bufferSize, replication,
+ blockSize, progress), null);
+ } else {
+ out = fs.create(f, permission, overwrite, bufferSize, replication,
+ blockSize, progress);
+ // remove the checksum file since we aren't writing one
+ Path checkFile = getChecksumFile(f);
+ if (fs.exists(checkFile)) {
+ fs.delete(checkFile, true);
+ }
+ }
if (permission != null) {
setPermission(f, permission);
}
Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java?rev=1242389&r1=1242388&r2=1242389&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java (original)
+++ hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java Thu Feb 9 16:41:44 2012
@@ -1937,6 +1937,15 @@ public abstract class FileSystem extends
}
/**
+ * Set the write checksum flag. This is only applicable if the
+ * corresponding FileSystem supports checksum. By default doesn't do anything.
+ * @param writeChecksum
+ */
+ public void setWriteChecksum(boolean writeChecksum) {
+ //doesn't do anything
+ }
+
+ /**
* Return a list of file status objects that corresponds to the list of paths
* excluding those non-existent paths.
*
Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java?rev=1242389&r1=1242388&r2=1242389&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java (original)
+++ hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java Thu Feb 9 16:41:44 2012
@@ -360,6 +360,11 @@ public class FilterFileSystem extends Fi
public void setVerifyChecksum(boolean verifyChecksum) {
fs.setVerifyChecksum(verifyChecksum);
}
+
+ @Override
+ public void setWriteChecksum(boolean writeChecksum) {
+ fs.setVerifyChecksum(writeChecksum);
+ }
@Override
public Configuration getConf() {
Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CommandWithDestination.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CommandWithDestination.java?rev=1242389&r1=1242388&r2=1242389&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CommandWithDestination.java (original)
+++ hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CommandWithDestination.java Thu Feb 9 16:41:44 2012
@@ -41,7 +41,9 @@ import org.apache.hadoop.io.IOUtils;
*/
abstract class CommandWithDestination extends FsCommand {
protected PathData dst;
- protected boolean overwrite = false;
+ private boolean overwrite = false;
+ private boolean verifyChecksum = true;
+ private boolean writeChecksum = true;
/**
*
@@ -53,6 +55,14 @@ abstract class CommandWithDestination ex
overwrite = flag;
}
+ protected void setVerifyChecksum(boolean flag) {
+ verifyChecksum = flag;
+ }
+
+ protected void setWriteChecksum(boolean flag) {
+ writeChecksum = flag;
+ }
+
/**
* The last arg is expected to be a local path, if only one argument is
* given then the destination will be the current directory
@@ -201,6 +211,7 @@ abstract class CommandWithDestination ex
* @throws IOException if copy fails
*/
protected void copyFileToTarget(PathData src, PathData target) throws IOException {
+ src.fs.setVerifyChecksum(verifyChecksum);
copyStreamToTarget(src.fs.open(src.path), target);
}
@@ -217,6 +228,7 @@ abstract class CommandWithDestination ex
if (target.exists && (target.stat.isDirectory() || !overwrite)) {
throw new PathExistsException(target.toString());
}
+ target.fs.setWriteChecksum(writeChecksum);
PathData tempFile = null;
try {
tempFile = target.createTempFile(target+"._COPYING_");
Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java?rev=1242389&r1=1242388&r2=1242389&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java (original)
+++ hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java Thu Feb 9 16:41:44 2012
@@ -25,7 +25,6 @@ import java.util.List;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
-import org.apache.hadoop.fs.ChecksumFileSystem;
import org.apache.hadoop.fs.FileUtil;
/** Various commands for copy files */
@@ -103,43 +102,17 @@ class CopyCommands {
"to the local name. <src> is kept. When copying multiple,\n" +
"files, the destination must be a directory.";
- /**
- * The prefix for the tmp file used in copyToLocal.
- * It must be at least three characters long, required by
- * {@link java.io.File#createTempFile(String, String, File)}.
- */
- private boolean copyCrc;
- private boolean verifyChecksum;
-
@Override
protected void processOptions(LinkedList<String> args)
throws IOException {
CommandFormat cf = new CommandFormat(
1, Integer.MAX_VALUE, "crc", "ignoreCrc");
cf.parse(args);
- copyCrc = cf.getOpt("crc");
- verifyChecksum = !cf.getOpt("ignoreCrc");
-
+ setWriteChecksum(cf.getOpt("crc"));
+ setVerifyChecksum(!cf.getOpt("ignoreCrc"));
setRecursive(true);
getLocalDestination(args);
}
-
- @Override
- protected void copyFileToTarget(PathData src, PathData target)
- throws IOException {
- src.fs.setVerifyChecksum(verifyChecksum);
-
- if (copyCrc && !(src.fs instanceof ChecksumFileSystem)) {
- displayWarning(src.fs + ": Does not support checksums");
- copyCrc = false;
- }
-
- super.copyFileToTarget(src, target);
- if (copyCrc) {
- // should we delete real file if crc copy fails?
- super.copyFileToTarget(src.getChecksumFile(), target.getChecksumFile());
- }
- }
}
/**
Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PathData.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PathData.java?rev=1242389&r1=1242388&r2=1242389&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PathData.java (original)
+++ hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PathData.java Thu Feb 9 16:41:44 2012
@@ -27,7 +27,6 @@ import java.net.URISyntaxException;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.ChecksumFileSystem;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
@@ -170,19 +169,6 @@ public class PathData {
}
/**
- * Return the corresponding crc data for a file. Avoids exposing the fs
- * contortions to the caller.
- * @return PathData of the crc file
- * @throws IOException is anything goes wrong
- */
- public PathData getChecksumFile() throws IOException {
- checkIfExists(FileTypeRequirement.SHOULD_NOT_BE_DIRECTORY);
- ChecksumFileSystem srcFs = (ChecksumFileSystem)fs;
- Path srcPath = srcFs.getChecksumFile(path);
- return new PathData(srcFs.getRawFileSystem(), srcPath.toString());
- }
-
- /**
* Returns a temporary file for this PathData with the given extension.
* The file will be deleted on exit.
* @param extension for the temporary file
Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java?rev=1242389&r1=1242388&r2=1242389&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java (original)
+++ hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java Thu Feb 9 16:41:44 2012
@@ -470,6 +470,15 @@ public class ViewFileSystem extends File
}
}
+ @Override
+ public void setWriteChecksum(final boolean writeChecksum) {
+ List<InodeTree.MountPoint<FileSystem>> mountPoints =
+ fsState.getMountPoints();
+ for (InodeTree.MountPoint<FileSystem> mount : mountPoints) {
+ mount.target.targetFileSystem.setWriteChecksum(writeChecksum);
+ }
+ }
+
public MountPoint[] getMountPoints() {
List<InodeTree.MountPoint<FileSystem>> mountPoints =
fsState.getMountPoints();
Added: hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java?rev=1242389&view=auto
==============================================================================
--- hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java (added)
+++ hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java Thu Feb 9 16:41:44 2012
@@ -0,0 +1,97 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs;
+
+import static org.junit.Assert.*;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestFsShellCopy {
+ static Configuration conf;
+ static FsShell shell;
+ static LocalFileSystem lfs;
+ static Path testRootDir, srcPath, dstPath;
+
+ @BeforeClass
+ public static void setup() throws Exception {
+ conf = new Configuration();
+ shell = new FsShell(conf);
+ lfs = FileSystem.getLocal(conf);
+ testRootDir = new Path(
+ System.getProperty("test.build.data","test/build/data"), "testShellCopy");
+ lfs.mkdirs(testRootDir);
+ srcPath = new Path(testRootDir, "srcFile");
+ dstPath = new Path(testRootDir, "dstFile");
+ }
+
+ @Before
+ public void prepFiles() throws Exception {
+ lfs.setVerifyChecksum(true);
+ lfs.setWriteChecksum(true);
+
+ lfs.delete(srcPath, true);
+ lfs.delete(dstPath, true);
+ FSDataOutputStream out = lfs.create(srcPath);
+ out.writeChars("hi");
+ out.close();
+ assertTrue(lfs.exists(lfs.getChecksumFile(srcPath)));
+ }
+
+ @Test
+ public void testCopyNoCrc() throws Exception {
+ shellRun(0, "-get", srcPath.toString(), dstPath.toString());
+ checkPath(dstPath, false);
+ }
+
+ @Test
+ public void testCopyCrc() throws Exception {
+ shellRun(0, "-get", "-crc", srcPath.toString(), dstPath.toString());
+ checkPath(dstPath, true);
+ }
+
+
+ @Test
+ public void testCorruptedCopyCrc() throws Exception {
+ FSDataOutputStream out = lfs.getRawFileSystem().create(srcPath);
+ out.writeChars("bang");
+ out.close();
+ shellRun(1, "-get", srcPath.toString(), dstPath.toString());
+ }
+
+ @Test
+ public void testCorruptedCopyIgnoreCrc() throws Exception {
+ shellRun(0, "-get", "-ignoreCrc", srcPath.toString(), dstPath.toString());
+ checkPath(dstPath, false);
+ }
+
+ private void checkPath(Path p, boolean expectChecksum) throws IOException {
+ assertTrue(lfs.exists(p));
+ boolean hasChecksum = lfs.exists(lfs.getChecksumFile(p));
+ assertEquals(expectChecksum, hasChecksum);
+ }
+
+ private void shellRun(int n, String ... args) throws Exception {
+ assertEquals(n, shell.run(args));
+ }
+}