You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by sz...@apache.org on 2009/04/04 00:55:03 UTC
svn commit: r761830 - in /hadoop/core/branches/branch-0.20: CHANGES.txt
src/test/org/apache/hadoop/fs/TestCopyFiles.java
src/tools/org/apache/hadoop/tools/DistCp.java
Author: szetszwo
Date: Fri Apr 3 22:55:03 2009
New Revision: 761830
URL: http://svn.apache.org/viewvc?rev=761830&view=rev
Log:
HADOOP-5227. Fix distcp so -update and -delete can be meaningfully combined. (Tsz Wo (Nicholas), SZE via cdouglas)
Modified:
hadoop/core/branches/branch-0.20/CHANGES.txt
hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/fs/TestCopyFiles.java
hadoop/core/branches/branch-0.20/src/tools/org/apache/hadoop/tools/DistCp.java
Modified: hadoop/core/branches/branch-0.20/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.20/CHANGES.txt?rev=761830&r1=761829&r2=761830&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.20/CHANGES.txt (original)
+++ hadoop/core/branches/branch-0.20/CHANGES.txt Fri Apr 3 22:55:03 2009
@@ -840,6 +840,9 @@
join back before scheduling new tasks. This fixes race conditions associated
with greedy scheduling as was the case earlier. (Amar Kamat via ddas)
+ HADOOP-5227. Fix distcp so -update and -delete can be meaningfully
+ combined. (Tsz Wo (Nicholas), SZE via cdouglas)
+
Release 0.19.2 - Unreleased
BUG FIXES
Modified: hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/fs/TestCopyFiles.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/fs/TestCopyFiles.java?rev=761830&r1=761829&r2=761830&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/fs/TestCopyFiles.java (original)
+++ hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/fs/TestCopyFiles.java Fri Apr 3 22:55:03 2009
@@ -32,19 +32,17 @@
import org.apache.commons.logging.LogFactory;
import org.apache.commons.logging.impl.Log4JLogger;
-import org.apache.log4j.Level;
-
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
-import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MiniMRCluster;
import org.apache.hadoop.security.UnixUserGroupInformation;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.tools.DistCp;
import org.apache.hadoop.util.ToolRunner;
+import org.apache.log4j.Level;
/**
@@ -62,7 +60,7 @@
static final URI LOCAL_FS = URI.create("file:///");
private static final Random RAN = new Random();
- private static final int NFILES = 20;
+ private static final int NFILES = 7;
private static String TEST_ROOT_DIR =
new Path(System.getProperty("test.build.data","/tmp"))
.toString().replace(' ', '+');
@@ -573,7 +571,7 @@
for (MyFile f : files) {
totsize += f.getSize();
}
- JobConf job = mr.createJobConf();
+ Configuration job = mr.createJobConf();
job.setLong("distcp.bytes.per.map", totsize / 3);
ToolRunner.run(new DistCp(job),
new String[] {"-m", "100",
@@ -611,6 +609,7 @@
final String nnUri = FileSystem.getDefaultUri(conf).toString();
final FileSystem fs = FileSystem.get(URI.create(nnUri), conf);
final DistCp distcp = new DistCp(conf);
+ final FsShell shell = new FsShell(conf);
final String srcrootdir = "/src_root";
final Path srcrootpath = new Path(srcrootdir);
@@ -624,7 +623,10 @@
ToolRunner.run(distcp,
new String[]{"-filelimit", ""+filelimit, nnUri+srcrootdir, nnUri+dstrootdir});
-
+ String results = execCmd(shell, "-lsr", dstrootdir);
+ results = removePrefix(results, dstrootdir);
+ System.out.println("results=" + results);
+
FileStatus[] dststat = getFileStatus(fs, dstrootdir, files, true);
assertEquals(filelimit, dststat.length);
deldir(fs, dstrootdir);
@@ -745,6 +747,7 @@
}
}
+ /** test -delete */
public void testDelete() throws Exception {
final Configuration conf = new Configuration();
MiniDFSCluster cluster = null;
@@ -760,31 +763,45 @@
final String srcrootdir = "/src_root";
final String dstrootdir = "/dst_root";
- {//test -delete
+ {
+ //create source files
createFiles(nnURI, srcrootdir);
- createFiles(nnURI, dstrootdir);
- create(fs, new Path(dstrootdir, "foo"));
- create(fs, new Path(dstrootdir, "foobar"));
-
- System.out.println("srcrootdir=" + srcrootdir);
- shell.run(new String[]{"-lsr", srcrootdir});
+ String srcresults = execCmd(shell, "-lsr", srcrootdir);
+ srcresults = removePrefix(srcresults, srcrootdir);
+ System.out.println("srcresults=" + srcresults);
+ //create some files in dst
+ createFiles(nnURI, dstrootdir);
System.out.println("dstrootdir=" + dstrootdir);
shell.run(new String[]{"-lsr", dstrootdir});
+ //run distcp
ToolRunner.run(distcp,
new String[]{"-delete", "-update", "-log", "/log",
nnUri+srcrootdir, nnUri+dstrootdir});
- String srcresults = execCmd(shell, "-lsr", srcrootdir);
- srcresults = removePrefix(srcresults, srcrootdir);
- System.out.println("srcresults=" + srcresults);
-
+ //make sure src and dst contains the same files
String dstresults = execCmd(shell, "-lsr", dstrootdir);
dstresults = removePrefix(dstresults, dstrootdir);
- System.out.println("dstresults=" + dstresults);
+ System.out.println("first dstresults=" + dstresults);
+ assertEquals(srcresults, dstresults);
+
+ //create additional file in dst
+ create(fs, new Path(dstrootdir, "foo"));
+ create(fs, new Path(dstrootdir, "foobar"));
+
+ //run distcp again
+ ToolRunner.run(distcp,
+ new String[]{"-delete", "-update", "-log", "/log2",
+ nnUri+srcrootdir, nnUri+dstrootdir});
+ //make sure src and dst contains the same files
+ dstresults = execCmd(shell, "-lsr", dstrootdir);
+ dstresults = removePrefix(dstresults, dstrootdir);
+ System.out.println("second dstresults=" + dstresults);
assertEquals(srcresults, dstresults);
+
+ //cleanup
deldir(fs, dstrootdir);
deldir(fs, srcrootdir);
}
Modified: hadoop/core/branches/branch-0.20/src/tools/org/apache/hadoop/tools/DistCp.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.20/src/tools/org/apache/hadoop/tools/DistCp.java?rev=761830&r1=761829&r2=761830&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.20/src/tools/org/apache/hadoop/tools/DistCp.java (original)
+++ hadoop/core/branches/branch-0.20/src/tools/org/apache/hadoop/tools/DistCp.java Fri Apr 3 22:55:03 2009
@@ -1039,10 +1039,8 @@
(args.srcs.size() == 1 && !dstExists) || update || overwrite;
int srcCount = 0, cnsyncf = 0, dirsyn = 0;
long fileCount = 0L, byteCount = 0L, cbsyncs = 0L;
- boolean exceededlimit = false;
try {
- for(Iterator<Path> srcItr = args.srcs.iterator();
- !exceededlimit && srcItr.hasNext(); ) {
+ for(Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext(); ) {
final Path src = srcItr.next();
FileSystem srcfs = src.getFileSystem(conf);
FileStatus srcfilestat = srcfs.getFileStatus(src);
@@ -1052,10 +1050,10 @@
}
Stack<FileStatus> pathstack = new Stack<FileStatus>();
- for(pathstack.push(srcfilestat); !exceededlimit && !pathstack.empty(); ) {
+ for(pathstack.push(srcfilestat); !pathstack.empty(); ) {
FileStatus cur = pathstack.pop();
FileStatus[] children = srcfs.listStatus(cur.getPath());
- for(int i = 0; !exceededlimit && i < children.length; i++) {
+ for(int i = 0; i < children.length; i++) {
boolean skipfile = false;
final FileStatus child = children[i];
final String dst = makeRelative(root, child.getPath());
@@ -1067,37 +1065,36 @@
else {
//skip file if the src and the dst files are the same.
skipfile = update && sameFile(srcfs, child, dstfs, new Path(args.dst, dst));
-
+ //skip file if it exceed file limit or size limit
+ skipfile |= fileCount == args.filelimit
+ || byteCount + child.getLen() > args.sizelimit;
+
if (!skipfile) {
++fileCount;
byteCount += child.getLen();
-
- exceededlimit |= fileCount > args.filelimit
- || byteCount > args.sizelimit;
- if (!exceededlimit) {
- if (LOG.isTraceEnabled()) {
- LOG.trace("adding file " + child.getPath());
- }
-
- ++cnsyncf;
- cbsyncs += child.getLen();
- if (cnsyncf > SYNC_FILE_MAX || cbsyncs > BYTES_PER_MAP) {
- src_writer.sync();
- dst_writer.sync();
- cnsyncf = 0;
- cbsyncs = 0L;
- }
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("adding file " + child.getPath());
+ }
+
+ ++cnsyncf;
+ cbsyncs += child.getLen();
+ if (cnsyncf > SYNC_FILE_MAX || cbsyncs > BYTES_PER_MAP) {
+ src_writer.sync();
+ dst_writer.sync();
+ cnsyncf = 0;
+ cbsyncs = 0L;
}
}
}
- if (!skipfile && !exceededlimit) {
+ if (!skipfile) {
src_writer.append(new LongWritable(child.isDir()? 0: child.getLen()),
new FilePair(child, dst));
- dst_writer.append(new Text(dst),
- new Text(child.getPath().toString()));
}
+
+ dst_writer.append(new Text(dst),
+ new Text(child.getPath().toString()));
}
if (cur.isDir()) {