You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by jl...@apache.org on 2014/07/21 23:24:15 UTC
svn commit: r1612400 - in /hadoop/common/trunk/hadoop-mapreduce-project: ./
hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/
hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/jav...
Author: jlowe
Date: Mon Jul 21 21:24:15 2014
New Revision: 1612400
URL: http://svn.apache.org/r1612400
Log:
MAPREDUCE-5756. CombineFileInputFormat.getSplits() including directories in its results. Contributed by Jason Dere
Modified:
hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt
hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java
hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileInputFormat.java
Modified: hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt?rev=1612400&r1=1612399&r2=1612400&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt Mon Jul 21 21:24:15 2014
@@ -172,6 +172,9 @@ Release 2.6.0 - UNRELEASED
MAPREDUCE-5957. AM throws ClassNotFoundException with job classloader
enabled if custom output format/committer is used (Sangjin Lee via jlowe)
+ MAPREDUCE-5756. CombineFileInputFormat.getSplits() including directories
+ in its results (Jason Dere via jlowe)
+
Release 2.5.0 - UNRELEASED
INCOMPATIBLE CHANGES
Modified: hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java?rev=1612400&r1=1612399&r2=1612400&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java Mon Jul 21 21:24:15 2014
@@ -579,7 +579,7 @@ public abstract class CombineFileInputFo
blocks = new OneBlockInfo[0];
} else {
- if(locations.length == 0) {
+ if(locations.length == 0 && !stat.isDirectory()) {
locations = new BlockLocation[] { new BlockLocation() };
}
Modified: hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileInputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileInputFormat.java?rev=1612400&r1=1612399&r2=1612400&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileInputFormat.java (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileInputFormat.java Mon Jul 21 21:24:15 2014
@@ -1275,6 +1275,61 @@ public class TestCombineFileInputFormat
}
/**
+ * Test that directories do not get included as part of getSplits()
+ */
+ @Test
+ public void testGetSplitsWithDirectory() throws Exception {
+ MiniDFSCluster dfs = null;
+ try {
+ Configuration conf = new Configuration();
+ dfs = new MiniDFSCluster.Builder(conf).racks(rack1).hosts(hosts1)
+ .build();
+ dfs.waitActive();
+
+ dfs = new MiniDFSCluster.Builder(conf).racks(rack1).hosts(hosts1)
+ .build();
+ dfs.waitActive();
+
+ FileSystem fileSys = dfs.getFileSystem();
+
+ // Set up the following directory structure:
+ // /dir1/: directory
+ // /dir1/file: regular file
+ // /dir1/dir2/: directory
+ Path dir1 = new Path("/dir1");
+ Path file = new Path("/dir1/file1");
+ Path dir2 = new Path("/dir1/dir2");
+ if (!fileSys.mkdirs(dir1)) {
+ throw new IOException("Mkdirs failed to create " + dir1.toString());
+ }
+ FSDataOutputStream out = fileSys.create(file);
+ out.write(new byte[0]);
+ out.close();
+ if (!fileSys.mkdirs(dir2)) {
+ throw new IOException("Mkdirs failed to create " + dir2.toString());
+ }
+
+ // split it using a CombinedFile input format
+ DummyInputFormat inFormat = new DummyInputFormat();
+ Job job = Job.getInstance(conf);
+ FileInputFormat.setInputPaths(job, "/dir1");
+ List<InputSplit> splits = inFormat.getSplits(job);
+
+ // directories should be omitted from getSplits() - we should only see file1 and not dir2
+ assertEquals(1, splits.size());
+ CombineFileSplit fileSplit = (CombineFileSplit) splits.get(0);
+ assertEquals(1, fileSplit.getNumPaths());
+ assertEquals(file.getName(), fileSplit.getPath(0).getName());
+ assertEquals(0, fileSplit.getOffset(0));
+ assertEquals(0, fileSplit.getLength(0));
+ } finally {
+ if (dfs != null) {
+ dfs.shutdown();
+ }
+ }
+ }
+
+ /**
* Test when input files are from non-default file systems
*/
@Test