You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by tu...@apache.org on 2012/10/30 06:52:17 UTC

svn commit: r1403618 - in /hadoop/common/branches/branch-2/hadoop-mapreduce-project: ./ hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/ hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/...

Author: tucu
Date: Tue Oct 30 05:52:17 2012
New Revision: 1403618

URL: http://svn.apache.org/viewvc?rev=1403618&view=rev
Log:
MAPREDUCE-1806. CombineFileInputFormat does not work with paths not on default FS. (Gera Shegalov via tucu)

Modified:
    hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt
    hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java
    hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileInputFormat.java

Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt?rev=1403618&r1=1403617&r2=1403618&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt Tue Oct 30 05:52:17 2012
@@ -43,6 +43,8 @@ Release 2.0.3-alpha - Unreleased
     MAPREDUCE-4637. Handle TaskAttempt diagnostic updates while in the NEW and 
     UNASSIGNED states. (Mayank Bansal via sseth)
 
+    MAPREDUCE-1806. CombineFileInputFormat does not work with paths not on default FS. (Gera Shegalov via tucu)
+
 Release 2.0.2-alpha - 2012-09-07 
 
   INCOMPATIBLE CHANGES

Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java?rev=1403618&r1=1403617&r2=1403618&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java Tue Oct 30 05:52:17 2012
@@ -215,7 +215,8 @@ public abstract class CombineFileInputFo
     // times, one time each for each pool in the next loop.
     List<Path> newpaths = new LinkedList<Path>();
     for (int i = 0; i < paths.length; i++) {
-      Path p = new Path(paths[i].toUri().getPath());
+      FileSystem fs = paths[i].getFileSystem(conf);
+      Path p = fs.makeQualified(paths[i]);
       newpaths.add(p);
     }
     paths = null;

Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileInputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileInputFormat.java?rev=1403618&r1=1403617&r2=1403618&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileInputFormat.java (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileInputFormat.java Tue Oct 30 05:52:17 2012
@@ -76,6 +76,8 @@ public class TestCombineFileInputFormat 
   static final int BLOCKSIZE = 1024;
   static final byte[] databuf = new byte[BLOCKSIZE];
 
+  private static final String DUMMY_FS_URI = "dummyfs:///";
+
   /** Dummy class to extend CombineFileInputFormat*/
   private class DummyInputFormat extends CombineFileInputFormat<Text, Text> {
     @Override
@@ -1145,6 +1147,38 @@ public class TestCombineFileInputFormat 
     fileSys.delete(file.getParent(), true);
   }
 
+  /**
+   * Test when input files are from non-default file systems
+   */
+  @Test
+  public void testForNonDefaultFileSystem() throws Throwable {
+    Configuration conf = new Configuration();
+
+    // use a fake file system scheme as default
+    conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, DUMMY_FS_URI);
+
+    // default fs path
+    assertEquals(DUMMY_FS_URI, FileSystem.getDefaultUri(conf).toString());
+    // add a local file
+    Path localPath = new Path("testFile1");
+    FileSystem lfs = FileSystem.getLocal(conf);
+    FSDataOutputStream dos = lfs.create(localPath);
+    dos.writeChars("Local file for CFIF");
+    dos.close();
+
+    Job job = Job.getInstance(conf);
+    FileInputFormat.setInputPaths(job, lfs.makeQualified(localPath));
+    DummyInputFormat inFormat = new DummyInputFormat();
+    List<InputSplit> splits = inFormat.getSplits(job);
+    assertTrue(splits.size() > 0);
+    for (InputSplit s : splits) {
+      CombineFileSplit cfs = (CombineFileSplit)s;
+      for (Path p : cfs.getPaths()) {
+        assertEquals(p.toUri().getScheme(), "file");
+      }
+    }
+  }
+
   static class TestFilter implements PathFilter {
     private Path p;
 
@@ -1156,7 +1190,7 @@ public class TestCombineFileInputFormat 
     // returns true if the specified path matches the prefix stored
     // in this TestFilter.
     public boolean accept(Path path) {
-      if (path.toString().indexOf(p.toString()) == 0) {
+      if (path.toUri().getPath().indexOf(p.toString()) == 0) {
         return true;
       }
       return false;