You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by jl...@apache.org on 2013/07/03 00:00:43 UTC
svn commit: r1499127 - in
/hadoop/common/branches/branch-2/hadoop-mapreduce-project: ./
hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/
hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/...
Author: jlowe
Date: Tue Jul 2 22:00:43 2013
New Revision: 1499127
URL: http://svn.apache.org/r1499127
Log:
svn merge -c 1499125 FIXES: MAPREDUCE-3193. FileInputFormat doesn't read files recursively in the input path dir. Contributed by Devaraj K
Added:
hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/input/
- copied from r1499125, hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/input/
hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestFileInputFormat.java
- copied unchanged from r1499125, hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestFileInputFormat.java
Modified:
hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt
hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/FileInputFormat.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ConfigUtil.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestFileInputFormat.java
Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt?rev=1499127&r1=1499126&r2=1499127&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt Tue Jul 2 22:00:43 2013
@@ -15,6 +15,9 @@ Release 2.3.0 - UNRELEASED
MAPREDUCE-5316. job -list-attempt-ids command does not handle illegal
task-state (Ashwin Shankar via jlowe)
+ MAPREDUCE-3193. FileInputFormat doesn't read files recursively in the
+ input path dir (Devaraj K via jlowe)
+
Release 2.2.0 - UNRELEASED
INCOMPATIBLE CHANGES
@@ -1062,6 +1065,9 @@ Release 0.23.10 - UNRELEASED
BUG FIXES
+ MAPREDUCE-3193. FileInputFormat doesn't read files recursively in the
+ input path dir (Devaraj K via jlowe)
+
Release 0.23.9 - 2013-07-08
INCOMPATIBLE CHANGES
Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/FileInputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/FileInputFormat.java?rev=1499127&r1=1499126&r2=1499127&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/FileInputFormat.java (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/FileInputFormat.java Tue Jul 2 22:00:43 2013
@@ -69,6 +69,10 @@ public abstract class FileInputFormat<K,
public static final String NUM_INPUT_FILES =
org.apache.hadoop.mapreduce.lib.input.FileInputFormat.NUM_INPUT_FILES;
+
+ public static final String INPUT_DIR_RECURSIVE =
+ org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR_RECURSIVE;
+
private static final double SPLIT_SLOP = 1.1; // 10% slop
@@ -192,7 +196,7 @@ public abstract class FileInputFormat<K,
TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job);
// Whether we need to recursive look into the directory structure
- boolean recursive = job.getBoolean("mapred.input.dir.recursive", false);
+ boolean recursive = job.getBoolean(INPUT_DIR_RECURSIVE, false);
List<FileStatus> result = new ArrayList<FileStatus>();
List<IOException> errors = new ArrayList<IOException>();
Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.java?rev=1499127&r1=1499126&r2=1499127&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.java (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.java Tue Jul 2 22:00:43 2013
@@ -64,6 +64,8 @@ public abstract class FileInputFormat<K,
"mapreduce.input.pathFilter.class";
public static final String NUM_INPUT_FILES =
"mapreduce.input.fileinputformat.numinputfiles";
+ public static final String INPUT_DIR_RECURSIVE =
+ "mapreduce.input.fileinputformat.input.dir.recursive";
private static final Log LOG = LogFactory.getLog(FileInputFormat.class);
@@ -102,6 +104,27 @@ public abstract class FileInputFormat<K,
return true;
}
}
+
+ /**
+ * @param job
+ * the job to modify
+ * @param inputDirRecursive
+ */
+ public static void setInputDirRecursive(Job job,
+ boolean inputDirRecursive) {
+ job.getConfiguration().setBoolean(INPUT_DIR_RECURSIVE,
+ inputDirRecursive);
+ }
+
+ /**
+ * @param job
+ * the job to look at.
+ * @return should the files to be read recursively?
+ */
+ public static boolean getInputDirRecursive(JobContext job) {
+ return job.getConfiguration().getBoolean(INPUT_DIR_RECURSIVE,
+ false);
+ }
/**
* Get the lower bound on split size imposed by the format.
@@ -210,6 +233,9 @@ public abstract class FileInputFormat<K,
TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs,
job.getConfiguration());
+ // Whether we need to recursive look into the directory structure
+ boolean recursive = getInputDirRecursive(job);
+
List<IOException> errors = new ArrayList<IOException>();
// creates a MultiPathFilter with the hiddenFileFilter and the
@@ -235,7 +261,11 @@ public abstract class FileInputFormat<K,
if (globStat.isDirectory()) {
for(FileStatus stat: fs.listStatus(globStat.getPath(),
inputFilter)) {
- result.add(stat);
+ if (recursive && stat.isDirectory()) {
+ addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
+ } else {
+ result.add(stat);
+ }
}
} else {
result.add(globStat);
@@ -252,6 +282,31 @@ public abstract class FileInputFormat<K,
}
/**
+ * Add files in the input path recursively into the results.
+ * @param result
+ * The List to store all files.
+ * @param fs
+ * The FileSystem.
+ * @param path
+ * The input path.
+ * @param inputFilter
+ * The input filter that can be used to filter files/dirs.
+ * @throws IOException
+ */
+ protected void addInputPathRecursively(List<FileStatus> result,
+ FileSystem fs, Path path, PathFilter inputFilter)
+ throws IOException {
+ for(FileStatus stat: fs.listStatus(path, inputFilter)) {
+ if (stat.isDirectory()) {
+ addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
+ } else {
+ result.add(stat);
+ }
+ }
+ }
+
+
+ /**
* A factory that makes the split for this class. It can be overridden
* by sub-classes to make sub-types
*/
Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ConfigUtil.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ConfigUtil.java?rev=1499127&r1=1499126&r2=1499127&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ConfigUtil.java (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ConfigUtil.java Tue Jul 2 22:00:43 2013
@@ -23,6 +23,7 @@ import org.apache.hadoop.conf.Configurat
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.MRConfig;
import org.apache.hadoop.mapreduce.MRJobConfig;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
import org.apache.hadoop.mapreduce.server.tasktracker.TTConfig;
@@ -528,6 +529,8 @@ public class ConfigUtil {
MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST);
Configuration.addDeprecation(JTConfig.JT_MAX_JOB_SPLIT_METAINFO_SIZE,
MRJobConfig.SPLIT_METAINFO_MAXSIZE);
+ Configuration.addDeprecation("mapred.input.dir.recursive",
+ FileInputFormat.INPUT_DIR_RECURSIVE);
}
public static void main(String[] args) {
Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestFileInputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestFileInputFormat.java?rev=1499127&r1=1499126&r2=1499127&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestFileInputFormat.java (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestFileInputFormat.java Tue Jul 2 22:00:43 2013
@@ -190,7 +190,7 @@ public class TestFileInputFormat extends
+ "directory with directories inside.", exceptionThrown);
// Enable multi-level/recursive inputs
- job.setBoolean("mapred.input.dir.recursive", true);
+ job.setBoolean(FileInputFormat.INPUT_DIR_RECURSIVE, true);
InputSplit[] splits = inFormat.getSplits(job, 1);
assertEquals(splits.length, 2);
}