You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by dd...@apache.org on 2008/03/28 13:45:17 UTC
svn commit: r642211 - in /hadoop/core/trunk: CHANGES.txt
src/java/org/apache/hadoop/mapred/FileInputFormat.java
Author: ddas
Date: Fri Mar 28 05:45:15 2008
New Revision: 642211
URL: http://svn.apache.org/viewvc?rev=642211&view=rev
Log:
HADOOP-2055. Allows users to set PathFilter on the FileInputFormat. Contributed by Alejandro Abdelnur.
Modified:
hadoop/core/trunk/CHANGES.txt
hadoop/core/trunk/src/java/org/apache/hadoop/mapred/FileInputFormat.java
Modified: hadoop/core/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=642211&r1=642210&r2=642211&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Fri Mar 28 05:45:15 2008
@@ -84,6 +84,9 @@
HADOOP-1622. Allow multiple jar files for map reduce.
(Mahadev Konar via dhruba)
+ HADOOP-2055. Allows users to set PathFilter on the FileInputFormat.
+ (Alejandro Abdelnur via ddas)
+
IMPROVEMENTS
HADOOP-2655. Copy on write for data and metadata files in the
Modified: hadoop/core/trunk/src/java/org/apache/hadoop/mapred/FileInputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/java/org/apache/hadoop/mapred/FileInputFormat.java?rev=642211&r1=642210&r2=642211&view=diff
==============================================================================
--- hadoop/core/trunk/src/java/org/apache/hadoop/mapred/FileInputFormat.java (original)
+++ hadoop/core/trunk/src/java/org/apache/hadoop/mapred/FileInputFormat.java Fri Mar 28 05:45:15 2008
@@ -29,6 +29,7 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.fs.BlockLocation;
+import org.apache.hadoop.util.ReflectionUtils;
/**
* A base class for file-based {@link InputFormat}.
@@ -59,6 +60,28 @@
}
/**
+ * Proxy PathFilter that accepts a path only if all filters given in the
+ * constructor do. Used by the listPaths() to apply the built-in
+ * hiddenFileFilter together with a user provided one (if any).
+ */
+ private static class MultiPathFilter implements PathFilter {
+ private List<PathFilter> filters;
+
+ public MultiPathFilter(List<PathFilter> filters) {
+ this.filters = filters;
+ }
+
+ public boolean accept(Path path) {
+ for (PathFilter filter : filters) {
+ if (!filter.accept(path)) {
+ return false;
+ }
+ }
+ return true;
+ }
+ }
+
+ /**
* Is the given filename splitable? Usually, true, but if the file is
* stream compressed, it will not be.
*
@@ -79,6 +102,28 @@
Reporter reporter)
throws IOException;
+ /**
+ * Set a PathFilter to be applied to the input paths for the map-reduce job.
+ *
+ * @param filter the PathFilter class use for filtering the input paths.
+ */
+ public static void setInputPathFilter(JobConf conf,
+ Class<? extends PathFilter> filter) {
+ conf.setClass("mapred.input.pathFilter.class", filter, PathFilter.class);
+ }
+
+ /**
+ * Get a PathFilter instance of the filter set for the input paths.
+ *
+ * @return the PathFilter instance set for the job, NULL if none has been set.
+ */
+ public static PathFilter getInputPathFilter(JobConf conf) {
+ Class filterClass = conf.getClass("mapred.input.pathFilter.class", null,
+ PathFilter.class);
+ return (filterClass != null) ?
+ (PathFilter) ReflectionUtils.newInstance(filterClass, conf) : null;
+ }
+
/** List input directories.
* Subclasses may override to, e.g., select only files matching a regular
* expression.
@@ -93,11 +138,23 @@
if (dirs.length == 0) {
throw new IOException("No input paths specified in job");
}
- List<Path> result = new ArrayList<Path>();
+
+ List<Path> result = new ArrayList<Path>();
+
+ // creates a MultiPathFilter with the hiddenFileFilter and the
+ // user provided one (if any).
+ List<PathFilter> filters = new ArrayList<PathFilter>();
+ filters.add(hiddenFileFilter);
+ PathFilter jobFilter = getInputPathFilter(job);
+ if (jobFilter != null) {
+ filters.add(jobFilter);
+ }
+ PathFilter inputFilter = new MultiPathFilter(filters);
+
for (Path p: dirs) {
FileSystem fs = p.getFileSystem(job);
Path[] matches =
- fs.listPaths(fs.globPaths(p, hiddenFileFilter), hiddenFileFilter);
+ fs.listPaths(fs.globPaths(p, inputFilter), inputFilter);
for (Path match: matches) {
result.add(fs.makeQualified(match));
}