You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by nz...@apache.org on 2011/06/16 19:06:51 UTC
svn commit: r1136546 - in /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql:
exec/ExecDriver.java io/CombineHiveInputFormat.java
Author: nzhang
Date: Thu Jun 16 17:06:50 2011
New Revision: 1136546
URL: http://svn.apache.org/viewvc?rev=1136546&view=rev
Log:
HIVE-2218. speedup addInputPaths (Yongqiang He via Ning Zhang)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java?rev=1136546&r1=1136545&r2=1136546&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java Thu Jun 16 17:06:50 2011
@@ -31,7 +31,9 @@ import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
+import java.util.HashSet;
import java.util.LinkedHashMap;
+import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
@@ -809,8 +811,8 @@ public class ExecDriver extends Task<Map
throws Exception {
int numEmptyPaths = 0;
- List<String> pathsProcessed = new ArrayList<String>();
-
+ Set<String> pathsProcessed = new HashSet<String>();
+ List<String> pathsToAdd = new LinkedList<String>();
// AliasToWork contains all the aliases
for (String oneAlias : work.getAliasToWork().keySet()) {
LOG.info("Processing alias " + oneAlias);
@@ -828,15 +830,14 @@ public class ExecDriver extends Task<Map
if (pathsProcessed.contains(path)) {
continue;
}
+
pathsProcessed.add(path);
LOG.info("Adding input file " + path);
-
- Path dirPath = new Path(path);
- if (!Utilities.isEmptyPath(job, path, ctx)) {
- FileInputFormat.addInputPath(job, dirPath);
- } else {
+ if (Utilities.isEmptyPath(job, path, ctx)) {
emptyPaths.add(path);
+ } else {
+ pathsToAdd.add(path);
}
}
}
@@ -860,6 +861,21 @@ public class ExecDriver extends Task<Map
oneAlias);
}
}
+ setInputPaths(job, pathsToAdd);
+ }
+
+ private static void setInputPaths(JobConf job, List<String> pathsToAdd) {
+ Path[] addedPaths = FileInputFormat.getInputPaths(job);
+ List<Path> toAddPathList = new ArrayList<Path>();
+ if(addedPaths != null) {
+ for(Path added: addedPaths) {
+ toAddPathList.add(added);
+ }
+ }
+ for(String toAdd: pathsToAdd) {
+ toAddPathList.add(new Path(toAdd));
+ }
+ FileInputFormat.setInputPaths(job, toAddPathList.toArray(new Path[0]));
}
@Override
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java?rev=1136546&r1=1136545&r2=1136546&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java Thu Jun 16 17:06:50 2011
@@ -510,7 +510,7 @@ public class CombineHiveInputFormat<K ex
}
static class CombineFilter implements PathFilter {
- private final List<String> pStrings = new ArrayList<String>();
+ private final Set<String> pStrings = new HashSet<String>();
// store a path prefix in this TestFilter
// PRECONDITION: p should always be a directory
@@ -522,19 +522,22 @@ public class CombineHiveInputFormat<K ex
}
public void addPath(Path p) {
- String pString = p.toUri().getPath().toString() + File.separator;;
+ String pString = p.toUri().getPath().toString();
pStrings.add(pString);
}
// returns true if the specified path matches the prefix stored
// in this TestFilter.
public boolean accept(Path path) {
- for (String pString : pStrings) {
- if (path.toString().indexOf(pString) == 0) {
- return true;
+ boolean find = false;
+ while (path != null && !find) {
+ if(pStrings.contains(path.toString())) {
+ find = true;
+ break;
}
+ path = path.getParent();
}
- return false;
+ return find;
}
@Override