You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by nz...@apache.org on 2011/06/16 19:06:51 UTC

svn commit: r1136546 - in /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql: exec/ExecDriver.java io/CombineHiveInputFormat.java

Author: nzhang
Date: Thu Jun 16 17:06:50 2011
New Revision: 1136546

URL: http://svn.apache.org/viewvc?rev=1136546&view=rev
Log:
HIVE-2218. speedup addInputPaths (Yongqiang He via Ning Zhang)

Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java?rev=1136546&r1=1136545&r2=1136546&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java Thu Jun 16 17:06:50 2011
@@ -31,7 +31,9 @@ import java.net.URLEncoder;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Enumeration;
+import java.util.HashSet;
 import java.util.LinkedHashMap;
+import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.Properties;
@@ -809,8 +811,8 @@ public class ExecDriver extends Task<Map
       throws Exception {
     int numEmptyPaths = 0;
 
-    List<String> pathsProcessed = new ArrayList<String>();
-
+    Set<String> pathsProcessed = new HashSet<String>();
+    List<String> pathsToAdd = new LinkedList<String>();
     // AliasToWork contains all the aliases
     for (String oneAlias : work.getAliasToWork().keySet()) {
       LOG.info("Processing alias " + oneAlias);
@@ -828,15 +830,14 @@ public class ExecDriver extends Task<Map
           if (pathsProcessed.contains(path)) {
             continue;
           }
+
           pathsProcessed.add(path);
 
           LOG.info("Adding input file " + path);
-
-          Path dirPath = new Path(path);
-          if (!Utilities.isEmptyPath(job, path, ctx)) {
-            FileInputFormat.addInputPath(job, dirPath);
-          } else {
+          if (Utilities.isEmptyPath(job, path, ctx)) {
             emptyPaths.add(path);
+          } else {
+            pathsToAdd.add(path);
           }
         }
       }
@@ -860,6 +861,21 @@ public class ExecDriver extends Task<Map
             oneAlias);
       }
     }
+    setInputPaths(job, pathsToAdd);
+  }
+
+  private static void setInputPaths(JobConf job, List<String> pathsToAdd) {
+    Path[] addedPaths = FileInputFormat.getInputPaths(job);
+    List<Path> toAddPathList = new ArrayList<Path>();
+    if(addedPaths != null) {
+      for(Path added: addedPaths) {
+        toAddPathList.add(added);
+      }
+    }
+    for(String toAdd: pathsToAdd) {
+      toAddPathList.add(new Path(toAdd));
+    }
+    FileInputFormat.setInputPaths(job, toAddPathList.toArray(new Path[0]));
   }
 
   @Override

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java?rev=1136546&r1=1136545&r2=1136546&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java Thu Jun 16 17:06:50 2011
@@ -510,7 +510,7 @@ public class CombineHiveInputFormat<K ex
   }
 
   static class CombineFilter implements PathFilter {
-    private final List<String> pStrings = new ArrayList<String>();
+    private final Set<String> pStrings = new HashSet<String>();
 
     // store a path prefix in this TestFilter
     // PRECONDITION: p should always be a directory
@@ -522,19 +522,22 @@ public class CombineHiveInputFormat<K ex
     }
 
     public void addPath(Path p) {
-      String pString = p.toUri().getPath().toString() + File.separator;;
+      String pString = p.toUri().getPath().toString();
       pStrings.add(pString);
     }
 
     // returns true if the specified path matches the prefix stored
     // in this TestFilter.
     public boolean accept(Path path) {
-      for (String pString : pStrings) {
-        if (path.toString().indexOf(pString) == 0) {
-          return true;
+      boolean find = false;
+      while (path != null && !find) {
+        if(pStrings.contains(path.toString())) {
+          find = true;
+          break;
         }
+        path = path.getParent();
       }
-      return false;
+      return find;
     }
 
     @Override