You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/09/09 17:29:29 UTC

svn commit: r1521158 - in /hive/trunk: metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java

Author: hashutosh
Date: Mon Sep  9 15:29:29 2013
New Revision: 1521158

URL: http://svn.apache.org/r1521158
Log:
HIVE-5234 : partition name filtering uses suboptimal datastructures (Sergey Shelukhin via Ashutosh Chauhan)

Modified:
    hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java

Modified: hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java?rev=1521158&r1=1521157&r2=1521158&view=diff
==============================================================================
--- hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java (original)
+++ hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java Mon Sep  9 15:29:29 2013
@@ -23,6 +23,7 @@ import static org.apache.hadoop.hive.met
 
 import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.util.AbstractList;
 import java.util.ArrayList;
 import java.util.LinkedHashMap;
 import java.util.List;
@@ -365,6 +366,30 @@ public class Warehouse {
 
   static final Pattern pat = Pattern.compile("([^/]+)=([^/]+)");
 
+  private static final Pattern slash = Pattern.compile("/");
+
+  /**
+   * Extracts values from partition name without the column names.
+   * @param name Partition name.
+   * @param result The result. Must be pre-sized to the expected number of columns.
+   */
+  public static void makeValsFromName(
+      String name, AbstractList<String> result) throws MetaException {
+    assert name != null;
+    String[] parts = slash.split(name, 0);
+    if (parts.length != result.size()) {
+      throw new MetaException(
+          "Expected " + result.size() + " components, got " + parts.length + " (" + name + ")");
+    }
+    for (int i = 0; i < parts.length; ++i) {
+      int eq = parts[i].indexOf('=');
+      if (eq <= 0) {
+        throw new MetaException("Unexpected component " + parts[i]);
+      }
+      result.set(i, unescapePathName(parts[i].substring(eq + 1)));
+    }
+  }
+
   public static LinkedHashMap<String, String> makeSpecFromName(String name)
       throws MetaException {
     if (name == null || name.isEmpty()) {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java?rev=1521158&r1=1521157&r2=1521158&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java Mon Sep  9 15:29:29 2013
@@ -18,10 +18,12 @@
 
 package org.apache.hadoop.hive.ql.optimizer.ppr;
 
+import java.util.AbstractSequentialList;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.LinkedHashSet;
+import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -348,15 +350,22 @@ public class PartitionPruner implements 
     ObjectPair<PrimitiveObjectInspector, ExprNodeEvaluator> handle =
         PartExprEvalUtils.prepareExpr(prunerExpr, columnNames);
 
-    // Filter the name list.
-    List<String> values = new ArrayList<String>(columnNames.size());
+    // Filter the name list. Removing elements one by one can be slow on e.g. ArrayList,
+    // so let's create a new list and copy it if we don't have a linked list
+    boolean inPlace = partNames instanceof AbstractSequentialList<?>;
+    List<String> partNamesSeq = inPlace ? partNames : new LinkedList<String>(partNames);
+
+    // Array for the values to pass to evaluator.
+    ArrayList<String> values = new ArrayList<String>(columnNames.size());
+    for (int i = 0; i < columnNames.size(); ++i) {
+      values.add(null);
+    }
+
     boolean hasUnknownPartitions = false;
-    Iterator<String> partIter = partNames.iterator();
+    Iterator<String> partIter = partNamesSeq.iterator();
     while (partIter.hasNext()) {
       String partName = partIter.next();
-      LinkedHashMap<String, String> partSpec = Warehouse.makeSpecFromName(partName);
-      values.clear();
-      values.addAll(partSpec.values());
+      Warehouse.makeValsFromName(partName, values);
 
       // Evaluate the expression tree.
       Boolean isNeeded = (Boolean)PartExprEvalUtils.evaluateExprOnPart(handle, values);
@@ -375,6 +384,10 @@ public class PartitionPruner implements 
       hasUnknownPartitions |= isUnknown;
       LOG.debug("retained " + (isUnknown ? "unknown " : "") + "partition: " + partName);
     }
+    if (!inPlace) {
+      partNames.clear();
+      partNames.addAll(partNamesSeq);
+    }
     return hasUnknownPartitions;
   }