You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/09/09 17:29:29 UTC
svn commit: r1521158 - in /hive/trunk:
metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java
ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
Author: hashutosh
Date: Mon Sep 9 15:29:29 2013
New Revision: 1521158
URL: http://svn.apache.org/r1521158
Log:
HIVE-5234 : partition name filtering uses suboptimal datastructures (Sergey Shelukhin via Ashutosh Chauhan)
Modified:
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
Modified: hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java?rev=1521158&r1=1521157&r2=1521158&view=diff
==============================================================================
--- hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java (original)
+++ hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java Mon Sep 9 15:29:29 2013
@@ -23,6 +23,7 @@ import static org.apache.hadoop.hive.met
import java.io.FileNotFoundException;
import java.io.IOException;
+import java.util.AbstractList;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
@@ -365,6 +366,30 @@ public class Warehouse {
static final Pattern pat = Pattern.compile("([^/]+)=([^/]+)");
+ private static final Pattern slash = Pattern.compile("/");
+
+ /**
+ * Extracts values from partition name without the column names.
+ * @param name Partition name.
+ * @param result The result. Must be pre-sized to the expected number of columns.
+ */
+ public static void makeValsFromName(
+ String name, AbstractList<String> result) throws MetaException {
+ assert name != null;
+ String[] parts = slash.split(name, 0);
+ if (parts.length != result.size()) {
+ throw new MetaException(
+ "Expected " + result.size() + " components, got " + parts.length + " (" + name + ")");
+ }
+ for (int i = 0; i < parts.length; ++i) {
+ int eq = parts[i].indexOf('=');
+ if (eq <= 0) {
+ throw new MetaException("Unexpected component " + parts[i]);
+ }
+ result.set(i, unescapePathName(parts[i].substring(eq + 1)));
+ }
+ }
+
public static LinkedHashMap<String, String> makeSpecFromName(String name)
throws MetaException {
if (name == null || name.isEmpty()) {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java?rev=1521158&r1=1521157&r2=1521158&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java Mon Sep 9 15:29:29 2013
@@ -18,10 +18,12 @@
package org.apache.hadoop.hive.ql.optimizer.ppr;
+import java.util.AbstractSequentialList;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
+import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -348,15 +350,22 @@ public class PartitionPruner implements
ObjectPair<PrimitiveObjectInspector, ExprNodeEvaluator> handle =
PartExprEvalUtils.prepareExpr(prunerExpr, columnNames);
- // Filter the name list.
- List<String> values = new ArrayList<String>(columnNames.size());
+ // Filter the name list. Removing elements one by one can be slow on e.g. ArrayList,
+ // so let's create a new list and copy it if we don't have a linked list
+ boolean inPlace = partNames instanceof AbstractSequentialList<?>;
+ List<String> partNamesSeq = inPlace ? partNames : new LinkedList<String>(partNames);
+
+ // Array for the values to pass to evaluator.
+ ArrayList<String> values = new ArrayList<String>(columnNames.size());
+ for (int i = 0; i < columnNames.size(); ++i) {
+ values.add(null);
+ }
+
boolean hasUnknownPartitions = false;
- Iterator<String> partIter = partNames.iterator();
+ Iterator<String> partIter = partNamesSeq.iterator();
while (partIter.hasNext()) {
String partName = partIter.next();
- LinkedHashMap<String, String> partSpec = Warehouse.makeSpecFromName(partName);
- values.clear();
- values.addAll(partSpec.values());
+ Warehouse.makeValsFromName(partName, values);
// Evaluate the expression tree.
Boolean isNeeded = (Boolean)PartExprEvalUtils.evaluateExprOnPart(handle, values);
@@ -375,6 +384,10 @@ public class PartitionPruner implements
hasUnknownPartitions |= isUnknown;
LOG.debug("retained " + (isUnknown ? "unknown " : "") + "partition: " + partName);
}
+ if (!inPlace) {
+ partNames.clear();
+ partNames.addAll(partNamesSeq);
+ }
return hasUnknownPartitions;
}