You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2014/11/11 16:47:11 UTC
svn commit: r1638191 - in /hive/trunk:
metastore/src/java/org/apache/hadoop/hive/metastore/
metastore/src/test/org/apache/hadoop/hive/metastore/
ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/
ql/src/test/queries/clientpositive/ ql/src/test/result...
Author: hashutosh
Date: Tue Nov 11 15:47:10 2014
New Revision: 1638191
URL: http://svn.apache.org/r1638191
Log:
HIVE-8099 : IN operator for partition column fails when the partition column type is DATE (Venki Korukanti via Ashutosh Chauhan)
Added:
hive/trunk/ql/src/test/queries/clientpositive/partition_type_in_plan.q
hive/trunk/ql/src/test/results/clientpositive/partition_type_in_plan.q.out
Modified:
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/PartitionExpressionProxy.java
hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/MockPartitionExpressionForMetastore.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionExpressionForMetastore.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
Modified: hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java?rev=1638191&r1=1638190&r2=1638191&view=diff
==============================================================================
--- hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java (original)
+++ hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java Tue Nov 11 15:47:10 2014
@@ -132,6 +132,8 @@ import org.apache.hadoop.hive.metastore.
import org.apache.hadoop.hive.metastore.parser.ExpressionTree.Operator;
import org.apache.hadoop.hive.metastore.parser.FilterLexer;
import org.apache.hadoop.hive.metastore.parser.FilterParser;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy;
import org.apache.hadoop.util.StringUtils;
@@ -2136,14 +2138,16 @@ public class ObjectStore implements RawS
result.addAll(getPartitionNamesNoTxn(
table.getDbName(), table.getTableName(), maxParts));
List<String> columnNames = new ArrayList<String>();
+ List<PrimitiveTypeInfo> typeInfos = new ArrayList<PrimitiveTypeInfo>();
for (FieldSchema fs : table.getPartitionKeys()) {
columnNames.add(fs.getName());
+ typeInfos.add(TypeInfoFactory.getPrimitiveTypeInfo(fs.getType()));
}
if (defaultPartName == null || defaultPartName.isEmpty()) {
defaultPartName = HiveConf.getVar(getConf(), HiveConf.ConfVars.DEFAULTPARTITIONNAME);
}
return expressionProxy.filterPartitionsByExpr(
- columnNames, expr, defaultPartName, result);
+ columnNames, typeInfos, expr, defaultPartName, result);
}
/**
Modified: hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/PartitionExpressionProxy.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/PartitionExpressionProxy.java?rev=1638191&r1=1638190&r2=1638191&view=diff
==============================================================================
--- hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/PartitionExpressionProxy.java (original)
+++ hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/PartitionExpressionProxy.java Tue Nov 11 15:47:10 2014
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.metastore
import java.util.List;
import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
/**
* The proxy interface that metastore uses to manipulate and apply
@@ -37,12 +38,14 @@ public interface PartitionExpressionProx
/**
* Filters the partition names via serialized Hive expression.
- * @param columnNames Partition column names in the underlying table.
+ * @param partColumnNames Partition column names in the underlying table.
+ * @param partColumnTypeInfos Partition column types in the underlying table
* @param expr Serialized expression.
* @param defaultPartitionName Default partition name from job or server configuration.
* @param partitionNames Partition names; the list is modified in place.
* @return Whether there were any unknown partitions preserved in the name list.
*/
- public boolean filterPartitionsByExpr(List<String> columnNames, byte[] expr,
+ public boolean filterPartitionsByExpr(List<String> partColumnNames,
+ List<PrimitiveTypeInfo> partColumnTypeInfos, byte[] expr,
String defaultPartitionName, List<String> partitionNames) throws MetaException;
}
Modified: hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/MockPartitionExpressionForMetastore.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/MockPartitionExpressionForMetastore.java?rev=1638191&r1=1638190&r2=1638191&view=diff
==============================================================================
--- hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/MockPartitionExpressionForMetastore.java (original)
+++ hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/MockPartitionExpressionForMetastore.java Tue Nov 11 15:47:10 2014
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.metastore;
import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import java.util.List;
@@ -32,7 +33,9 @@ public class MockPartitionExpressionForM
}
@Override
- public boolean filterPartitionsByExpr(List<String> columnNames, byte[] expr, String defaultPartitionName, List<String> partitionNames) throws MetaException {
+ public boolean filterPartitionsByExpr(List<String> partColumnNames,
+ List<PrimitiveTypeInfo> partColumnTypeInfos, byte[] expr, String defaultPartitionName,
+ List<String> partitionNames) throws MetaException {
return false;
}
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java?rev=1638191&r1=1638190&r2=1638191&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java Tue Nov 11 15:47:10 2014
@@ -39,6 +39,7 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
public class PartExprEvalUtils {
@@ -103,11 +104,13 @@ public class PartExprEvalUtils {
}
static synchronized public ObjectPair<PrimitiveObjectInspector, ExprNodeEvaluator> prepareExpr(
- ExprNodeGenericFuncDesc expr, List<String> partNames) throws HiveException {
+ ExprNodeGenericFuncDesc expr, List<String> partNames,
+ List<PrimitiveTypeInfo> partColumnTypeInfos) throws HiveException {
// Create the row object
List<ObjectInspector> partObjectInspectors = new ArrayList<ObjectInspector>();
for (int i = 0; i < partNames.size(); i++) {
- partObjectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
+ partObjectInspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
+ partColumnTypeInfos.get(i)));
}
StructObjectInspector objectInspector = ObjectInspectorFactory
.getStandardStructObjectInspector(partNames, partObjectInspectors);
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionExpressionForMetastore.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionExpressionForMetastore.java?rev=1638191&r1=1638190&r2=1638191&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionExpressionForMetastore.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionExpressionForMetastore.java Tue Nov 11 15:47:10 2014
@@ -27,6 +27,7 @@ import org.apache.hadoop.hive.metastore.
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
/**
* The basic implementation of PartitionExpressionProxy that uses ql package classes.
@@ -40,13 +41,14 @@ public class PartitionExpressionForMetas
}
@Override
- public boolean filterPartitionsByExpr(List<String> columnNames, byte[] exprBytes,
+ public boolean filterPartitionsByExpr(List<String> partColumnNames,
+ List<PrimitiveTypeInfo> partColumnTypeInfos, byte[] exprBytes,
String defaultPartitionName, List<String> partitionNames) throws MetaException {
ExprNodeGenericFuncDesc expr = deserializeExpr(exprBytes);
try {
long startTime = System.nanoTime(), len = partitionNames.size();
boolean result = PartitionPruner.prunePartitionNames(
- columnNames, expr, defaultPartitionName, partitionNames);
+ partColumnNames, partColumnTypeInfos, expr, defaultPartitionName, partitionNames);
double timeMs = (System.nanoTime() - startTime) / 1000000.0;
LOG.debug("Pruning " + len + " partition names took " + timeMs + "ms");
return result;
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java?rev=1638191&r1=1638190&r2=1638191&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java Tue Nov 11 15:47:10 2014
@@ -57,7 +57,9 @@ import org.apache.hadoop.hive.ql.udf.gen
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
@@ -420,9 +422,10 @@ public class PartitionPruner implements
String defaultPartitionName = conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME);
List<String> partCols = extractPartColNames(tab);
+ List<PrimitiveTypeInfo> partColTypeInfos = extractPartColTypes(tab);
boolean hasUnknownPartitions = prunePartitionNames(
- partCols, prunerExpr, defaultPartitionName, partNames);
+ partCols, partColTypeInfos, prunerExpr, defaultPartitionName, partNames);
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PRUNE_LISTING);
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
@@ -442,19 +445,30 @@ public class PartitionPruner implements
return partCols;
}
+ private static List<PrimitiveTypeInfo> extractPartColTypes(Table tab) {
+ List<FieldSchema> pCols = tab.getPartCols();
+ List<PrimitiveTypeInfo> partColTypeInfos = new ArrayList<PrimitiveTypeInfo>(pCols.size());
+ for (FieldSchema pCol : pCols) {
+ partColTypeInfos.add(TypeInfoFactory.getPrimitiveTypeInfo(pCol.getType()));
+ }
+ return partColTypeInfos;
+ }
+
/**
* Prunes partition names to see if they match the prune expression.
- * @param columnNames name of partition columns
+ * @param partColumnNames name of partition columns
+ * @param partColumnTypeInfos types of partition columns
* @param prunerExpr The expression to match.
* @param defaultPartitionName name of default partition
* @param partNames Partition names to filter. The list is modified in place.
* @return Whether the list has any partitions for which the expression may or may not match.
*/
- public static boolean prunePartitionNames(List<String> columnNames, ExprNodeGenericFuncDesc prunerExpr,
+ public static boolean prunePartitionNames(List<String> partColumnNames,
+ List<PrimitiveTypeInfo> partColumnTypeInfos, ExprNodeGenericFuncDesc prunerExpr,
String defaultPartitionName, List<String> partNames) throws HiveException, MetaException {
// Prepare the expression to filter on the columns.
ObjectPair<PrimitiveObjectInspector, ExprNodeEvaluator> handle =
- PartExprEvalUtils.prepareExpr(prunerExpr, columnNames);
+ PartExprEvalUtils.prepareExpr(prunerExpr, partColumnNames, partColumnTypeInfos);
// Filter the name list. Removing elements one by one can be slow on e.g. ArrayList,
// so let's create a new list and copy it if we don't have a linked list
@@ -462,8 +476,8 @@ public class PartitionPruner implements
List<String> partNamesSeq = inPlace ? partNames : new LinkedList<String>(partNames);
// Array for the values to pass to evaluator.
- ArrayList<String> values = new ArrayList<String>(columnNames.size());
- for (int i = 0; i < columnNames.size(); ++i) {
+ ArrayList<String> values = new ArrayList<String>(partColumnNames.size());
+ for (int i = 0; i < partColumnNames.size(); ++i) {
values.add(null);
}
@@ -473,8 +487,17 @@ public class PartitionPruner implements
String partName = partIter.next();
Warehouse.makeValsFromName(partName, values);
+ ArrayList<Object> convertedValues = new ArrayList<Object>(values.size());
+ for(int i=0; i<values.size(); i++) {
+ Object o = ObjectInspectorConverters.getConverter(
+ PrimitiveObjectInspectorFactory.javaStringObjectInspector,
+ PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(partColumnTypeInfos.get(i)))
+ .convert(values.get(i));
+ convertedValues.add(o);
+ }
+
// Evaluate the expression tree.
- Boolean isNeeded = (Boolean)PartExprEvalUtils.evaluateExprOnPart(handle, values);
+ Boolean isNeeded = (Boolean)PartExprEvalUtils.evaluateExprOnPart(handle, convertedValues);
boolean isUnknown = (isNeeded == null);
if (!isUnknown && !isNeeded) {
partIter.remove();
Added: hive/trunk/ql/src/test/queries/clientpositive/partition_type_in_plan.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/partition_type_in_plan.q?rev=1638191&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/partition_type_in_plan.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/partition_type_in_plan.q Tue Nov 11 15:47:10 2014
@@ -0,0 +1,15 @@
+-- Test partition column type is considered as the type given in table def
+-- and not as 'string'
+CREATE TABLE datePartTbl(col1 string) PARTITIONED BY (date_prt date);
+
+-- Add test partitions and some sample data
+INSERT OVERWRITE TABLE datePartTbl PARTITION(date_prt='2014-08-09')
+ SELECT 'col1-2014-08-09' FROM src LIMIT 1;
+
+INSERT OVERWRITE TABLE datePartTbl PARTITION(date_prt='2014-08-10')
+ SELECT 'col1-2014-08-10' FROM src LIMIT 1;
+
+-- Query where 'date_prt' value is restricted to given values in IN operator.
+SELECT * FROM datePartTbl WHERE date_prt IN (CAST('2014-08-09' AS DATE), CAST('2014-08-08' AS DATE));
+
+DROP TABLE datePartTbl;
Added: hive/trunk/ql/src/test/results/clientpositive/partition_type_in_plan.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/partition_type_in_plan.q.out?rev=1638191&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/partition_type_in_plan.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/partition_type_in_plan.q.out Tue Nov 11 15:47:10 2014
@@ -0,0 +1,57 @@
+PREHOOK: query: -- Test partition column type is considered as the type given in table def
+-- and not as 'string'
+CREATE TABLE datePartTbl(col1 string) PARTITIONED BY (date_prt date)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@datePartTbl
+POSTHOOK: query: -- Test partition column type is considered as the type given in table def
+-- and not as 'string'
+CREATE TABLE datePartTbl(col1 string) PARTITIONED BY (date_prt date)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@datePartTbl
+PREHOOK: query: -- Add test partitions and some sample data
+INSERT OVERWRITE TABLE datePartTbl PARTITION(date_prt='2014-08-09')
+ SELECT 'col1-2014-08-09' FROM src LIMIT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dateparttbl@date_prt=2014-08-09
+POSTHOOK: query: -- Add test partitions and some sample data
+INSERT OVERWRITE TABLE datePartTbl PARTITION(date_prt='2014-08-09')
+ SELECT 'col1-2014-08-09' FROM src LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dateparttbl@date_prt=2014-08-09
+POSTHOOK: Lineage: dateparttbl PARTITION(date_prt=2014-08-09).col1 SIMPLE []
+PREHOOK: query: INSERT OVERWRITE TABLE datePartTbl PARTITION(date_prt='2014-08-10')
+ SELECT 'col1-2014-08-10' FROM src LIMIT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dateparttbl@date_prt=2014-08-10
+POSTHOOK: query: INSERT OVERWRITE TABLE datePartTbl PARTITION(date_prt='2014-08-10')
+ SELECT 'col1-2014-08-10' FROM src LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dateparttbl@date_prt=2014-08-10
+POSTHOOK: Lineage: dateparttbl PARTITION(date_prt=2014-08-10).col1 SIMPLE []
+PREHOOK: query: -- Query where 'date_prt' value is restricted to given values in IN operator.
+SELECT * FROM datePartTbl WHERE date_prt IN (CAST('2014-08-09' AS DATE), CAST('2014-08-08' AS DATE))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dateparttbl
+PREHOOK: Input: default@dateparttbl@date_prt=2014-08-09
+#### A masked pattern was here ####
+POSTHOOK: query: -- Query where 'date_prt' value is restricted to given values in IN operator.
+SELECT * FROM datePartTbl WHERE date_prt IN (CAST('2014-08-09' AS DATE), CAST('2014-08-08' AS DATE))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dateparttbl
+POSTHOOK: Input: default@dateparttbl@date_prt=2014-08-09
+#### A masked pattern was here ####
+col1-2014-08-09 2014-08-09
+PREHOOK: query: DROP TABLE datePartTbl
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dateparttbl
+PREHOOK: Output: default@dateparttbl
+POSTHOOK: query: DROP TABLE datePartTbl
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@dateparttbl
+POSTHOOK: Output: default@dateparttbl