You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2014/11/11 16:47:11 UTC

svn commit: r1638191 - in /hive/trunk: metastore/src/java/org/apache/hadoop/hive/metastore/ metastore/src/test/org/apache/hadoop/hive/metastore/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ ql/src/test/queries/clientpositive/ ql/src/test/result...

Author: hashutosh
Date: Tue Nov 11 15:47:10 2014
New Revision: 1638191

URL: http://svn.apache.org/r1638191
Log:
HIVE-8099 : IN operator for partition column fails when the partition column type is DATE (Venki Korukanti via Ashutosh Chauhan)

Added:
    hive/trunk/ql/src/test/queries/clientpositive/partition_type_in_plan.q
    hive/trunk/ql/src/test/results/clientpositive/partition_type_in_plan.q.out
Modified:
    hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
    hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/PartitionExpressionProxy.java
    hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/MockPartitionExpressionForMetastore.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionExpressionForMetastore.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java

Modified: hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java?rev=1638191&r1=1638190&r2=1638191&view=diff
==============================================================================
--- hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java (original)
+++ hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java Tue Nov 11 15:47:10 2014
@@ -132,6 +132,8 @@ import org.apache.hadoop.hive.metastore.
 import org.apache.hadoop.hive.metastore.parser.ExpressionTree.Operator;
 import org.apache.hadoop.hive.metastore.parser.FilterLexer;
 import org.apache.hadoop.hive.metastore.parser.FilterParser;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 import org.apache.hadoop.hive.shims.ShimLoader;
 import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy;
 import org.apache.hadoop.util.StringUtils;
@@ -2136,14 +2138,16 @@ public class ObjectStore implements RawS
     result.addAll(getPartitionNamesNoTxn(
         table.getDbName(), table.getTableName(), maxParts));
     List<String> columnNames = new ArrayList<String>();
+    List<PrimitiveTypeInfo> typeInfos = new ArrayList<PrimitiveTypeInfo>();
     for (FieldSchema fs : table.getPartitionKeys()) {
       columnNames.add(fs.getName());
+      typeInfos.add(TypeInfoFactory.getPrimitiveTypeInfo(fs.getType()));
     }
     if (defaultPartName == null || defaultPartName.isEmpty()) {
       defaultPartName = HiveConf.getVar(getConf(), HiveConf.ConfVars.DEFAULTPARTITIONNAME);
     }
     return expressionProxy.filterPartitionsByExpr(
-        columnNames, expr, defaultPartName, result);
+        columnNames, typeInfos, expr, defaultPartName, result);
   }
 
   /**

Modified: hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/PartitionExpressionProxy.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/PartitionExpressionProxy.java?rev=1638191&r1=1638190&r2=1638191&view=diff
==============================================================================
--- hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/PartitionExpressionProxy.java (original)
+++ hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/PartitionExpressionProxy.java Tue Nov 11 15:47:10 2014
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.metastore
 import java.util.List;
 
 import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 
 /**
  * The proxy interface that metastore uses to manipulate and apply
@@ -37,12 +38,14 @@ public interface PartitionExpressionProx
 
   /**
    * Filters the partition names via serialized Hive expression.
-   * @param columnNames Partition column names in the underlying table.
+   * @param partColumnNames Partition column names in the underlying table.
+   * @param partColumnTypeInfos Partition column types in the underlying table
    * @param expr Serialized expression.
    * @param defaultPartitionName Default partition name from job or server configuration.
    * @param partitionNames Partition names; the list is modified in place.
    * @return Whether there were any unknown partitions preserved in the name list.
    */
-  public boolean filterPartitionsByExpr(List<String> columnNames, byte[] expr,
+  public boolean filterPartitionsByExpr(List<String> partColumnNames,
+      List<PrimitiveTypeInfo> partColumnTypeInfos, byte[] expr,
       String defaultPartitionName, List<String> partitionNames) throws MetaException;
 }

Modified: hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/MockPartitionExpressionForMetastore.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/MockPartitionExpressionForMetastore.java?rev=1638191&r1=1638190&r2=1638191&view=diff
==============================================================================
--- hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/MockPartitionExpressionForMetastore.java (original)
+++ hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/MockPartitionExpressionForMetastore.java Tue Nov 11 15:47:10 2014
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.metastore;
 
 import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 
 import java.util.List;
 
@@ -32,7 +33,9 @@ public class MockPartitionExpressionForM
   }
 
   @Override
-  public boolean filterPartitionsByExpr(List<String> columnNames, byte[] expr, String defaultPartitionName, List<String> partitionNames) throws MetaException {
+  public boolean filterPartitionsByExpr(List<String> partColumnNames,
+      List<PrimitiveTypeInfo> partColumnTypeInfos, byte[] expr, String defaultPartitionName,
+      List<String> partitionNames) throws MetaException {
     return false;
   }
 }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java?rev=1638191&r1=1638190&r2=1638191&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java Tue Nov 11 15:47:10 2014
@@ -39,6 +39,7 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 
 public class PartExprEvalUtils {
@@ -103,11 +104,13 @@ public class PartExprEvalUtils {
   }
 
   static synchronized public ObjectPair<PrimitiveObjectInspector, ExprNodeEvaluator> prepareExpr(
-      ExprNodeGenericFuncDesc expr, List<String> partNames) throws HiveException {
+      ExprNodeGenericFuncDesc expr, List<String> partNames,
+      List<PrimitiveTypeInfo> partColumnTypeInfos) throws HiveException {
     // Create the row object
     List<ObjectInspector> partObjectInspectors = new ArrayList<ObjectInspector>();
     for (int i = 0; i < partNames.size(); i++) {
-      partObjectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
+      partObjectInspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
+          partColumnTypeInfos.get(i)));
     }
     StructObjectInspector objectInspector = ObjectInspectorFactory
         .getStandardStructObjectInspector(partNames, partObjectInspectors);

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionExpressionForMetastore.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionExpressionForMetastore.java?rev=1638191&r1=1638190&r2=1638191&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionExpressionForMetastore.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionExpressionForMetastore.java Tue Nov 11 15:47:10 2014
@@ -27,6 +27,7 @@ import org.apache.hadoop.hive.metastore.
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 
 /**
  * The basic implementation of PartitionExpressionProxy that uses ql package classes.
@@ -40,13 +41,14 @@ public class PartitionExpressionForMetas
   }
 
   @Override
-  public boolean filterPartitionsByExpr(List<String> columnNames, byte[] exprBytes,
+  public boolean filterPartitionsByExpr(List<String> partColumnNames,
+      List<PrimitiveTypeInfo> partColumnTypeInfos, byte[] exprBytes,
       String defaultPartitionName, List<String> partitionNames) throws MetaException {
     ExprNodeGenericFuncDesc expr = deserializeExpr(exprBytes);
     try {
       long startTime = System.nanoTime(), len = partitionNames.size();
       boolean result = PartitionPruner.prunePartitionNames(
-          columnNames, expr, defaultPartitionName, partitionNames);
+          partColumnNames, partColumnTypeInfos, expr, defaultPartitionName, partitionNames);
       double timeMs = (System.nanoTime() - startTime) / 1000000.0;
       LOG.debug("Pruning " + len + " partition names took " + timeMs + "ms");
       return result;

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java?rev=1638191&r1=1638190&r2=1638191&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java Tue Nov 11 15:47:10 2014
@@ -57,7 +57,9 @@ import org.apache.hadoop.hive.ql.udf.gen
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
 import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 
@@ -420,9 +422,10 @@ public class PartitionPruner implements 
 
     String defaultPartitionName = conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME);
     List<String> partCols = extractPartColNames(tab);
+    List<PrimitiveTypeInfo> partColTypeInfos = extractPartColTypes(tab);
 
     boolean hasUnknownPartitions = prunePartitionNames(
-        partCols, prunerExpr, defaultPartitionName, partNames);
+        partCols, partColTypeInfos, prunerExpr, defaultPartitionName, partNames);
     perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PRUNE_LISTING);
 
     perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
@@ -442,19 +445,30 @@ public class PartitionPruner implements 
     return partCols;
   }
 
+  private static List<PrimitiveTypeInfo> extractPartColTypes(Table tab) {
+    List<FieldSchema> pCols = tab.getPartCols();
+    List<PrimitiveTypeInfo> partColTypeInfos = new ArrayList<PrimitiveTypeInfo>(pCols.size());
+    for (FieldSchema pCol : pCols) {
+      partColTypeInfos.add(TypeInfoFactory.getPrimitiveTypeInfo(pCol.getType()));
+    }
+    return partColTypeInfos;
+  }
+
   /**
    * Prunes partition names to see if they match the prune expression.
-   * @param columnNames name of partition columns
+   * @param partColumnNames name of partition columns
+   * @param partColumnTypeInfos types of partition columns
    * @param prunerExpr The expression to match.
    * @param defaultPartitionName name of default partition
    * @param partNames Partition names to filter. The list is modified in place.
    * @return Whether the list has any partitions for which the expression may or may not match.
    */
-  public static boolean prunePartitionNames(List<String> columnNames, ExprNodeGenericFuncDesc prunerExpr,
+  public static boolean prunePartitionNames(List<String> partColumnNames,
+      List<PrimitiveTypeInfo> partColumnTypeInfos, ExprNodeGenericFuncDesc prunerExpr,
       String defaultPartitionName, List<String> partNames) throws HiveException, MetaException {
     // Prepare the expression to filter on the columns.
     ObjectPair<PrimitiveObjectInspector, ExprNodeEvaluator> handle =
-        PartExprEvalUtils.prepareExpr(prunerExpr, columnNames);
+        PartExprEvalUtils.prepareExpr(prunerExpr, partColumnNames, partColumnTypeInfos);
 
     // Filter the name list. Removing elements one by one can be slow on e.g. ArrayList,
     // so let's create a new list and copy it if we don't have a linked list
@@ -462,8 +476,8 @@ public class PartitionPruner implements 
     List<String> partNamesSeq = inPlace ? partNames : new LinkedList<String>(partNames);
 
     // Array for the values to pass to evaluator.
-    ArrayList<String> values = new ArrayList<String>(columnNames.size());
-    for (int i = 0; i < columnNames.size(); ++i) {
+    ArrayList<String> values = new ArrayList<String>(partColumnNames.size());
+    for (int i = 0; i < partColumnNames.size(); ++i) {
       values.add(null);
     }
 
@@ -473,8 +487,17 @@ public class PartitionPruner implements 
       String partName = partIter.next();
       Warehouse.makeValsFromName(partName, values);
 
+      ArrayList<Object> convertedValues = new ArrayList<Object>(values.size());
+      for(int i=0; i<values.size(); i++) {
+        Object o = ObjectInspectorConverters.getConverter(
+            PrimitiveObjectInspectorFactory.javaStringObjectInspector,
+            PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(partColumnTypeInfos.get(i)))
+            .convert(values.get(i));
+        convertedValues.add(o);
+      }
+
       // Evaluate the expression tree.
-      Boolean isNeeded = (Boolean)PartExprEvalUtils.evaluateExprOnPart(handle, values);
+      Boolean isNeeded = (Boolean)PartExprEvalUtils.evaluateExprOnPart(handle, convertedValues);
       boolean isUnknown = (isNeeded == null);
       if (!isUnknown && !isNeeded) {
         partIter.remove();

Added: hive/trunk/ql/src/test/queries/clientpositive/partition_type_in_plan.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/partition_type_in_plan.q?rev=1638191&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/partition_type_in_plan.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/partition_type_in_plan.q Tue Nov 11 15:47:10 2014
@@ -0,0 +1,15 @@
+-- Test partition column type is considered as the type given in table def
+-- and not as 'string'
+CREATE TABLE datePartTbl(col1 string) PARTITIONED BY (date_prt date);
+
+-- Add test partitions and some sample data
+INSERT OVERWRITE TABLE datePartTbl PARTITION(date_prt='2014-08-09')
+  SELECT 'col1-2014-08-09' FROM src LIMIT 1;
+
+INSERT OVERWRITE TABLE datePartTbl PARTITION(date_prt='2014-08-10')
+  SELECT 'col1-2014-08-10' FROM src LIMIT 1;
+
+-- Query where 'date_prt' value is restricted to given values in IN operator.
+SELECT * FROM datePartTbl WHERE date_prt IN (CAST('2014-08-09' AS DATE), CAST('2014-08-08' AS DATE));
+
+DROP TABLE datePartTbl;

Added: hive/trunk/ql/src/test/results/clientpositive/partition_type_in_plan.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/partition_type_in_plan.q.out?rev=1638191&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/partition_type_in_plan.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/partition_type_in_plan.q.out Tue Nov 11 15:47:10 2014
@@ -0,0 +1,57 @@
+PREHOOK: query: -- Test partition column type is considered as the type given in table def
+-- and not as 'string'
+CREATE TABLE datePartTbl(col1 string) PARTITIONED BY (date_prt date)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@datePartTbl
+POSTHOOK: query: -- Test partition column type is considered as the type given in table def
+-- and not as 'string'
+CREATE TABLE datePartTbl(col1 string) PARTITIONED BY (date_prt date)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@datePartTbl
+PREHOOK: query: -- Add test partitions and some sample data
+INSERT OVERWRITE TABLE datePartTbl PARTITION(date_prt='2014-08-09')
+  SELECT 'col1-2014-08-09' FROM src LIMIT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dateparttbl@date_prt=2014-08-09
+POSTHOOK: query: -- Add test partitions and some sample data
+INSERT OVERWRITE TABLE datePartTbl PARTITION(date_prt='2014-08-09')
+  SELECT 'col1-2014-08-09' FROM src LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dateparttbl@date_prt=2014-08-09
+POSTHOOK: Lineage: dateparttbl PARTITION(date_prt=2014-08-09).col1 SIMPLE []
+PREHOOK: query: INSERT OVERWRITE TABLE datePartTbl PARTITION(date_prt='2014-08-10')
+  SELECT 'col1-2014-08-10' FROM src LIMIT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dateparttbl@date_prt=2014-08-10
+POSTHOOK: query: INSERT OVERWRITE TABLE datePartTbl PARTITION(date_prt='2014-08-10')
+  SELECT 'col1-2014-08-10' FROM src LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dateparttbl@date_prt=2014-08-10
+POSTHOOK: Lineage: dateparttbl PARTITION(date_prt=2014-08-10).col1 SIMPLE []
+PREHOOK: query: -- Query where 'date_prt' value is restricted to given values in IN operator.
+SELECT * FROM datePartTbl WHERE date_prt IN (CAST('2014-08-09' AS DATE), CAST('2014-08-08' AS DATE))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dateparttbl
+PREHOOK: Input: default@dateparttbl@date_prt=2014-08-09
+#### A masked pattern was here ####
+POSTHOOK: query: -- Query where 'date_prt' value is restricted to given values in IN operator.
+SELECT * FROM datePartTbl WHERE date_prt IN (CAST('2014-08-09' AS DATE), CAST('2014-08-08' AS DATE))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dateparttbl
+POSTHOOK: Input: default@dateparttbl@date_prt=2014-08-09
+#### A masked pattern was here ####
+col1-2014-08-09	2014-08-09
+PREHOOK: query: DROP TABLE datePartTbl
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dateparttbl
+PREHOOK: Output: default@dateparttbl
+POSTHOOK: query: DROP TABLE datePartTbl
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@dateparttbl
+POSTHOOK: Output: default@dateparttbl