You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ss...@apache.org on 2016/04/05 00:38:06 UTC

[22/24] hive git commit: HIVE-13381 : Timestamp & date should have precedence in type hierarchy than string group (Ashutosh Chauhan via Jason Dere)

HIVE-13381 : Timestamp & date should have precedence in type hierarchy than string group (Ashutosh Chauhan via Jason Dere)

Signed-off-by: Ashutosh Chauhan <ha...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b4465023
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b4465023
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b4465023

Branch: refs/heads/llap
Commit: b44650231ad2708fa73346164ae9c329ad36d6cb
Parents: 9830363
Author: Ashutosh Chauhan <ha...@apache.org>
Authored: Tue Mar 29 19:01:24 2016 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Mon Apr 4 13:11:07 2016 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/exec/FunctionRegistry.java   |   9 +-
 .../ql/exec/vector/VectorizationContext.java    |  12 +-
 .../hive/ql/exec/TestFunctionRegistry.java      |  18 ++-
 .../exec/vector/TestVectorizationContext.java   |  17 +-
 .../queries/clientpositive/cast_on_constant.q   |   7 +
 .../clientpositive/cast_on_constant.q.out       | 160 +++++++++++++++++++
 6 files changed, 198 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/b4465023/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index 56b96b4..1343b39 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -737,7 +737,14 @@ public final class FunctionRegistry {
       return getTypeInfoForPrimitiveCategory(
           (PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b,PrimitiveCategory.STRING);
     }
-
+    // timestamp/date is higher precedence than String_GROUP
+    if (pgA == PrimitiveGrouping.STRING_GROUP && pgB == PrimitiveGrouping.DATE_GROUP) {
+      return b;
+    }
+    // date/timestamp is higher precedence than String_GROUP
+    if (pgB == PrimitiveGrouping.STRING_GROUP && pgA == PrimitiveGrouping.DATE_GROUP) {
+      return a;
+    }
     // Another special case, because timestamp is not implicitly convertible to numeric types.
     if ((pgA == PrimitiveGrouping.NUMERIC_GROUP || pgB == PrimitiveGrouping.NUMERIC_GROUP)
         && (pcA == PrimitiveCategory.TIMESTAMP || pcB == PrimitiveCategory.TIMESTAMP)) {

http://git-wip-us.apache.org/repos/asf/hive/blob/b4465023/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 1eb960d..30a0f5a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -155,7 +155,7 @@ public class VectorizationContext {
 
   VectorExpressionDescriptor vMap;
 
-  private List<String> initialColumnNames;
+  private final List<String> initialColumnNames;
 
   private List<Integer> projectedColumns;
   private List<String> projectionColumnNames;
@@ -712,7 +712,7 @@ public class VectorizationContext {
         genericUdf = new GenericUDFToDate();
         break;
       case TIMESTAMP:
-        genericUdf = new GenericUDFToUnixTimeStamp();
+        genericUdf = new GenericUDFTimestamp();
         break;
       case INTERVAL_YEAR_MONTH:
         genericUdf = new GenericUDFToIntervalYearMonth();
@@ -1329,7 +1329,7 @@ public class VectorizationContext {
     case INT:
     case LONG:
       return InConstantType.INT_FAMILY;
-  
+
     case DATE:
       return InConstantType.TIMESTAMP;
 
@@ -1339,16 +1339,16 @@ public class VectorizationContext {
     case FLOAT:
     case DOUBLE:
       return InConstantType.FLOAT_FAMILY;
-  
+
     case STRING:
     case CHAR:
     case VARCHAR:
     case BINARY:
       return InConstantType.STRING_FAMILY;
-  
+
     case DECIMAL:
       return InConstantType.DECIMAL;
-  
+
 
     case INTERVAL_YEAR_MONTH:
     case INTERVAL_DAY_TIME:

http://git-wip-us.apache.org/repos/asf/hive/blob/b4465023/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java
index 6a83c32..8488c21 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java
@@ -253,9 +253,13 @@ public class TestFunctionRegistry extends TestCase {
                TypeInfoFactory.doubleTypeInfo);
 
     comparison(TypeInfoFactory.dateTypeInfo, TypeInfoFactory.stringTypeInfo,
-        TypeInfoFactory.stringTypeInfo);
+        TypeInfoFactory.dateTypeInfo);
     comparison(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.dateTypeInfo,
-        TypeInfoFactory.stringTypeInfo);
+        TypeInfoFactory.dateTypeInfo);
+    comparison(TypeInfoFactory.timestampTypeInfo, TypeInfoFactory.stringTypeInfo,
+        TypeInfoFactory.timestampTypeInfo);
+    comparison(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.timestampTypeInfo,
+        TypeInfoFactory.timestampTypeInfo);
 
     comparison(TypeInfoFactory.intTypeInfo, TypeInfoFactory.timestampTypeInfo,
         TypeInfoFactory.doubleTypeInfo);
@@ -364,15 +368,15 @@ public class TestFunctionRegistry extends TestCase {
 
     // non-qualified types should simply return the TypeInfo associated with that type
     assertEquals(TypeInfoFactory.stringTypeInfo, FunctionRegistry.getTypeInfoForPrimitiveCategory(
-        (PrimitiveTypeInfo) varchar10, (PrimitiveTypeInfo) TypeInfoFactory.stringTypeInfo,
+        (PrimitiveTypeInfo) varchar10, TypeInfoFactory.stringTypeInfo,
         PrimitiveCategory.STRING));
     assertEquals(TypeInfoFactory.stringTypeInfo, FunctionRegistry.getTypeInfoForPrimitiveCategory(
-        (PrimitiveTypeInfo) TypeInfoFactory.stringTypeInfo,
-        (PrimitiveTypeInfo) TypeInfoFactory.stringTypeInfo,
+        TypeInfoFactory.stringTypeInfo,
+        TypeInfoFactory.stringTypeInfo,
         PrimitiveCategory.STRING));
     assertEquals(TypeInfoFactory.doubleTypeInfo, FunctionRegistry.getTypeInfoForPrimitiveCategory(
-        (PrimitiveTypeInfo) TypeInfoFactory.doubleTypeInfo,
-        (PrimitiveTypeInfo) TypeInfoFactory.stringTypeInfo,
+        TypeInfoFactory.doubleTypeInfo,
+        TypeInfoFactory.stringTypeInfo,
         PrimitiveCategory.DOUBLE));
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/b4465023/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
index e4c7529..bb37a04 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
@@ -23,14 +23,9 @@ import static org.junit.Assert.assertTrue;
 
 import java.sql.Timestamp;
 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
-
-import junit.framework.Assert;
 
 import org.apache.hadoop.hive.common.type.HiveChar;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.common.type.HiveVarchar;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.BRoundWithNumDigitsDoubleToDouble;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.ColAndCol;
@@ -73,11 +68,12 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.StringUpper;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFUnixTimeStampDate;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFUnixTimeStampTimestamp;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFYearDate;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColumnInList;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterLongColumnInList;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterDoubleColumnInList;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFYearTimestamp;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterTimestampColumnBetween;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterTimestampColumnNotBetween;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongColumnLongScalar;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarLongScalar;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarLongColumn;
@@ -144,13 +140,12 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFPower;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFRound;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPPlus;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDecimal;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFTimestamp;
 import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.junit.Assert;
 import org.junit.Test;
 
 public class TestVectorizationContext {
@@ -1215,12 +1210,12 @@ public class TestVectorizationContext {
     children1.set(2, new ExprNodeConstantDesc("2013-11-05 00:00:00.000"));
     children1.set(3, new ExprNodeConstantDesc("2013-11-06 00:00:00.000"));
     ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
-    assertEquals(FilterStringColumnBetween.class, ve.getClass());
+    assertEquals(FilterTimestampColumnBetween.class, ve.getClass());
 
     // timestamp NOT BETWEEN
     children1.set(0, new ExprNodeConstantDesc(new Boolean(true)));
     ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
-    assertEquals(FilterStringColumnNotBetween.class, ve.getClass());
+    assertEquals(FilterTimestampColumnNotBetween.class, ve.getClass());
   }
 
   // Test translation of both IN filters and boolean-valued IN expressions (non-filters).
@@ -1468,7 +1463,7 @@ public class TestVectorizationContext {
     children1.set(2,  col3Expr);
     ve = vc.getVectorExpression(exprDesc);
     assertTrue(ve instanceof IfExprCharScalarStringGroupColumn);
- 
+
     // test for VARCHAR type
     VarcharTypeInfo varcharTypeInfo = new VarcharTypeInfo(10);
     constDesc2 = new ExprNodeConstantDesc(varcharTypeInfo, new HiveVarchar("Alpha", 10));

http://git-wip-us.apache.org/repos/asf/hive/blob/b4465023/ql/src/test/queries/clientpositive/cast_on_constant.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/cast_on_constant.q b/ql/src/test/queries/clientpositive/cast_on_constant.q
new file mode 100644
index 0000000..aabb9c6
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/cast_on_constant.q
@@ -0,0 +1,7 @@
+create table t1(ts_field timestamp, date_field date);
+explain select * from t1 where ts_field = "2016-01-23 00:00:00";
+explain select * from t1 where date_field = "2016-01-23";
+explain select * from t1 where ts_field = timestamp '2016-01-23 00:00:00';
+explain select * from t1 where date_field = date '2016-01-23';
+
+drop table t1;

http://git-wip-us.apache.org/repos/asf/hive/blob/b4465023/ql/src/test/results/clientpositive/cast_on_constant.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/cast_on_constant.q.out b/ql/src/test/results/clientpositive/cast_on_constant.q.out
new file mode 100644
index 0000000..f8d6a0d
--- /dev/null
+++ b/ql/src/test/results/clientpositive/cast_on_constant.q.out
@@ -0,0 +1,160 @@
+PREHOOK: query: create table t1(ts_field timestamp, date_field date)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1(ts_field timestamp, date_field date)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: explain select * from t1 where ts_field = "2016-01-23 00:00:00"
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from t1 where ts_field = "2016-01-23 00:00:00"
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t1
+            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: (ts_field = 2016-01-23 00:00:00.0) (type: boolean)
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+              Select Operator
+                expressions: 2016-01-23 00:00:00.0 (type: timestamp), date_field (type: date)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain select * from t1 where date_field = "2016-01-23"
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from t1 where date_field = "2016-01-23"
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t1
+            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: (date_field = 2016-01-23) (type: boolean)
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+              Select Operator
+                expressions: ts_field (type: timestamp), 2016-01-23 (type: date)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain select * from t1 where ts_field = timestamp '2016-01-23 00:00:00'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from t1 where ts_field = timestamp '2016-01-23 00:00:00'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t1
+            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: (ts_field = 2016-01-23 00:00:00.0) (type: boolean)
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+              Select Operator
+                expressions: 2016-01-23 00:00:00.0 (type: timestamp), date_field (type: date)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain select * from t1 where date_field = date '2016-01-23'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from t1 where date_field = date '2016-01-23'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t1
+            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: (date_field = 2016-01-23) (type: boolean)
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+              Select Operator
+                expressions: ts_field (type: timestamp), 2016-01-23 (type: date)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: drop table t1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@t1
+POSTHOOK: query: drop table t1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@t1