You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2016/04/13 21:01:06 UTC

hive git commit: HIVE-13340 : Vectorization: from_unixtime UDF shim (Gopal V via Ashutosh Chauhan)

Repository: hive
Updated Branches:
  refs/heads/master e7f69f078 -> 7049f49d9


HIVE-13340 : Vectorization: from_unixtime UDF shim (Gopal V via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan <ha...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7049f49d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7049f49d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7049f49d

Branch: refs/heads/master
Commit: 7049f49d9574587b2eb5896bab8415d7cd7c1ef1
Parents: e7f69f0
Author: Gopal V <go...@apache.org>
Authored: Wed Mar 23 02:07:00 2016 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Wed Apr 13 12:00:07 2016 -0700

----------------------------------------------------------------------
 .../ql/exec/vector/VectorizationContext.java    |   2 +
 .../optimizer/ConstantPropagateProcFactory.java |  22 ++-
 .../hive/ql/optimizer/physical/Vectorizer.java  |   2 +
 ql/src/test/queries/clientpositive/foldts.q     |  20 +++
 ql/src/test/results/clientpositive/foldts.q.out | 154 +++++++++++++++++++
 .../clientpositive/udf_to_unix_timestamp.q.out  |   2 +-
 6 files changed, 197 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/7049f49d/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 329c1d5..86025ef 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -102,6 +102,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 import org.apache.hadoop.hive.ql.plan.GroupByDesc;
 import org.apache.hadoop.hive.ql.udf.SettableUDF;
 import org.apache.hadoop.hive.ql.udf.UDFConv;
+import org.apache.hadoop.hive.ql.udf.UDFFromUnixTime;
 import org.apache.hadoop.hive.ql.udf.UDFHex;
 import org.apache.hadoop.hive.ql.udf.UDFRegExpExtract;
 import org.apache.hadoop.hive.ql.udf.UDFRegExpReplace;
@@ -761,6 +762,7 @@ public class VectorizationContext {
           || udfClass.equals(UDFRegExpExtract.class)
           || udfClass.equals(UDFRegExpReplace.class)
           || udfClass.equals(UDFConv.class)
+          || udfClass.equals(UDFFromUnixTime.class) && isIntFamily(arg0Type(expr))
           || isCastToIntFamily(udfClass) && isStringFamily(arg0Type(expr))
           || isCastToFloatFamily(udfClass) && isStringFamily(arg0Type(expr))
           || udfClass.equals(UDFToString.class) &&

http://git-wip-us.apache.org/repos/asf/hive/blob/7049f49d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
index bdc7448..8c1f34d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
@@ -77,6 +77,8 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUnixTimeStamp;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen;
 import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
@@ -229,7 +231,7 @@ public final class ConstantPropagateProcFactory {
   public static ExprNodeDesc foldExpr(ExprNodeGenericFuncDesc funcDesc) {
 
     GenericUDF udf = funcDesc.getGenericUDF();
-    if (!isDeterministicUdf(udf)) {
+    if (!isDeterministicUdf(udf, funcDesc.getChildren())) {
       return funcDesc;
     }
     return evaluateFunction(funcDesc.getGenericUDF(),funcDesc.getChildren(), funcDesc.getChildren());
@@ -347,7 +349,7 @@ public final class ConstantPropagateProcFactory {
       }
 
       // Don't evaluate nondeterministic function since the value can only calculate during runtime.
-      if (!isDeterministicUdf(udf)) {
+      if (!isDeterministicUdf(udf, newExprs)) {
         if (LOG.isDebugEnabled()) {
           LOG.debug("Function " + udf.getClass() + " is undeterministic. Don't evalulate immediately.");
         }
@@ -406,7 +408,7 @@ public final class ConstantPropagateProcFactory {
       }
 
       // Don't evaluate nondeterministic function since the value can only calculate during runtime.
-      if (!isDeterministicUdf(udf)) {
+      if (!isDeterministicUdf(udf, newExprs)) {
         if (LOG.isDebugEnabled()) {
           LOG.debug("Function " + udf.getClass() + " is undeterministic. Don't evaluate immediately.");
         }
@@ -457,12 +459,17 @@ public final class ConstantPropagateProcFactory {
     return desc;
   }
 
-  private static boolean isDeterministicUdf(GenericUDF udf) {
+  private static boolean isDeterministicUdf(GenericUDF udf,  List<ExprNodeDesc> children) {
     UDFType udfType = udf.getClass().getAnnotation(UDFType.class);
     if (udf instanceof GenericUDFBridge) {
       udfType = ((GenericUDFBridge) udf).getUdfClass().getAnnotation(UDFType.class);
     }
     if (udfType.deterministic() == false) {
+      if (udf.getClass().equals(GenericUDFUnixTimeStamp.class) 
+          && children != null && children.size() > 0) {
+        // unix_timestamp is polymorphic (ignore class annotations)
+        return true;
+      }
       return false;
     }
 
@@ -817,6 +824,13 @@ public final class ConstantPropagateProcFactory {
       }
     }
 
+    if (udf instanceof GenericUDFUnixTimeStamp) {
+      if (newExprs.size() >= 1) {
+        // unix_timestamp(args) -> to_unix_timestamp(args)
+        return ExprNodeGenericFuncDesc.newInstance(new GenericUDFToUnixTimeStamp(), newExprs);
+      }
+    }
+
     return null;
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/7049f49d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index d806b97..1ddd9be 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -126,6 +126,7 @@ import org.apache.hadoop.hive.ql.udf.UDFCos;
 import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth;
 import org.apache.hadoop.hive.ql.udf.UDFDegrees;
 import org.apache.hadoop.hive.ql.udf.UDFExp;
+import org.apache.hadoop.hive.ql.udf.UDFFromUnixTime;
 import org.apache.hadoop.hive.ql.udf.UDFHex;
 import org.apache.hadoop.hive.ql.udf.UDFHour;
 import org.apache.hadoop.hive.ql.udf.UDFLength;
@@ -247,6 +248,7 @@ public class Vectorizer implements PhysicalPlanResolver {
     supportedGenericUDFs.add(UDFSecond.class);
     supportedGenericUDFs.add(UDFWeekOfYear.class);
     supportedGenericUDFs.add(GenericUDFToUnixTimeStamp.class);
+    supportedGenericUDFs.add(UDFFromUnixTime.class);
 
     supportedGenericUDFs.add(GenericUDFDateAdd.class);
     supportedGenericUDFs.add(GenericUDFDateSub.class);

http://git-wip-us.apache.org/repos/asf/hive/blob/7049f49d/ql/src/test/queries/clientpositive/foldts.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/foldts.q b/ql/src/test/queries/clientpositive/foldts.q
new file mode 100644
index 0000000..362cac2
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/foldts.q
@@ -0,0 +1,20 @@
+
+set hive.vectorized.execution.enabled=true;
+set hive.fetch.task.conversion=none;
+
+explain
+select ctimestamp1, unix_timestamp(ctimestamp1), to_unix_timestamp(ctimestamp1) from alltypesorc limit 1;
+
+select ctimestamp1, unix_timestamp(ctimestamp1), to_unix_timestamp(ctimestamp1) from alltypesorc limit 1;
+
+create temporary table src1orc stored as orc as select * from src1;
+
+explain
+select from_unixtime(to_unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1; 
+
+select from_unixtime(to_unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1; 
+
+explain
+select from_unixtime(unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1; 
+
+select from_unixtime(unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1; 

http://git-wip-us.apache.org/repos/asf/hive/blob/7049f49d/ql/src/test/results/clientpositive/foldts.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/foldts.q.out b/ql/src/test/results/clientpositive/foldts.q.out
new file mode 100644
index 0000000..4c78495
--- /dev/null
+++ b/ql/src/test/results/clientpositive/foldts.q.out
@@ -0,0 +1,154 @@
+PREHOOK: query: explain
+select ctimestamp1, unix_timestamp(ctimestamp1), to_unix_timestamp(ctimestamp1) from alltypesorc limit 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select ctimestamp1, unix_timestamp(ctimestamp1), to_unix_timestamp(ctimestamp1) from alltypesorc limit 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: alltypesorc
+            Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: ctimestamp1 (type: timestamp), to_unix_timestamp(ctimestamp1) (type: bigint), to_unix_timestamp(ctimestamp1) (type: bigint)
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+              Limit
+                Number of rows: 1
+                Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select ctimestamp1, unix_timestamp(ctimestamp1), to_unix_timestamp(ctimestamp1) from alltypesorc limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ctimestamp1, unix_timestamp(ctimestamp1), to_unix_timestamp(ctimestamp1) from alltypesorc limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+1969-12-31 15:59:46.674	-13	-13
+PREHOOK: query: create temporary table src1orc stored as orc as select * from src1
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@src1orc
+POSTHOOK: query: create temporary table src1orc stored as orc as select * from src1
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@src1orc
+PREHOOK: query: explain
+select from_unixtime(to_unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select from_unixtime(to_unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: alltypesorc
+            Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: from_unixtime(to_unix_timestamp(ctimestamp1), 'EEEE') (type: string)
+              outputColumnNames: _col0
+              Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+              Limit
+                Number of rows: 1
+                Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select from_unixtime(to_unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select from_unixtime(to_unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+Wednesday
+PREHOOK: query: explain
+select from_unixtime(unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select from_unixtime(unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: alltypesorc
+            Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: from_unixtime(to_unix_timestamp(ctimestamp1), 'EEEE') (type: string)
+              outputColumnNames: _col0
+              Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+              Limit
+                Number of rows: 1
+                Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select from_unixtime(unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select from_unixtime(unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+Wednesday

http://git-wip-us.apache.org/repos/asf/hive/blob/7049f49d/ql/src/test/results/clientpositive/udf_to_unix_timestamp.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udf_to_unix_timestamp.q.out b/ql/src/test/results/clientpositive/udf_to_unix_timestamp.q.out
index ce82461..3d31664 100644
--- a/ql/src/test/results/clientpositive/udf_to_unix_timestamp.q.out
+++ b/ql/src/test/results/clientpositive/udf_to_unix_timestamp.q.out
@@ -103,7 +103,7 @@ STAGE PLANS:
           alias: src
           Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
           Filter Operator
-            predicate: (unix_timestamp(key) > 10) (type: boolean)
+            predicate: (to_unix_timestamp(key) > 10) (type: boolean)
             Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: key (type: string), value (type: string)