You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2016/04/13 21:01:06 UTC
hive git commit: HIVE-13340 : Vectorization: from_unixtime UDF shim
(Gopal V via Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master e7f69f078 -> 7049f49d9
HIVE-13340 : Vectorization: from_unixtime UDF shim (Gopal V via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7049f49d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7049f49d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7049f49d
Branch: refs/heads/master
Commit: 7049f49d9574587b2eb5896bab8415d7cd7c1ef1
Parents: e7f69f0
Author: Gopal V <go...@apache.org>
Authored: Wed Mar 23 02:07:00 2016 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Wed Apr 13 12:00:07 2016 -0700
----------------------------------------------------------------------
.../ql/exec/vector/VectorizationContext.java | 2 +
.../optimizer/ConstantPropagateProcFactory.java | 22 ++-
.../hive/ql/optimizer/physical/Vectorizer.java | 2 +
ql/src/test/queries/clientpositive/foldts.q | 20 +++
ql/src/test/results/clientpositive/foldts.q.out | 154 +++++++++++++++++++
.../clientpositive/udf_to_unix_timestamp.q.out | 2 +-
6 files changed, 197 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/7049f49d/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 329c1d5..86025ef 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -102,6 +102,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.GroupByDesc;
import org.apache.hadoop.hive.ql.udf.SettableUDF;
import org.apache.hadoop.hive.ql.udf.UDFConv;
+import org.apache.hadoop.hive.ql.udf.UDFFromUnixTime;
import org.apache.hadoop.hive.ql.udf.UDFHex;
import org.apache.hadoop.hive.ql.udf.UDFRegExpExtract;
import org.apache.hadoop.hive.ql.udf.UDFRegExpReplace;
@@ -761,6 +762,7 @@ public class VectorizationContext {
|| udfClass.equals(UDFRegExpExtract.class)
|| udfClass.equals(UDFRegExpReplace.class)
|| udfClass.equals(UDFConv.class)
+ || udfClass.equals(UDFFromUnixTime.class) && isIntFamily(arg0Type(expr))
|| isCastToIntFamily(udfClass) && isStringFamily(arg0Type(expr))
|| isCastToFloatFamily(udfClass) && isStringFamily(arg0Type(expr))
|| udfClass.equals(UDFToString.class) &&
http://git-wip-us.apache.org/repos/asf/hive/blob/7049f49d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
index bdc7448..8c1f34d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
@@ -77,6 +77,8 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUnixTimeStamp;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
@@ -229,7 +231,7 @@ public final class ConstantPropagateProcFactory {
public static ExprNodeDesc foldExpr(ExprNodeGenericFuncDesc funcDesc) {
GenericUDF udf = funcDesc.getGenericUDF();
- if (!isDeterministicUdf(udf)) {
+ if (!isDeterministicUdf(udf, funcDesc.getChildren())) {
return funcDesc;
}
return evaluateFunction(funcDesc.getGenericUDF(),funcDesc.getChildren(), funcDesc.getChildren());
@@ -347,7 +349,7 @@ public final class ConstantPropagateProcFactory {
}
// Don't evaluate nondeterministic function since the value can only calculate during runtime.
- if (!isDeterministicUdf(udf)) {
+ if (!isDeterministicUdf(udf, newExprs)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Function " + udf.getClass() + " is undeterministic. Don't evalulate immediately.");
}
@@ -406,7 +408,7 @@ public final class ConstantPropagateProcFactory {
}
// Don't evaluate nondeterministic function since the value can only calculate during runtime.
- if (!isDeterministicUdf(udf)) {
+ if (!isDeterministicUdf(udf, newExprs)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Function " + udf.getClass() + " is undeterministic. Don't evaluate immediately.");
}
@@ -457,12 +459,17 @@ public final class ConstantPropagateProcFactory {
return desc;
}
- private static boolean isDeterministicUdf(GenericUDF udf) {
+ private static boolean isDeterministicUdf(GenericUDF udf, List<ExprNodeDesc> children) {
UDFType udfType = udf.getClass().getAnnotation(UDFType.class);
if (udf instanceof GenericUDFBridge) {
udfType = ((GenericUDFBridge) udf).getUdfClass().getAnnotation(UDFType.class);
}
if (udfType.deterministic() == false) {
+ if (udf.getClass().equals(GenericUDFUnixTimeStamp.class)
+ && children != null && children.size() > 0) {
+ // unix_timestamp is polymorphic (ignore class annotations)
+ return true;
+ }
return false;
}
@@ -817,6 +824,13 @@ public final class ConstantPropagateProcFactory {
}
}
+ if (udf instanceof GenericUDFUnixTimeStamp) {
+ if (newExprs.size() >= 1) {
+ // unix_timestamp(args) -> to_unix_timestamp(args)
+ return ExprNodeGenericFuncDesc.newInstance(new GenericUDFToUnixTimeStamp(), newExprs);
+ }
+ }
+
return null;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/7049f49d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index d806b97..1ddd9be 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -126,6 +126,7 @@ import org.apache.hadoop.hive.ql.udf.UDFCos;
import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth;
import org.apache.hadoop.hive.ql.udf.UDFDegrees;
import org.apache.hadoop.hive.ql.udf.UDFExp;
+import org.apache.hadoop.hive.ql.udf.UDFFromUnixTime;
import org.apache.hadoop.hive.ql.udf.UDFHex;
import org.apache.hadoop.hive.ql.udf.UDFHour;
import org.apache.hadoop.hive.ql.udf.UDFLength;
@@ -247,6 +248,7 @@ public class Vectorizer implements PhysicalPlanResolver {
supportedGenericUDFs.add(UDFSecond.class);
supportedGenericUDFs.add(UDFWeekOfYear.class);
supportedGenericUDFs.add(GenericUDFToUnixTimeStamp.class);
+ supportedGenericUDFs.add(UDFFromUnixTime.class);
supportedGenericUDFs.add(GenericUDFDateAdd.class);
supportedGenericUDFs.add(GenericUDFDateSub.class);
http://git-wip-us.apache.org/repos/asf/hive/blob/7049f49d/ql/src/test/queries/clientpositive/foldts.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/foldts.q b/ql/src/test/queries/clientpositive/foldts.q
new file mode 100644
index 0000000..362cac2
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/foldts.q
@@ -0,0 +1,20 @@
+
+set hive.vectorized.execution.enabled=true;
+set hive.fetch.task.conversion=none;
+
+explain
+select ctimestamp1, unix_timestamp(ctimestamp1), to_unix_timestamp(ctimestamp1) from alltypesorc limit 1;
+
+select ctimestamp1, unix_timestamp(ctimestamp1), to_unix_timestamp(ctimestamp1) from alltypesorc limit 1;
+
+create temporary table src1orc stored as orc as select * from src1;
+
+explain
+select from_unixtime(to_unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1;
+
+select from_unixtime(to_unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1;
+
+explain
+select from_unixtime(unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1;
+
+select from_unixtime(unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1;
http://git-wip-us.apache.org/repos/asf/hive/blob/7049f49d/ql/src/test/results/clientpositive/foldts.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/foldts.q.out b/ql/src/test/results/clientpositive/foldts.q.out
new file mode 100644
index 0000000..4c78495
--- /dev/null
+++ b/ql/src/test/results/clientpositive/foldts.q.out
@@ -0,0 +1,154 @@
+PREHOOK: query: explain
+select ctimestamp1, unix_timestamp(ctimestamp1), to_unix_timestamp(ctimestamp1) from alltypesorc limit 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select ctimestamp1, unix_timestamp(ctimestamp1), to_unix_timestamp(ctimestamp1) from alltypesorc limit 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctimestamp1 (type: timestamp), to_unix_timestamp(ctimestamp1) (type: bigint), to_unix_timestamp(ctimestamp1) (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select ctimestamp1, unix_timestamp(ctimestamp1), to_unix_timestamp(ctimestamp1) from alltypesorc limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ctimestamp1, unix_timestamp(ctimestamp1), to_unix_timestamp(ctimestamp1) from alltypesorc limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+1969-12-31 15:59:46.674 -13 -13
+PREHOOK: query: create temporary table src1orc stored as orc as select * from src1
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@src1orc
+POSTHOOK: query: create temporary table src1orc stored as orc as select * from src1
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@src1orc
+PREHOOK: query: explain
+select from_unixtime(to_unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select from_unixtime(to_unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: from_unixtime(to_unix_timestamp(ctimestamp1), 'EEEE') (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select from_unixtime(to_unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select from_unixtime(to_unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+Wednesday
+PREHOOK: query: explain
+select from_unixtime(unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select from_unixtime(unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: from_unixtime(to_unix_timestamp(ctimestamp1), 'EEEE') (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select from_unixtime(unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select from_unixtime(unix_timestamp(ctimestamp1), 'EEEE') from alltypesorc limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+Wednesday
http://git-wip-us.apache.org/repos/asf/hive/blob/7049f49d/ql/src/test/results/clientpositive/udf_to_unix_timestamp.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udf_to_unix_timestamp.q.out b/ql/src/test/results/clientpositive/udf_to_unix_timestamp.q.out
index ce82461..3d31664 100644
--- a/ql/src/test/results/clientpositive/udf_to_unix_timestamp.q.out
+++ b/ql/src/test/results/clientpositive/udf_to_unix_timestamp.q.out
@@ -103,7 +103,7 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (unix_timestamp(key) > 10) (type: boolean)
+ predicate: (to_unix_timestamp(key) > 10) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)