You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2018/06/16 15:43:04 UTC
[2/2] hive git commit: HIVE-19695 : Year Month Day extraction
functions need to add an implicit cast for column that are String types (Slim
Bouguerra via Ashutosh Chauhan)
HIVE-19695 : Year Month Day extraction functions need to add an implicit cast for column that are String types (Slim Bouguerra via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/040c0783
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/040c0783
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/040c0783
Branch: refs/heads/master
Commit: 040c0783e01fb3089d7925def7c349d7ac98e4d6
Parents: 5ba634a
Author: Slim Bouguerra <sl...@gmail.com>
Authored: Thu May 24 11:17:00 2018 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Sat Jun 16 08:41:49 2018 -0700
----------------------------------------------------------------------
.../calcite/translator/RexNodeConverter.java | 61 ++++--
.../clientpositive/druidmini_extractTime.q | 23 ++
.../druid/druidmini_extractTime.q.out | 103 +++++++++
.../llap/dynamic_partition_pruning.q.out | 55 +++--
.../llap/vectorized_timestamp_funcs.q.out | 26 +--
.../spark/spark_dynamic_partition_pruning.q.out | 112 +++++-----
...k_vectorized_dynamic_partition_pruning.q.out | 216 +++++++++----------
.../spark/vectorized_timestamp_funcs.q.out | 26 +--
.../test/results/clientpositive/udf_hour.q.out | 4 +-
.../results/clientpositive/udf_minute.q.out | 4 +-
.../results/clientpositive/udf_second.q.out | 4 +-
.../vectorized_timestamp_funcs.q.out | 26 +--
12 files changed, 400 insertions(+), 260 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/040c0783/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
index 7a482d9..7cedab6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
@@ -17,18 +17,10 @@
*/
package org.apache.hadoop.hive.ql.optimizer.calcite.translator;
-import java.math.BigDecimal;
-import java.math.BigInteger;
-import java.sql.Timestamp;
-import java.time.Instant;
-import java.util.ArrayList;
-import java.util.Calendar;
-import java.util.Date;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableList.Builder;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Iterables;
import org.apache.calcite.avatica.util.TimeUnit;
import org.apache.calcite.avatica.util.TimeUnitRange;
import org.apache.calcite.plan.RelOptCluster;
@@ -49,6 +41,7 @@ import org.apache.calcite.sql.fun.SqlCastFunction;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.sql.parser.SqlParserPos;
import org.apache.calcite.sql.type.SqlTypeName;
+import org.apache.calcite.sql.type.SqlTypeUtil;
import org.apache.calcite.util.ConversionUtil;
import org.apache.calcite.util.DateString;
import org.apache.calcite.util.NlsString;
@@ -104,9 +97,17 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableList.Builder;
-import com.google.common.collect.ImmutableMap;
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.sql.Timestamp;
+import java.time.Instant;
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.Date;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
public class RexNodeConverter {
@@ -455,26 +456,50 @@ public class RexNodeConverter {
private List<RexNode> rewriteExtractDateChildren(SqlOperator op, List<RexNode> childRexNodeLst)
throws SemanticException {
- List<RexNode> newChildRexNodeLst = new ArrayList<RexNode>();
+ List<RexNode> newChildRexNodeLst = new ArrayList<>(2);
+ final boolean isTimestampLevel;
if (op == HiveExtractDate.YEAR) {
newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.YEAR));
+ isTimestampLevel = false;
} else if (op == HiveExtractDate.QUARTER) {
newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.QUARTER));
+ isTimestampLevel = false;
} else if (op == HiveExtractDate.MONTH) {
newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.MONTH));
+ isTimestampLevel = false;
} else if (op == HiveExtractDate.WEEK) {
newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.WEEK));
+ isTimestampLevel = false;
} else if (op == HiveExtractDate.DAY) {
newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.DAY));
+ isTimestampLevel = false;
} else if (op == HiveExtractDate.HOUR) {
newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.HOUR));
+ isTimestampLevel = true;
} else if (op == HiveExtractDate.MINUTE) {
newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.MINUTE));
+ isTimestampLevel = true;
} else if (op == HiveExtractDate.SECOND) {
newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.SECOND));
+ isTimestampLevel = true;
+ } else {
+ isTimestampLevel = false;
}
- assert childRexNodeLst.size() == 1;
- newChildRexNodeLst.add(childRexNodeLst.get(0));
+
+ final RexNode child = Iterables.getOnlyElement(childRexNodeLst);
+ if (SqlTypeUtil.isDatetime(child.getType()) || SqlTypeUtil.isInterval(child.getType())) {
+ newChildRexNodeLst.add(child);
+ } else {
+ // We need to add a cast to DATETIME Family
+ if (isTimestampLevel) {
+ newChildRexNodeLst.add(
+ cluster.getRexBuilder().makeCast(cluster.getTypeFactory().createSqlType(SqlTypeName.TIMESTAMP), child));
+ } else {
+ newChildRexNodeLst.add(
+ cluster.getRexBuilder().makeCast(cluster.getTypeFactory().createSqlType(SqlTypeName.DATE), child));
+ }
+ }
+
return newChildRexNodeLst;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/040c0783/ql/src/test/queries/clientpositive/druidmini_extractTime.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/druidmini_extractTime.q b/ql/src/test/queries/clientpositive/druidmini_extractTime.q
index 429f796..9541361 100644
--- a/ql/src/test/queries/clientpositive/druidmini_extractTime.q
+++ b/ql/src/test/queries/clientpositive/druidmini_extractTime.q
@@ -176,4 +176,27 @@ SELECT CAST(`__time` AS DATE) AS `x_date`, SUM(cfloat) FROM druid_table GROUP B
SELECT CAST(`__time` AS DATE) AS `x_date` FROM druid_table ORDER BY `x_date` LIMIT 5;
+-- Test Extract from non datetime column
+
+create table test_extract_from_string_base_table(`timecolumn` timestamp, `date_c` string, `timestamp_c` string, `metric_c` double);
+insert into test_extract_from_string_base_table values ('2015-03-08 00:00:00', '2015-03-10', '2015-03-08 05:30:20', 5.0);
+
+CREATE TABLE druid_test_extract_from_string_table
+STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler'
+TBLPROPERTIES ("druid.segment.granularity" = "DAY")
+AS select
+cast(`timecolumn` as timestamp with local time zone) as `__time`, `date_c`, `timestamp_c`, `metric_c`
+FROM test_extract_from_string_base_table;
+
+explain select
+year(date_c), month(date_c),day(date_c),
+year(timestamp_c), month(timestamp_c),day(timestamp_c), hour(timestamp_c), minute (timestamp_c), second (timestamp_c)
+from druid_test_extract_from_string_table;
+
+select year(date_c), month(date_c), day(date_c),
+year(timestamp_c), month(timestamp_c), day(timestamp_c), hour(timestamp_c), minute (timestamp_c), second (timestamp_c)
+from druid_test_extract_from_string_table;
+
+DROP TABLE druid_test_extract_from_string_table;
+DROP TABLE test_extract_from_string_base_table;
DROP TABLE druid_table;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/040c0783/ql/src/test/results/clientpositive/druid/druidmini_extractTime.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/druid/druidmini_extractTime.q.out b/ql/src/test/results/clientpositive/druid/druidmini_extractTime.q.out
index f21847b..30e273b 100644
--- a/ql/src/test/results/clientpositive/druid/druidmini_extractTime.q.out
+++ b/ql/src/test/results/clientpositive/druid/druidmini_extractTime.q.out
@@ -756,6 +756,109 @@ POSTHOOK: Output: hdfs://### HDFS PATH ###
1969-12-31
1969-12-31
1969-12-31
+PREHOOK: query: create table test_extract_from_string_base_table(`timecolumn` timestamp, `date_c` string, `timestamp_c` string, `metric_c` double)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_extract_from_string_base_table
+POSTHOOK: query: create table test_extract_from_string_base_table(`timecolumn` timestamp, `date_c` string, `timestamp_c` string, `metric_c` double)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_extract_from_string_base_table
+PREHOOK: query: insert into test_extract_from_string_base_table values ('2015-03-08 00:00:00', '2015-03-10', '2015-03-08 05:30:20', 5.0)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test_extract_from_string_base_table
+POSTHOOK: query: insert into test_extract_from_string_base_table values ('2015-03-08 00:00:00', '2015-03-10', '2015-03-08 05:30:20', 5.0)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test_extract_from_string_base_table
+POSTHOOK: Lineage: test_extract_from_string_base_table.date_c SCRIPT []
+POSTHOOK: Lineage: test_extract_from_string_base_table.metric_c SCRIPT []
+POSTHOOK: Lineage: test_extract_from_string_base_table.timecolumn SCRIPT []
+POSTHOOK: Lineage: test_extract_from_string_base_table.timestamp_c SCRIPT []
+PREHOOK: query: CREATE TABLE druid_test_extract_from_string_table
+STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler'
+TBLPROPERTIES ("druid.segment.granularity" = "DAY")
+AS select
+cast(`timecolumn` as timestamp with local time zone) as `__time`, `date_c`, `timestamp_c`, `metric_c`
+FROM test_extract_from_string_base_table
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@test_extract_from_string_base_table
+PREHOOK: Output: database:default
+PREHOOK: Output: default@druid_test_extract_from_string_table
+POSTHOOK: query: CREATE TABLE druid_test_extract_from_string_table
+STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler'
+TBLPROPERTIES ("druid.segment.granularity" = "DAY")
+AS select
+cast(`timecolumn` as timestamp with local time zone) as `__time`, `date_c`, `timestamp_c`, `metric_c`
+FROM test_extract_from_string_base_table
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@test_extract_from_string_base_table
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@druid_test_extract_from_string_table
+POSTHOOK: Lineage: druid_test_extract_from_string_table.__time EXPRESSION [(test_extract_from_string_base_table)test_extract_from_string_base_table.FieldSchema(name:timecolumn, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: druid_test_extract_from_string_table.date_c SIMPLE [(test_extract_from_string_base_table)test_extract_from_string_base_table.FieldSchema(name:date_c, type:string, comment:null), ]
+POSTHOOK: Lineage: druid_test_extract_from_string_table.metric_c SIMPLE [(test_extract_from_string_base_table)test_extract_from_string_base_table.FieldSchema(name:metric_c, type:double, comment:null), ]
+POSTHOOK: Lineage: druid_test_extract_from_string_table.timestamp_c SIMPLE [(test_extract_from_string_base_table)test_extract_from_string_base_table.FieldSchema(name:timestamp_c, type:string, comment:null), ]
+PREHOOK: query: explain select
+year(date_c), month(date_c),day(date_c),
+year(timestamp_c), month(timestamp_c),day(timestamp_c), hour(timestamp_c), minute (timestamp_c), second (timestamp_c)
+from druid_test_extract_from_string_table
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select
+year(date_c), month(date_c),day(date_c),
+year(timestamp_c), month(timestamp_c),day(timestamp_c), hour(timestamp_c), minute (timestamp_c), second (timestamp_c)
+from druid_test_extract_from_string_table
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: druid_test_extract_from_string_table
+ properties:
+ druid.fieldNames vc,vc0,vc1,vc2,vc3,vc4,vc5,vc6,vc7
+ druid.fieldTypes int,int,int,int,int,int,int,int,int
+ druid.query.json {"queryType":"scan","dataSource":"default.druid_test_extract_from_string_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_extract(timestamp_floor(timestamp_parse(\"date_c\",'','US/Pacific'),'P1D','','US/Pacific'),'YEAR','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc0","expression":"timestamp_extract(timestamp_floor(timestamp_parse(\"date_c\",'','US/Pacific'),'P1D','','US/Pacific'),'MONTH','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc1","expression":"timestamp_extract(timestamp_floor(timestamp_parse(\"date_c\",'','US/Pacific'),'P1D','','US/Pacific'),'DAY','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc2","expression":"timestamp_extract(timestamp_floor(timestamp_parse(\"timestamp_c\",'','US/Pacific'),'P1D','','US/Pacific'),'YEAR','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc3
","expression":"timestamp_extract(timestamp_floor(timestamp_parse(\"timestamp_c\",'','US/Pacific'),'P1D','','US/Pacific'),'MONTH','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc4","expression":"timestamp_extract(timestamp_floor(timestamp_parse(\"timestamp_c\",'','US/Pacific'),'P1D','','US/Pacific'),'DAY','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc5","expression":"timestamp_extract(timestamp_parse(\"timestamp_c\",'','US/Pacific'),'HOUR','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc6","expression":"timestamp_extract(timestamp_parse(\"timestamp_c\",'','US/Pacific'),'MINUTE','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc7","expression":"timestamp_extract(timestamp_parse(\"timestamp_c\",'','US/Pacific'),'SECOND','US/Pacific')","outputType":"LONG"}],"columns":["vc","vc0","vc1","vc2","vc3","vc4","vc5","vc6","vc7"],"resultFormat":"compactedList"}
+ druid.query.type scan
+ Select Operator
+ expressions: vc (type: int), vc0 (type: int), vc1 (type: int), vc2 (type: int), vc3 (type: int), vc4 (type: int), vc5 (type: int), vc6 (type: int), vc7 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ ListSink
+
+PREHOOK: query: select year(date_c), month(date_c), day(date_c),
+year(timestamp_c), month(timestamp_c), day(timestamp_c), hour(timestamp_c), minute (timestamp_c), second (timestamp_c)
+from druid_test_extract_from_string_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@druid_test_extract_from_string_table
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select year(date_c), month(date_c), day(date_c),
+year(timestamp_c), month(timestamp_c), day(timestamp_c), hour(timestamp_c), minute (timestamp_c), second (timestamp_c)
+from druid_test_extract_from_string_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@druid_test_extract_from_string_table
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+2015 3 10 2015 3 8 5 30 20
+PREHOOK: query: DROP TABLE druid_test_extract_from_string_table
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@druid_test_extract_from_string_table
+PREHOOK: Output: default@druid_test_extract_from_string_table
+POSTHOOK: query: DROP TABLE druid_test_extract_from_string_table
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@druid_test_extract_from_string_table
+POSTHOOK: Output: default@druid_test_extract_from_string_table
+PREHOOK: query: DROP TABLE test_extract_from_string_base_table
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@test_extract_from_string_base_table
+PREHOOK: Output: default@test_extract_from_string_base_table
+POSTHOOK: query: DROP TABLE test_extract_from_string_base_table
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@test_extract_from_string_base_table
+POSTHOOK: Output: default@test_extract_from_string_base_table
PREHOOK: query: DROP TABLE druid_table
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@druid_table
http://git-wip-us.apache.org/repos/asf/hive/blob/040c0783/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
index 70d423b..1133b54 100644
--- a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
@@ -476,16 +476,15 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart
- filterExpr: ds is not null (type: boolean)
Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
- key expressions: day(_col0) (type: int)
+ key expressions: day(CAST( _col0 AS DATE)) (type: int)
sort order: +
- Map-reduce partition columns: day(_col0) (type: int)
+ Map-reduce partition columns: day(CAST( _col0 AS DATE)) (type: int)
Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
@@ -493,22 +492,22 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date_n2
- filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ filterExpr: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ predicate: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: day(_col0) (type: int)
+ key expressions: day(CAST( _col0 AS DATE)) (type: int)
sort order: +
- Map-reduce partition columns: day(_col0) (type: int)
+ Map-reduce partition columns: day(CAST( _col0 AS DATE)) (type: int)
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: day(_col0) (type: int)
+ expressions: day(CAST( _col0 AS DATE)) (type: int)
outputColumnNames: _col0
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Group By Operator
@@ -519,7 +518,7 @@ STAGE PLANS:
Dynamic Partitioning Event Operator
Target column: ds (string)
Target Input: srcpart
- Partition key expr: day(ds)
+ Partition key expr: day(CAST( ds AS DATE))
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Target Vertex: Map 1
Execution mode: llap
@@ -531,8 +530,8 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 day(_col0) (type: int)
- 1 day(_col0) (type: int)
+ 0 day(CAST( _col0 AS DATE)) (type: int)
+ 1 day(CAST( _col0 AS DATE)) (type: int)
Statistics: Num rows: 2200 Data size: 404800 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
@@ -605,16 +604,15 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart
- filterExpr: ds is not null (type: boolean)
Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
- key expressions: day(_col0) (type: int)
+ key expressions: day(CAST( _col0 AS DATE)) (type: int)
sort order: +
- Map-reduce partition columns: day(_col0) (type: int)
+ Map-reduce partition columns: day(CAST( _col0 AS DATE)) (type: int)
Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
@@ -622,19 +620,19 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date_n2
- filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ filterExpr: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ predicate: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: day(_col0) (type: int)
+ key expressions: day(CAST( _col0 AS DATE)) (type: int)
sort order: +
- Map-reduce partition columns: day(_col0) (type: int)
+ Map-reduce partition columns: day(CAST( _col0 AS DATE)) (type: int)
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Execution mode: llap
LLAP IO: no inputs
@@ -645,8 +643,8 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 day(_col0) (type: int)
- 1 day(_col0) (type: int)
+ 0 day(CAST( _col0 AS DATE)) (type: int)
+ 1 day(CAST( _col0 AS DATE)) (type: int)
Statistics: Num rows: 2200 Data size: 404800 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
@@ -4111,7 +4109,6 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart
- filterExpr: ds is not null (type: boolean)
Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ds (type: string)
@@ -4121,8 +4118,8 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 day(_col0) (type: int)
- 1 day(_col0) (type: int)
+ 0 day(CAST( _col0 AS DATE)) (type: int)
+ 1 day(CAST( _col0 AS DATE)) (type: int)
input vertices:
1 Map 3
Statistics: Num rows: 2200 Data size: 404800 Basic stats: COMPLETE Column stats: NONE
@@ -4141,22 +4138,22 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date_n2
- filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ filterExpr: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ predicate: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: day(_col0) (type: int)
+ key expressions: day(CAST( _col0 AS DATE)) (type: int)
sort order: +
- Map-reduce partition columns: day(_col0) (type: int)
+ Map-reduce partition columns: day(CAST( _col0 AS DATE)) (type: int)
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: day(_col0) (type: int)
+ expressions: day(CAST( _col0 AS DATE)) (type: int)
outputColumnNames: _col0
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Group By Operator
@@ -4167,7 +4164,7 @@ STAGE PLANS:
Dynamic Partitioning Event Operator
Target column: ds (string)
Target Input: srcpart
- Partition key expr: day(ds)
+ Partition key expr: day(CAST( ds AS DATE))
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Target Vertex: Map 1
Execution mode: llap
http://git-wip-us.apache.org/repos/asf/hive/blob/040c0783/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
index 79ba4c6..7ce15ae 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
@@ -473,13 +473,13 @@ STAGE PLANS:
TableScan Vectorization:
native: true
Select Operator
- expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), minute(stimestamp1) (type: int), second(stimestamp1) (type: int)
+ expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(CAST( stimestamp1 AS DATE)) (type: int), month(CAST( stimestamp1 AS DATE)) (type: int), day(CAST( stimestamp1 AS DATE)) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(CAST( stimestamp1 AS DATE)) (type: int), hour(CAST( stimestamp1 AS TIMESTAMP)) (type: int), minute(CAST( stimestamp1 AS TIMESTAMP)) (type: int), second(CAST( stimestamp1 AS TIMESTAMP)) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 13]
- selectExpressions: VectorUDFUnixTimeStampString(col 2:string) -> 5:bigint, VectorUDFYearString(col 2:string, fieldStart 0, fieldLength 4) -> 6:int, VectorUDFMonthString(col 2:string, fieldStart 5, fieldLength 2) -> 7:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 8:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 9:int, VectorUDFWeekOfYearString(col 2:string) -> 10:int, VectorUDFHourString(col 2:string, fieldStart 11, fieldLength 2) -> 11:int, VectorUDFMinuteString(col 2:string, fieldStart 14, fieldLength 2) -> 12:int, VectorUDFSecondString(col 2:string, fieldStart 17, fieldLength 2) -> 13:int
+ projectedOutputColumnNums: [5, 7, 8, 9, 6, 11, 10, 13, 14]
+ selectExpressions: VectorUDFUnixTimeStampString(col 2:string) -> 5:bigint, VectorUDFYearDate(col 6, field YEAR)(children: CastStringToDate(col 2:string) -> 6:date) -> 7:int, VectorUDFMonthDate(col 6, field MONTH)(children: CastStringToDate(col 2:string) -> 6:date) -> 8:int, VectorUDFDayOfMonthDate(col 6, field DAY_OF_MONTH)(children: CastStringToDate(col 2:string) -> 6:date) -> 9:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 6:int, VectorUDFWeekOfYearDate(col 10, field WEEK_OF_YEAR)(children: CastStringToDate(col 2:string) -> 10:date) -> 11:int, VectorUDFHourTimestamp(col 12:timestamp, field HOUR_OF_DAY)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 12:timestamp) -> 10:int, VectorUDFMinuteTimestamp(col 12:timestamp, field MINUTE)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 12:timestamp) -> 13:int, VectorUDFSecondTimestamp(col 12:timestamp, field SECOND)(children: VectorUDFAdaptor(CAST( stimest
amp1 AS TIMESTAMP)) -> 12:timestamp) -> 14:int
Statistics: Num rows: 52 Data size: 2080 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: bigint)
@@ -499,7 +499,7 @@ STAGE PLANS:
featureSupportInUse: []
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
- usesVectorUDFAdaptor: false
+ usesVectorUDFAdaptor: true
vectorized: true
Reducer 2
Execution mode: vectorized, llap
@@ -667,13 +667,13 @@ STAGE PLANS:
TableScan Vectorization:
native: true
Select Operator
- expressions: (to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1)) (type: boolean), (year(ctimestamp1) = year(stimestamp1)) (type: boolean), (month(ctimestamp1) = month(stimestamp1)) (type: boolean), (day(ctimestamp1) = day(stimestamp1)) (type: boolean), (dayofmonth(ctimestamp1) = dayofmonth(stimestamp1)) (type: boolean), (weekofyear(ctimestamp1) = weekofyear(stimestamp1)) (type: boolean), (hour(ctimestamp1) = hour(stimestamp1)) (type: boolean), (minute(ctimestamp1) = minute(stimestamp1)) (type: boolean), (second(ctimestamp1) = second(stimestamp1)) (type: boolean)
+ expressions: (to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1)) (type: boolean), (year(ctimestamp1) = year(CAST( stimestamp1 AS DATE))) (type: boolean), (month(ctimestamp1) = month(CAST( stimestamp1 AS DATE))) (type: boolean), (day(ctimestamp1) = day(CAST( stimestamp1 AS DATE))) (type: boolean), (dayofmonth(ctimestamp1) = dayofmonth(stimestamp1)) (type: boolean), (weekofyear(ctimestamp1) = weekofyear(CAST( stimestamp1 AS DATE))) (type: boolean), (hour(ctimestamp1) = hour(CAST( stimestamp1 AS TIMESTAMP))) (type: boolean), (minute(ctimestamp1) = minute(CAST( stimestamp1 AS TIMESTAMP))) (type: boolean), (second(ctimestamp1) = second(CAST( stimestamp1 AS TIMESTAMP))) (type: boolean)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [7, 8, 9, 10, 11, 12, 13, 14, 15]
- selectExpressions: LongColEqualLongColumn(col 5:bigint, col 6:bigint)(children: VectorUDFUnixTimeStampTimestamp(col 1:timestamp) -> 5:bigint, VectorUDFUnixTimeStampString(col 2:string) -> 6:bigint) -> 7:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 5:int, VectorUDFYearString(col 2:string, fieldStart 0, fieldLength 4) -> 6:int) -> 8:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 5:int, VectorUDFMonthString(col 2:string, fieldStart 5, fieldLength 2) -> 6:int) -> 9:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 5:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 6:int) -> 10:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 5
:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 6:int) -> 11:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 5:int, VectorUDFWeekOfYearString(col 2:string) -> 6:int) -> 12:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFHourTimestamp(col 1:timestamp, field HOUR_OF_DAY) -> 5:int, VectorUDFHourString(col 2:string, fieldStart 11, fieldLength 2) -> 6:int) -> 13:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 5:int, VectorUDFMinuteString(col 2:string, fieldStart 14, fieldLength 2) -> 6:int) -> 14:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFSecondTimestamp(col 1:timestamp, field SECOND) -> 5:int, VectorUDFSecondString(col 2:string, fieldStart 17, fieldLength 2) -> 6:int) -> 15:boolean
+ projectedOutputColumnNums: [7, 6, 8, 9, 11, 10, 14, 15, 16]
+ selectExpressions: LongColEqualLongColumn(col 5:bigint, col 6:bigint)(children: VectorUDFUnixTimeStampTimestamp(col 1:timestamp) -> 5:bigint, VectorUDFUnixTimeStampString(col 2:string) -> 6:bigint) -> 7:boolean, LongColEqualLongColumn(col 5:int, col 8:int)(children: VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 5:int, VectorUDFYearDate(col 6, field YEAR)(children: CastStringToDate(col 2:string) -> 6:date) -> 8:int) -> 6:boolean, LongColEqualLongColumn(col 5:int, col 9:int)(children: VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 5:int, VectorUDFMonthDate(col 8, field MONTH)(children: CastStringToDate(col 2:string) -> 8:date) -> 9:int) -> 8:boolean, LongColEqualLongColumn(col 5:int, col 10:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 5:int, VectorUDFDayOfMonthDate(col 9, field DAY_OF_MONTH)(children: CastStringToDate(col 2:string) -> 9:date) -> 10:int) -> 9:boolean, LongColEqualLongColumn(col 5:int, col
10:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 5:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 10:int) -> 11:boolean, LongColEqualLongColumn(col 5:int, col 12:int)(children: VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 5:int, VectorUDFWeekOfYearDate(col 10, field WEEK_OF_YEAR)(children: CastStringToDate(col 2:string) -> 10:date) -> 12:int) -> 10:boolean, LongColEqualLongColumn(col 5:int, col 12:int)(children: VectorUDFHourTimestamp(col 1:timestamp, field HOUR_OF_DAY) -> 5:int, VectorUDFHourTimestamp(col 13:timestamp, field HOUR_OF_DAY)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 13:timestamp) -> 12:int) -> 14:boolean, LongColEqualLongColumn(col 5:int, col 12:int)(children: VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 5:int, VectorUDFMinuteTimestamp(col 13:timestamp, field MINUTE)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 13:timestamp)
-> 12:int) -> 15:boolean, LongColEqualLongColumn(col 5:int, col 12:int)(children: VectorUDFSecondTimestamp(col 1:timestamp, field SECOND) -> 5:int, VectorUDFSecondTimestamp(col 13:timestamp, field SECOND)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 13:timestamp) -> 12:int) -> 16:boolean
Statistics: Num rows: 52 Data size: 1872 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: boolean)
@@ -693,7 +693,7 @@ STAGE PLANS:
featureSupportInUse: []
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
- usesVectorUDFAdaptor: false
+ usesVectorUDFAdaptor: true
vectorized: true
Reducer 2
Execution mode: vectorized, llap
@@ -861,13 +861,13 @@ STAGE PLANS:
TableScan Vectorization:
native: true
Select Operator
- expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), minute(stimestamp1) (type: int), second(stimestamp1) (type: int)
+ expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(CAST( stimestamp1 AS DATE)) (type: int), month(CAST( stimestamp1 AS DATE)) (type: int), day(CAST( stimestamp1 AS DATE)) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(CAST( stimestamp1 AS DATE)) (type: int), hour(CAST( stimestamp1 AS TIMESTAMP)) (type: int), minute(CAST( stimestamp1 AS TIMESTAMP)) (type: int), second(CAST( stimestamp1 AS TIMESTAMP)) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [2, 3, 4, 5, 6, 7, 8, 9, 10]
- selectExpressions: VectorUDFUnixTimeStampString(col 0:string) -> 2:bigint, VectorUDFYearString(col 0:string, fieldStart 0, fieldLength 4) -> 3:int, VectorUDFMonthString(col 0:string, fieldStart 5, fieldLength 2) -> 4:int, VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 5:int, VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 6:int, VectorUDFWeekOfYearString(col 0:string) -> 7:int, VectorUDFHourString(col 0:string, fieldStart 11, fieldLength 2) -> 8:int, VectorUDFMinuteString(col 0:string, fieldStart 14, fieldLength 2) -> 9:int, VectorUDFSecondString(col 0:string, fieldStart 17, fieldLength 2) -> 10:int
+ projectedOutputColumnNums: [2, 4, 5, 6, 3, 8, 7, 10, 11]
+ selectExpressions: VectorUDFUnixTimeStampString(col 0:string) -> 2:bigint, VectorUDFYearDate(col 3, field YEAR)(children: CastStringToDate(col 0:string) -> 3:date) -> 4:int, VectorUDFMonthDate(col 3, field MONTH)(children: CastStringToDate(col 0:string) -> 3:date) -> 5:int, VectorUDFDayOfMonthDate(col 3, field DAY_OF_MONTH)(children: CastStringToDate(col 0:string) -> 3:date) -> 6:int, VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 3:int, VectorUDFWeekOfYearDate(col 7, field WEEK_OF_YEAR)(children: CastStringToDate(col 0:string) -> 7:date) -> 8:int, VectorUDFHourTimestamp(col 9:timestamp, field HOUR_OF_DAY)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 9:timestamp) -> 7:int, VectorUDFMinuteTimestamp(col 9:timestamp, field MINUTE)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 9:timestamp) -> 10:int, VectorUDFSecondTimestamp(col 9:timestamp, field SECOND)(children: VectorUDFAdaptor(CAST( stimestamp1 AS T
IMESTAMP)) -> 9:timestamp) -> 11:int
Statistics: Num rows: 3 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: bigint)
@@ -887,7 +887,7 @@ STAGE PLANS:
featureSupportInUse: []
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
- usesVectorUDFAdaptor: false
+ usesVectorUDFAdaptor: true
vectorized: true
Reducer 2
Execution mode: vectorized, llap
@@ -953,7 +953,7 @@ ORDER BY c1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@alltypesorc_wrong
#### A masked pattern was here ####
-NULL NULL NULL NULL NULL NULL NULL NULL NULL
+NULL 2 11 30 NULL 48 NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL NULL NULL
PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
http://git-wip-us.apache.org/repos/asf/hive/blob/040c0783/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out
index 22bfdbb..cecee57 100644
--- a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out
+++ b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out
@@ -463,17 +463,17 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date_n4
- filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ filterExpr: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ predicate: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: day(_col0) (type: int)
+ expressions: day(CAST( _col0 AS DATE)) (type: int)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Group By Operator
@@ -482,7 +482,7 @@ STAGE PLANS:
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Spark Partition Pruning Sink Operator
- Target Columns: [Map 1 -> [ds:string (day(ds))]]
+ Target Columns: [Map 1 -> [ds:string (day(CAST( ds AS DATE)))]]
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Stage: Stage-1
@@ -496,34 +496,33 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart
- filterExpr: ds is not null (type: boolean)
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: day(_col0) (type: int)
+ key expressions: day(CAST( _col0 AS DATE)) (type: int)
sort order: +
- Map-reduce partition columns: day(_col0) (type: int)
+ Map-reduce partition columns: day(CAST( _col0 AS DATE)) (type: int)
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Map 4
Map Operator Tree:
TableScan
alias: srcpart_date_n4
- filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ filterExpr: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ predicate: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: day(_col0) (type: int)
+ key expressions: day(CAST( _col0 AS DATE)) (type: int)
sort order: +
- Map-reduce partition columns: day(_col0) (type: int)
+ Map-reduce partition columns: day(CAST( _col0 AS DATE)) (type: int)
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
@@ -531,8 +530,8 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 day(_col0) (type: int)
- 1 day(_col0) (type: int)
+ 0 day(CAST( _col0 AS DATE)) (type: int)
+ 1 day(CAST( _col0 AS DATE)) (type: int)
Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
@@ -603,34 +602,33 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart
- filterExpr: ds is not null (type: boolean)
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: day(_col0) (type: int)
+ key expressions: day(CAST( _col0 AS DATE)) (type: int)
sort order: +
- Map-reduce partition columns: day(_col0) (type: int)
+ Map-reduce partition columns: day(CAST( _col0 AS DATE)) (type: int)
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Map 4
Map Operator Tree:
TableScan
alias: srcpart_date_n4
- filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ filterExpr: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ predicate: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: day(_col0) (type: int)
+ key expressions: day(CAST( _col0 AS DATE)) (type: int)
sort order: +
- Map-reduce partition columns: day(_col0) (type: int)
+ Map-reduce partition columns: day(CAST( _col0 AS DATE)) (type: int)
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
@@ -638,8 +636,8 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 day(_col0) (type: int)
- 1 day(_col0) (type: int)
+ 0 day(CAST( _col0 AS DATE)) (type: int)
+ 1 day(CAST( _col0 AS DATE)) (type: int)
Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
@@ -708,17 +706,17 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date_n4
- filterExpr: ((date = '2008-04-08') and abs(((- UDFToLong(concat(UDFToString(day(ds)), '0'))) + 10)) is not null) (type: boolean)
+ filterExpr: ((date = '2008-04-08') and abs(((- UDFToLong(concat(UDFToString(day(CAST( ds AS DATE))), '0'))) + 10)) is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((date = '2008-04-08') and abs(((- UDFToLong(concat(UDFToString(day(ds)), '0'))) + 10)) is not null) (type: boolean)
+ predicate: ((date = '2008-04-08') and abs(((- UDFToLong(concat(UDFToString(day(CAST( ds AS DATE))), '0'))) + 10)) is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: abs(((- UDFToLong(concat(UDFToString(day(_col0)), '0'))) + 10)) (type: bigint)
+ expressions: abs(((- UDFToLong(concat(UDFToString(day(CAST( _col0 AS DATE))), '0'))) + 10)) (type: bigint)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Group By Operator
@@ -727,7 +725,7 @@ STAGE PLANS:
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Spark Partition Pruning Sink Operator
- Target Columns: [Map 1 -> [ds:string (abs(((- UDFToLong(concat(UDFToString(day(ds)), '0'))) + 10)))]]
+ Target Columns: [Map 1 -> [ds:string (abs(((- UDFToLong(concat(UDFToString(day(CAST( ds AS DATE))), '0'))) + 10)))]]
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Stage: Stage-1
@@ -741,37 +739,37 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart
- filterExpr: abs(((- UDFToLong(concat(UDFToString(day(ds)), '0'))) + 10)) is not null (type: boolean)
+ filterExpr: abs(((- UDFToLong(concat(UDFToString(day(CAST( ds AS DATE))), '0'))) + 10)) is not null (type: boolean)
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: abs(((- UDFToLong(concat(UDFToString(day(ds)), '0'))) + 10)) is not null (type: boolean)
+ predicate: abs(((- UDFToLong(concat(UDFToString(day(CAST( ds AS DATE))), '0'))) + 10)) is not null (type: boolean)
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: abs(((- UDFToLong(concat(UDFToString(day(_col0)), '0'))) + 10)) (type: bigint)
+ key expressions: abs(((- UDFToLong(concat(UDFToString(day(CAST( _col0 AS DATE))), '0'))) + 10)) (type: bigint)
sort order: +
- Map-reduce partition columns: abs(((- UDFToLong(concat(UDFToString(day(_col0)), '0'))) + 10)) (type: bigint)
+ Map-reduce partition columns: abs(((- UDFToLong(concat(UDFToString(day(CAST( _col0 AS DATE))), '0'))) + 10)) (type: bigint)
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Map 4
Map Operator Tree:
TableScan
alias: srcpart_date_n4
- filterExpr: ((date = '2008-04-08') and abs(((- UDFToLong(concat(UDFToString(day(ds)), '0'))) + 10)) is not null) (type: boolean)
+ filterExpr: ((date = '2008-04-08') and abs(((- UDFToLong(concat(UDFToString(day(CAST( ds AS DATE))), '0'))) + 10)) is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((date = '2008-04-08') and abs(((- UDFToLong(concat(UDFToString(day(ds)), '0'))) + 10)) is not null) (type: boolean)
+ predicate: ((date = '2008-04-08') and abs(((- UDFToLong(concat(UDFToString(day(CAST( ds AS DATE))), '0'))) + 10)) is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: abs(((- UDFToLong(concat(UDFToString(day(_col0)), '0'))) + 10)) (type: bigint)
+ key expressions: abs(((- UDFToLong(concat(UDFToString(day(CAST( _col0 AS DATE))), '0'))) + 10)) (type: bigint)
sort order: +
- Map-reduce partition columns: abs(((- UDFToLong(concat(UDFToString(day(_col0)), '0'))) + 10)) (type: bigint)
+ Map-reduce partition columns: abs(((- UDFToLong(concat(UDFToString(day(CAST( _col0 AS DATE))), '0'))) + 10)) (type: bigint)
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
@@ -779,8 +777,8 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 abs(((- UDFToLong(concat(UDFToString(day(_col0)), '0'))) + 10)) (type: bigint)
- 1 abs(((- UDFToLong(concat(UDFToString(day(_col0)), '0'))) + 10)) (type: bigint)
+ 0 abs(((- UDFToLong(concat(UDFToString(day(CAST( _col0 AS DATE))), '0'))) + 10)) (type: bigint)
+ 1 abs(((- UDFToLong(concat(UDFToString(day(CAST( _col0 AS DATE))), '0'))) + 10)) (type: bigint)
Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
@@ -849,17 +847,17 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date_n4
- filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ filterExpr: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ predicate: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: CAST( day(_col0) AS decimal(10,0)) (type: decimal(10,0))
+ expressions: CAST( day(CAST( _col0 AS DATE)) AS decimal(10,0)) (type: decimal(10,0))
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Group By Operator
@@ -868,7 +866,7 @@ STAGE PLANS:
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Spark Partition Pruning Sink Operator
- Target Columns: [Map 1 -> [ds:string (CAST( UDFToShort(day(ds)) AS decimal(10,0)))]]
+ Target Columns: [Map 1 -> [ds:string (CAST( UDFToShort(day(CAST( ds AS DATE))) AS decimal(10,0)))]]
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Stage: Stage-1
@@ -882,34 +880,33 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart
- filterExpr: ds is not null (type: boolean)
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: CAST( UDFToShort(day(_col0)) AS decimal(10,0)) (type: decimal(10,0))
+ key expressions: CAST( UDFToShort(day(CAST( _col0 AS DATE))) AS decimal(10,0)) (type: decimal(10,0))
sort order: +
- Map-reduce partition columns: CAST( UDFToShort(day(_col0)) AS decimal(10,0)) (type: decimal(10,0))
+ Map-reduce partition columns: CAST( UDFToShort(day(CAST( _col0 AS DATE))) AS decimal(10,0)) (type: decimal(10,0))
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Map 4
Map Operator Tree:
TableScan
alias: srcpart_date_n4
- filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ filterExpr: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ predicate: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: CAST( day(_col0) AS decimal(10,0)) (type: decimal(10,0))
+ key expressions: CAST( day(CAST( _col0 AS DATE)) AS decimal(10,0)) (type: decimal(10,0))
sort order: +
- Map-reduce partition columns: CAST( day(_col0) AS decimal(10,0)) (type: decimal(10,0))
+ Map-reduce partition columns: CAST( day(CAST( _col0 AS DATE)) AS decimal(10,0)) (type: decimal(10,0))
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
@@ -917,8 +914,8 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 CAST( UDFToShort(day(_col0)) AS decimal(10,0)) (type: decimal(10,0))
- 1 CAST( day(_col0) AS decimal(10,0)) (type: decimal(10,0))
+ 0 CAST( UDFToShort(day(CAST( _col0 AS DATE))) AS decimal(10,0)) (type: decimal(10,0))
+ 1 CAST( day(CAST( _col0 AS DATE)) AS decimal(10,0)) (type: decimal(10,0))
Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
@@ -4518,10 +4515,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date_n4
- filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ filterExpr: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ predicate: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -4529,10 +4526,10 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
keys:
- 0 day(_col0) (type: int)
- 1 day(_col0) (type: int)
+ 0 day(CAST( _col0 AS DATE)) (type: int)
+ 1 day(CAST( _col0 AS DATE)) (type: int)
Select Operator
- expressions: day(_col0) (type: int)
+ expressions: day(CAST( _col0 AS DATE)) (type: int)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Group By Operator
@@ -4541,7 +4538,7 @@ STAGE PLANS:
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Spark Partition Pruning Sink Operator
- Target Columns: [Map 1 -> [ds:string (day(ds))]]
+ Target Columns: [Map 1 -> [ds:string (day(CAST( ds AS DATE)))]]
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Local Work:
Map Reduce Local Work
@@ -4556,7 +4553,6 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart
- filterExpr: ds is not null (type: boolean)
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -4566,8 +4562,8 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 day(_col0) (type: int)
- 1 day(_col0) (type: int)
+ 0 day(CAST( _col0 AS DATE)) (type: int)
+ 1 day(CAST( _col0 AS DATE)) (type: int)
input vertices:
1 Map 3
Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE