You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2018/06/16 15:43:03 UTC
[1/2] hive git commit: HIVE-19695 : Year Month Day extraction
functions need to add an implicit cast for column that are String types (Slim
Bouguerra via Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master 5ba634aa6 -> 040c0783e
http://git-wip-us.apache.org/repos/asf/hive/blob/040c0783/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out
index 634de07..6bd0a3f 100644
--- a/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out
+++ b/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out
@@ -766,7 +766,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date_n0
- filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ filterExpr: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
@@ -775,8 +775,8 @@ STAGE PLANS:
Filter Vectorization:
className: VectorFilterOperator
native: true
- predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 1:string, val 2008-04-08), SelectColumnIsNotNull(col 0:string))
- predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ predicateExpression: FilterStringGroupColEqualStringScalar(col 1:string, val 2008-04-08)
+ predicate: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -787,19 +787,19 @@ STAGE PLANS:
projectedOutputColumnNums: [0]
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: day(_col0) (type: int)
+ expressions: day(CAST( _col0 AS DATE)) (type: int)
outputColumnNames: _col0
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [3]
- selectExpressions: VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 3:int
+ projectedOutputColumnNums: [4]
+ selectExpressions: VectorUDFDayOfMonthDate(col 3, field DAY_OF_MONTH)(children: CastStringToDate(col 0:string) -> 3:date) -> 4:int
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Group By Operator
Group By Vectorization:
className: VectorGroupByOperator
groupByMode: HASH
- keyExpressions: col 3:int
+ keyExpressions: col 4:int
native: false
vectorProcessingMode: HASH
projectedOutputColumnNums: []
@@ -808,7 +808,7 @@ STAGE PLANS:
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Spark Partition Pruning Sink Operator
- Target Columns: [Map 1 -> [ds:string (day(ds))]]
+ Target Columns: [Map 1 -> [ds:string (day(CAST( ds AS DATE)))]]
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
Map Vectorization:
@@ -825,7 +825,7 @@ STAGE PLANS:
includeColumns: [0, 1]
dataColumns: ds:string, date:string
partitionColumnCount: 0
- scratchColumnTypeNames: [bigint]
+ scratchColumnTypeNames: [bigint, bigint]
Stage: Stage-1
Spark
@@ -838,7 +838,6 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart
- filterExpr: ds is not null (type: boolean)
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
@@ -852,13 +851,13 @@ STAGE PLANS:
projectedOutputColumnNums: [2]
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: day(_col0) (type: int)
+ key expressions: day(CAST( _col0 AS DATE)) (type: int)
sort order: +
- Map-reduce partition columns: day(_col0) (type: int)
+ Map-reduce partition columns: day(CAST( _col0 AS DATE)) (type: int)
Reduce Sink Vectorization:
className: VectorReduceSinkLongOperator
- keyColumnNums: [5]
- keyExpressions: VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 5:int
+ keyColumnNums: [6]
+ keyExpressions: VectorUDFDayOfMonthDate(col 5, field DAY_OF_MONTH)(children: CastStringToDate(col 2:string) -> 5:date) -> 6:int
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
valueColumnNums: []
@@ -879,12 +878,12 @@ STAGE PLANS:
dataColumns: key:string, value:string
partitionColumnCount: 2
partitionColumns: ds:string, hr:string
- scratchColumnTypeNames: [bigint]
+ scratchColumnTypeNames: [bigint, bigint]
Map 4
Map Operator Tree:
TableScan
alias: srcpart_date_n0
- filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ filterExpr: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
@@ -893,8 +892,8 @@ STAGE PLANS:
Filter Vectorization:
className: VectorFilterOperator
native: true
- predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 1:string, val 2008-04-08), SelectColumnIsNotNull(col 0:string))
- predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ predicateExpression: FilterStringGroupColEqualStringScalar(col 1:string, val 2008-04-08)
+ predicate: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -905,13 +904,13 @@ STAGE PLANS:
projectedOutputColumnNums: [0]
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: day(_col0) (type: int)
+ key expressions: day(CAST( _col0 AS DATE)) (type: int)
sort order: +
- Map-reduce partition columns: day(_col0) (type: int)
+ Map-reduce partition columns: day(CAST( _col0 AS DATE)) (type: int)
Reduce Sink Vectorization:
className: VectorReduceSinkLongOperator
- keyColumnNums: [3]
- keyExpressions: VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 3:int
+ keyColumnNums: [4]
+ keyExpressions: VectorUDFDayOfMonthDate(col 3, field DAY_OF_MONTH)(children: CastStringToDate(col 0:string) -> 3:date) -> 4:int
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
valueColumnNums: []
@@ -931,7 +930,7 @@ STAGE PLANS:
includeColumns: [0, 1]
dataColumns: ds:string, date:string
partitionColumnCount: 0
- scratchColumnTypeNames: [bigint]
+ scratchColumnTypeNames: [bigint, bigint]
Reducer 2
Reduce Vectorization:
enabled: true
@@ -943,8 +942,8 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 day(_col0) (type: int)
- 1 day(_col0) (type: int)
+ 0 day(CAST( _col0 AS DATE)) (type: int)
+ 1 day(CAST( _col0 AS DATE)) (type: int)
Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
@@ -1043,7 +1042,6 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart
- filterExpr: ds is not null (type: boolean)
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
@@ -1057,13 +1055,13 @@ STAGE PLANS:
projectedOutputColumnNums: [2]
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: day(_col0) (type: int)
+ key expressions: day(CAST( _col0 AS DATE)) (type: int)
sort order: +
- Map-reduce partition columns: day(_col0) (type: int)
+ Map-reduce partition columns: day(CAST( _col0 AS DATE)) (type: int)
Reduce Sink Vectorization:
className: VectorReduceSinkLongOperator
- keyColumnNums: [5]
- keyExpressions: VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 5:int
+ keyColumnNums: [6]
+ keyExpressions: VectorUDFDayOfMonthDate(col 5, field DAY_OF_MONTH)(children: CastStringToDate(col 2:string) -> 5:date) -> 6:int
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
valueColumnNums: []
@@ -1084,12 +1082,12 @@ STAGE PLANS:
dataColumns: key:string, value:string
partitionColumnCount: 2
partitionColumns: ds:string, hr:string
- scratchColumnTypeNames: [bigint]
+ scratchColumnTypeNames: [bigint, bigint]
Map 4
Map Operator Tree:
TableScan
alias: srcpart_date_n0
- filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ filterExpr: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
@@ -1098,8 +1096,8 @@ STAGE PLANS:
Filter Vectorization:
className: VectorFilterOperator
native: true
- predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 1:string, val 2008-04-08), SelectColumnIsNotNull(col 0:string))
- predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ predicateExpression: FilterStringGroupColEqualStringScalar(col 1:string, val 2008-04-08)
+ predicate: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -1110,13 +1108,13 @@ STAGE PLANS:
projectedOutputColumnNums: [0]
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: day(_col0) (type: int)
+ key expressions: day(CAST( _col0 AS DATE)) (type: int)
sort order: +
- Map-reduce partition columns: day(_col0) (type: int)
+ Map-reduce partition columns: day(CAST( _col0 AS DATE)) (type: int)
Reduce Sink Vectorization:
className: VectorReduceSinkLongOperator
- keyColumnNums: [3]
- keyExpressions: VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 3:int
+ keyColumnNums: [4]
+ keyExpressions: VectorUDFDayOfMonthDate(col 3, field DAY_OF_MONTH)(children: CastStringToDate(col 0:string) -> 3:date) -> 4:int
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
valueColumnNums: []
@@ -1136,7 +1134,7 @@ STAGE PLANS:
includeColumns: [0, 1]
dataColumns: ds:string, date:string
partitionColumnCount: 0
- scratchColumnTypeNames: [bigint]
+ scratchColumnTypeNames: [bigint, bigint]
Reducer 2
Reduce Vectorization:
enabled: true
@@ -1148,8 +1146,8 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 day(_col0) (type: int)
- 1 day(_col0) (type: int)
+ 0 day(CAST( _col0 AS DATE)) (type: int)
+ 1 day(CAST( _col0 AS DATE)) (type: int)
Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
@@ -1246,7 +1244,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date_n0
- filterExpr: ((date = '2008-04-08') and abs(((- UDFToLong(concat(UDFToString(day(ds)), '0'))) + 10)) is not null) (type: boolean)
+ filterExpr: ((date = '2008-04-08') and abs(((- UDFToLong(concat(UDFToString(day(CAST( ds AS DATE))), '0'))) + 10)) is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
@@ -1255,8 +1253,8 @@ STAGE PLANS:
Filter Vectorization:
className: VectorFilterOperator
native: true
- predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 1:string, val 2008-04-08), SelectColumnIsNotNull(col 6:bigint)(children: FuncAbsLongToLong(col 3:bigint)(children: LongColAddLongScalar(col 6:bigint, val 10)(children: LongColUnaryMinus(col 3:bigint)(children: CastStringToLong(col 5:string)(children: StringGroupColConcatStringScalar(col 4:string, val 0)(children: CastLongToString(col 3:int)(children: VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 3:int) -> 4:string) -> 5:string) -> 3:bigint) -> 6:bigint) -> 3:bigint) -> 6:bigint))
- predicate: ((date = '2008-04-08') and abs(((- UDFToLong(concat(UDFToString(day(ds)), '0'))) + 10)) is not null) (type: boolean)
+ predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 1:string, val 2008-04-08), SelectColumnIsNotNull(col 4:bigint)(children: FuncAbsLongToLong(col 3:bigint)(children: LongColAddLongScalar(col 4:bigint, val 10)(children: LongColUnaryMinus(col 3:bigint)(children: CastStringToLong(col 6:string)(children: StringGroupColConcatStringScalar(col 5:string, val 0)(children: CastLongToString(col 4:int)(children: VectorUDFDayOfMonthDate(col 3, field DAY_OF_MONTH)(children: CastStringToDate(col 0:string) -> 3:date) -> 4:int) -> 5:string) -> 6:string) -> 3:bigint) -> 4:bigint) -> 3:bigint) -> 4:bigint))
+ predicate: ((date = '2008-04-08') and abs(((- UDFToLong(concat(UDFToString(day(CAST( ds AS DATE))), '0'))) + 10)) is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -1267,19 +1265,19 @@ STAGE PLANS:
projectedOutputColumnNums: [0]
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: abs(((- UDFToLong(concat(UDFToString(day(_col0)), '0'))) + 10)) (type: bigint)
+ expressions: abs(((- UDFToLong(concat(UDFToString(day(CAST( _col0 AS DATE))), '0'))) + 10)) (type: bigint)
outputColumnNames: _col0
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [6]
- selectExpressions: FuncAbsLongToLong(col 3:bigint)(children: LongColAddLongScalar(col 6:bigint, val 10)(children: LongColUnaryMinus(col 3:bigint)(children: CastStringToLong(col 5:string)(children: StringGroupColConcatStringScalar(col 4:string, val 0)(children: CastLongToString(col 3:int)(children: VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 3:int) -> 4:string) -> 5:string) -> 3:bigint) -> 6:bigint) -> 3:bigint) -> 6:bigint
+ projectedOutputColumnNums: [4]
+ selectExpressions: FuncAbsLongToLong(col 3:bigint)(children: LongColAddLongScalar(col 4:bigint, val 10)(children: LongColUnaryMinus(col 3:bigint)(children: CastStringToLong(col 6:string)(children: StringGroupColConcatStringScalar(col 5:string, val 0)(children: CastLongToString(col 4:int)(children: VectorUDFDayOfMonthDate(col 3, field DAY_OF_MONTH)(children: CastStringToDate(col 0:string) -> 3:date) -> 4:int) -> 5:string) -> 6:string) -> 3:bigint) -> 4:bigint) -> 3:bigint) -> 4:bigint
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Group By Operator
Group By Vectorization:
className: VectorGroupByOperator
groupByMode: HASH
- keyExpressions: col 6:bigint
+ keyExpressions: col 4:bigint
native: false
vectorProcessingMode: HASH
projectedOutputColumnNums: []
@@ -1288,7 +1286,7 @@ STAGE PLANS:
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Spark Partition Pruning Sink Operator
- Target Columns: [Map 1 -> [ds:string (abs(((- UDFToLong(concat(UDFToString(day(ds)), '0'))) + 10)))]]
+ Target Columns: [Map 1 -> [ds:string (abs(((- UDFToLong(concat(UDFToString(day(CAST( ds AS DATE))), '0'))) + 10)))]]
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
Map Vectorization:
@@ -1305,7 +1303,7 @@ STAGE PLANS:
includeColumns: [0, 1]
dataColumns: ds:string, date:string
partitionColumnCount: 0
- scratchColumnTypeNames: [bigint, string, string, bigint]
+ scratchColumnTypeNames: [bigint, bigint, string, string]
Stage: Stage-1
Spark
@@ -1318,7 +1316,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart
- filterExpr: abs(((- UDFToLong(concat(UDFToString(day(ds)), '0'))) + 10)) is not null (type: boolean)
+ filterExpr: abs(((- UDFToLong(concat(UDFToString(day(CAST( ds AS DATE))), '0'))) + 10)) is not null (type: boolean)
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
@@ -1327,8 +1325,8 @@ STAGE PLANS:
Filter Vectorization:
className: VectorFilterOperator
native: true
- predicateExpression: SelectColumnIsNotNull(col 8:bigint)(children: FuncAbsLongToLong(col 5:bigint)(children: LongColAddLongScalar(col 8:bigint, val 10)(children: LongColUnaryMinus(col 5:bigint)(children: CastStringToLong(col 7:string)(children: StringGroupColConcatStringScalar(col 6:string, val 0)(children: CastLongToString(col 5:int)(children: VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 5:int) -> 6:string) -> 7:string) -> 5:bigint) -> 8:bigint) -> 5:bigint) -> 8:bigint)
- predicate: abs(((- UDFToLong(concat(UDFToString(day(ds)), '0'))) + 10)) is not null (type: boolean)
+ predicateExpression: SelectColumnIsNotNull(col 6:bigint)(children: FuncAbsLongToLong(col 5:bigint)(children: LongColAddLongScalar(col 6:bigint, val 10)(children: LongColUnaryMinus(col 5:bigint)(children: CastStringToLong(col 8:string)(children: StringGroupColConcatStringScalar(col 7:string, val 0)(children: CastLongToString(col 6:int)(children: VectorUDFDayOfMonthDate(col 5, field DAY_OF_MONTH)(children: CastStringToDate(col 2:string) -> 5:date) -> 6:int) -> 7:string) -> 8:string) -> 5:bigint) -> 6:bigint) -> 5:bigint) -> 6:bigint)
+ predicate: abs(((- UDFToLong(concat(UDFToString(day(CAST( ds AS DATE))), '0'))) + 10)) is not null (type: boolean)
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -1339,13 +1337,13 @@ STAGE PLANS:
projectedOutputColumnNums: [2]
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: abs(((- UDFToLong(concat(UDFToString(day(_col0)), '0'))) + 10)) (type: bigint)
+ key expressions: abs(((- UDFToLong(concat(UDFToString(day(CAST( _col0 AS DATE))), '0'))) + 10)) (type: bigint)
sort order: +
- Map-reduce partition columns: abs(((- UDFToLong(concat(UDFToString(day(_col0)), '0'))) + 10)) (type: bigint)
+ Map-reduce partition columns: abs(((- UDFToLong(concat(UDFToString(day(CAST( _col0 AS DATE))), '0'))) + 10)) (type: bigint)
Reduce Sink Vectorization:
className: VectorReduceSinkLongOperator
- keyColumnNums: [8]
- keyExpressions: FuncAbsLongToLong(col 5:bigint)(children: LongColAddLongScalar(col 8:bigint, val 10)(children: LongColUnaryMinus(col 5:bigint)(children: CastStringToLong(col 7:string)(children: StringGroupColConcatStringScalar(col 6:string, val 0)(children: CastLongToString(col 5:int)(children: VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 5:int) -> 6:string) -> 7:string) -> 5:bigint) -> 8:bigint) -> 5:bigint) -> 8:bigint
+ keyColumnNums: [6]
+ keyExpressions: FuncAbsLongToLong(col 5:bigint)(children: LongColAddLongScalar(col 6:bigint, val 10)(children: LongColUnaryMinus(col 5:bigint)(children: CastStringToLong(col 8:string)(children: StringGroupColConcatStringScalar(col 7:string, val 0)(children: CastLongToString(col 6:int)(children: VectorUDFDayOfMonthDate(col 5, field DAY_OF_MONTH)(children: CastStringToDate(col 2:string) -> 5:date) -> 6:int) -> 7:string) -> 8:string) -> 5:bigint) -> 6:bigint) -> 5:bigint) -> 6:bigint
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
valueColumnNums: []
@@ -1366,12 +1364,12 @@ STAGE PLANS:
dataColumns: key:string, value:string
partitionColumnCount: 2
partitionColumns: ds:string, hr:string
- scratchColumnTypeNames: [bigint, string, string, bigint]
+ scratchColumnTypeNames: [bigint, bigint, string, string]
Map 4
Map Operator Tree:
TableScan
alias: srcpart_date_n0
- filterExpr: ((date = '2008-04-08') and abs(((- UDFToLong(concat(UDFToString(day(ds)), '0'))) + 10)) is not null) (type: boolean)
+ filterExpr: ((date = '2008-04-08') and abs(((- UDFToLong(concat(UDFToString(day(CAST( ds AS DATE))), '0'))) + 10)) is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
@@ -1380,8 +1378,8 @@ STAGE PLANS:
Filter Vectorization:
className: VectorFilterOperator
native: true
- predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 1:string, val 2008-04-08), SelectColumnIsNotNull(col 6:bigint)(children: FuncAbsLongToLong(col 3:bigint)(children: LongColAddLongScalar(col 6:bigint, val 10)(children: LongColUnaryMinus(col 3:bigint)(children: CastStringToLong(col 5:string)(children: StringGroupColConcatStringScalar(col 4:string, val 0)(children: CastLongToString(col 3:int)(children: VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 3:int) -> 4:string) -> 5:string) -> 3:bigint) -> 6:bigint) -> 3:bigint) -> 6:bigint))
- predicate: ((date = '2008-04-08') and abs(((- UDFToLong(concat(UDFToString(day(ds)), '0'))) + 10)) is not null) (type: boolean)
+ predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 1:string, val 2008-04-08), SelectColumnIsNotNull(col 4:bigint)(children: FuncAbsLongToLong(col 3:bigint)(children: LongColAddLongScalar(col 4:bigint, val 10)(children: LongColUnaryMinus(col 3:bigint)(children: CastStringToLong(col 6:string)(children: StringGroupColConcatStringScalar(col 5:string, val 0)(children: CastLongToString(col 4:int)(children: VectorUDFDayOfMonthDate(col 3, field DAY_OF_MONTH)(children: CastStringToDate(col 0:string) -> 3:date) -> 4:int) -> 5:string) -> 6:string) -> 3:bigint) -> 4:bigint) -> 3:bigint) -> 4:bigint))
+ predicate: ((date = '2008-04-08') and abs(((- UDFToLong(concat(UDFToString(day(CAST( ds AS DATE))), '0'))) + 10)) is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -1392,13 +1390,13 @@ STAGE PLANS:
projectedOutputColumnNums: [0]
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: abs(((- UDFToLong(concat(UDFToString(day(_col0)), '0'))) + 10)) (type: bigint)
+ key expressions: abs(((- UDFToLong(concat(UDFToString(day(CAST( _col0 AS DATE))), '0'))) + 10)) (type: bigint)
sort order: +
- Map-reduce partition columns: abs(((- UDFToLong(concat(UDFToString(day(_col0)), '0'))) + 10)) (type: bigint)
+ Map-reduce partition columns: abs(((- UDFToLong(concat(UDFToString(day(CAST( _col0 AS DATE))), '0'))) + 10)) (type: bigint)
Reduce Sink Vectorization:
className: VectorReduceSinkLongOperator
- keyColumnNums: [6]
- keyExpressions: FuncAbsLongToLong(col 3:bigint)(children: LongColAddLongScalar(col 6:bigint, val 10)(children: LongColUnaryMinus(col 3:bigint)(children: CastStringToLong(col 5:string)(children: StringGroupColConcatStringScalar(col 4:string, val 0)(children: CastLongToString(col 3:int)(children: VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 3:int) -> 4:string) -> 5:string) -> 3:bigint) -> 6:bigint) -> 3:bigint) -> 6:bigint
+ keyColumnNums: [4]
+ keyExpressions: FuncAbsLongToLong(col 3:bigint)(children: LongColAddLongScalar(col 4:bigint, val 10)(children: LongColUnaryMinus(col 3:bigint)(children: CastStringToLong(col 6:string)(children: StringGroupColConcatStringScalar(col 5:string, val 0)(children: CastLongToString(col 4:int)(children: VectorUDFDayOfMonthDate(col 3, field DAY_OF_MONTH)(children: CastStringToDate(col 0:string) -> 3:date) -> 4:int) -> 5:string) -> 6:string) -> 3:bigint) -> 4:bigint) -> 3:bigint) -> 4:bigint
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
valueColumnNums: []
@@ -1418,7 +1416,7 @@ STAGE PLANS:
includeColumns: [0, 1]
dataColumns: ds:string, date:string
partitionColumnCount: 0
- scratchColumnTypeNames: [bigint, string, string, bigint]
+ scratchColumnTypeNames: [bigint, bigint, string, string]
Reducer 2
Reduce Vectorization:
enabled: true
@@ -1430,8 +1428,8 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 abs(((- UDFToLong(concat(UDFToString(day(_col0)), '0'))) + 10)) (type: bigint)
- 1 abs(((- UDFToLong(concat(UDFToString(day(_col0)), '0'))) + 10)) (type: bigint)
+ 0 abs(((- UDFToLong(concat(UDFToString(day(CAST( _col0 AS DATE))), '0'))) + 10)) (type: bigint)
+ 1 abs(((- UDFToLong(concat(UDFToString(day(CAST( _col0 AS DATE))), '0'))) + 10)) (type: bigint)
Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
@@ -1528,7 +1526,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date_n0
- filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ filterExpr: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
@@ -1537,8 +1535,8 @@ STAGE PLANS:
Filter Vectorization:
className: VectorFilterOperator
native: true
- predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 1:string, val 2008-04-08), SelectColumnIsNotNull(col 0:string))
- predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ predicateExpression: FilterStringGroupColEqualStringScalar(col 1:string, val 2008-04-08)
+ predicate: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -1549,19 +1547,19 @@ STAGE PLANS:
projectedOutputColumnNums: [0]
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: CAST( day(_col0) AS decimal(10,0)) (type: decimal(10,0))
+ expressions: CAST( day(CAST( _col0 AS DATE)) AS decimal(10,0)) (type: decimal(10,0))
outputColumnNames: _col0
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [4]
- selectExpressions: CastLongToDecimal(col 3:int)(children: VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 3:int) -> 4:decimal(10,0)
+ projectedOutputColumnNums: [5]
+ selectExpressions: CastLongToDecimal(col 4:int)(children: VectorUDFDayOfMonthDate(col 3, field DAY_OF_MONTH)(children: CastStringToDate(col 0:string) -> 3:date) -> 4:int) -> 5:decimal(10,0)
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Group By Operator
Group By Vectorization:
className: VectorGroupByOperator
groupByMode: HASH
- keyExpressions: col 4:decimal(10,0)
+ keyExpressions: col 5:decimal(10,0)
native: false
vectorProcessingMode: HASH
projectedOutputColumnNums: []
@@ -1570,7 +1568,7 @@ STAGE PLANS:
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Spark Partition Pruning Sink Operator
- Target Columns: [Map 1 -> [ds:string (CAST( UDFToShort(day(ds)) AS decimal(10,0)))]]
+ Target Columns: [Map 1 -> [ds:string (CAST( UDFToShort(day(CAST( ds AS DATE))) AS decimal(10,0)))]]
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
Map Vectorization:
@@ -1587,7 +1585,7 @@ STAGE PLANS:
includeColumns: [0, 1]
dataColumns: ds:string, date:string
partitionColumnCount: 0
- scratchColumnTypeNames: [bigint, decimal(10,0)]
+ scratchColumnTypeNames: [bigint, bigint, decimal(10,0)]
Stage: Stage-1
Spark
@@ -1600,7 +1598,6 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart
- filterExpr: ds is not null (type: boolean)
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
@@ -1614,13 +1611,13 @@ STAGE PLANS:
projectedOutputColumnNums: [2]
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: CAST( UDFToShort(day(_col0)) AS decimal(10,0)) (type: decimal(10,0))
+ key expressions: CAST( UDFToShort(day(CAST( _col0 AS DATE))) AS decimal(10,0)) (type: decimal(10,0))
sort order: +
- Map-reduce partition columns: CAST( UDFToShort(day(_col0)) AS decimal(10,0)) (type: decimal(10,0))
+ Map-reduce partition columns: CAST( UDFToShort(day(CAST( _col0 AS DATE))) AS decimal(10,0)) (type: decimal(10,0))
Reduce Sink Vectorization:
className: VectorReduceSinkMultiKeyOperator
- keyColumnNums: [6]
- keyExpressions: CastLongToDecimal(col 5:smallint)(children: col 5:int) -> 6:decimal(10,0)
+ keyColumnNums: [7]
+ keyExpressions: CastLongToDecimal(col 6:smallint)(children: col 6:int) -> 7:decimal(10,0)
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
valueColumnNums: []
@@ -1641,12 +1638,12 @@ STAGE PLANS:
dataColumns: key:string, value:string
partitionColumnCount: 2
partitionColumns: ds:string, hr:string
- scratchColumnTypeNames: [bigint, decimal(10,0)]
+ scratchColumnTypeNames: [bigint, bigint, decimal(10,0)]
Map 4
Map Operator Tree:
TableScan
alias: srcpart_date_n0
- filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ filterExpr: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
@@ -1655,8 +1652,8 @@ STAGE PLANS:
Filter Vectorization:
className: VectorFilterOperator
native: true
- predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 1:string, val 2008-04-08), SelectColumnIsNotNull(col 0:string))
- predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ predicateExpression: FilterStringGroupColEqualStringScalar(col 1:string, val 2008-04-08)
+ predicate: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -1667,13 +1664,13 @@ STAGE PLANS:
projectedOutputColumnNums: [0]
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: CAST( day(_col0) AS decimal(10,0)) (type: decimal(10,0))
+ key expressions: CAST( day(CAST( _col0 AS DATE)) AS decimal(10,0)) (type: decimal(10,0))
sort order: +
- Map-reduce partition columns: CAST( day(_col0) AS decimal(10,0)) (type: decimal(10,0))
+ Map-reduce partition columns: CAST( day(CAST( _col0 AS DATE)) AS decimal(10,0)) (type: decimal(10,0))
Reduce Sink Vectorization:
className: VectorReduceSinkMultiKeyOperator
- keyColumnNums: [4]
- keyExpressions: CastLongToDecimal(col 3:int)(children: VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 3:int) -> 4:decimal(10,0)
+ keyColumnNums: [5]
+ keyExpressions: CastLongToDecimal(col 4:int)(children: VectorUDFDayOfMonthDate(col 3, field DAY_OF_MONTH)(children: CastStringToDate(col 0:string) -> 3:date) -> 4:int) -> 5:decimal(10,0)
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
valueColumnNums: []
@@ -1693,7 +1690,7 @@ STAGE PLANS:
includeColumns: [0, 1]
dataColumns: ds:string, date:string
partitionColumnCount: 0
- scratchColumnTypeNames: [bigint, decimal(10,0)]
+ scratchColumnTypeNames: [bigint, bigint, decimal(10,0)]
Reducer 2
Reduce Vectorization:
enabled: true
@@ -1705,8 +1702,8 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 CAST( UDFToShort(day(_col0)) AS decimal(10,0)) (type: decimal(10,0))
- 1 CAST( day(_col0) AS decimal(10,0)) (type: decimal(10,0))
+ 0 CAST( UDFToShort(day(CAST( _col0 AS DATE))) AS decimal(10,0)) (type: decimal(10,0))
+ 1 CAST( day(CAST( _col0 AS DATE)) AS decimal(10,0)) (type: decimal(10,0))
Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
@@ -9108,7 +9105,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date_n0
- filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ filterExpr: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
@@ -9117,8 +9114,8 @@ STAGE PLANS:
Filter Vectorization:
className: VectorFilterOperator
native: true
- predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 1:string, val 2008-04-08), SelectColumnIsNotNull(col 0:string))
- predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ predicateExpression: FilterStringGroupColEqualStringScalar(col 1:string, val 2008-04-08)
+ predicate: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -9133,22 +9130,22 @@ STAGE PLANS:
className: VectorSparkHashTableSinkOperator
native: true
keys:
- 0 day(_col0) (type: int)
- 1 day(_col0) (type: int)
+ 0 day(CAST( _col0 AS DATE)) (type: int)
+ 1 day(CAST( _col0 AS DATE)) (type: int)
Select Operator
- expressions: day(_col0) (type: int)
+ expressions: day(CAST( _col0 AS DATE)) (type: int)
outputColumnNames: _col0
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [3]
- selectExpressions: VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 3:int
+ projectedOutputColumnNums: [4]
+ selectExpressions: VectorUDFDayOfMonthDate(col 3, field DAY_OF_MONTH)(children: CastStringToDate(col 0:string) -> 3:date) -> 4:int
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Group By Operator
Group By Vectorization:
className: VectorGroupByOperator
groupByMode: HASH
- keyExpressions: col 3:int
+ keyExpressions: col 4:int
native: false
vectorProcessingMode: HASH
projectedOutputColumnNums: []
@@ -9157,7 +9154,7 @@ STAGE PLANS:
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Spark Partition Pruning Sink Operator
- Target Columns: [Map 1 -> [ds:string (day(ds))]]
+ Target Columns: [Map 1 -> [ds:string (day(CAST( ds AS DATE)))]]
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
Map Vectorization:
@@ -9174,7 +9171,7 @@ STAGE PLANS:
includeColumns: [0, 1]
dataColumns: ds:string, date:string
partitionColumnCount: 0
- scratchColumnTypeNames: [bigint]
+ scratchColumnTypeNames: [bigint, bigint]
Local Work:
Map Reduce Local Work
@@ -9188,7 +9185,6 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart
- filterExpr: ds is not null (type: boolean)
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
@@ -9205,11 +9201,11 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 day(_col0) (type: int)
- 1 day(_col0) (type: int)
+ 0 day(CAST( _col0 AS DATE)) (type: int)
+ 1 day(CAST( _col0 AS DATE)) (type: int)
Map Join Vectorization:
- bigTableKeyColumnNums: [5]
- bigTableKeyExpressions: VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 5:int
+ bigTableKeyColumnNums: [6]
+ bigTableKeyExpressions: VectorUDFDayOfMonthDate(col 5, field DAY_OF_MONTH)(children: CastStringToDate(col 2:string) -> 5:date) -> 6:int
className: VectorMapJoinInnerBigOnlyLongOperator
native: true
nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true
@@ -9254,7 +9250,7 @@ STAGE PLANS:
dataColumns: key:string, value:string
partitionColumnCount: 2
partitionColumns: ds:string, hr:string
- scratchColumnTypeNames: [bigint]
+ scratchColumnTypeNames: [bigint, bigint]
Local Work:
Map Reduce Local Work
Reducer 2
http://git-wip-us.apache.org/repos/asf/hive/blob/040c0783/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out b/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out
index 18926cb..95ebf46 100644
--- a/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out
@@ -470,13 +470,13 @@ STAGE PLANS:
TableScan Vectorization:
native: true
Select Operator
- expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), minute(stimestamp1) (type: int), second(stimestamp1) (type: int)
+ expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(CAST( stimestamp1 AS DATE)) (type: int), month(CAST( stimestamp1 AS DATE)) (type: int), day(CAST( stimestamp1 AS DATE)) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(CAST( stimestamp1 AS DATE)) (type: int), hour(CAST( stimestamp1 AS TIMESTAMP)) (type: int), minute(CAST( stimestamp1 AS TIMESTAMP)) (type: int), second(CAST( stimestamp1 AS TIMESTAMP)) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 13]
- selectExpressions: VectorUDFUnixTimeStampString(col 2:string) -> 5:bigint, VectorUDFYearString(col 2:string, fieldStart 0, fieldLength 4) -> 6:int, VectorUDFMonthString(col 2:string, fieldStart 5, fieldLength 2) -> 7:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 8:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 9:int, VectorUDFWeekOfYearString(col 2:string) -> 10:int, VectorUDFHourString(col 2:string, fieldStart 11, fieldLength 2) -> 11:int, VectorUDFMinuteString(col 2:string, fieldStart 14, fieldLength 2) -> 12:int, VectorUDFSecondString(col 2:string, fieldStart 17, fieldLength 2) -> 13:int
+ projectedOutputColumnNums: [5, 7, 8, 9, 6, 11, 10, 13, 14]
+ selectExpressions: VectorUDFUnixTimeStampString(col 2:string) -> 5:bigint, VectorUDFYearDate(col 6, field YEAR)(children: CastStringToDate(col 2:string) -> 6:date) -> 7:int, VectorUDFMonthDate(col 6, field MONTH)(children: CastStringToDate(col 2:string) -> 6:date) -> 8:int, VectorUDFDayOfMonthDate(col 6, field DAY_OF_MONTH)(children: CastStringToDate(col 2:string) -> 6:date) -> 9:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 6:int, VectorUDFWeekOfYearDate(col 10, field WEEK_OF_YEAR)(children: CastStringToDate(col 2:string) -> 10:date) -> 11:int, VectorUDFHourTimestamp(col 12:timestamp, field HOUR_OF_DAY)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 12:timestamp) -> 10:int, VectorUDFMinuteTimestamp(col 12:timestamp, field MINUTE)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 12:timestamp) -> 13:int, VectorUDFSecondTimestamp(col 12:timestamp, field SECOND)(children: VectorUDFAdaptor(CAST( stimest
amp1 AS TIMESTAMP)) -> 12:timestamp) -> 14:int
Statistics: Num rows: 52 Data size: 3219 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: bigint)
@@ -495,7 +495,7 @@ STAGE PLANS:
featureSupportInUse: []
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
- usesVectorUDFAdaptor: false
+ usesVectorUDFAdaptor: true
vectorized: true
Reducer 2
Execution mode: vectorized
@@ -662,13 +662,13 @@ STAGE PLANS:
TableScan Vectorization:
native: true
Select Operator
- expressions: (to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1)) (type: boolean), (year(ctimestamp1) = year(stimestamp1)) (type: boolean), (month(ctimestamp1) = month(stimestamp1)) (type: boolean), (day(ctimestamp1) = day(stimestamp1)) (type: boolean), (dayofmonth(ctimestamp1) = dayofmonth(stimestamp1)) (type: boolean), (weekofyear(ctimestamp1) = weekofyear(stimestamp1)) (type: boolean), (hour(ctimestamp1) = hour(stimestamp1)) (type: boolean), (minute(ctimestamp1) = minute(stimestamp1)) (type: boolean), (second(ctimestamp1) = second(stimestamp1)) (type: boolean)
+ expressions: (to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1)) (type: boolean), (year(ctimestamp1) = year(CAST( stimestamp1 AS DATE))) (type: boolean), (month(ctimestamp1) = month(CAST( stimestamp1 AS DATE))) (type: boolean), (day(ctimestamp1) = day(CAST( stimestamp1 AS DATE))) (type: boolean), (dayofmonth(ctimestamp1) = dayofmonth(stimestamp1)) (type: boolean), (weekofyear(ctimestamp1) = weekofyear(CAST( stimestamp1 AS DATE))) (type: boolean), (hour(ctimestamp1) = hour(CAST( stimestamp1 AS TIMESTAMP))) (type: boolean), (minute(ctimestamp1) = minute(CAST( stimestamp1 AS TIMESTAMP))) (type: boolean), (second(ctimestamp1) = second(CAST( stimestamp1 AS TIMESTAMP))) (type: boolean)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [7, 8, 9, 10, 11, 12, 13, 14, 15]
- selectExpressions: LongColEqualLongColumn(col 5:bigint, col 6:bigint)(children: VectorUDFUnixTimeStampTimestamp(col 1:timestamp) -> 5:bigint, VectorUDFUnixTimeStampString(col 2:string) -> 6:bigint) -> 7:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 5:int, VectorUDFYearString(col 2:string, fieldStart 0, fieldLength 4) -> 6:int) -> 8:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 5:int, VectorUDFMonthString(col 2:string, fieldStart 5, fieldLength 2) -> 6:int) -> 9:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 5:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 6:int) -> 10:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 5
:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 6:int) -> 11:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 5:int, VectorUDFWeekOfYearString(col 2:string) -> 6:int) -> 12:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFHourTimestamp(col 1:timestamp, field HOUR_OF_DAY) -> 5:int, VectorUDFHourString(col 2:string, fieldStart 11, fieldLength 2) -> 6:int) -> 13:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 5:int, VectorUDFMinuteString(col 2:string, fieldStart 14, fieldLength 2) -> 6:int) -> 14:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFSecondTimestamp(col 1:timestamp, field SECOND) -> 5:int, VectorUDFSecondString(col 2:string, fieldStart 17, fieldLength 2) -> 6:int) -> 15:boolean
+ projectedOutputColumnNums: [7, 6, 8, 9, 11, 10, 14, 15, 16]
+ selectExpressions: LongColEqualLongColumn(col 5:bigint, col 6:bigint)(children: VectorUDFUnixTimeStampTimestamp(col 1:timestamp) -> 5:bigint, VectorUDFUnixTimeStampString(col 2:string) -> 6:bigint) -> 7:boolean, LongColEqualLongColumn(col 5:int, col 8:int)(children: VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 5:int, VectorUDFYearDate(col 6, field YEAR)(children: CastStringToDate(col 2:string) -> 6:date) -> 8:int) -> 6:boolean, LongColEqualLongColumn(col 5:int, col 9:int)(children: VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 5:int, VectorUDFMonthDate(col 8, field MONTH)(children: CastStringToDate(col 2:string) -> 8:date) -> 9:int) -> 8:boolean, LongColEqualLongColumn(col 5:int, col 10:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 5:int, VectorUDFDayOfMonthDate(col 9, field DAY_OF_MONTH)(children: CastStringToDate(col 2:string) -> 9:date) -> 10:int) -> 9:boolean, LongColEqualLongColumn(col 5:int, col
10:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 5:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 10:int) -> 11:boolean, LongColEqualLongColumn(col 5:int, col 12:int)(children: VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 5:int, VectorUDFWeekOfYearDate(col 10, field WEEK_OF_YEAR)(children: CastStringToDate(col 2:string) -> 10:date) -> 12:int) -> 10:boolean, LongColEqualLongColumn(col 5:int, col 12:int)(children: VectorUDFHourTimestamp(col 1:timestamp, field HOUR_OF_DAY) -> 5:int, VectorUDFHourTimestamp(col 13:timestamp, field HOUR_OF_DAY)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 13:timestamp) -> 12:int) -> 14:boolean, LongColEqualLongColumn(col 5:int, col 12:int)(children: VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 5:int, VectorUDFMinuteTimestamp(col 13:timestamp, field MINUTE)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 13:timestamp)
-> 12:int) -> 15:boolean, LongColEqualLongColumn(col 5:int, col 12:int)(children: VectorUDFSecondTimestamp(col 1:timestamp, field SECOND) -> 5:int, VectorUDFSecondTimestamp(col 13:timestamp, field SECOND)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 13:timestamp) -> 12:int) -> 16:boolean
Statistics: Num rows: 52 Data size: 3219 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: boolean)
@@ -687,7 +687,7 @@ STAGE PLANS:
featureSupportInUse: []
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
- usesVectorUDFAdaptor: false
+ usesVectorUDFAdaptor: true
vectorized: true
Reducer 2
Execution mode: vectorized
@@ -854,13 +854,13 @@ STAGE PLANS:
TableScan Vectorization:
native: true
Select Operator
- expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), minute(stimestamp1) (type: int), second(stimestamp1) (type: int)
+ expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(CAST( stimestamp1 AS DATE)) (type: int), month(CAST( stimestamp1 AS DATE)) (type: int), day(CAST( stimestamp1 AS DATE)) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(CAST( stimestamp1 AS DATE)) (type: int), hour(CAST( stimestamp1 AS TIMESTAMP)) (type: int), minute(CAST( stimestamp1 AS TIMESTAMP)) (type: int), second(CAST( stimestamp1 AS TIMESTAMP)) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [2, 3, 4, 5, 6, 7, 8, 9, 10]
- selectExpressions: VectorUDFUnixTimeStampString(col 0:string) -> 2:bigint, VectorUDFYearString(col 0:string, fieldStart 0, fieldLength 4) -> 3:int, VectorUDFMonthString(col 0:string, fieldStart 5, fieldLength 2) -> 4:int, VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 5:int, VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 6:int, VectorUDFWeekOfYearString(col 0:string) -> 7:int, VectorUDFHourString(col 0:string, fieldStart 11, fieldLength 2) -> 8:int, VectorUDFMinuteString(col 0:string, fieldStart 14, fieldLength 2) -> 9:int, VectorUDFSecondString(col 0:string, fieldStart 17, fieldLength 2) -> 10:int
+ projectedOutputColumnNums: [2, 4, 5, 6, 3, 8, 7, 10, 11]
+ selectExpressions: VectorUDFUnixTimeStampString(col 0:string) -> 2:bigint, VectorUDFYearDate(col 3, field YEAR)(children: CastStringToDate(col 0:string) -> 3:date) -> 4:int, VectorUDFMonthDate(col 3, field MONTH)(children: CastStringToDate(col 0:string) -> 3:date) -> 5:int, VectorUDFDayOfMonthDate(col 3, field DAY_OF_MONTH)(children: CastStringToDate(col 0:string) -> 3:date) -> 6:int, VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 3:int, VectorUDFWeekOfYearDate(col 7, field WEEK_OF_YEAR)(children: CastStringToDate(col 0:string) -> 7:date) -> 8:int, VectorUDFHourTimestamp(col 9:timestamp, field HOUR_OF_DAY)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 9:timestamp) -> 7:int, VectorUDFMinuteTimestamp(col 9:timestamp, field MINUTE)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 9:timestamp) -> 10:int, VectorUDFSecondTimestamp(col 9:timestamp, field SECOND)(children: VectorUDFAdaptor(CAST( stimestamp1 AS T
IMESTAMP)) -> 9:timestamp) -> 11:int
Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: bigint)
@@ -879,7 +879,7 @@ STAGE PLANS:
featureSupportInUse: []
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
- usesVectorUDFAdaptor: false
+ usesVectorUDFAdaptor: true
vectorized: true
Reducer 2
Execution mode: vectorized
@@ -945,7 +945,7 @@ ORDER BY c1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@alltypesorc_wrong
#### A masked pattern was here ####
-NULL NULL NULL NULL NULL NULL NULL NULL NULL
+NULL 2 11 30 NULL 48 NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL NULL NULL
PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
http://git-wip-us.apache.org/repos/asf/hive/blob/040c0783/ql/src/test/results/clientpositive/udf_hour.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udf_hour.q.out b/ql/src/test/results/clientpositive/udf_hour.q.out
index d26d71f..318a98a 100644
--- a/ql/src/test/results/clientpositive/udf_hour.q.out
+++ b/ql/src/test/results/clientpositive/udf_hour.q.out
@@ -41,7 +41,7 @@ STAGE PLANS:
predicate: (UDFToDouble(key) = 86.0D) (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: 13 (type: int), 13 (type: int), null (type: int)
+ expressions: 13 (type: int), null (type: int), 0 (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
ListSink
@@ -56,7 +56,7 @@ FROM src WHERE key = 86
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
-13 13 NULL
+13 NULL 0
PREHOOK: query: SELECT hour(cast('2009-08-07 13:14:15' as timestamp))
FROM src WHERE key=86
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/040c0783/ql/src/test/results/clientpositive/udf_minute.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udf_minute.q.out b/ql/src/test/results/clientpositive/udf_minute.q.out
index ababcca..02a5c8d 100644
--- a/ql/src/test/results/clientpositive/udf_minute.q.out
+++ b/ql/src/test/results/clientpositive/udf_minute.q.out
@@ -41,7 +41,7 @@ STAGE PLANS:
predicate: (UDFToDouble(key) = 86.0D) (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: 14 (type: int), 14 (type: int), null (type: int)
+ expressions: 14 (type: int), null (type: int), 0 (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -69,4 +69,4 @@ FROM src WHERE key = 86
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
-14 14 NULL
+14 NULL 0
http://git-wip-us.apache.org/repos/asf/hive/blob/040c0783/ql/src/test/results/clientpositive/udf_second.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udf_second.q.out b/ql/src/test/results/clientpositive/udf_second.q.out
index 8042a30..6cc7775 100644
--- a/ql/src/test/results/clientpositive/udf_second.q.out
+++ b/ql/src/test/results/clientpositive/udf_second.q.out
@@ -41,7 +41,7 @@ STAGE PLANS:
predicate: (UDFToDouble(key) = 86.0D) (type: boolean)
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: 15 (type: int), 15 (type: int), null (type: int)
+ expressions: 15 (type: int), null (type: int), 0 (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
ListSink
@@ -56,4 +56,4 @@ FROM src WHERE key = 86
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
-15 15 NULL
+15 NULL 0
http://git-wip-us.apache.org/repos/asf/hive/blob/040c0783/ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out b/ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out
index 9f90e82..fb49a9b 100644
--- a/ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out
+++ b/ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out
@@ -450,13 +450,13 @@ STAGE PLANS:
TableScan Vectorization:
native: true
Select Operator
- expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), minute(stimestamp1) (type: int), second(stimestamp1) (type: int)
+ expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(CAST( stimestamp1 AS DATE)) (type: int), month(CAST( stimestamp1 AS DATE)) (type: int), day(CAST( stimestamp1 AS DATE)) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(CAST( stimestamp1 AS DATE)) (type: int), hour(CAST( stimestamp1 AS TIMESTAMP)) (type: int), minute(CAST( stimestamp1 AS TIMESTAMP)) (type: int), second(CAST( stimestamp1 AS TIMESTAMP)) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 13]
- selectExpressions: VectorUDFUnixTimeStampString(col 2:string) -> 5:bigint, VectorUDFYearString(col 2:string, fieldStart 0, fieldLength 4) -> 6:int, VectorUDFMonthString(col 2:string, fieldStart 5, fieldLength 2) -> 7:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 8:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 9:int, VectorUDFWeekOfYearString(col 2:string) -> 10:int, VectorUDFHourString(col 2:string, fieldStart 11, fieldLength 2) -> 11:int, VectorUDFMinuteString(col 2:string, fieldStart 14, fieldLength 2) -> 12:int, VectorUDFSecondString(col 2:string, fieldStart 17, fieldLength 2) -> 13:int
+ projectedOutputColumnNums: [5, 7, 8, 9, 6, 11, 10, 13, 14]
+ selectExpressions: VectorUDFUnixTimeStampString(col 2:string) -> 5:bigint, VectorUDFYearDate(col 6, field YEAR)(children: CastStringToDate(col 2:string) -> 6:date) -> 7:int, VectorUDFMonthDate(col 6, field MONTH)(children: CastStringToDate(col 2:string) -> 6:date) -> 8:int, VectorUDFDayOfMonthDate(col 6, field DAY_OF_MONTH)(children: CastStringToDate(col 2:string) -> 6:date) -> 9:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 6:int, VectorUDFWeekOfYearDate(col 10, field WEEK_OF_YEAR)(children: CastStringToDate(col 2:string) -> 10:date) -> 11:int, VectorUDFHourTimestamp(col 12:timestamp, field HOUR_OF_DAY)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 12:timestamp) -> 10:int, VectorUDFMinuteTimestamp(col 12:timestamp, field MINUTE)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 12:timestamp) -> 13:int, VectorUDFSecondTimestamp(col 12:timestamp, field SECOND)(children: VectorUDFAdaptor(CAST( stimestamp1 A
S TIMESTAMP)) -> 12:timestamp) -> 14:int
Statistics: Num rows: 52 Data size: 3179 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: bigint)
@@ -476,7 +476,7 @@ STAGE PLANS:
featureSupportInUse: []
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: false
- usesVectorUDFAdaptor: false
+ usesVectorUDFAdaptor: true
vectorized: true
Reduce Vectorization:
enabled: false
@@ -627,13 +627,13 @@ STAGE PLANS:
TableScan Vectorization:
native: true
Select Operator
- expressions: (to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1)) (type: boolean), (year(ctimestamp1) = year(stimestamp1)) (type: boolean), (month(ctimestamp1) = month(stimestamp1)) (type: boolean), (day(ctimestamp1) = day(stimestamp1)) (type: boolean), (dayofmonth(ctimestamp1) = dayofmonth(stimestamp1)) (type: boolean), (weekofyear(ctimestamp1) = weekofyear(stimestamp1)) (type: boolean), (hour(ctimestamp1) = hour(stimestamp1)) (type: boolean), (minute(ctimestamp1) = minute(stimestamp1)) (type: boolean), (second(ctimestamp1) = second(stimestamp1)) (type: boolean)
+ expressions: (to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1)) (type: boolean), (year(ctimestamp1) = year(CAST( stimestamp1 AS DATE))) (type: boolean), (month(ctimestamp1) = month(CAST( stimestamp1 AS DATE))) (type: boolean), (day(ctimestamp1) = day(CAST( stimestamp1 AS DATE))) (type: boolean), (dayofmonth(ctimestamp1) = dayofmonth(stimestamp1)) (type: boolean), (weekofyear(ctimestamp1) = weekofyear(CAST( stimestamp1 AS DATE))) (type: boolean), (hour(ctimestamp1) = hour(CAST( stimestamp1 AS TIMESTAMP))) (type: boolean), (minute(ctimestamp1) = minute(CAST( stimestamp1 AS TIMESTAMP))) (type: boolean), (second(ctimestamp1) = second(CAST( stimestamp1 AS TIMESTAMP))) (type: boolean)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [7, 8, 9, 10, 11, 12, 13, 14, 15]
- selectExpressions: LongColEqualLongColumn(col 5:bigint, col 6:bigint)(children: VectorUDFUnixTimeStampTimestamp(col 1:timestamp) -> 5:bigint, VectorUDFUnixTimeStampString(col 2:string) -> 6:bigint) -> 7:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 5:int, VectorUDFYearString(col 2:string, fieldStart 0, fieldLength 4) -> 6:int) -> 8:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 5:int, VectorUDFMonthString(col 2:string, fieldStart 5, fieldLength 2) -> 6:int) -> 9:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 5:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 6:int) -> 10:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 5:int,
VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 6:int) -> 11:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 5:int, VectorUDFWeekOfYearString(col 2:string) -> 6:int) -> 12:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFHourTimestamp(col 1:timestamp, field HOUR_OF_DAY) -> 5:int, VectorUDFHourString(col 2:string, fieldStart 11, fieldLength 2) -> 6:int) -> 13:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 5:int, VectorUDFMinuteString(col 2:string, fieldStart 14, fieldLength 2) -> 6:int) -> 14:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFSecondTimestamp(col 1:timestamp, field SECOND) -> 5:int, VectorUDFSecondString(col 2:string, fieldStart 17, fieldLength 2) -> 6:int) -> 15:boolean
+ projectedOutputColumnNums: [7, 6, 8, 9, 11, 10, 14, 15, 16]
+ selectExpressions: LongColEqualLongColumn(col 5:bigint, col 6:bigint)(children: VectorUDFUnixTimeStampTimestamp(col 1:timestamp) -> 5:bigint, VectorUDFUnixTimeStampString(col 2:string) -> 6:bigint) -> 7:boolean, LongColEqualLongColumn(col 5:int, col 8:int)(children: VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 5:int, VectorUDFYearDate(col 6, field YEAR)(children: CastStringToDate(col 2:string) -> 6:date) -> 8:int) -> 6:boolean, LongColEqualLongColumn(col 5:int, col 9:int)(children: VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 5:int, VectorUDFMonthDate(col 8, field MONTH)(children: CastStringToDate(col 2:string) -> 8:date) -> 9:int) -> 8:boolean, LongColEqualLongColumn(col 5:int, col 10:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 5:int, VectorUDFDayOfMonthDate(col 9, field DAY_OF_MONTH)(children: CastStringToDate(col 2:string) -> 9:date) -> 10:int) -> 9:boolean, LongColEqualLongColumn(col 5:int, col 10:in
t)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 5:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 10:int) -> 11:boolean, LongColEqualLongColumn(col 5:int, col 12:int)(children: VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 5:int, VectorUDFWeekOfYearDate(col 10, field WEEK_OF_YEAR)(children: CastStringToDate(col 2:string) -> 10:date) -> 12:int) -> 10:boolean, LongColEqualLongColumn(col 5:int, col 12:int)(children: VectorUDFHourTimestamp(col 1:timestamp, field HOUR_OF_DAY) -> 5:int, VectorUDFHourTimestamp(col 13:timestamp, field HOUR_OF_DAY)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 13:timestamp) -> 12:int) -> 14:boolean, LongColEqualLongColumn(col 5:int, col 12:int)(children: VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 5:int, VectorUDFMinuteTimestamp(col 13:timestamp, field MINUTE)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 13:timestamp) -> 12:
int) -> 15:boolean, LongColEqualLongColumn(col 5:int, col 12:int)(children: VectorUDFSecondTimestamp(col 1:timestamp, field SECOND) -> 5:int, VectorUDFSecondTimestamp(col 13:timestamp, field SECOND)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 13:timestamp) -> 12:int) -> 16:boolean
Statistics: Num rows: 52 Data size: 3179 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: boolean)
@@ -653,7 +653,7 @@ STAGE PLANS:
featureSupportInUse: []
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: false
- usesVectorUDFAdaptor: false
+ usesVectorUDFAdaptor: true
vectorized: true
Reduce Vectorization:
enabled: false
@@ -804,13 +804,13 @@ STAGE PLANS:
TableScan Vectorization:
native: true
Select Operator
- expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), minute(stimestamp1) (type: int), second(stimestamp1) (type: int)
+ expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(CAST( stimestamp1 AS DATE)) (type: int), month(CAST( stimestamp1 AS DATE)) (type: int), day(CAST( stimestamp1 AS DATE)) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(CAST( stimestamp1 AS DATE)) (type: int), hour(CAST( stimestamp1 AS TIMESTAMP)) (type: int), minute(CAST( stimestamp1 AS TIMESTAMP)) (type: int), second(CAST( stimestamp1 AS TIMESTAMP)) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [2, 3, 4, 5, 6, 7, 8, 9, 10]
- selectExpressions: VectorUDFUnixTimeStampString(col 0:string) -> 2:bigint, VectorUDFYearString(col 0:string, fieldStart 0, fieldLength 4) -> 3:int, VectorUDFMonthString(col 0:string, fieldStart 5, fieldLength 2) -> 4:int, VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 5:int, VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 6:int, VectorUDFWeekOfYearString(col 0:string) -> 7:int, VectorUDFHourString(col 0:string, fieldStart 11, fieldLength 2) -> 8:int, VectorUDFMinuteString(col 0:string, fieldStart 14, fieldLength 2) -> 9:int, VectorUDFSecondString(col 0:string, fieldStart 17, fieldLength 2) -> 10:int
+ projectedOutputColumnNums: [2, 4, 5, 6, 3, 8, 7, 10, 11]
+ selectExpressions: VectorUDFUnixTimeStampString(col 0:string) -> 2:bigint, VectorUDFYearDate(col 3, field YEAR)(children: CastStringToDate(col 0:string) -> 3:date) -> 4:int, VectorUDFMonthDate(col 3, field MONTH)(children: CastStringToDate(col 0:string) -> 3:date) -> 5:int, VectorUDFDayOfMonthDate(col 3, field DAY_OF_MONTH)(children: CastStringToDate(col 0:string) -> 3:date) -> 6:int, VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 3:int, VectorUDFWeekOfYearDate(col 7, field WEEK_OF_YEAR)(children: CastStringToDate(col 0:string) -> 7:date) -> 8:int, VectorUDFHourTimestamp(col 9:timestamp, field HOUR_OF_DAY)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 9:timestamp) -> 7:int, VectorUDFMinuteTimestamp(col 9:timestamp, field MINUTE)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 9:timestamp) -> 10:int, VectorUDFSecondTimestamp(col 9:timestamp, field SECOND)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTA
MP)) -> 9:timestamp) -> 11:int
Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: bigint)
@@ -830,7 +830,7 @@ STAGE PLANS:
featureSupportInUse: []
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: false
- usesVectorUDFAdaptor: false
+ usesVectorUDFAdaptor: true
vectorized: true
Reduce Vectorization:
enabled: false
@@ -885,7 +885,7 @@ ORDER BY c1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@alltypesorc_wrong
#### A masked pattern was here ####
-NULL NULL NULL NULL NULL NULL NULL NULL NULL
+NULL 2 11 30 NULL 48 NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL NULL NULL
PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
[2/2] hive git commit: HIVE-19695 : Year Month Day extraction
functions need to add an implicit cast for column that are String types (Slim
Bouguerra via Ashutosh Chauhan)
Posted by ha...@apache.org.
HIVE-19695 : Year Month Day extraction functions need to add an implicit cast for column that are String types (Slim Bouguerra via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/040c0783
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/040c0783
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/040c0783
Branch: refs/heads/master
Commit: 040c0783e01fb3089d7925def7c349d7ac98e4d6
Parents: 5ba634a
Author: Slim Bouguerra <sl...@gmail.com>
Authored: Thu May 24 11:17:00 2018 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Sat Jun 16 08:41:49 2018 -0700
----------------------------------------------------------------------
.../calcite/translator/RexNodeConverter.java | 61 ++++--
.../clientpositive/druidmini_extractTime.q | 23 ++
.../druid/druidmini_extractTime.q.out | 103 +++++++++
.../llap/dynamic_partition_pruning.q.out | 55 +++--
.../llap/vectorized_timestamp_funcs.q.out | 26 +--
.../spark/spark_dynamic_partition_pruning.q.out | 112 +++++-----
...k_vectorized_dynamic_partition_pruning.q.out | 216 +++++++++----------
.../spark/vectorized_timestamp_funcs.q.out | 26 +--
.../test/results/clientpositive/udf_hour.q.out | 4 +-
.../results/clientpositive/udf_minute.q.out | 4 +-
.../results/clientpositive/udf_second.q.out | 4 +-
.../vectorized_timestamp_funcs.q.out | 26 +--
12 files changed, 400 insertions(+), 260 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/040c0783/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
index 7a482d9..7cedab6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
@@ -17,18 +17,10 @@
*/
package org.apache.hadoop.hive.ql.optimizer.calcite.translator;
-import java.math.BigDecimal;
-import java.math.BigInteger;
-import java.sql.Timestamp;
-import java.time.Instant;
-import java.util.ArrayList;
-import java.util.Calendar;
-import java.util.Date;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableList.Builder;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Iterables;
import org.apache.calcite.avatica.util.TimeUnit;
import org.apache.calcite.avatica.util.TimeUnitRange;
import org.apache.calcite.plan.RelOptCluster;
@@ -49,6 +41,7 @@ import org.apache.calcite.sql.fun.SqlCastFunction;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.sql.parser.SqlParserPos;
import org.apache.calcite.sql.type.SqlTypeName;
+import org.apache.calcite.sql.type.SqlTypeUtil;
import org.apache.calcite.util.ConversionUtil;
import org.apache.calcite.util.DateString;
import org.apache.calcite.util.NlsString;
@@ -104,9 +97,17 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableList.Builder;
-import com.google.common.collect.ImmutableMap;
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.sql.Timestamp;
+import java.time.Instant;
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.Date;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
public class RexNodeConverter {
@@ -455,26 +456,50 @@ public class RexNodeConverter {
private List<RexNode> rewriteExtractDateChildren(SqlOperator op, List<RexNode> childRexNodeLst)
throws SemanticException {
- List<RexNode> newChildRexNodeLst = new ArrayList<RexNode>();
+ List<RexNode> newChildRexNodeLst = new ArrayList<>(2);
+ final boolean isTimestampLevel;
if (op == HiveExtractDate.YEAR) {
newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.YEAR));
+ isTimestampLevel = false;
} else if (op == HiveExtractDate.QUARTER) {
newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.QUARTER));
+ isTimestampLevel = false;
} else if (op == HiveExtractDate.MONTH) {
newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.MONTH));
+ isTimestampLevel = false;
} else if (op == HiveExtractDate.WEEK) {
newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.WEEK));
+ isTimestampLevel = false;
} else if (op == HiveExtractDate.DAY) {
newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.DAY));
+ isTimestampLevel = false;
} else if (op == HiveExtractDate.HOUR) {
newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.HOUR));
+ isTimestampLevel = true;
} else if (op == HiveExtractDate.MINUTE) {
newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.MINUTE));
+ isTimestampLevel = true;
} else if (op == HiveExtractDate.SECOND) {
newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.SECOND));
+ isTimestampLevel = true;
+ } else {
+ isTimestampLevel = false;
}
- assert childRexNodeLst.size() == 1;
- newChildRexNodeLst.add(childRexNodeLst.get(0));
+
+ final RexNode child = Iterables.getOnlyElement(childRexNodeLst);
+ if (SqlTypeUtil.isDatetime(child.getType()) || SqlTypeUtil.isInterval(child.getType())) {
+ newChildRexNodeLst.add(child);
+ } else {
+ // We need to add a cast to DATETIME Family
+ if (isTimestampLevel) {
+ newChildRexNodeLst.add(
+ cluster.getRexBuilder().makeCast(cluster.getTypeFactory().createSqlType(SqlTypeName.TIMESTAMP), child));
+ } else {
+ newChildRexNodeLst.add(
+ cluster.getRexBuilder().makeCast(cluster.getTypeFactory().createSqlType(SqlTypeName.DATE), child));
+ }
+ }
+
return newChildRexNodeLst;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/040c0783/ql/src/test/queries/clientpositive/druidmini_extractTime.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/druidmini_extractTime.q b/ql/src/test/queries/clientpositive/druidmini_extractTime.q
index 429f796..9541361 100644
--- a/ql/src/test/queries/clientpositive/druidmini_extractTime.q
+++ b/ql/src/test/queries/clientpositive/druidmini_extractTime.q
@@ -176,4 +176,27 @@ SELECT CAST(`__time` AS DATE) AS `x_date`, SUM(cfloat) FROM druid_table GROUP B
SELECT CAST(`__time` AS DATE) AS `x_date` FROM druid_table ORDER BY `x_date` LIMIT 5;
+-- Test Extract from non datetime column
+
+create table test_extract_from_string_base_table(`timecolumn` timestamp, `date_c` string, `timestamp_c` string, `metric_c` double);
+insert into test_extract_from_string_base_table values ('2015-03-08 00:00:00', '2015-03-10', '2015-03-08 05:30:20', 5.0);
+
+CREATE TABLE druid_test_extract_from_string_table
+STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler'
+TBLPROPERTIES ("druid.segment.granularity" = "DAY")
+AS select
+cast(`timecolumn` as timestamp with local time zone) as `__time`, `date_c`, `timestamp_c`, `metric_c`
+FROM test_extract_from_string_base_table;
+
+explain select
+year(date_c), month(date_c),day(date_c),
+year(timestamp_c), month(timestamp_c),day(timestamp_c), hour(timestamp_c), minute (timestamp_c), second (timestamp_c)
+from druid_test_extract_from_string_table;
+
+select year(date_c), month(date_c), day(date_c),
+year(timestamp_c), month(timestamp_c), day(timestamp_c), hour(timestamp_c), minute (timestamp_c), second (timestamp_c)
+from druid_test_extract_from_string_table;
+
+DROP TABLE druid_test_extract_from_string_table;
+DROP TABLE test_extract_from_string_base_table;
DROP TABLE druid_table;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/040c0783/ql/src/test/results/clientpositive/druid/druidmini_extractTime.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/druid/druidmini_extractTime.q.out b/ql/src/test/results/clientpositive/druid/druidmini_extractTime.q.out
index f21847b..30e273b 100644
--- a/ql/src/test/results/clientpositive/druid/druidmini_extractTime.q.out
+++ b/ql/src/test/results/clientpositive/druid/druidmini_extractTime.q.out
@@ -756,6 +756,109 @@ POSTHOOK: Output: hdfs://### HDFS PATH ###
1969-12-31
1969-12-31
1969-12-31
+PREHOOK: query: create table test_extract_from_string_base_table(`timecolumn` timestamp, `date_c` string, `timestamp_c` string, `metric_c` double)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_extract_from_string_base_table
+POSTHOOK: query: create table test_extract_from_string_base_table(`timecolumn` timestamp, `date_c` string, `timestamp_c` string, `metric_c` double)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_extract_from_string_base_table
+PREHOOK: query: insert into test_extract_from_string_base_table values ('2015-03-08 00:00:00', '2015-03-10', '2015-03-08 05:30:20', 5.0)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test_extract_from_string_base_table
+POSTHOOK: query: insert into test_extract_from_string_base_table values ('2015-03-08 00:00:00', '2015-03-10', '2015-03-08 05:30:20', 5.0)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test_extract_from_string_base_table
+POSTHOOK: Lineage: test_extract_from_string_base_table.date_c SCRIPT []
+POSTHOOK: Lineage: test_extract_from_string_base_table.metric_c SCRIPT []
+POSTHOOK: Lineage: test_extract_from_string_base_table.timecolumn SCRIPT []
+POSTHOOK: Lineage: test_extract_from_string_base_table.timestamp_c SCRIPT []
+PREHOOK: query: CREATE TABLE druid_test_extract_from_string_table
+STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler'
+TBLPROPERTIES ("druid.segment.granularity" = "DAY")
+AS select
+cast(`timecolumn` as timestamp with local time zone) as `__time`, `date_c`, `timestamp_c`, `metric_c`
+FROM test_extract_from_string_base_table
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@test_extract_from_string_base_table
+PREHOOK: Output: database:default
+PREHOOK: Output: default@druid_test_extract_from_string_table
+POSTHOOK: query: CREATE TABLE druid_test_extract_from_string_table
+STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler'
+TBLPROPERTIES ("druid.segment.granularity" = "DAY")
+AS select
+cast(`timecolumn` as timestamp with local time zone) as `__time`, `date_c`, `timestamp_c`, `metric_c`
+FROM test_extract_from_string_base_table
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@test_extract_from_string_base_table
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@druid_test_extract_from_string_table
+POSTHOOK: Lineage: druid_test_extract_from_string_table.__time EXPRESSION [(test_extract_from_string_base_table)test_extract_from_string_base_table.FieldSchema(name:timecolumn, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: druid_test_extract_from_string_table.date_c SIMPLE [(test_extract_from_string_base_table)test_extract_from_string_base_table.FieldSchema(name:date_c, type:string, comment:null), ]
+POSTHOOK: Lineage: druid_test_extract_from_string_table.metric_c SIMPLE [(test_extract_from_string_base_table)test_extract_from_string_base_table.FieldSchema(name:metric_c, type:double, comment:null), ]
+POSTHOOK: Lineage: druid_test_extract_from_string_table.timestamp_c SIMPLE [(test_extract_from_string_base_table)test_extract_from_string_base_table.FieldSchema(name:timestamp_c, type:string, comment:null), ]
+PREHOOK: query: explain select
+year(date_c), month(date_c),day(date_c),
+year(timestamp_c), month(timestamp_c),day(timestamp_c), hour(timestamp_c), minute (timestamp_c), second (timestamp_c)
+from druid_test_extract_from_string_table
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select
+year(date_c), month(date_c),day(date_c),
+year(timestamp_c), month(timestamp_c),day(timestamp_c), hour(timestamp_c), minute (timestamp_c), second (timestamp_c)
+from druid_test_extract_from_string_table
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: druid_test_extract_from_string_table
+ properties:
+ druid.fieldNames vc,vc0,vc1,vc2,vc3,vc4,vc5,vc6,vc7
+ druid.fieldTypes int,int,int,int,int,int,int,int,int
+ druid.query.json {"queryType":"scan","dataSource":"default.druid_test_extract_from_string_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_extract(timestamp_floor(timestamp_parse(\"date_c\",'','US/Pacific'),'P1D','','US/Pacific'),'YEAR','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc0","expression":"timestamp_extract(timestamp_floor(timestamp_parse(\"date_c\",'','US/Pacific'),'P1D','','US/Pacific'),'MONTH','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc1","expression":"timestamp_extract(timestamp_floor(timestamp_parse(\"date_c\",'','US/Pacific'),'P1D','','US/Pacific'),'DAY','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc2","expression":"timestamp_extract(timestamp_floor(timestamp_parse(\"timestamp_c\",'','US/Pacific'),'P1D','','US/Pacific'),'YEAR','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc3
","expression":"timestamp_extract(timestamp_floor(timestamp_parse(\"timestamp_c\",'','US/Pacific'),'P1D','','US/Pacific'),'MONTH','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc4","expression":"timestamp_extract(timestamp_floor(timestamp_parse(\"timestamp_c\",'','US/Pacific'),'P1D','','US/Pacific'),'DAY','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc5","expression":"timestamp_extract(timestamp_parse(\"timestamp_c\",'','US/Pacific'),'HOUR','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc6","expression":"timestamp_extract(timestamp_parse(\"timestamp_c\",'','US/Pacific'),'MINUTE','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc7","expression":"timestamp_extract(timestamp_parse(\"timestamp_c\",'','US/Pacific'),'SECOND','US/Pacific')","outputType":"LONG"}],"columns":["vc","vc0","vc1","vc2","vc3","vc4","vc5","vc6","vc7"],"resultFormat":"compactedList"}
+ druid.query.type scan
+ Select Operator
+ expressions: vc (type: int), vc0 (type: int), vc1 (type: int), vc2 (type: int), vc3 (type: int), vc4 (type: int), vc5 (type: int), vc6 (type: int), vc7 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ ListSink
+
+PREHOOK: query: select year(date_c), month(date_c), day(date_c),
+year(timestamp_c), month(timestamp_c), day(timestamp_c), hour(timestamp_c), minute (timestamp_c), second (timestamp_c)
+from druid_test_extract_from_string_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@druid_test_extract_from_string_table
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select year(date_c), month(date_c), day(date_c),
+year(timestamp_c), month(timestamp_c), day(timestamp_c), hour(timestamp_c), minute (timestamp_c), second (timestamp_c)
+from druid_test_extract_from_string_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@druid_test_extract_from_string_table
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+2015 3 10 2015 3 8 5 30 20
+PREHOOK: query: DROP TABLE druid_test_extract_from_string_table
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@druid_test_extract_from_string_table
+PREHOOK: Output: default@druid_test_extract_from_string_table
+POSTHOOK: query: DROP TABLE druid_test_extract_from_string_table
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@druid_test_extract_from_string_table
+POSTHOOK: Output: default@druid_test_extract_from_string_table
+PREHOOK: query: DROP TABLE test_extract_from_string_base_table
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@test_extract_from_string_base_table
+PREHOOK: Output: default@test_extract_from_string_base_table
+POSTHOOK: query: DROP TABLE test_extract_from_string_base_table
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@test_extract_from_string_base_table
+POSTHOOK: Output: default@test_extract_from_string_base_table
PREHOOK: query: DROP TABLE druid_table
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@druid_table
http://git-wip-us.apache.org/repos/asf/hive/blob/040c0783/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
index 70d423b..1133b54 100644
--- a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
@@ -476,16 +476,15 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart
- filterExpr: ds is not null (type: boolean)
Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
- key expressions: day(_col0) (type: int)
+ key expressions: day(CAST( _col0 AS DATE)) (type: int)
sort order: +
- Map-reduce partition columns: day(_col0) (type: int)
+ Map-reduce partition columns: day(CAST( _col0 AS DATE)) (type: int)
Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
@@ -493,22 +492,22 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date_n2
- filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ filterExpr: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ predicate: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: day(_col0) (type: int)
+ key expressions: day(CAST( _col0 AS DATE)) (type: int)
sort order: +
- Map-reduce partition columns: day(_col0) (type: int)
+ Map-reduce partition columns: day(CAST( _col0 AS DATE)) (type: int)
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: day(_col0) (type: int)
+ expressions: day(CAST( _col0 AS DATE)) (type: int)
outputColumnNames: _col0
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Group By Operator
@@ -519,7 +518,7 @@ STAGE PLANS:
Dynamic Partitioning Event Operator
Target column: ds (string)
Target Input: srcpart
- Partition key expr: day(ds)
+ Partition key expr: day(CAST( ds AS DATE))
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Target Vertex: Map 1
Execution mode: llap
@@ -531,8 +530,8 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 day(_col0) (type: int)
- 1 day(_col0) (type: int)
+ 0 day(CAST( _col0 AS DATE)) (type: int)
+ 1 day(CAST( _col0 AS DATE)) (type: int)
Statistics: Num rows: 2200 Data size: 404800 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
@@ -605,16 +604,15 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart
- filterExpr: ds is not null (type: boolean)
Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
- key expressions: day(_col0) (type: int)
+ key expressions: day(CAST( _col0 AS DATE)) (type: int)
sort order: +
- Map-reduce partition columns: day(_col0) (type: int)
+ Map-reduce partition columns: day(CAST( _col0 AS DATE)) (type: int)
Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
@@ -622,19 +620,19 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date_n2
- filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ filterExpr: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ predicate: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: day(_col0) (type: int)
+ key expressions: day(CAST( _col0 AS DATE)) (type: int)
sort order: +
- Map-reduce partition columns: day(_col0) (type: int)
+ Map-reduce partition columns: day(CAST( _col0 AS DATE)) (type: int)
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Execution mode: llap
LLAP IO: no inputs
@@ -645,8 +643,8 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 day(_col0) (type: int)
- 1 day(_col0) (type: int)
+ 0 day(CAST( _col0 AS DATE)) (type: int)
+ 1 day(CAST( _col0 AS DATE)) (type: int)
Statistics: Num rows: 2200 Data size: 404800 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
@@ -4111,7 +4109,6 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart
- filterExpr: ds is not null (type: boolean)
Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ds (type: string)
@@ -4121,8 +4118,8 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 day(_col0) (type: int)
- 1 day(_col0) (type: int)
+ 0 day(CAST( _col0 AS DATE)) (type: int)
+ 1 day(CAST( _col0 AS DATE)) (type: int)
input vertices:
1 Map 3
Statistics: Num rows: 2200 Data size: 404800 Basic stats: COMPLETE Column stats: NONE
@@ -4141,22 +4138,22 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date_n2
- filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ filterExpr: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ predicate: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: day(_col0) (type: int)
+ key expressions: day(CAST( _col0 AS DATE)) (type: int)
sort order: +
- Map-reduce partition columns: day(_col0) (type: int)
+ Map-reduce partition columns: day(CAST( _col0 AS DATE)) (type: int)
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: day(_col0) (type: int)
+ expressions: day(CAST( _col0 AS DATE)) (type: int)
outputColumnNames: _col0
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Group By Operator
@@ -4167,7 +4164,7 @@ STAGE PLANS:
Dynamic Partitioning Event Operator
Target column: ds (string)
Target Input: srcpart
- Partition key expr: day(ds)
+ Partition key expr: day(CAST( ds AS DATE))
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Target Vertex: Map 1
Execution mode: llap
http://git-wip-us.apache.org/repos/asf/hive/blob/040c0783/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
index 79ba4c6..7ce15ae 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
@@ -473,13 +473,13 @@ STAGE PLANS:
TableScan Vectorization:
native: true
Select Operator
- expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), minute(stimestamp1) (type: int), second(stimestamp1) (type: int)
+ expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(CAST( stimestamp1 AS DATE)) (type: int), month(CAST( stimestamp1 AS DATE)) (type: int), day(CAST( stimestamp1 AS DATE)) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(CAST( stimestamp1 AS DATE)) (type: int), hour(CAST( stimestamp1 AS TIMESTAMP)) (type: int), minute(CAST( stimestamp1 AS TIMESTAMP)) (type: int), second(CAST( stimestamp1 AS TIMESTAMP)) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 13]
- selectExpressions: VectorUDFUnixTimeStampString(col 2:string) -> 5:bigint, VectorUDFYearString(col 2:string, fieldStart 0, fieldLength 4) -> 6:int, VectorUDFMonthString(col 2:string, fieldStart 5, fieldLength 2) -> 7:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 8:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 9:int, VectorUDFWeekOfYearString(col 2:string) -> 10:int, VectorUDFHourString(col 2:string, fieldStart 11, fieldLength 2) -> 11:int, VectorUDFMinuteString(col 2:string, fieldStart 14, fieldLength 2) -> 12:int, VectorUDFSecondString(col 2:string, fieldStart 17, fieldLength 2) -> 13:int
+ projectedOutputColumnNums: [5, 7, 8, 9, 6, 11, 10, 13, 14]
+ selectExpressions: VectorUDFUnixTimeStampString(col 2:string) -> 5:bigint, VectorUDFYearDate(col 6, field YEAR)(children: CastStringToDate(col 2:string) -> 6:date) -> 7:int, VectorUDFMonthDate(col 6, field MONTH)(children: CastStringToDate(col 2:string) -> 6:date) -> 8:int, VectorUDFDayOfMonthDate(col 6, field DAY_OF_MONTH)(children: CastStringToDate(col 2:string) -> 6:date) -> 9:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 6:int, VectorUDFWeekOfYearDate(col 10, field WEEK_OF_YEAR)(children: CastStringToDate(col 2:string) -> 10:date) -> 11:int, VectorUDFHourTimestamp(col 12:timestamp, field HOUR_OF_DAY)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 12:timestamp) -> 10:int, VectorUDFMinuteTimestamp(col 12:timestamp, field MINUTE)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 12:timestamp) -> 13:int, VectorUDFSecondTimestamp(col 12:timestamp, field SECOND)(children: VectorUDFAdaptor(CAST( stimest
amp1 AS TIMESTAMP)) -> 12:timestamp) -> 14:int
Statistics: Num rows: 52 Data size: 2080 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: bigint)
@@ -499,7 +499,7 @@ STAGE PLANS:
featureSupportInUse: []
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
- usesVectorUDFAdaptor: false
+ usesVectorUDFAdaptor: true
vectorized: true
Reducer 2
Execution mode: vectorized, llap
@@ -667,13 +667,13 @@ STAGE PLANS:
TableScan Vectorization:
native: true
Select Operator
- expressions: (to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1)) (type: boolean), (year(ctimestamp1) = year(stimestamp1)) (type: boolean), (month(ctimestamp1) = month(stimestamp1)) (type: boolean), (day(ctimestamp1) = day(stimestamp1)) (type: boolean), (dayofmonth(ctimestamp1) = dayofmonth(stimestamp1)) (type: boolean), (weekofyear(ctimestamp1) = weekofyear(stimestamp1)) (type: boolean), (hour(ctimestamp1) = hour(stimestamp1)) (type: boolean), (minute(ctimestamp1) = minute(stimestamp1)) (type: boolean), (second(ctimestamp1) = second(stimestamp1)) (type: boolean)
+ expressions: (to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1)) (type: boolean), (year(ctimestamp1) = year(CAST( stimestamp1 AS DATE))) (type: boolean), (month(ctimestamp1) = month(CAST( stimestamp1 AS DATE))) (type: boolean), (day(ctimestamp1) = day(CAST( stimestamp1 AS DATE))) (type: boolean), (dayofmonth(ctimestamp1) = dayofmonth(stimestamp1)) (type: boolean), (weekofyear(ctimestamp1) = weekofyear(CAST( stimestamp1 AS DATE))) (type: boolean), (hour(ctimestamp1) = hour(CAST( stimestamp1 AS TIMESTAMP))) (type: boolean), (minute(ctimestamp1) = minute(CAST( stimestamp1 AS TIMESTAMP))) (type: boolean), (second(ctimestamp1) = second(CAST( stimestamp1 AS TIMESTAMP))) (type: boolean)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [7, 8, 9, 10, 11, 12, 13, 14, 15]
- selectExpressions: LongColEqualLongColumn(col 5:bigint, col 6:bigint)(children: VectorUDFUnixTimeStampTimestamp(col 1:timestamp) -> 5:bigint, VectorUDFUnixTimeStampString(col 2:string) -> 6:bigint) -> 7:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 5:int, VectorUDFYearString(col 2:string, fieldStart 0, fieldLength 4) -> 6:int) -> 8:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 5:int, VectorUDFMonthString(col 2:string, fieldStart 5, fieldLength 2) -> 6:int) -> 9:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 5:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 6:int) -> 10:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 5
:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 6:int) -> 11:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 5:int, VectorUDFWeekOfYearString(col 2:string) -> 6:int) -> 12:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFHourTimestamp(col 1:timestamp, field HOUR_OF_DAY) -> 5:int, VectorUDFHourString(col 2:string, fieldStart 11, fieldLength 2) -> 6:int) -> 13:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 5:int, VectorUDFMinuteString(col 2:string, fieldStart 14, fieldLength 2) -> 6:int) -> 14:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFSecondTimestamp(col 1:timestamp, field SECOND) -> 5:int, VectorUDFSecondString(col 2:string, fieldStart 17, fieldLength 2) -> 6:int) -> 15:boolean
+ projectedOutputColumnNums: [7, 6, 8, 9, 11, 10, 14, 15, 16]
+ selectExpressions: LongColEqualLongColumn(col 5:bigint, col 6:bigint)(children: VectorUDFUnixTimeStampTimestamp(col 1:timestamp) -> 5:bigint, VectorUDFUnixTimeStampString(col 2:string) -> 6:bigint) -> 7:boolean, LongColEqualLongColumn(col 5:int, col 8:int)(children: VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 5:int, VectorUDFYearDate(col 6, field YEAR)(children: CastStringToDate(col 2:string) -> 6:date) -> 8:int) -> 6:boolean, LongColEqualLongColumn(col 5:int, col 9:int)(children: VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 5:int, VectorUDFMonthDate(col 8, field MONTH)(children: CastStringToDate(col 2:string) -> 8:date) -> 9:int) -> 8:boolean, LongColEqualLongColumn(col 5:int, col 10:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 5:int, VectorUDFDayOfMonthDate(col 9, field DAY_OF_MONTH)(children: CastStringToDate(col 2:string) -> 9:date) -> 10:int) -> 9:boolean, LongColEqualLongColumn(col 5:int, col
10:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 5:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 10:int) -> 11:boolean, LongColEqualLongColumn(col 5:int, col 12:int)(children: VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 5:int, VectorUDFWeekOfYearDate(col 10, field WEEK_OF_YEAR)(children: CastStringToDate(col 2:string) -> 10:date) -> 12:int) -> 10:boolean, LongColEqualLongColumn(col 5:int, col 12:int)(children: VectorUDFHourTimestamp(col 1:timestamp, field HOUR_OF_DAY) -> 5:int, VectorUDFHourTimestamp(col 13:timestamp, field HOUR_OF_DAY)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 13:timestamp) -> 12:int) -> 14:boolean, LongColEqualLongColumn(col 5:int, col 12:int)(children: VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 5:int, VectorUDFMinuteTimestamp(col 13:timestamp, field MINUTE)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 13:timestamp)
-> 12:int) -> 15:boolean, LongColEqualLongColumn(col 5:int, col 12:int)(children: VectorUDFSecondTimestamp(col 1:timestamp, field SECOND) -> 5:int, VectorUDFSecondTimestamp(col 13:timestamp, field SECOND)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 13:timestamp) -> 12:int) -> 16:boolean
Statistics: Num rows: 52 Data size: 1872 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: boolean)
@@ -693,7 +693,7 @@ STAGE PLANS:
featureSupportInUse: []
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
- usesVectorUDFAdaptor: false
+ usesVectorUDFAdaptor: true
vectorized: true
Reducer 2
Execution mode: vectorized, llap
@@ -861,13 +861,13 @@ STAGE PLANS:
TableScan Vectorization:
native: true
Select Operator
- expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), minute(stimestamp1) (type: int), second(stimestamp1) (type: int)
+ expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(CAST( stimestamp1 AS DATE)) (type: int), month(CAST( stimestamp1 AS DATE)) (type: int), day(CAST( stimestamp1 AS DATE)) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(CAST( stimestamp1 AS DATE)) (type: int), hour(CAST( stimestamp1 AS TIMESTAMP)) (type: int), minute(CAST( stimestamp1 AS TIMESTAMP)) (type: int), second(CAST( stimestamp1 AS TIMESTAMP)) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [2, 3, 4, 5, 6, 7, 8, 9, 10]
- selectExpressions: VectorUDFUnixTimeStampString(col 0:string) -> 2:bigint, VectorUDFYearString(col 0:string, fieldStart 0, fieldLength 4) -> 3:int, VectorUDFMonthString(col 0:string, fieldStart 5, fieldLength 2) -> 4:int, VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 5:int, VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 6:int, VectorUDFWeekOfYearString(col 0:string) -> 7:int, VectorUDFHourString(col 0:string, fieldStart 11, fieldLength 2) -> 8:int, VectorUDFMinuteString(col 0:string, fieldStart 14, fieldLength 2) -> 9:int, VectorUDFSecondString(col 0:string, fieldStart 17, fieldLength 2) -> 10:int
+ projectedOutputColumnNums: [2, 4, 5, 6, 3, 8, 7, 10, 11]
+ selectExpressions: VectorUDFUnixTimeStampString(col 0:string) -> 2:bigint, VectorUDFYearDate(col 3, field YEAR)(children: CastStringToDate(col 0:string) -> 3:date) -> 4:int, VectorUDFMonthDate(col 3, field MONTH)(children: CastStringToDate(col 0:string) -> 3:date) -> 5:int, VectorUDFDayOfMonthDate(col 3, field DAY_OF_MONTH)(children: CastStringToDate(col 0:string) -> 3:date) -> 6:int, VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 3:int, VectorUDFWeekOfYearDate(col 7, field WEEK_OF_YEAR)(children: CastStringToDate(col 0:string) -> 7:date) -> 8:int, VectorUDFHourTimestamp(col 9:timestamp, field HOUR_OF_DAY)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 9:timestamp) -> 7:int, VectorUDFMinuteTimestamp(col 9:timestamp, field MINUTE)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 9:timestamp) -> 10:int, VectorUDFSecondTimestamp(col 9:timestamp, field SECOND)(children: VectorUDFAdaptor(CAST( stimestamp1 AS T
IMESTAMP)) -> 9:timestamp) -> 11:int
Statistics: Num rows: 3 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: bigint)
@@ -887,7 +887,7 @@ STAGE PLANS:
featureSupportInUse: []
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
allNative: true
- usesVectorUDFAdaptor: false
+ usesVectorUDFAdaptor: true
vectorized: true
Reducer 2
Execution mode: vectorized, llap
@@ -953,7 +953,7 @@ ORDER BY c1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@alltypesorc_wrong
#### A masked pattern was here ####
-NULL NULL NULL NULL NULL NULL NULL NULL NULL
+NULL 2 11 30 NULL 48 NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL NULL NULL
PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
http://git-wip-us.apache.org/repos/asf/hive/blob/040c0783/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out
index 22bfdbb..cecee57 100644
--- a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out
+++ b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out
@@ -463,17 +463,17 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date_n4
- filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ filterExpr: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ predicate: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: day(_col0) (type: int)
+ expressions: day(CAST( _col0 AS DATE)) (type: int)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Group By Operator
@@ -482,7 +482,7 @@ STAGE PLANS:
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Spark Partition Pruning Sink Operator
- Target Columns: [Map 1 -> [ds:string (day(ds))]]
+ Target Columns: [Map 1 -> [ds:string (day(CAST( ds AS DATE)))]]
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Stage: Stage-1
@@ -496,34 +496,33 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart
- filterExpr: ds is not null (type: boolean)
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: day(_col0) (type: int)
+ key expressions: day(CAST( _col0 AS DATE)) (type: int)
sort order: +
- Map-reduce partition columns: day(_col0) (type: int)
+ Map-reduce partition columns: day(CAST( _col0 AS DATE)) (type: int)
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Map 4
Map Operator Tree:
TableScan
alias: srcpart_date_n4
- filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ filterExpr: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ predicate: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: day(_col0) (type: int)
+ key expressions: day(CAST( _col0 AS DATE)) (type: int)
sort order: +
- Map-reduce partition columns: day(_col0) (type: int)
+ Map-reduce partition columns: day(CAST( _col0 AS DATE)) (type: int)
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
@@ -531,8 +530,8 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 day(_col0) (type: int)
- 1 day(_col0) (type: int)
+ 0 day(CAST( _col0 AS DATE)) (type: int)
+ 1 day(CAST( _col0 AS DATE)) (type: int)
Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
@@ -603,34 +602,33 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart
- filterExpr: ds is not null (type: boolean)
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: day(_col0) (type: int)
+ key expressions: day(CAST( _col0 AS DATE)) (type: int)
sort order: +
- Map-reduce partition columns: day(_col0) (type: int)
+ Map-reduce partition columns: day(CAST( _col0 AS DATE)) (type: int)
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Map 4
Map Operator Tree:
TableScan
alias: srcpart_date_n4
- filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ filterExpr: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ predicate: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: day(_col0) (type: int)
+ key expressions: day(CAST( _col0 AS DATE)) (type: int)
sort order: +
- Map-reduce partition columns: day(_col0) (type: int)
+ Map-reduce partition columns: day(CAST( _col0 AS DATE)) (type: int)
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
@@ -638,8 +636,8 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 day(_col0) (type: int)
- 1 day(_col0) (type: int)
+ 0 day(CAST( _col0 AS DATE)) (type: int)
+ 1 day(CAST( _col0 AS DATE)) (type: int)
Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
@@ -708,17 +706,17 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date_n4
- filterExpr: ((date = '2008-04-08') and abs(((- UDFToLong(concat(UDFToString(day(ds)), '0'))) + 10)) is not null) (type: boolean)
+ filterExpr: ((date = '2008-04-08') and abs(((- UDFToLong(concat(UDFToString(day(CAST( ds AS DATE))), '0'))) + 10)) is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((date = '2008-04-08') and abs(((- UDFToLong(concat(UDFToString(day(ds)), '0'))) + 10)) is not null) (type: boolean)
+ predicate: ((date = '2008-04-08') and abs(((- UDFToLong(concat(UDFToString(day(CAST( ds AS DATE))), '0'))) + 10)) is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: abs(((- UDFToLong(concat(UDFToString(day(_col0)), '0'))) + 10)) (type: bigint)
+ expressions: abs(((- UDFToLong(concat(UDFToString(day(CAST( _col0 AS DATE))), '0'))) + 10)) (type: bigint)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Group By Operator
@@ -727,7 +725,7 @@ STAGE PLANS:
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Spark Partition Pruning Sink Operator
- Target Columns: [Map 1 -> [ds:string (abs(((- UDFToLong(concat(UDFToString(day(ds)), '0'))) + 10)))]]
+ Target Columns: [Map 1 -> [ds:string (abs(((- UDFToLong(concat(UDFToString(day(CAST( ds AS DATE))), '0'))) + 10)))]]
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Stage: Stage-1
@@ -741,37 +739,37 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart
- filterExpr: abs(((- UDFToLong(concat(UDFToString(day(ds)), '0'))) + 10)) is not null (type: boolean)
+ filterExpr: abs(((- UDFToLong(concat(UDFToString(day(CAST( ds AS DATE))), '0'))) + 10)) is not null (type: boolean)
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: abs(((- UDFToLong(concat(UDFToString(day(ds)), '0'))) + 10)) is not null (type: boolean)
+ predicate: abs(((- UDFToLong(concat(UDFToString(day(CAST( ds AS DATE))), '0'))) + 10)) is not null (type: boolean)
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: abs(((- UDFToLong(concat(UDFToString(day(_col0)), '0'))) + 10)) (type: bigint)
+ key expressions: abs(((- UDFToLong(concat(UDFToString(day(CAST( _col0 AS DATE))), '0'))) + 10)) (type: bigint)
sort order: +
- Map-reduce partition columns: abs(((- UDFToLong(concat(UDFToString(day(_col0)), '0'))) + 10)) (type: bigint)
+ Map-reduce partition columns: abs(((- UDFToLong(concat(UDFToString(day(CAST( _col0 AS DATE))), '0'))) + 10)) (type: bigint)
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Map 4
Map Operator Tree:
TableScan
alias: srcpart_date_n4
- filterExpr: ((date = '2008-04-08') and abs(((- UDFToLong(concat(UDFToString(day(ds)), '0'))) + 10)) is not null) (type: boolean)
+ filterExpr: ((date = '2008-04-08') and abs(((- UDFToLong(concat(UDFToString(day(CAST( ds AS DATE))), '0'))) + 10)) is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((date = '2008-04-08') and abs(((- UDFToLong(concat(UDFToString(day(ds)), '0'))) + 10)) is not null) (type: boolean)
+ predicate: ((date = '2008-04-08') and abs(((- UDFToLong(concat(UDFToString(day(CAST( ds AS DATE))), '0'))) + 10)) is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: abs(((- UDFToLong(concat(UDFToString(day(_col0)), '0'))) + 10)) (type: bigint)
+ key expressions: abs(((- UDFToLong(concat(UDFToString(day(CAST( _col0 AS DATE))), '0'))) + 10)) (type: bigint)
sort order: +
- Map-reduce partition columns: abs(((- UDFToLong(concat(UDFToString(day(_col0)), '0'))) + 10)) (type: bigint)
+ Map-reduce partition columns: abs(((- UDFToLong(concat(UDFToString(day(CAST( _col0 AS DATE))), '0'))) + 10)) (type: bigint)
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
@@ -779,8 +777,8 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 abs(((- UDFToLong(concat(UDFToString(day(_col0)), '0'))) + 10)) (type: bigint)
- 1 abs(((- UDFToLong(concat(UDFToString(day(_col0)), '0'))) + 10)) (type: bigint)
+ 0 abs(((- UDFToLong(concat(UDFToString(day(CAST( _col0 AS DATE))), '0'))) + 10)) (type: bigint)
+ 1 abs(((- UDFToLong(concat(UDFToString(day(CAST( _col0 AS DATE))), '0'))) + 10)) (type: bigint)
Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
@@ -849,17 +847,17 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date_n4
- filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ filterExpr: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ predicate: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: CAST( day(_col0) AS decimal(10,0)) (type: decimal(10,0))
+ expressions: CAST( day(CAST( _col0 AS DATE)) AS decimal(10,0)) (type: decimal(10,0))
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Group By Operator
@@ -868,7 +866,7 @@ STAGE PLANS:
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Spark Partition Pruning Sink Operator
- Target Columns: [Map 1 -> [ds:string (CAST( UDFToShort(day(ds)) AS decimal(10,0)))]]
+ Target Columns: [Map 1 -> [ds:string (CAST( UDFToShort(day(CAST( ds AS DATE))) AS decimal(10,0)))]]
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Stage: Stage-1
@@ -882,34 +880,33 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart
- filterExpr: ds is not null (type: boolean)
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: CAST( UDFToShort(day(_col0)) AS decimal(10,0)) (type: decimal(10,0))
+ key expressions: CAST( UDFToShort(day(CAST( _col0 AS DATE))) AS decimal(10,0)) (type: decimal(10,0))
sort order: +
- Map-reduce partition columns: CAST( UDFToShort(day(_col0)) AS decimal(10,0)) (type: decimal(10,0))
+ Map-reduce partition columns: CAST( UDFToShort(day(CAST( _col0 AS DATE))) AS decimal(10,0)) (type: decimal(10,0))
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Map 4
Map Operator Tree:
TableScan
alias: srcpart_date_n4
- filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ filterExpr: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ predicate: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: CAST( day(_col0) AS decimal(10,0)) (type: decimal(10,0))
+ key expressions: CAST( day(CAST( _col0 AS DATE)) AS decimal(10,0)) (type: decimal(10,0))
sort order: +
- Map-reduce partition columns: CAST( day(_col0) AS decimal(10,0)) (type: decimal(10,0))
+ Map-reduce partition columns: CAST( day(CAST( _col0 AS DATE)) AS decimal(10,0)) (type: decimal(10,0))
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
@@ -917,8 +914,8 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 CAST( UDFToShort(day(_col0)) AS decimal(10,0)) (type: decimal(10,0))
- 1 CAST( day(_col0) AS decimal(10,0)) (type: decimal(10,0))
+ 0 CAST( UDFToShort(day(CAST( _col0 AS DATE))) AS decimal(10,0)) (type: decimal(10,0))
+ 1 CAST( day(CAST( _col0 AS DATE)) AS decimal(10,0)) (type: decimal(10,0))
Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
@@ -4518,10 +4515,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date_n4
- filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ filterExpr: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
+ predicate: (date = '2008-04-08') (type: boolean)
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -4529,10 +4526,10 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
keys:
- 0 day(_col0) (type: int)
- 1 day(_col0) (type: int)
+ 0 day(CAST( _col0 AS DATE)) (type: int)
+ 1 day(CAST( _col0 AS DATE)) (type: int)
Select Operator
- expressions: day(_col0) (type: int)
+ expressions: day(CAST( _col0 AS DATE)) (type: int)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Group By Operator
@@ -4541,7 +4538,7 @@ STAGE PLANS:
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Spark Partition Pruning Sink Operator
- Target Columns: [Map 1 -> [ds:string (day(ds))]]
+ Target Columns: [Map 1 -> [ds:string (day(CAST( ds AS DATE)))]]
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Local Work:
Map Reduce Local Work
@@ -4556,7 +4553,6 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart
- filterExpr: ds is not null (type: boolean)
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -4566,8 +4562,8 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 day(_col0) (type: int)
- 1 day(_col0) (type: int)
+ 0 day(CAST( _col0 AS DATE)) (type: int)
+ 1 day(CAST( _col0 AS DATE)) (type: int)
input vertices:
1 Map 3
Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE