You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by li...@apache.org on 2016/12/23 03:28:01 UTC
[1/4] hive git commit: HIVE-15357: Fix and re-enable the spark-only
tests (Rui reviewed by Chao)
Repository: hive
Updated Branches:
refs/heads/master ee35ccb19 -> 858ce8c22
http://git-wip-us.apache.org/repos/asf/hive/blob/858ce8c2/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out
index c8f6cd7..699fcc6 100644
--- a/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out
+++ b/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out
@@ -127,6 +127,8 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
POSTHOOK: Output: database:default
POSTHOOK: Output: default@srcpart_date
+POSTHOOK: Lineage: srcpart_date.date SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ]
+POSTHOOK: Lineage: srcpart_date.ds SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ]
PREHOOK: query: create table srcpart_hour stored as orc as select hr as hr, hr as hour from srcpart group by hr
PREHOOK: type: CREATETABLE_AS_SELECT
PREHOOK: Input: default@srcpart
@@ -145,6 +147,8 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
POSTHOOK: Output: database:default
POSTHOOK: Output: default@srcpart_hour
+POSTHOOK: Lineage: srcpart_hour.hour SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ]
+POSTHOOK: Lineage: srcpart_hour.hr SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ]
PREHOOK: query: create table srcpart_date_hour stored as orc as select ds as ds, ds as `date`, hr as hr, hr as hour from srcpart group by ds, hr
PREHOOK: type: CREATETABLE_AS_SELECT
PREHOOK: Input: default@srcpart
@@ -163,6 +167,10 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
POSTHOOK: Output: database:default
POSTHOOK: Output: default@srcpart_date_hour
+POSTHOOK: Lineage: srcpart_date_hour.date SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ]
+POSTHOOK: Lineage: srcpart_date_hour.ds SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ]
+POSTHOOK: Lineage: srcpart_date_hour.hour SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ]
+POSTHOOK: Lineage: srcpart_date_hour.hr SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ]
PREHOOK: query: create table srcpart_double_hour stored as orc as select (hr*2) as hr, hr as hour from srcpart group by hr
PREHOOK: type: CREATETABLE_AS_SELECT
PREHOOK: Input: default@srcpart
@@ -181,6 +189,8 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
POSTHOOK: Output: database:default
POSTHOOK: Output: default@srcpart_double_hour
+POSTHOOK: Lineage: srcpart_double_hour.hour SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ]
+POSTHOOK: Lineage: srcpart_double_hour.hr EXPRESSION [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ]
PREHOOK: query: -- single column, single key
EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
PREHOOK: type: QUERY
@@ -201,10 +211,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date
- filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -252,10 +262,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date
- filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -297,8 +307,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -361,10 +371,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date
- filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -406,8 +416,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -438,14 +448,10 @@ POSTHOOK: Input: default@srcpart_date
PREHOOK: query: select count(*) from srcpart where ds = '2008-04-08'
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart
-PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
-PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
#### A masked pattern was here ####
POSTHOOK: query: select count(*) from srcpart where ds = '2008-04-08'
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart
-POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
-POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
#### A masked pattern was here ####
1000
PREHOOK: query: -- multiple sources, single key
@@ -470,10 +476,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date
- filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -498,10 +504,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_hour
- filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 344 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: hr (type: string)
@@ -550,10 +556,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date
- filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -569,10 +575,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_hour
- filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 344 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: hr (type: string)
@@ -629,8 +635,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -701,10 +707,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date
- filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -720,10 +726,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_hour
- filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 344 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: hr (type: string)
@@ -780,8 +786,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -816,12 +822,10 @@ POSTHOOK: Input: default@srcpart_hour
PREHOOK: query: select count(*) from srcpart where hr = 11 and ds = '2008-04-08'
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart
-PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
#### A masked pattern was here ####
POSTHOOK: query: select count(*) from srcpart where hr = 11 and ds = '2008-04-08'
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart
-POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
#### A masked pattern was here ####
500
PREHOOK: query: -- multiple columns single source
@@ -844,10 +848,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date_hour
- filterExpr: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean)
Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string), hr (type: string)
@@ -872,10 +876,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date_hour
- filterExpr: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean)
Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string), hr (type: string)
@@ -922,10 +926,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date_hour
- filterExpr: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean)
Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string), hr (type: string)
@@ -967,8 +971,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -1031,10 +1035,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date_hour
- filterExpr: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean)
Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string), hr (type: string)
@@ -1076,8 +1080,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -1108,12 +1112,10 @@ POSTHOOK: Input: default@srcpart_date_hour
PREHOOK: query: select count(*) from srcpart where ds = '2008-04-08' and hr = 11
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart
-PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
#### A masked pattern was here ####
POSTHOOK: query: select count(*) from srcpart where ds = '2008-04-08' and hr = 11
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart
-POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
#### A masked pattern was here ####
500
PREHOOK: query: -- empty set
@@ -1136,10 +1138,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date
- filterExpr: (ds is not null and (date = 'I DONT EXIST')) (type: boolean)
+ filterExpr: ((date = 'I DONT EXIST') and ds is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (ds is not null and (date = 'I DONT EXIST')) (type: boolean)
+ predicate: ((date = 'I DONT EXIST') and ds is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -1187,10 +1189,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date
- filterExpr: (ds is not null and (date = 'I DONT EXIST')) (type: boolean)
+ filterExpr: ((date = 'I DONT EXIST') and ds is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (ds is not null and (date = 'I DONT EXIST')) (type: boolean)
+ predicate: ((date = 'I DONT EXIST') and ds is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -1232,8 +1234,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -1296,10 +1298,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date
- filterExpr: (ds is not null and (date = 'I DONT EXIST')) (type: boolean)
+ filterExpr: ((date = 'I DONT EXIST') and ds is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (ds is not null and (date = 'I DONT EXIST')) (type: boolean)
+ predicate: ((date = 'I DONT EXIST') and ds is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -1341,8 +1343,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -1399,10 +1401,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_double_hour
- filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: hr (type: double)
@@ -1450,10 +1452,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_double_hour
- filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: hr (type: double)
@@ -1495,8 +1497,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -1542,10 +1544,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_double_hour
- filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: hr (type: double)
@@ -1585,18 +1587,18 @@ STAGE PLANS:
outputColumnNames: _col0
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: (UDFToDouble(_col0) * UDFToDouble(2)) (type: double)
+ key expressions: (UDFToDouble(_col0) * 2.0) (type: double)
sort order: +
- Map-reduce partition columns: (UDFToDouble(_col0) * UDFToDouble(2)) (type: double)
+ Map-reduce partition columns: (UDFToDouble(_col0) * 2.0) (type: double)
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Map 4
Map Operator Tree:
TableScan
alias: srcpart_double_hour
- filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: hr (type: double)
@@ -1614,7 +1616,7 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 (UDFToDouble(_col0) * UDFToDouble(2)) (type: double)
+ 0 (UDFToDouble(_col0) * 2.0) (type: double)
1 _col0 (type: double)
Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
Group By Operator
@@ -1638,8 +1640,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -1702,10 +1704,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_double_hour
- filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: hr (type: double)
@@ -1747,8 +1749,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -1803,18 +1805,18 @@ STAGE PLANS:
outputColumnNames: _col0
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: (UDFToDouble(_col0) * UDFToDouble(2)) (type: double)
+ key expressions: (UDFToDouble(_col0) * 2.0) (type: double)
sort order: +
- Map-reduce partition columns: (UDFToDouble(_col0) * UDFToDouble(2)) (type: double)
+ Map-reduce partition columns: (UDFToDouble(_col0) * 2.0) (type: double)
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Map 4
Map Operator Tree:
TableScan
alias: srcpart_double_hour
- filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: hr (type: double)
@@ -1832,7 +1834,7 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 (UDFToDouble(_col0) * UDFToDouble(2)) (type: double)
+ 0 (UDFToDouble(_col0) * 2.0) (type: double)
1 _col0 (type: double)
Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
Group By Operator
@@ -1856,8 +1858,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -1888,14 +1890,10 @@ POSTHOOK: Input: default@srcpart_double_hour
PREHOOK: query: select count(*) from srcpart where hr = 11
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart
-PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
-PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
#### A masked pattern was here ####
POSTHOOK: query: select count(*) from srcpart where hr = 11
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart
-POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
-POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
#### A masked pattern was here ####
1000
PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11
@@ -1916,10 +1914,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_double_hour
- filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: hr (type: double)
@@ -1959,18 +1957,18 @@ STAGE PLANS:
outputColumnNames: _col0
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: UDFToString((UDFToDouble(_col0) * UDFToDouble(2))) (type: string)
+ key expressions: UDFToString((UDFToDouble(_col0) * 2.0)) (type: string)
sort order: +
- Map-reduce partition columns: UDFToString((UDFToDouble(_col0) * UDFToDouble(2))) (type: string)
+ Map-reduce partition columns: UDFToString((UDFToDouble(_col0) * 2.0)) (type: string)
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Map 4
Map Operator Tree:
TableScan
alias: srcpart_double_hour
- filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: hr (type: double)
@@ -1988,7 +1986,7 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 UDFToString((UDFToDouble(_col0) * UDFToDouble(2))) (type: string)
+ 0 UDFToString((UDFToDouble(_col0) * 2.0)) (type: string)
1 UDFToString(_col0) (type: string)
Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
Group By Operator
@@ -2012,8 +2010,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -2044,16 +2042,13 @@ POSTHOOK: Input: default@srcpart_double_hour
PREHOOK: query: select count(*) from srcpart where cast(hr as string) = 11
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart
-PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
-PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
#### A masked pattern was here ####
POSTHOOK: query: select count(*) from srcpart where cast(hr as string) = 11
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart
-POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
-POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
#### A masked pattern was here ####
1000
+Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product
PREHOOK: query: -- parent is reduce tasks
EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'
PREHOOK: type: QUERY
@@ -2061,83 +2056,29 @@ POSTHOOK: query: -- parent is reduce tasks
EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-2
- Spark
- Edges:
- Reducer 7 <- Map 6 (GROUP, 2)
-#### A masked pattern was here ####
- Vertices:
- Map 6
- Map Operator Tree:
- TableScan
- alias: srcpart
- filterExpr: (ds = '2008-04-08') (type: boolean)
- Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: '2008-04-08' (type: string)
- outputColumnNames: ds
- Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: ds (type: string)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- Reducer 7
- Execution mode: vectorized
- Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: string)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Spark Partition Pruning Sink Operator
- partition key expr: ds
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- target column name: ds
- target work: Map 1
-
Stage: Stage-1
Spark
Edges:
- Reducer 5 <- Map 4 (GROUP, 2)
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1)
Reducer 3 <- Reducer 2 (GROUP, 1)
+ Reducer 5 <- Map 4 (GROUP, 2)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: srcpart
- filterExpr: ds is not null (type: boolean)
- Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ filterExpr: (ds = '2008-04-08') (type: boolean)
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: ds (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ sort order:
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Map 4
Map Operator Tree:
TableScan
@@ -2145,18 +2086,16 @@ STAGE PLANS:
filterExpr: (ds = '2008-04-08') (type: boolean)
Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: '2008-04-08' (type: string)
- outputColumnNames: ds
Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- keys: ds (type: string)
+ keys: '2008-04-08' (type: string)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: string)
+ key expressions: '2008-04-08' (type: string)
sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Map-reduce partition columns: '2008-04-08' (type: string)
Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
@@ -2164,9 +2103,9 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+ 0
+ 1
+ Statistics: Num rows: 500000 Data size: 11124000 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
mode: hash
@@ -2188,22 +2127,22 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Reducer 5
Execution mode: vectorized
Reduce Operator Tree:
Group By Operator
- keys: KEY._col0 (type: string)
+ keys: '2008-04-08' (type: string)
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Select Operator
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Stage: Stage-0
Fetch Operator
@@ -2211,37 +2150,30 @@ STAGE PLANS:
Processor Tree:
ListSink
+Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product
PREHOOK: query: select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
-PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
#### A masked pattern was here ####
POSTHOOK: query: select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
-POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
#### A masked pattern was here ####
1000
PREHOOK: query: select count(*) from srcpart where ds = '2008-04-08'
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart
-PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
-PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
#### A masked pattern was here ####
POSTHOOK: query: select count(*) from srcpart where ds = '2008-04-08'
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart
-POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
-POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
#### A masked pattern was here ####
1000
-Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product
+Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product
PREHOOK: query: -- non-equi join
EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr)
PREHOOK: type: QUERY
@@ -2300,12 +2232,12 @@ STAGE PLANS:
0
1
outputColumnNames: _col0, _col1, _col2, _col4
- Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2000 Data size: 743248 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((_col0 = _col2) or (_col1 = _col4)) (type: boolean)
- Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2000 Data size: 743248 Basic stats: COMPLETE Column stats: NONE
Select Operator
- Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2000 Data size: 743248 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
mode: hash
@@ -2327,8 +2259,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -2337,7 +2269,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product
+Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product
PREHOOK: query: select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr)
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart
@@ -2500,8 +2432,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -2645,8 +2577,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -2768,8 +2700,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -2893,8 +2825,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -2925,10 +2857,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date
- filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -2964,22 +2896,23 @@ STAGE PLANS:
alias: srcpart
Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: ds (type: string)
- outputColumnNames: _col0
+ expressions: ds (type: string), hr (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Map 5
Map Operator Tree:
TableScan
alias: srcpart_date
- filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -3001,11 +2934,13 @@ STAGE PLANS:
predicate: ((UDFToDouble(hour) = 11.0) and (UDFToDouble(hr) = 11.0)) (type: boolean)
Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE
Select Operator
+ expressions: hr (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: '11' (type: string)
+ key expressions: _col0 (type: string)
sort order: +
- Map-reduce partition columns: '11' (type: string)
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
Reducer 2
@@ -3016,11 +2951,12 @@ STAGE PLANS:
keys:
0 _col0 (type: string)
1 _col0 (type: string)
+ outputColumnNames: _col1
Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: '11' (type: string)
+ key expressions: _col1 (type: string)
sort order: +
- Map-reduce partition columns: '11' (type: string)
+ Map-reduce partition columns: _col1 (type: string)
Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE
Reducer 3
Reduce Operator Tree:
@@ -3052,8 +2988,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -3104,29 +3040,30 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart
- filterExpr: (ds is not null and (UDFToDouble(hr) = 13.0)) (type: boolean)
+ filterExpr: ((UDFToDouble(hr) = 13.0) and ds is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Filter Operator
- predicate: (ds is not null and (UDFToDouble(hr) = 13.0)) (type: boolean)
+ predicate: ((UDFToDouble(hr) = 13.0) and ds is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
- expressions: ds (type: string)
- outputColumnNames: _col0
+ expressions: ds (type: string), hr (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col1 (type: string)
Execution mode: vectorized
Map 5
Map Operator Tree:
TableScan
alias: srcpart_date
- filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -3148,11 +3085,13 @@ STAGE PLANS:
predicate: (UDFToDouble(hr) = 13.0) (type: boolean)
Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE
Select Operator
+ expressions: hr (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: '13' (type: string)
+ key expressions: _col0 (type: string)
sort order: +
- Map-reduce partition columns: '13' (type: string)
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
Reducer 2
@@ -3163,11 +3102,12 @@ STAGE PLANS:
keys:
0 _col0 (type: string)
1 _col0 (type: string)
+ outputColumnNames: _col1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Reduce Output Operator
- key expressions: '13' (type: string)
+ key expressions: _col1 (type: string)
sort order: +
- Map-reduce partition columns: '13' (type: string)
+ Map-reduce partition columns: _col1 (type: string)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Reducer 3
Reduce Operator Tree:
@@ -3199,8 +3139,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -3239,11 +3179,12 @@ STAGE PLANS:
Stage: Stage-2
Spark
Edges:
- Reducer 11 <- Map 10 (GROUP, 1)
- Reducer 9 <- Map 8 (GROUP, 1)
+ Reducer 10 <- Map 9 (GROUP, 1)
+ Reducer 11 <- Reducer 10 (GROUP, 2), Reducer 13 (GROUP, 2)
+ Reducer 13 <- Map 12 (GROUP, 1)
#### A masked pattern was here ####
Vertices:
- Map 10
+ Map 12
Map Operator Tree:
TableScan
alias: srcpart
@@ -3256,12 +3197,12 @@ STAGE PLANS:
aggregations: min(ds)
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: string)
- Map 8
+ Map 9
Map Operator Tree:
TableScan
alias: srcpart
@@ -3274,86 +3215,84 @@ STAGE PLANS:
aggregations: max(ds)
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: string)
+ Reducer 10
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: max(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE
Reducer 11
Execution mode: vectorized
Reduce Operator Tree:
Group By Operator
- aggregations: min(VALUE._col0)
+ keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: _col0 is not null (type: boolean)
- Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
Group By Operator
keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE
- Spark Partition Pruning Sink Operator
- partition key expr: ds
- Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE
- target column name: ds
- target work: Map 1
- Reducer 9
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Spark Partition Pruning Sink Operator
+ partition key expr: ds
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ target column name: ds
+ target work: Map 1
+ Reducer 13
Execution mode: vectorized
Reduce Operator Tree:
Group By Operator
- aggregations: max(VALUE._col0)
+ aggregations: min(VALUE._col0)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: _col0 is not null (type: boolean)
- Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE
- Spark Partition Pruning Sink Operator
- partition key expr: ds
- Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE
- target column name: ds
- target work: Map 1
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE
Stage: Stage-1
Spark
Edges:
- Reducer 5 <- Map 4 (GROUP, 1)
- Reducer 7 <- Map 6 (GROUP, 1)
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2)
Reducer 3 <- Reducer 2 (GROUP, 1)
+ Reducer 5 <- Map 4 (GROUP, 1)
+ Reducer 6 <- Reducer 5 (GROUP, 2), Reducer 8 (GROUP, 2)
+ Reducer 8 <- Map 7 (GROUP, 1)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: srcpart
- filterExpr: ds is not null (type: boolean)
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -3377,12 +3316,12 @@ STAGE PLANS:
aggregations: max(ds)
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: string)
- Map 6
+ Map 7
Map Operator Tree:
TableScan
alias: srcpart
@@ -3395,16 +3334,16 @@ STAGE PLANS:
aggregations: min(ds)
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: string)
Reducer 2
Reduce Operator Tree:
Join Operator
condition map:
- Left Semi Join 0 to 1
+ Inner Join 0 to 1
keys:
0 _col0 (type: string)
1 _col0 (type: string)
@@ -3430,8 +3369,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Reducer 5
Execution mode: vectorized
@@ -3440,41 +3379,48 @@ STAGE PLANS:
aggregations: max(VALUE._col0)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: _col0 is not null (type: boolean)
- Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE
- Reducer 7
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+ Reducer 6
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Reducer 8
Execution mode: vectorized
Reduce Operator Tree:
Group By Operator
aggregations: min(VALUE._col0)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: _col0 is not null (type: boolean)
- Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE
Stage: Stage-0
Fetch Operator
@@ -3512,11 +3458,12 @@ STAGE PLANS:
Stage: Stage-2
Spark
Edges:
- Reducer 11 <- Map 10 (GROUP, 1)
- Reducer 9 <- Map 8 (GROUP, 1)
+ Reducer 10 <- Map 9 (GROUP, 1)
+ Reducer 11 <- Reducer 10 (GROUP, 2), Reducer 13 (GROUP, 2)
+ Reducer 13 <- Map 12 (GROUP, 1)
#### A masked pattern was here ####
Vertices:
- Map 10
+ Map 12
Map Operator Tree:
TableScan
alias: srcpart
@@ -3529,12 +3476,12 @@ STAGE PLANS:
aggregations: min(ds)
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: string)
- Map 8
+ Map 9
Map Operator Tree:
TableScan
alias: srcpart
@@ -3547,86 +3494,84 @@ STAGE PLANS:
aggregations: max(ds)
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: string)
+ Reducer 10
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: max(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE
Reducer 11
Execution mode: vectorized
Reduce Operator Tree:
Group By Operator
- aggregations: min(VALUE._col0)
+ keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: _col0 is not null (type: boolean)
- Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
Group By Operator
keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE
- Spark Partition Pruning Sink Operator
- partition key expr: ds
- Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE
- target column name: ds
- target work: Map 1
- Reducer 9
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Spark Partition Pruning Sink Operator
+ partition key expr: ds
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ target column name: ds
+ target work: Map 1
+ Reducer 13
Execution mode: vectorized
Reduce Operator Tree:
Group By Operator
- aggregations: max(VALUE._col0)
+ aggregations: min(VALUE._col0)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: _col0 is not null (type: boolean)
- Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE
- Spark Partition Pruning Sink Operator
- partition key expr: ds
- Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE
- target column name: ds
- target work: Map 1
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE
Stage: Stage-1
Spark
Edges:
- Reducer 5 <- Map 4 (GROUP, 1)
- Reducer 7 <- Map 6 (GROUP, 1)
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2)
Reducer 3 <- Reducer 2 (GROUP, 2)
+ Reducer 5 <- Map 4 (GROUP, 1)
+ Reducer 6 <- Reducer 5 (GROUP, 2), Reducer 8 (GROUP, 2)
+ Reducer 8 <- Map 7 (GROUP, 1)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: srcpart
- filterExpr: ds is not null (type: boolean)
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -3650,12 +3595,12 @@ STAGE PLANS:
aggregations: max(ds)
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics
<TRUNCATED>
[2/4] hive git commit: HIVE-15357: Fix and re-enable the spark-only
tests (Rui reviewed by Chao)
Posted by li...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/858ce8c2/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_2.q.out b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_2.q.out
index 4e62a3b..b18fc3c 100644
--- a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_2.q.out
+++ b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_2.q.out
@@ -165,29 +165,33 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: d1
- filterExpr: (id is not null and (label) IN ('foo', 'bar')) (type: boolean)
+ filterExpr: ((label) IN ('foo', 'bar') and id is not null) (type: boolean)
Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (id is not null and (label) IN ('foo', 'bar')) (type: boolean)
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 dim_shops_id (type: int)
- 1 id (type: int)
+ predicate: ((label) IN ('foo', 'bar') and id is not null) (type: boolean)
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: id (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: int)
- mode: hash
+ expressions: id (type: int), label (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Select Operator
+ expressions: _col0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
- Spark Partition Pruning Sink Operator
- partition key expr: dim_shops_id
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
- target column name: dim_shops_id
- target work: Map 1
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Spark Partition Pruning Sink Operator
+ partition key expr: dim_shops_id
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ target column name: dim_shops_id
+ target work: Map 1
Local Work:
Map Reduce Local Work
@@ -204,34 +208,35 @@ STAGE PLANS:
alias: agg
filterExpr: dim_shops_id is not null (type: boolean)
Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 dim_shops_id (type: int)
- 1 id (type: int)
- outputColumnNames: _col0, _col1, _col5, _col6
- input vertices:
- 1 Map 4
- Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: ((_col1 = _col5) and (_col6) IN ('foo', 'bar')) (type: boolean)
- Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: amount (type: decimal(10,0)), dim_shops_id (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col3
+ input vertices:
+ 1 Map 4
+ Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col6 (type: string), _col0 (type: decimal(10,0))
- outputColumnNames: _col6, _col0
- Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ expressions: _col3 (type: string), _col0 (type: decimal(10,0))
+ outputColumnNames: _col3, _col0
+ Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(), sum(_col0)
- keys: _col6 (type: string)
+ keys: _col3 (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint), _col2 (type: decimal(20,0))
Local Work:
Map Reduce Local Work
@@ -242,24 +247,24 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
- Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint), _col2 (type: decimal(20,0))
Reducer 3
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: decimal(20,0))
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -332,15 +337,19 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: d1
- filterExpr: (id is not null and (label) IN ('foo', 'bar')) (type: boolean)
+ filterExpr: ((label) IN ('foo', 'bar') and id is not null) (type: boolean)
Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (id is not null and (label) IN ('foo', 'bar')) (type: boolean)
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 dim_shops_id (type: int)
- 1 id (type: int)
+ predicate: ((label) IN ('foo', 'bar') and id is not null) (type: boolean)
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: id (type: int), label (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
Local Work:
Map Reduce Local Work
@@ -357,34 +366,35 @@ STAGE PLANS:
alias: agg
filterExpr: dim_shops_id is not null (type: boolean)
Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 dim_shops_id (type: int)
- 1 id (type: int)
- outputColumnNames: _col0, _col1, _col5, _col6
- input vertices:
- 1 Map 4
- Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: ((_col1 = _col5) and (_col6) IN ('foo', 'bar')) (type: boolean)
- Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: amount (type: decimal(10,0)), dim_shops_id (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col3
+ input vertices:
+ 1 Map 4
+ Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col6 (type: string), _col0 (type: decimal(10,0))
- outputColumnNames: _col6, _col0
- Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ expressions: _col3 (type: string), _col0 (type: decimal(10,0))
+ outputColumnNames: _col3, _col0
+ Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(), sum(_col0)
- keys: _col6 (type: string)
+ keys: _col3 (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint), _col2 (type: decimal(20,0))
Local Work:
Map Reduce Local Work
@@ -395,24 +405,24 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
- Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint), _col2 (type: decimal(20,0))
Reducer 3
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: decimal(20,0))
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -481,11 +491,15 @@ STAGE PLANS:
Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: id is not null (type: boolean)
- Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 dim_shops_id (type: int)
- 1 id (type: int)
+ Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: id (type: int), label (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
Local Work:
Map Reduce Local Work
@@ -499,29 +513,30 @@ STAGE PLANS:
alias: agg
filterExpr: dim_shops_id is not null (type: boolean)
Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 dim_shops_id (type: int)
- 1 id (type: int)
- outputColumnNames: _col1, _col5, _col6
- input vertices:
- 1 Map 2
- Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (_col1 = _col5) (type: boolean)
- Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: dim_shops_id (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col2
+ input vertices:
+ 1 Map 2
+ Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col6 (type: string)
+ expressions: _col2 (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Local Work:
Map Reduce Local Work
@@ -563,6 +578,7 @@ baz
foo
foo
foo
+Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
PREHOOK: query: EXPLAIN SELECT agg.amount
FROM agg_01 agg,
dim_shops d1
@@ -589,15 +605,17 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: d1
- filterExpr: (id = 1) (type: boolean)
+ filterExpr: (1 = id) (type: boolean)
Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (id = 1) (type: boolean)
+ predicate: (1 = id) (type: boolean)
Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 1 (type: int)
- 1 1 (type: int)
+ Select Operator
+ Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0
+ 1
Local Work:
Map Reduce Local Work
@@ -611,23 +629,27 @@ STAGE PLANS:
alias: agg
filterExpr: (dim_shops_id = 1) (type: boolean)
Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 1 (type: int)
- 1 1 (type: int)
+ Select Operator
+ expressions: amount (type: decimal(10,0))
outputColumnNames: _col0
- input vertices:
- 1 Map 2
Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0
+ input vertices:
+ 1 Map 2
+ Statistics: Num rows: 3 Data size: 27 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 27 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Local Work:
Map Reduce Local Work
@@ -637,6 +659,7 @@ STAGE PLANS:
Processor Tree:
ListSink
+Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
PREHOOK: query: SELECT agg.amount
FROM agg_01 agg,
dim_shops d1
@@ -692,29 +715,33 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: d1
- filterExpr: (id is not null and (label) IN ('foo', 'bar')) (type: boolean)
+ filterExpr: ((label) IN ('foo', 'bar') and id is not null) (type: boolean)
Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (id is not null and (label) IN ('foo', 'bar')) (type: boolean)
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 dim_shops_id (type: int)
- 1 id (type: int)
+ predicate: ((label) IN ('foo', 'bar') and id is not null) (type: boolean)
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: id (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: int)
- mode: hash
+ expressions: id (type: int), label (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Select Operator
+ expressions: _col0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
- Spark Partition Pruning Sink Operator
- partition key expr: dim_shops_id
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
- target column name: dim_shops_id
- target work: Map 1
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Spark Partition Pruning Sink Operator
+ partition key expr: dim_shops_id
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ target column name: dim_shops_id
+ target work: Map 1
Local Work:
Map Reduce Local Work
@@ -731,34 +758,35 @@ STAGE PLANS:
alias: agg
filterExpr: dim_shops_id is not null (type: boolean)
Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 dim_shops_id (type: int)
- 1 id (type: int)
- outputColumnNames: _col0, _col1, _col5, _col6
- input vertices:
- 1 Map 4
- Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: ((_col1 = _col5) and (_col6) IN ('foo', 'bar')) (type: boolean)
- Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: amount (type: decimal(10,0)), dim_shops_id (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col3
+ input vertices:
+ 1 Map 4
+ Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col6 (type: string), _col0 (type: decimal(10,0))
- outputColumnNames: _col6, _col0
- Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ expressions: _col3 (type: string), _col0 (type: decimal(10,0))
+ outputColumnNames: _col3, _col0
+ Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(), sum(_col0)
- keys: _col6 (type: string)
+ keys: _col3 (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint), _col2 (type: decimal(20,0))
Local Work:
Map Reduce Local Work
@@ -769,24 +797,24 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
- Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint), _col2 (type: decimal(20,0))
Reducer 3
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: decimal(20,0))
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -852,29 +880,33 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: dim_shops
- filterExpr: (id is not null and (label = 'foo')) (type: boolean)
+ filterExpr: ((label = 'foo') and id is not null) (type: boolean)
Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (id is not null and (label = 'foo')) (type: boolean)
+ predicate: ((label = 'foo') and id is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 dim_shops_id (type: int)
- 1 id (type: int)
Select Operator
expressions: id (type: int)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: int)
- mode: hash
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Select Operator
+ expressions: _col0 (type: int)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
- Spark Partition Pruning Sink Operator
- partition key expr: dim_shops_id
+ Group By Operator
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
- target column name: dim_shops_id
- target work: Map 1
+ Spark Partition Pruning Sink Operator
+ partition key expr: dim_shops_id
+ Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ target column name: dim_shops_id
+ target work: Map 1
Local Work:
Map Reduce Local Work
@@ -886,29 +918,33 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: dim_shops
- filterExpr: (id is not null and (label = 'bar')) (type: boolean)
+ filterExpr: ((label = 'bar') and id is not null) (type: boolean)
Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (id is not null and (label = 'bar')) (type: boolean)
+ predicate: ((label = 'bar') and id is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
- Spark HashTable Sink Operator
- keys:
- 0 dim_shops_id (type: int)
- 1 id (type: int)
Select Operator
expressions: id (type: int)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: int)
- mode: hash
+ Spark HashTable Sink Operator
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ Select Operator
+ expressions: _col0 (type: int)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
- Spark Partition Pruning Sink Operator
- partition key expr: dim_shops_id
+ Group By Operator
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
- target column name: dim_shops_id
- target work: Map 3
+ Spark Partition Pruning Sink Operator
+ partition key expr: dim_shops_id
+ Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ target column name: dim_shops_id
+ target work: Map 3
Local Work:
Map Reduce Local Work
@@ -922,30 +958,27 @@ STAGE PLANS:
alias: agg_01
filterExpr: dim_shops_id is not null (type: boolean)
Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 dim_shops_id (type: int)
- 1 id (type: int)
- outputColumnNames: _col0, _col1, _col5
- input vertices:
- 1 Map 2
- Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (_col1 = _col5) (type: boolean)
- Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: decimal(10,0))
- outputColumnNames: _col0
- Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 8 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Select Operator
+ expressions: amount (type: decimal(10,0)), dim_shops_id (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0
+ input vertices:
+ 1 Map 2
+ Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 18 Data size: 58 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Local Work:
Map Reduce Local Work
Map 3
@@ -954,30 +987,27 @@ STAGE PLANS:
alias: agg_01
filterExpr: dim_shops_id is not null (type: boolean)
Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 dim_shops_id (type: int)
- 1 id (type: int)
- outputColumnNames: _col0, _col1, _col5
- input vertices:
- 1 Map 4
- Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (_col1 = _col5) (type: boolean)
- Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: decimal(10,0))
- outputColumnNames: _col0
- Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 8 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Select Operator
+ expressions: amount (type: decimal(10,0)), dim_shops_id (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0
+ input vertices:
+ 1 Map 4
+ Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 18 Data size: 58 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Local Work:
Map Reduce Local Work
[3/4] hive git commit: HIVE-15357: Fix and re-enable the spark-only
tests (Rui reviewed by Chao)
Posted by li...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/858ce8c2/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out
index 16aa452..fc6edb4 100644
--- a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out
+++ b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out
@@ -38,9 +38,9 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
#### A masked pattern was here ####
11
12
-PREHOOK: query: EXPLAIN create table srcpart_date as select ds as ds, ds as date from srcpart group by ds
+PREHOOK: query: EXPLAIN create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds
PREHOOK: type: CREATETABLE_AS_SELECT
-POSTHOOK: query: EXPLAIN create table srcpart_date as select ds as ds, ds as date from srcpart group by ds
+POSTHOOK: query: EXPLAIN create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds
POSTHOOK: type: CREATETABLE_AS_SELECT
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -112,7 +112,7 @@ STAGE PLANS:
Stage: Stage-2
Stats-Aggr Operator
-PREHOOK: query: create table srcpart_date as select ds as ds, ds as date from srcpart group by ds
+PREHOOK: query: create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds
PREHOOK: type: CREATETABLE_AS_SELECT
PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
@@ -121,7 +121,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
PREHOOK: Output: database:default
PREHOOK: Output: default@srcpart_date
-POSTHOOK: query: create table srcpart_date as select ds as ds, ds as date from srcpart group by ds
+POSTHOOK: query: create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds
POSTHOOK: type: CREATETABLE_AS_SELECT
POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
@@ -130,6 +130,8 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
POSTHOOK: Output: database:default
POSTHOOK: Output: default@srcpart_date
+POSTHOOK: Lineage: srcpart_date.date SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ]
+POSTHOOK: Lineage: srcpart_date.ds SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ]
PREHOOK: query: create table srcpart_hour as select hr as hr, hr as hour from srcpart group by hr
PREHOOK: type: CREATETABLE_AS_SELECT
PREHOOK: Input: default@srcpart
@@ -148,7 +150,9 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
POSTHOOK: Output: database:default
POSTHOOK: Output: default@srcpart_hour
-PREHOOK: query: create table srcpart_date_hour as select ds as ds, ds as date, hr as hr, hr as hour from srcpart group by ds, hr
+POSTHOOK: Lineage: srcpart_hour.hour SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ]
+POSTHOOK: Lineage: srcpart_hour.hr SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ]
+PREHOOK: query: create table srcpart_date_hour as select ds as ds, ds as `date`, hr as hr, hr as hour from srcpart group by ds, hr
PREHOOK: type: CREATETABLE_AS_SELECT
PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
@@ -157,7 +161,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
PREHOOK: Output: database:default
PREHOOK: Output: default@srcpart_date_hour
-POSTHOOK: query: create table srcpart_date_hour as select ds as ds, ds as date, hr as hr, hr as hour from srcpart group by ds, hr
+POSTHOOK: query: create table srcpart_date_hour as select ds as ds, ds as `date`, hr as hr, hr as hour from srcpart group by ds, hr
POSTHOOK: type: CREATETABLE_AS_SELECT
POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
@@ -166,6 +170,10 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
POSTHOOK: Output: database:default
POSTHOOK: Output: default@srcpart_date_hour
+POSTHOOK: Lineage: srcpart_date_hour.date SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ]
+POSTHOOK: Lineage: srcpart_date_hour.ds SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ]
+POSTHOOK: Lineage: srcpart_date_hour.hour SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ]
+POSTHOOK: Lineage: srcpart_date_hour.hr SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ]
PREHOOK: query: create table srcpart_double_hour as select (hr*2) as hr, hr as hour from srcpart group by hr
PREHOOK: type: CREATETABLE_AS_SELECT
PREHOOK: Input: default@srcpart
@@ -184,11 +192,13 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
POSTHOOK: Output: database:default
POSTHOOK: Output: default@srcpart_double_hour
+POSTHOOK: Lineage: srcpart_double_hour.hour SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ]
+POSTHOOK: Lineage: srcpart_double_hour.hr EXPRESSION [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ]
PREHOOK: query: -- single column, single key
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'
+EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
PREHOOK: type: QUERY
POSTHOOK: query: -- single column, single key
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'
+EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-2 is a root stage
@@ -204,10 +214,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date
- filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -254,10 +264,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date
- filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -297,8 +307,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -307,7 +317,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'
+PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
@@ -316,7 +326,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
PREHOOK: Input: default@srcpart_date
#### A masked pattern was here ####
-POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'
+POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
@@ -326,9 +336,9 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
POSTHOOK: Input: default@srcpart_date
#### A masked pattern was here ####
1000
-PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'
+PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'
+POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -361,10 +371,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date
- filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -404,8 +414,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -414,7 +424,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'
+PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
@@ -423,7 +433,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
PREHOOK: Input: default@srcpart_date
#### A masked pattern was here ####
-POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'
+POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
@@ -436,23 +446,19 @@ POSTHOOK: Input: default@srcpart_date
PREHOOK: query: select count(*) from srcpart where ds = '2008-04-08'
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart
-PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
-PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
#### A masked pattern was here ####
POSTHOOK: query: select count(*) from srcpart where ds = '2008-04-08'
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart
-POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
-POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
#### A masked pattern was here ####
1000
PREHOOK: query: -- multiple sources, single key
EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr)
-where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11
+where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11
PREHOOK: type: QUERY
POSTHOOK: query: -- multiple sources, single key
EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr)
-where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11
+where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-2 is a root stage
@@ -468,10 +474,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date
- filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -495,10 +501,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_hour
- filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: hr (type: string)
@@ -546,10 +552,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date
- filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -564,10 +570,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_hour
- filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: hr (type: string)
@@ -622,8 +628,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -633,7 +639,7 @@ STAGE PLANS:
ListSink
PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr)
-where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11
+where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
@@ -644,7 +650,7 @@ PREHOOK: Input: default@srcpart_date
PREHOOK: Input: default@srcpart_hour
#### A masked pattern was here ####
POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr)
-where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11
+where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
@@ -656,10 +662,10 @@ POSTHOOK: Input: default@srcpart_hour
#### A masked pattern was here ####
500
PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr)
-where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11
+where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11
PREHOOK: type: QUERY
POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr)
-where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11
+where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -694,10 +700,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date
- filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -712,10 +718,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_hour
- filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: hr (type: string)
@@ -770,8 +776,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -781,7 +787,7 @@ STAGE PLANS:
ListSink
PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr)
-where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11
+where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
@@ -792,7 +798,7 @@ PREHOOK: Input: default@srcpart_date
PREHOOK: Input: default@srcpart_hour
#### A masked pattern was here ####
POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr)
-where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11
+where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
@@ -806,19 +812,17 @@ POSTHOOK: Input: default@srcpart_hour
PREHOOK: query: select count(*) from srcpart where hr = 11 and ds = '2008-04-08'
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart
-PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
#### A masked pattern was here ####
POSTHOOK: query: select count(*) from srcpart where hr = 11 and ds = '2008-04-08'
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart
-POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
#### A masked pattern was here ####
500
PREHOOK: query: -- multiple columns single source
-EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11
+EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11
PREHOOK: type: QUERY
POSTHOOK: query: -- multiple columns single source
-EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11
+EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-2 is a root stage
@@ -834,10 +838,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date_hour
- filterExpr: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean)
Statistics: Num rows: 4 Data size: 108 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string), hr (type: string)
@@ -861,10 +865,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date_hour
- filterExpr: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean)
Statistics: Num rows: 4 Data size: 108 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string), hr (type: string)
@@ -910,10 +914,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date_hour
- filterExpr: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean)
Statistics: Num rows: 4 Data size: 108 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string), hr (type: string)
@@ -953,8 +957,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -963,7 +967,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11
+PREHOOK: query: select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
@@ -972,7 +976,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
PREHOOK: Input: default@srcpart_date_hour
#### A masked pattern was here ####
-POSTHOOK: query: select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11
+POSTHOOK: query: select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
@@ -982,9 +986,9 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
POSTHOOK: Input: default@srcpart_date_hour
#### A masked pattern was here ####
500
-PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11
+PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11
+POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -1017,10 +1021,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date_hour
- filterExpr: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean)
Statistics: Num rows: 4 Data size: 108 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string), hr (type: string)
@@ -1060,8 +1064,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -1070,7 +1074,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11
+PREHOOK: query: select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
@@ -1079,7 +1083,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
PREHOOK: Input: default@srcpart_date_hour
#### A masked pattern was here ####
-POSTHOOK: query: select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11
+POSTHOOK: query: select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
@@ -1092,19 +1096,17 @@ POSTHOOK: Input: default@srcpart_date_hour
PREHOOK: query: select count(*) from srcpart where ds = '2008-04-08' and hr = 11
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart
-PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
#### A masked pattern was here ####
POSTHOOK: query: select count(*) from srcpart where ds = '2008-04-08' and hr = 11
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart
-POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
#### A masked pattern was here ####
500
PREHOOK: query: -- empty set
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST'
+EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'
PREHOOK: type: QUERY
POSTHOOK: query: -- empty set
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST'
+EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-2 is a root stage
@@ -1120,10 +1122,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date
- filterExpr: (ds is not null and (date = 'I DONT EXIST')) (type: boolean)
+ filterExpr: ((date = 'I DONT EXIST') and ds is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (ds is not null and (date = 'I DONT EXIST')) (type: boolean)
+ predicate: ((date = 'I DONT EXIST') and ds is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -1170,10 +1172,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date
- filterExpr: (ds is not null and (date = 'I DONT EXIST')) (type: boolean)
+ filterExpr: ((date = 'I DONT EXIST') and ds is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (ds is not null and (date = 'I DONT EXIST')) (type: boolean)
+ predicate: ((date = 'I DONT EXIST') and ds is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -1213,8 +1215,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -1223,7 +1225,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST'
+PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
@@ -1232,7 +1234,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
PREHOOK: Input: default@srcpart_date
#### A masked pattern was here ####
-POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST'
+POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
@@ -1242,9 +1244,9 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
POSTHOOK: Input: default@srcpart_date
#### A masked pattern was here ####
0
-PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST'
+PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST'
+POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -1277,10 +1279,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date
- filterExpr: (ds is not null and (date = 'I DONT EXIST')) (type: boolean)
+ filterExpr: ((date = 'I DONT EXIST') and ds is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (ds is not null and (date = 'I DONT EXIST')) (type: boolean)
+ predicate: ((date = 'I DONT EXIST') and ds is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -1320,8 +1322,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -1330,7 +1332,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST'
+PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
@@ -1339,7 +1341,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
PREHOOK: Input: default@srcpart_date
#### A masked pattern was here ####
-POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST'
+POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
@@ -1378,10 +1380,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_double_hour
- filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: hr (type: double)
@@ -1428,10 +1430,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_double_hour
- filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: hr (type: double)
@@ -1471,8 +1473,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -1518,10 +1520,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_double_hour
- filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: hr (type: double)
@@ -1560,18 +1562,18 @@ STAGE PLANS:
outputColumnNames: _col0
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: (UDFToDouble(_col0) * UDFToDouble(2)) (type: double)
+ key expressions: (UDFToDouble(_col0) * 2.0) (type: double)
sort order: +
- Map-reduce partition columns: (UDFToDouble(_col0) * UDFToDouble(2)) (type: double)
+ Map-reduce partition columns: (UDFToDouble(_col0) * 2.0) (type: double)
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Map 4
Map Operator Tree:
TableScan
alias: srcpart_double_hour
- filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: hr (type: double)
@@ -1588,7 +1590,7 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 (UDFToDouble(_col0) * UDFToDouble(2)) (type: double)
+ 0 (UDFToDouble(_col0) * 2.0) (type: double)
1 _col0 (type: double)
Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
Group By Operator
@@ -1611,8 +1613,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -1675,10 +1677,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_double_hour
- filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: hr (type: double)
@@ -1718,8 +1720,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -1774,18 +1776,18 @@ STAGE PLANS:
outputColumnNames: _col0
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: (UDFToDouble(_col0) * UDFToDouble(2)) (type: double)
+ key expressions: (UDFToDouble(_col0) * 2.0) (type: double)
sort order: +
- Map-reduce partition columns: (UDFToDouble(_col0) * UDFToDouble(2)) (type: double)
+ Map-reduce partition columns: (UDFToDouble(_col0) * 2.0) (type: double)
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Map 4
Map Operator Tree:
TableScan
alias: srcpart_double_hour
- filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: hr (type: double)
@@ -1802,7 +1804,7 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 (UDFToDouble(_col0) * UDFToDouble(2)) (type: double)
+ 0 (UDFToDouble(_col0) * 2.0) (type: double)
1 _col0 (type: double)
Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
Group By Operator
@@ -1825,8 +1827,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -1857,14 +1859,10 @@ POSTHOOK: Input: default@srcpart_double_hour
PREHOOK: query: select count(*) from srcpart where hr = 11
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart
-PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
-PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
#### A masked pattern was here ####
POSTHOOK: query: select count(*) from srcpart where hr = 11
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart
-POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
-POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
#### A masked pattern was here ####
1000
PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11
@@ -1885,10 +1883,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_double_hour
- filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: hr (type: double)
@@ -1927,18 +1925,18 @@ STAGE PLANS:
outputColumnNames: _col0
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: UDFToString((UDFToDouble(_col0) * UDFToDouble(2))) (type: string)
+ key expressions: UDFToString((UDFToDouble(_col0) * 2.0)) (type: string)
sort order: +
- Map-reduce partition columns: UDFToString((UDFToDouble(_col0) * UDFToDouble(2))) (type: string)
+ Map-reduce partition columns: UDFToString((UDFToDouble(_col0) * 2.0)) (type: string)
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
Map 4
Map Operator Tree:
TableScan
alias: srcpart_double_hour
- filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean)
+ predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: hr (type: double)
@@ -1955,7 +1953,7 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 UDFToString((UDFToDouble(_col0) * UDFToDouble(2))) (type: string)
+ 0 UDFToString((UDFToDouble(_col0) * 2.0)) (type: string)
1 UDFToString(_col0) (type: string)
Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
Group By Operator
@@ -1978,8 +1976,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -2010,99 +2008,43 @@ POSTHOOK: Input: default@srcpart_double_hour
PREHOOK: query: select count(*) from srcpart where cast(hr as string) = 11
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart
-PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
-PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
#### A masked pattern was here ####
POSTHOOK: query: select count(*) from srcpart where cast(hr as string) = 11
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart
-POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
-POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
#### A masked pattern was here ####
1000
+Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product
PREHOOK: query: -- parent is reduce tasks
-EXPLAIN select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08'
+EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'
PREHOOK: type: QUERY
POSTHOOK: query: -- parent is reduce tasks
-EXPLAIN select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08'
+EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-1 depends on stages: Stage-2
+ Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-2
- Spark
- Edges:
- Reducer 7 <- Map 6 (GROUP, 2)
-#### A masked pattern was here ####
- Vertices:
- Map 6
- Map Operator Tree:
- TableScan
- alias: srcpart
- filterExpr: (ds = '2008-04-08') (type: boolean)
- Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: '2008-04-08' (type: string)
- outputColumnNames: ds
- Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: ds (type: string)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- Reducer 7
- Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: string)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Spark Partition Pruning Sink Operator
- partition key expr: ds
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- target column name: ds
- target work: Map 1
-
Stage: Stage-1
Spark
Edges:
- Reducer 5 <- Map 4 (GROUP, 2)
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1)
Reducer 3 <- Reducer 2 (GROUP, 1)
+ Reducer 5 <- Map 4 (GROUP, 2)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: srcpart
- filterExpr: ds is not null (type: boolean)
- Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ filterExpr: (ds = '2008-04-08') (type: boolean)
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: ds (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ sort order:
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Map 4
Map Operator Tree:
TableScan
@@ -2110,18 +2052,16 @@ STAGE PLANS:
filterExpr: (ds = '2008-04-08') (type: boolean)
Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: '2008-04-08' (type: string)
- outputColumnNames: ds
Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- keys: ds (type: string)
+ keys: '2008-04-08' (type: string)
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: string)
+ key expressions: '2008-04-08' (type: string)
sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Map-reduce partition columns: '2008-04-08' (type: string)
Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
@@ -2129,9 +2069,9 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+ 0
+ 1
+ Statistics: Num rows: 500000 Data size: 11124000 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
mode: hash
@@ -2152,21 +2092,21 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Reducer 5
Reduce Operator Tree:
Group By Operator
- keys: KEY._col0 (type: string)
+ keys: '2008-04-08' (type: string)
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Select Operator
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Stage: Stage-0
Fetch Operator
@@ -2174,42 +2114,35 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08'
+Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product
+PREHOOK: query: select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
-PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
#### A masked pattern was here ####
-POSTHOOK: query: select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08'
+POSTHOOK: query: select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
-POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
#### A masked pattern was here ####
1000
PREHOOK: query: select count(*) from srcpart where ds = '2008-04-08'
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart
-PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
-PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
#### A masked pattern was here ####
POSTHOOK: query: select count(*) from srcpart where ds = '2008-04-08'
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart
-POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
-POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
#### A masked pattern was here ####
1000
-Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product
+Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product
PREHOOK: query: -- non-equi join
-EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr)
+EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr)
PREHOOK: type: QUERY
POSTHOOK: query: -- non-equi join
-EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr)
+EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -2262,12 +2195,12 @@ STAGE PLANS:
0
1
outputColumnNames: _col0, _col1, _col2, _col4
- Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2000 Data size: 77248 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((_col0 = _col2) or (_col1 = _col4)) (type: boolean)
- Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2000 Data size: 77248 Basic stats: COMPLETE Column stats: NONE
Select Operator
- Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2000 Data size: 77248 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
mode: hash
@@ -2288,8 +2221,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -2298,8 +2231,8 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product
-PREHOOK: query: select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr)
+Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product
+PREHOOK: query: select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr)
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
@@ -2308,7 +2241,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
PREHOOK: Input: default@srcpart_date_hour
#### A masked pattern was here ####
-POSTHOOK: query: select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr)
+POSTHOOK: query: select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
@@ -2319,10 +2252,10 @@ POSTHOOK: Input: default@srcpart_date_hour
#### A masked pattern was here ####
1500
PREHOOK: query: -- old style join syntax
-EXPLAIN select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr
+EXPLAIN select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr
PREHOOK: type: QUERY
POSTHOOK: query: -- old style join syntax
-EXPLAIN select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr
+EXPLAIN select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-2 is a root stage
@@ -2457,8 +2390,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -2467,7 +2400,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr
+PREHOOK: query: select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
@@ -2476,7 +2409,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
PREHOOK: Input: default@srcpart_date_hour
#### A masked pattern was here ####
-POSTHOOK: query: select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr
+POSTHOOK: query: select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
@@ -2487,10 +2420,10 @@ POSTHOOK: Input: default@srcpart_date_hour
#### A masked pattern was here ####
500
PREHOOK: query: -- left join
-EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'
+EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
PREHOOK: type: QUERY
POSTHOOK: query: -- left join
-EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'
+EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-2 is a root stage
@@ -2599,8 +2532,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -2609,9 +2542,9 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'
+PREHOOK: query: EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'
+POSTHOOK: query: EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-2 is a root stage
@@ -2719,8 +2652,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -2730,10 +2663,10 @@ STAGE PLANS:
ListSink
PREHOOK: query: -- full outer
-EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'
+EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
PREHOOK: type: QUERY
POSTHOOK: query: -- full outer
-EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'
+EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-2 is a root stage
@@ -2841,8 +2774,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -2853,11 +2786,11 @@ STAGE PLANS:
PREHOOK: query: -- with static pruning
EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr)
-where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11
+where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11
PREHOOK: type: QUERY
POSTHOOK: query: -- with static pruning
EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr)
-where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11
+where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-2 is a root stage
@@ -2873,10 +2806,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart_date
- filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -2911,22 +2844,23 @@ STAGE PLANS:
alias: srcpart
Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: ds (type: string)
- outputColumnNames: _col0
+ expressions: ds (type: string), hr (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Map 5
Map Operator Tree:
TableScan
alias: srcpart_date
- filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ predicate: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ds (type: string)
@@ -2947,11 +2881,13 @@ STAGE PLANS:
predicate: ((UDFToDouble(hour) = 11.0) and (UDFToDouble(hr) = 11.0)) (type: boolean)
Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
Select Operator
+ expressions: hr (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: '11' (type: string)
+ key expressions: _col0 (type: string)
sort order: +
- Map-reduce partition columns: '11' (type: string)
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
Reducer 2
Reduce Operator Tree:
@@ -2961,11 +2897,12 @@ STAGE PLANS:
keys:
0 _col0 (type: string)
1 _col0 (type: string)
+ outputColumnNames: _col1
Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: '11' (type: string)
+ key expressions: _col1 (type: string)
sort order: +
- Map-reduce partition columns: '11' (type: string)
+ Map-reduce partition columns: _col1 (type: string)
Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE
Reducer 3
Reduce Operator Tree:
@@ -2996,8 +2933,8 @@ STAGE PLANS:
compressed: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
@@ -3007,7 +2944,7 @@ STAGE PLANS:
ListSink
PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr)
-where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11
+where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
@@ -3016,7 +2953,7 @@ PREHOOK: Input: default@srcpart_date
PREHOOK: Input: default@srcpart_hour
#### A masked pattern was here ####
POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr)
-where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11
+where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
@@ -3026,10 +2963,10 @@ POSTHOOK: Input: default@srcpart_hour
#### A masked pattern was here ####
500
PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr)
-where srcpart_date.date = '2008-04-08' and srcpart.hr = 13
+where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13
PREHOOK: type: QUERY
POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr)
-where srcpart_date.date = '2008-04-08' and srcpart.hr = 13
+where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -3048,28 +2985,29 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart
- filterExpr: (ds is not null and (UDFToDouble(hr) = 13.0)) (type: boolean)
+ filterExpr: ((UDFToDouble(hr) = 13.0) and ds is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Filter Operator
- predicate: (ds is not null and (UDFToDouble(hr) = 13.0)) (type: boolean)
+ predicate: ((UDFToDouble(hr) = 13.0) and ds is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
- expressions: ds (type: string)
- outputColumnNames: _col0
+ expressions: ds (type: string), hr (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col1 (type: string)
Map 5
Map Operator Tree:
TableScan
alias: srcpart_date
- filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean)
+ filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean)
Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE
Filter Operator
-
<TRUNCATED>
[4/4] hive git commit: HIVE-15357: Fix and re-enable the spark-only
tests (Rui reviewed by Chao)
Posted by li...@apache.org.
HIVE-15357: Fix and re-enable the spark-only tests (Rui reviewed by Chao)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/858ce8c2
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/858ce8c2
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/858ce8c2
Branch: refs/heads/master
Commit: 858ce8c22b65057e8cb3d9d87cb9b9dfb0f8666c
Parents: ee35ccb
Author: Rui Li <li...@apache.org>
Authored: Fri Dec 23 11:27:47 2016 +0800
Committer: Rui Li <li...@apache.org>
Committed: Fri Dec 23 11:27:47 2016 +0800
----------------------------------------------------------------------
.../hadoop/hive/cli/control/CliConfigs.java | 1 +
.../hive/ql/optimizer/physical/Vectorizer.java | 1 +
.../spark_dynamic_partition_pruning.q | 100 +-
...spark_vectorized_dynamic_partition_pruning.q | 1 +
.../spark/spark_combine_equivalent_work.q.out | 2 -
.../spark/spark_dynamic_partition_pruning.q.out | 1650 ++++++++----------
.../spark_dynamic_partition_pruning_2.q.out | 506 +++---
...k_vectorized_dynamic_partition_pruning.q.out | 1507 +++++++---------
8 files changed, 1726 insertions(+), 2042 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/858ce8c2/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java
----------------------------------------------------------------------
diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java
index c5e027b..af8ec67 100644
--- a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java
+++ b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java
@@ -479,6 +479,7 @@ public class CliConfigs {
setQueryDir("ql/src/test/queries/clientpositive");
includesFrom(testConfigProps, "miniSparkOnYarn.query.files");
+ includesFrom(testConfigProps, "spark.only.query.files");
setResultsDir("ql/src/test/results/clientpositive/spark");
setLogDir("itests/qtest-spark/target/qfile-results/clientpositive/spark");
http://git-wip-us.apache.org/repos/asf/hive/blob/858ce8c2/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 468ccaf..2a99274 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -2501,6 +2501,7 @@ public class Vectorizer implements PhysicalPlanResolver {
case EXTRACT:
case EVENT:
case HASHTABLESINK:
+ case SPARKPRUNINGSINK:
vectorOp = OperatorFactory.getVectorOperator(
op.getCompilationOpContext(), op.getConf(), vContext);
break;
http://git-wip-us.apache.org/repos/asf/hive/blob/858ce8c2/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning.q b/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning.q
index 8b83ef6..3b6810b 100644
--- a/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning.q
+++ b/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning.q
@@ -1,58 +1,58 @@
-set hive.support.sql11.reserved.keywords=false;
set hive.optimize.ppd=true;
set hive.ppd.remove.duplicatefilters=true;
set hive.spark.dynamic.partition.pruning=true;
set hive.optimize.metadataonly=false;
set hive.optimize.index.filter=true;
+set hive.strict.checks.cartesian.product=false;
-- SORT_QUERY_RESULTS
select distinct ds from srcpart;
select distinct hr from srcpart;
-EXPLAIN create table srcpart_date as select ds as ds, ds as date from srcpart group by ds;
-create table srcpart_date as select ds as ds, ds as date from srcpart group by ds;
+EXPLAIN create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds;
+create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds;
create table srcpart_hour as select hr as hr, hr as hour from srcpart group by hr;
-create table srcpart_date_hour as select ds as ds, ds as date, hr as hr, hr as hour from srcpart group by ds, hr;
+create table srcpart_date_hour as select ds as ds, ds as `date`, hr as hr, hr as hour from srcpart group by ds, hr;
create table srcpart_double_hour as select (hr*2) as hr, hr as hour from srcpart group by hr;
-- single column, single key
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08';
-select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08';
+EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08';
+select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08';
set hive.spark.dynamic.partition.pruning=false;
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08';
-select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08';
+EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08';
+select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08';
set hive.spark.dynamic.partition.pruning=true;
select count(*) from srcpart where ds = '2008-04-08';
-- multiple sources, single key
EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr)
-where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11;
+where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11;
select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr)
-where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11;
+where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11;
set hive.spark.dynamic.partition.pruning=false;
EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr)
-where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11;
+where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11;
select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr)
-where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11;
+where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11;
set hive.spark.dynamic.partition.pruning=true;
select count(*) from srcpart where hr = 11 and ds = '2008-04-08';
-- multiple columns single source
-EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11;
-select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11;
+EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11;
+select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11;
set hive.spark.dynamic.partition.pruning=false;
-EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11;
-select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11;
+EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11;
+select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11;
set hive.spark.dynamic.partition.pruning=true;
select count(*) from srcpart where ds = '2008-04-08' and hr = 11;
-- empty set
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST';
-select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST';
+EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST';
+select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST';
set hive.spark.dynamic.partition.pruning=false;
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST';
-select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST';
+EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST';
+select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST';
set hive.spark.dynamic.partition.pruning=true;
select count(*) from srcpart where ds = 'I DONT EXIST';
@@ -75,34 +75,34 @@ select count(*) from srcpart where cast(hr as string) = 11;
-- parent is reduce tasks
-EXPLAIN select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08';
-select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08';
+EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08';
+select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08';
select count(*) from srcpart where ds = '2008-04-08';
-- non-equi join
-EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr);
-select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr);
+EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr);
+select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr);
-- old style join syntax
-EXPLAIN select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr;
-select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr;
+EXPLAIN select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr;
+select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr;
-- left join
-EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08';
-EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08';
+EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08';
+EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08';
-- full outer
-EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08';
+EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08';
-- with static pruning
EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr)
-where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11;
+where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11;
select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr)
-where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11;
+where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11;
EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr)
-where srcpart_date.date = '2008-04-08' and srcpart.hr = 13;
+where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13;
select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr)
-where srcpart_date.date = '2008-04-08' and srcpart.hr = 13;
+where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13;
-- union + subquery
EXPLAIN select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart);
@@ -117,26 +117,26 @@ set hive.auto.convert.join.noconditionaltask = true;
set hive.auto.convert.join.noconditionaltask.size = 10000000;
-- single column, single key
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08';
-select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08';
+EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08';
+select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08';
select count(*) from srcpart where ds = '2008-04-08';
-- multiple sources, single key
EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr)
-where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11;
+where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11;
select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr)
-where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11;
+where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11;
select count(*) from srcpart where hr = 11 and ds = '2008-04-08';
-- multiple columns single source
-EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11;
-select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11;
+EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11;
+select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11;
select count(*) from srcpart where ds = '2008-04-08' and hr = 11;
-- empty set
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST';
+EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST';
-- Disabled until TEZ-1486 is fixed
--- select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST';
+-- select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST';
-- expressions
EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11;
@@ -146,27 +146,27 @@ select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart
select count(*) from srcpart where hr = 11;
-- parent is reduce tasks
-EXPLAIN select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08';
-select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08';
+EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08';
+select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08';
select count(*) from srcpart where ds = '2008-04-08';
-- left join
-EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08';
-EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08';
+EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08';
+EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08';
-- full outer
-EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08';
+EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08';
-- with static pruning
EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr)
-where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11;
+where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11;
select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr)
-where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11;
+where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11;
EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr)
-where srcpart_date.date = '2008-04-08' and srcpart.hr = 13;
+where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13;
-- Disabled until TEZ-1486 is fixed
-- select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr)
--- where srcpart_date.date = '2008-04-08' and srcpart.hr = 13;
+-- where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13;
-- union + subquery
EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart);
http://git-wip-us.apache.org/repos/asf/hive/blob/858ce8c2/ql/src/test/queries/clientpositive/spark_vectorized_dynamic_partition_pruning.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/spark_vectorized_dynamic_partition_pruning.q b/ql/src/test/queries/clientpositive/spark_vectorized_dynamic_partition_pruning.q
index 293fcfc..791c3a9 100644
--- a/ql/src/test/queries/clientpositive/spark_vectorized_dynamic_partition_pruning.q
+++ b/ql/src/test/queries/clientpositive/spark_vectorized_dynamic_partition_pruning.q
@@ -4,6 +4,7 @@ set hive.spark.dynamic.partition.pruning=true;
set hive.optimize.metadataonly=false;
set hive.optimize.index.filter=true;
set hive.vectorized.execution.enabled=true;
+set hive.strict.checks.cartesian.product=false;
select distinct ds from srcpart;
http://git-wip-us.apache.org/repos/asf/hive/blob/858ce8c2/ql/src/test/results/clientpositive/spark/spark_combine_equivalent_work.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/spark_combine_equivalent_work.q.out b/ql/src/test/results/clientpositive/spark/spark_combine_equivalent_work.q.out
index 93d07d6..b7116c1 100644
--- a/ql/src/test/results/clientpositive/spark/spark_combine_equivalent_work.q.out
+++ b/ql/src/test/results/clientpositive/spark/spark_combine_equivalent_work.q.out
@@ -30,11 +30,9 @@ POSTHOOK: Output: default@a1
POSTHOOK: Output: default@a1@end_dt=20161021
PREHOOK: query: insert into table a1 partition(END_DT='20161020') values('2000721360','20161001')
PREHOOK: type: QUERY
-PREHOOK: Input: default@values__tmp__table__1
PREHOOK: Output: default@a1@end_dt=20161020
POSTHOOK: query: insert into table a1 partition(END_DT='20161020') values('2000721360','20161001')
POSTHOOK: type: QUERY
-POSTHOOK: Input: default@values__tmp__table__1
POSTHOOK: Output: default@a1@end_dt=20161020
POSTHOOK: Lineage: a1 PARTITION(end_dt=20161020).kehhao SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
POSTHOOK: Lineage: a1 PARTITION(end_dt=20161020).start_dt SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]