You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2019/01/04 00:24:17 UTC
[18/35] hive git commit: HIVE-16957: Support CTAS for auto gather
column stats (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/138b00ca/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out
index 93791ac..7f2cc85 100644
--- a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out
@@ -5154,27 +5154,27 @@ STAGE PLANS:
TableScan
alias: src2_n2
filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean)
- Statistics: Num rows: 309 Data size: 56640 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: (UDFToDouble(key) < 10.0D) (type: boolean)
- Statistics: Num rows: 103 Data size: 18880 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: string), count (type: bigint)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 103 Data size: 18880 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
keys: _col0 (type: string), _col1 (type: bigint)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 206 Data size: 37760 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: bigint)
null sort order: aa
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
- Statistics: Num rows: 206 Data size: 37760 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
tag: -1
auto parallelism: true
Execution mode: vectorized
@@ -5187,7 +5187,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"count":"true","key":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
@@ -5210,7 +5210,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"count":"true","key":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
@@ -5238,24 +5238,24 @@ STAGE PLANS:
TableScan
alias: src5_n1
filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean)
- Statistics: Num rows: 309 Data size: 54280 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 309 Data size: 26883 Basic stats: COMPLETE Column stats: COMPLETE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: (UDFToDouble(key) < 10.0D) (type: boolean)
- Statistics: Num rows: 103 Data size: 18093 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 8961 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
keys: key (type: string)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 103 Data size: 18093 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 51 Data size: 4845 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
null sort order: a
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 103 Data size: 18093 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 51 Data size: 4845 Basic stats: COMPLETE Column stats: COMPLETE
tag: -1
value expressions: _col1 (type: bigint)
auto parallelism: true
@@ -5269,7 +5269,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"count":"true","key":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
@@ -5292,7 +5292,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"count":"true","key":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
@@ -5320,27 +5320,27 @@ STAGE PLANS:
TableScan
alias: src3
filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean)
- Statistics: Num rows: 309 Data size: 56640 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: (UDFToDouble(key) < 10.0D) (type: boolean)
- Statistics: Num rows: 103 Data size: 18880 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: string), count (type: bigint)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 103 Data size: 18880 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
keys: _col0 (type: string), _col1 (type: bigint)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 206 Data size: 37760 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: bigint)
null sort order: aa
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
- Statistics: Num rows: 206 Data size: 37760 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
tag: -1
auto parallelism: true
Execution mode: vectorized
@@ -5353,7 +5353,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"count":"true","key":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
@@ -5376,7 +5376,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"count":"true","key":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
@@ -5404,27 +5404,27 @@ STAGE PLANS:
TableScan
alias: src4
filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean)
- Statistics: Num rows: 309 Data size: 56640 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: (UDFToDouble(key) < 10.0D) (type: boolean)
- Statistics: Num rows: 103 Data size: 18880 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: string), count (type: bigint)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 103 Data size: 18880 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
keys: _col0 (type: string), _col1 (type: bigint)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 206 Data size: 37760 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: bigint)
null sort order: aa
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
- Statistics: Num rows: 206 Data size: 37760 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
tag: -1
auto parallelism: true
Execution mode: vectorized
@@ -5437,7 +5437,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"count":"true","key":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
@@ -5460,7 +5460,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"count":"true","key":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
@@ -5492,79 +5492,79 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 51 Data size: 8958 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 51 Data size: 4845 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
keys: _col0 (type: string), _col1 (type: bigint)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 154 Data size: 27838 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 77 Data size: 7315 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: bigint)
null sort order: aa
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
- Statistics: Num rows: 154 Data size: 27838 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 77 Data size: 7315 Basic stats: COMPLETE Column stats: COMPLETE
tag: -1
auto parallelism: true
Reducer 3
- Execution mode: vectorized
+ Execution mode: vectorized, llap
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 103 Data size: 18880 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
keys: _col0 (type: string), _col1 (type: bigint)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 206 Data size: 37760 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: bigint)
null sort order: aa
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
- Statistics: Num rows: 206 Data size: 37760 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
tag: -1
auto parallelism: true
Reducer 5
- Execution mode: vectorized
+ Execution mode: vectorized, llap
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 103 Data size: 18880 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
keys: _col0 (type: string), _col1 (type: bigint)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 154 Data size: 27838 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 77 Data size: 7315 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: bigint)
null sort order: aa
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
- Statistics: Num rows: 154 Data size: 27838 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 77 Data size: 7315 Basic stats: COMPLETE Column stats: COMPLETE
tag: -1
auto parallelism: true
Reducer 7
- Execution mode: vectorized
+ Execution mode: vectorized, llap
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 77 Data size: 13919 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 77 Data size: 7315 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
GlobalTableId: 0
directory: hdfs://### HDFS PATH ###
NumFilesPerFileSink: 1
- Statistics: Num rows: 77 Data size: 13919 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 77 Data size: 7315 Basic stats: COMPLETE Column stats: COMPLETE
Stats Publishing Key Prefix: hdfs://### HDFS PATH ###
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -5624,11 +5624,11 @@ POSTHOOK: Input: default@src3
POSTHOOK: Input: default@src4
POSTHOOK: Input: default@src5_n1
POSTHOOK: Output: hdfs://### HDFS PATH ###
+5 1
0 1
0 3
2 1
4 1
-5 1
5 3
8 1
9 1
@@ -5690,9 +5690,9 @@ STAGE PLANS:
Edges:
Map 1 <- Union 2 (CONTAINS)
Map 6 <- Union 2 (CONTAINS)
+ Map 8 <- Map 7 (BROADCAST_EDGE), Union 4 (CONTAINS)
Reducer 3 <- Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS)
Reducer 5 <- Union 4 (SIMPLE_EDGE)
- Reducer 8 <- Map 7 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE), Union 4 (CONTAINS)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -5700,27 +5700,27 @@ STAGE PLANS:
TableScan
alias: src2_n2
filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean)
- Statistics: Num rows: 309 Data size: 56640 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: (UDFToDouble(key) < 10.0D) (type: boolean)
- Statistics: Num rows: 103 Data size: 18880 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: string), count (type: bigint)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 103 Data size: 18880 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
keys: _col0 (type: string), _col1 (type: bigint)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 206 Data size: 37760 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: bigint)
null sort order: aa
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
- Statistics: Num rows: 206 Data size: 37760 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
tag: -1
auto parallelism: true
Execution mode: vectorized
@@ -5733,7 +5733,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"count":"true","key":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
@@ -5756,7 +5756,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"count":"true","key":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
@@ -5784,27 +5784,27 @@ STAGE PLANS:
TableScan
alias: src3
filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean)
- Statistics: Num rows: 309 Data size: 56640 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: (UDFToDouble(key) < 10.0D) (type: boolean)
- Statistics: Num rows: 103 Data size: 18880 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: string), count (type: bigint)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 103 Data size: 18880 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
keys: _col0 (type: string), _col1 (type: bigint)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 206 Data size: 37760 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: bigint)
null sort order: aa
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
- Statistics: Num rows: 206 Data size: 37760 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
tag: -1
auto parallelism: true
Execution mode: vectorized
@@ -5817,7 +5817,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"count":"true","key":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
@@ -5840,7 +5840,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"count":"true","key":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
@@ -5868,22 +5868,22 @@ STAGE PLANS:
TableScan
alias: a
filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean)
- Statistics: Num rows: 309 Data size: 54280 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 309 Data size: 26883 Basic stats: COMPLETE Column stats: COMPLETE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: (UDFToDouble(key) < 10.0D) (type: boolean)
- Statistics: Num rows: 103 Data size: 18093 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 8961 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 103 Data size: 18093 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 8961 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
null sort order: a
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 103 Data size: 18093 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 8961 Basic stats: COMPLETE Column stats: COMPLETE
tag: 0
auto parallelism: true
Execution mode: vectorized
@@ -5896,7 +5896,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"count":"true","key":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
@@ -5919,7 +5919,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"count":"true","key":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
@@ -5942,30 +5942,51 @@ STAGE PLANS:
name: default.src4
Truncated Path -> Alias:
/src4 [a]
- Map 9
+ Map 8
Map Operator Tree:
TableScan
alias: b
filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean)
- Statistics: Num rows: 309 Data size: 56640 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: (UDFToDouble(key) < 10.0D) (type: boolean)
- Statistics: Num rows: 103 Data size: 18880 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: string), count (type: bigint)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 103 Data size: 18880 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 103 Data size: 18880 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- value expressions: _col1 (type: bigint)
- auto parallelism: true
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Estimated key counts: Map 7 => 103
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col2
+ input vertices:
+ 0 Map 7
+ Position of Big Table: 1
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
+ HybridGraceHashJoin: true
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: bigint)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: bigint)
+ null sort order: aa
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
+ tag: -1
+ auto parallelism: true
Execution mode: vectorized
Path -> Alias:
hdfs://### HDFS PATH ### [b]
@@ -5976,7 +5997,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"count":"true","key":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
@@ -5999,7 +6020,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"count":"true","key":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
@@ -6023,42 +6044,42 @@ STAGE PLANS:
Truncated Path -> Alias:
/src5_n1 [b]
Reducer 3
- Execution mode: vectorized
+ Execution mode: vectorized, llap
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 103 Data size: 18880 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
keys: _col0 (type: string), _col1 (type: bigint)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 216 Data size: 38782 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: bigint)
null sort order: aa
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
- Statistics: Num rows: 216 Data size: 38782 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
tag: -1
auto parallelism: true
Reducer 5
- Execution mode: vectorized
+ Execution mode: vectorized, llap
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 108 Data size: 19391 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
GlobalTableId: 0
directory: hdfs://### HDFS PATH ###
NumFilesPerFileSink: 1
- Statistics: Num rows: 108 Data size: 19391 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
Stats Publishing Key Prefix: hdfs://### HDFS PATH ###
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -6075,35 +6096,6 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
- Reducer 8
- Needs Tagging: false
- Reduce Operator Tree:
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col2
- Position of Big Table: 1
- Statistics: Num rows: 113 Data size: 19902 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col2 (type: bigint)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 113 Data size: 19902 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: string), _col1 (type: bigint)
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 216 Data size: 38782 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: bigint)
- null sort order: aa
- sort order: ++
- Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
- Statistics: Num rows: 216 Data size: 38782 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- auto parallelism: true
Union 2
Vertex: Union 2
Union 4
@@ -6206,10 +6198,10 @@ STAGE PLANS:
Edges:
Map 1 <- Union 2 (CONTAINS)
Map 6 <- Union 2 (CONTAINS)
+ Map 7 <- Map 9 (BROADCAST_EDGE)
Reducer 3 <- Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS)
Reducer 5 <- Union 4 (SIMPLE_EDGE)
- Reducer 8 <- Map 10 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
- Reducer 9 <- Reducer 8 (SIMPLE_EDGE), Union 4 (CONTAINS)
+ Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 4 (CONTAINS)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -6217,27 +6209,27 @@ STAGE PLANS:
TableScan
alias: src2_n2
filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean)
- Statistics: Num rows: 309 Data size: 56640 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: (UDFToDouble(key) < 10.0D) (type: boolean)
- Statistics: Num rows: 103 Data size: 18880 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: string), count (type: bigint)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 103 Data size: 18880 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
keys: _col0 (type: string), _col1 (type: bigint)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 206 Data size: 37760 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: bigint)
null sort order: aa
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
- Statistics: Num rows: 206 Data size: 37760 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
tag: -1
auto parallelism: true
Execution mode: vectorized
@@ -6250,7 +6242,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"count":"true","key":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
@@ -6273,7 +6265,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"count":"true","key":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
@@ -6296,40 +6288,45 @@ STAGE PLANS:
name: default.src2_n2
Truncated Path -> Alias:
/src2_n2 [src2_n2]
- Map 10
+ Map 6
Map Operator Tree:
TableScan
- alias: b
+ alias: src3
filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean)
- Statistics: Num rows: 309 Data size: 54280 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: (UDFToDouble(key) < 10.0D) (type: boolean)
- Statistics: Num rows: 103 Data size: 18093 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 103 Data size: 18093 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 103 Data size: 18093 Basic stats: COMPLETE Column stats: NONE
- tag: 1
- auto parallelism: true
+ expressions: key (type: string), count (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: bigint)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: bigint)
+ null sort order: aa
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
+ tag: -1
+ auto parallelism: true
Execution mode: vectorized
Path -> Alias:
- hdfs://### HDFS PATH ### [b]
+ hdfs://### HDFS PATH ### [src3]
Path -> Partition:
hdfs://### HDFS PATH ###
Partition
- base file name: src5_n1
+ base file name: src3
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"count":"true","key":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
@@ -6338,11 +6335,11 @@ STAGE PLANS:
columns.types string:bigint
#### A masked pattern was here ####
location hdfs://### HDFS PATH ###
- name default.src5_n1
+ name default.src3
numFiles 1
numRows 309
rawDataSize 1482
- serialization.ddl struct src5_n1 { string key, i64 count}
+ serialization.ddl struct src3 { string key, i64 count}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 1791
@@ -6352,7 +6349,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"count":"true","key":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
@@ -6361,59 +6358,74 @@ STAGE PLANS:
columns.types string:bigint
#### A masked pattern was here ####
location hdfs://### HDFS PATH ###
- name default.src5_n1
+ name default.src3
numFiles 1
numRows 309
rawDataSize 1482
- serialization.ddl struct src5_n1 { string key, i64 count}
+ serialization.ddl struct src3 { string key, i64 count}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 1791
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.src5_n1
- name: default.src5_n1
+ name: default.src3
+ name: default.src3
Truncated Path -> Alias:
- /src5_n1 [b]
- Map 6
+ /src3 [src3]
+ Map 7
Map Operator Tree:
TableScan
- alias: src3
+ alias: a
filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean)
- Statistics: Num rows: 309 Data size: 56640 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 309 Data size: 26883 Basic stats: COMPLETE Column stats: COMPLETE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: (UDFToDouble(key) < 10.0D) (type: boolean)
- Statistics: Num rows: 103 Data size: 18880 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 8961 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: key (type: string), count (type: bigint)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 103 Data size: 18880 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: string), _col1 (type: bigint)
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 206 Data size: 37760 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: bigint)
- null sort order: aa
- sort order: ++
- Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
- Statistics: Num rows: 206 Data size: 37760 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- auto parallelism: true
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 103 Data size: 8961 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Estimated key counts: Map 9 => 103
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0
+ input vertices:
+ 1 Map 9
+ Position of Big Table: 0
+ Statistics: Num rows: 103 Data size: 8961 Basic stats: COMPLETE Column stats: COMPLETE
+ HybridGraceHashJoin: true
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 51 Data size: 4845 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: a
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 51 Data size: 4845 Basic stats: COMPLETE Column stats: COMPLETE
+ tag: -1
+ value expressions: _col1 (type: bigint)
+ auto parallelism: true
Execution mode: vectorized
Path -> Alias:
- hdfs://### HDFS PATH ### [src3]
+ hdfs://### HDFS PATH ### [a]
Path -> Partition:
hdfs://### HDFS PATH ###
Partition
- base file name: src3
+ base file name: src4
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"count":"true","key":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
@@ -6422,11 +6434,11 @@ STAGE PLANS:
columns.types string:bigint
#### A masked pattern was here ####
location hdfs://### HDFS PATH ###
- name default.src3
+ name default.src4
numFiles 1
numRows 309
rawDataSize 1482
- serialization.ddl struct src3 { string key, i64 count}
+ serialization.ddl struct src4 { string key, i64 count}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 1791
@@ -6436,7 +6448,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"count":"true","key":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
@@ -6445,54 +6457,54 @@ STAGE PLANS:
columns.types string:bigint
#### A masked pattern was here ####
location hdfs://### HDFS PATH ###
- name default.src3
+ name default.src4
numFiles 1
numRows 309
rawDataSize 1482
- serialization.ddl struct src3 { string key, i64 count}
+ serialization.ddl struct src4 { string key, i64 count}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 1791
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.src3
- name: default.src3
+ name: default.src4
+ name: default.src4
Truncated Path -> Alias:
- /src3 [src3]
- Map 7
+ /src4 [a]
+ Map 9
Map Operator Tree:
TableScan
- alias: a
+ alias: b
filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean)
- Statistics: Num rows: 309 Data size: 54280 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 309 Data size: 26883 Basic stats: COMPLETE Column stats: COMPLETE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: (UDFToDouble(key) < 10.0D) (type: boolean)
- Statistics: Num rows: 103 Data size: 18093 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 8961 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 103 Data size: 18093 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 8961 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
null sort order: a
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 103 Data size: 18093 Basic stats: COMPLETE Column stats: NONE
- tag: 0
+ Statistics: Num rows: 103 Data size: 8961 Basic stats: COMPLETE Column stats: COMPLETE
+ tag: 1
auto parallelism: true
Execution mode: vectorized
Path -> Alias:
- hdfs://### HDFS PATH ### [a]
+ hdfs://### HDFS PATH ### [b]
Path -> Partition:
hdfs://### HDFS PATH ###
Partition
- base file name: src4
+ base file name: src5_n1
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"count":"true","key":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
@@ -6501,11 +6513,11 @@ STAGE PLANS:
columns.types string:bigint
#### A masked pattern was here ####
location hdfs://### HDFS PATH ###
- name default.src4
+ name default.src5_n1
numFiles 1
numRows 309
rawDataSize 1482
- serialization.ddl struct src4 { string key, i64 count}
+ serialization.ddl struct src5_n1 { string key, i64 count}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 1791
@@ -6515,7 +6527,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"count":"true","key":"true"}}
bucket_count -1
bucketing_version 2
column.name.delimiter ,
@@ -6524,57 +6536,57 @@ STAGE PLANS:
columns.types string:bigint
#### A masked pattern was here ####
location hdfs://### HDFS PATH ###
- name default.src4
+ name default.src5_n1
numFiles 1
numRows 309
rawDataSize 1482
- serialization.ddl struct src4 { string key, i64 count}
+ serialization.ddl struct src5_n1 { string key, i64 count}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
totalSize 1791
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.src4
- name: default.src4
+ name: default.src5_n1
+ name: default.src5_n1
Truncated Path -> Alias:
- /src4 [a]
+ /src5_n1 [b]
Reducer 3
- Execution mode: vectorized
+ Execution mode: vectorized, llap
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 103 Data size: 18880 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
keys: _col0 (type: string), _col1 (type: bigint)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 159 Data size: 28742 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 77 Data size: 7315 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: bigint)
null sort order: aa
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
- Statistics: Num rows: 159 Data size: 28742 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 77 Data size: 7315 Basic stats: COMPLETE Column stats: COMPLETE
tag: -1
auto parallelism: true
Reducer 5
- Execution mode: vectorized
+ Execution mode: vectorized, llap
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 79 Data size: 14280 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 77 Data size: 7315 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
GlobalTableId: 0
directory: hdfs://### HDFS PATH ###
NumFilesPerFileSink: 1
- Statistics: Num rows: 79 Data size: 14280 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 77 Data size: 7315 Basic stats: COMPLETE Column stats: COMPLETE
Stats Publishing Key Prefix: hdfs://### HDFS PATH ###
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -6592,33 +6604,6 @@ STAGE PLANS:
GatherStats: false
MultiFileSpray: false
Reducer 8
- Needs Tagging: false
- Reduce Operator Tree:
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0
- Position of Big Table: 0
- Statistics: Num rows: 113 Data size: 19902 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 113 Data size: 19902 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 113 Data size: 19902 Basic stats: COMPLETE Column stats: NONE
- tag: -1
- value expressions: _col1 (type: bigint)
- auto parallelism: true
- Reducer 9
Execution mode: vectorized
Needs Tagging: false
Reduce Operator Tree:
@@ -6627,18 +6612,18 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 56 Data size: 9862 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 51 Data size: 4845 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
keys: _col0 (type: string), _col1 (type: bigint)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 159 Data size: 28742 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 77 Data size: 7315 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: bigint)
null sort order: aa
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
- Statistics: Num rows: 159 Data size: 28742 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 77 Data size: 7315 Basic stats: COMPLETE Column stats: COMPLETE
tag: -1
auto parallelism: true
Union 2
@@ -6678,11 +6663,11 @@ POSTHOOK: Input: default@src3
POSTHOOK: Input: default@src4
POSTHOOK: Input: default@src5_n1
POSTHOOK: Output: hdfs://### HDFS PATH ###
+5 1
0 1
0 3
2 1
4 1
-5 1
5 3
8 1
9 1
@@ -6759,7 +6744,7 @@ POSTHOOK: Output: default@tmp_unionall_n0
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-2 depends on stages: Stage-1
- Stage-4 depends on stages: Stage-2, Stage-0
+ Stage-4 depends on stages: Stage-0, Stage-2
Stage-3 depends on stages: Stage-4
Stage-0 depends on stages: Stage-1
@@ -6769,10 +6754,11 @@ STAGE PLANS:
#### A masked pattern was here ####
Edges:
Map 1 <- Union 2 (CONTAINS)
- Map 4 <- Union 5 (CONTAINS)
- Map 7 <- Union 5 (CONTAINS)
+ Map 5 <- Union 6 (CONTAINS)
+ Map 8 <- Union 6 (CONTAINS)
Reducer 3 <- Union 2 (SIMPLE_EDGE)
- Reducer 6 <- Union 2 (CONTAINS), Union 5 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
+ Reducer 7 <- Union 2 (CONTAINS), Union 6 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -6796,7 +6782,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
Statistics: Num rows: 408 Data size: 72624 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized
- Map 4
+ Map 5
Map Operator Tree:
TableScan
alias: a
@@ -6817,7 +6803,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized
- Map 7
+ Map 8
Map Operator Tree:
TableScan
alias: b
@@ -6839,7 +6825,7 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized
Reducer 3
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: string), KEY._col1 (type: string)
@@ -6864,7 +6850,35 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.tmp_unionall_n0
- Reducer 6
+ Select Operator
+ expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: string)
+ outputColumnNames: col1, col2, col3
+ Statistics: Num rows: 408 Data size: 75888 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+ Reducer 4
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 7
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
@@ -6897,8 +6911,8 @@ STAGE PLANS:
Statistics: Num rows: 408 Data size: 72624 Basic stats: COMPLETE Column stats: COMPLETE
Union 2
Vertex: Union 2
- Union 5
- Vertex: Union 5
+ Union 6
+ Vertex: Union 6
Stage: Stage-2
Dependency Collection
@@ -6915,6 +6929,10 @@ STAGE PLANS:
Stage: Stage-3
Stats Work
Basic Stats Work:
+ Column Stats Desc:
+ Columns: counts, key, value
+ Column Types: bigint, string, string
+ Table: default.tmp_unionall_n0
Stage: Stage-0
Move Operator
@@ -8255,10 +8273,9 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 3 <- Union 4 (CONTAINS)
- Map 6 <- Union 4 (CONTAINS)
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
- Reducer 5 <- Union 4 (SIMPLE_EDGE)
+ Map 2 <- Union 3 (CONTAINS)
+ Map 5 <- Union 3 (CONTAINS)
+ Reducer 4 <- Map 1 (BROADCAST_EDGE), Union 3 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -8266,105 +8283,100 @@ STAGE PLANS:
TableScan
alias: a
filterExpr: (UDFToDouble(key) = 97.0D) (type: boolean)
- Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (UDFToDouble(key) = 97.0D) (type: boolean)
- Statistics: Num rows: 250 Data size: 43792 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 250 Data size: 43792 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 43792 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized
- Map 3
+ Map 2
Map Operator Tree:
TableScan
alias: dim_pho
filterExpr: (UDFToDouble(key) = 97.0D) (type: boolean)
- Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (UDFToDouble(key) = 97.0D) (type: boolean)
- Statistics: Num rows: 250 Data size: 87584 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 87584 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
keys: _col0 (type: string), _col1 (type: string)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized
- Map 6
+ Map 5
Map Operator Tree:
TableScan
alias: jackson_sev_add
filterExpr: (UDFToDouble(key) = 97.0D) (type: boolean)
- Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (UDFToDouble(key) = 97.0D) (type: boolean)
- Statistics: Num rows: 250 Data size: 87584 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 87584 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
keys: _col0 (type: string), _col1 (type: string)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized
- Reducer 2
- Reduce Operator Tree:
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col1, _col2
- Statistics: Num rows: 275 Data size: 48171 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col1 (type: string), _col2 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 275 Data size: 48171 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 275 Data size: 48171 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 5
+ Reducer 4
Execution mode: vectorized
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 87584 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 87584 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string)
- Union 4
- Vertex: Union 4
+ Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col1, _col2
+ input vertices:
+ 0 Map 1
+ Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
+ HybridGraceHashJoin: true
+ Select Operator
+ expressions: _col1 (type: string), _col2 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Union 3
+ Vertex: Union 3
Stage: Stage-0
Fetch Operator
@@ -9035,12 +9047,15 @@ STAGE PLANS:
Reduce Output Operator
sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
Reduce Output Operator
sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
Reduce Output Operator
sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
Execution mode: vectorized
Map 11
Map Operator Tree:
@@ -9055,6 +9070,7 @@ STAGE PLANS:
Reduce Output Operator
sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
Execution mode: vectorized
Reducer 10
Execution mode: vectorized
@@ -9745,66 +9761,67 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: t1_n93
- Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
keys: _col0 (type: string), _col1 (type: string)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 20 Data size: 7360 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 20 Data size: 7360 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized
Map 7
Map Operator Tree:
TableScan
alias: t2_n58
- Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
keys: _col0 (type: string), _col1 (type: string)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 20 Data size: 7360 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 20 Data size: 7360 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized
Reducer 3
+ Execution mode: llap
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(1)
keys: _col0 (type: string)
mode: complete
outputColumnNames: _col0, _col1
- Statistics: Num rows: 5 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), UDFToInteger(_col1) (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 5 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 5 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -9813,61 +9830,63 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col1 (type: int)
outputColumnNames: key, cnt
- Statistics: Num rows: 5 Data size: 1840 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll')
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
Select Operator
expressions: _col1 (type: string)
outputColumnNames: _col1
- Statistic
<TRUNCATED>