You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by vg...@apache.org on 2018/10/21 19:47:11 UTC
[5/7] hive git commit: HIVE-20703: Put dynamic sort partition
optimization under cost based decision (Vineet Garg,
reviewed by Prasanth Jayachandran, Sergey Shelukhin)
http://git-wip-us.apache.org/repos/asf/hive/blob/5eebbdf7/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out
index 5fd1bf6..40dc5e9 100644
--- a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out
@@ -515,20 +515,19 @@ STAGE PLANS:
key expressions: _col3 (type: string), _bucket_number (type: string), _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
sort order: +++
Map-reduce partition columns: _col3 (type: string)
- Statistics: Num rows: 5 Data size: 2170 Basic stats: COMPLETE Column stats: PARTIAL
+ value expressions: 'foo' (type: string), 'bar' (type: string)
Execution mode: llap
LLAP IO: may be used (ACID table)
Reducer 2
Execution mode: llap
Reduce Operator Tree:
Select Operator
- expressions: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 'bar' (type: string), KEY._col3 (type: string), KEY._bucket_number (type: string)
+ expressions: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._bucket_number (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _bucket_number
- Statistics: Num rows: 5 Data size: 2220 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
compressed: false
Dp Sort State: PARTITION_BUCKET_SORTED
- Statistics: Num rows: 5 Data size: 2220 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 2170 Basic stats: COMPLETE Column stats: PARTIAL
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -1286,20 +1285,19 @@ STAGE PLANS:
key expressions: '2008-04-08' (type: string), _col4 (type: int), _bucket_number (type: string), _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
sort order: ++++
Map-reduce partition columns: '2008-04-08' (type: string), _col4 (type: int)
- Statistics: Num rows: 5 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL
+ value expressions: 'foo' (type: string), 'bar' (type: string)
Execution mode: llap
LLAP IO: may be used (ACID table)
Reducer 2
Execution mode: llap
Reduce Operator Tree:
Select Operator
- expressions: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), KEY._col4 (type: int), KEY._bucket_number (type: string)
+ expressions: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY._bucket_number (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number
- Statistics: Num rows: 5 Data size: 1790 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
compressed: false
Dp Sort State: PARTITION_BUCKET_SORTED
- Statistics: Num rows: 5 Data size: 1790 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -1431,7 +1429,6 @@ STAGE PLANS:
key expressions: _col1 (type: string), _col2 (type: int), _bucket_number (type: string), _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
sort order: ++++
Map-reduce partition columns: _col1 (type: string), _col2 (type: int)
- Statistics: Num rows: 5 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL
Execution mode: llap
LLAP IO: may be used (ACID table)
Reducer 2
@@ -1440,11 +1437,10 @@ STAGE PLANS:
Select Operator
expressions: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), KEY._col1 (type: string), KEY._col2 (type: int), KEY._bucket_number (type: string)
outputColumnNames: _col0, _col1, _col2, _bucket_number
- Statistics: Num rows: 5 Data size: 2240 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
compressed: false
Dp Sort State: PARTITION_BUCKET_SORTED
- Statistics: Num rows: 5 Data size: 2240 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -1636,7 +1632,6 @@ STAGE PLANS:
key expressions: _col3 (type: string), _col4 (type: int), _bucket_number (type: string), _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
sort order: ++++
Map-reduce partition columns: _col3 (type: string), _col4 (type: int)
- Statistics: Num rows: 5 Data size: 2675 Basic stats: COMPLETE Column stats: PARTIAL
value expressions: _col1 (type: string), 'bar' (type: string)
Execution mode: llap
LLAP IO: may be used (ACID table)
@@ -1646,11 +1641,10 @@ STAGE PLANS:
Select Operator
expressions: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY._bucket_number (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number
- Statistics: Num rows: 5 Data size: 3595 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
compressed: false
Dp Sort State: PARTITION_BUCKET_SORTED
- Statistics: Num rows: 5 Data size: 3595 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 2675 Basic stats: COMPLETE Column stats: PARTIAL
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -1745,7 +1739,6 @@ STAGE PLANS:
key expressions: _col3 (type: string), _col4 (type: int), _bucket_number (type: string), _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
sort order: ++++
Map-reduce partition columns: _col3 (type: string), _col4 (type: int)
- Statistics: Num rows: 5 Data size: 2675 Basic stats: COMPLETE Column stats: PARTIAL
value expressions: _col1 (type: string), 'bar' (type: string)
Execution mode: llap
LLAP IO: may be used (ACID table)
@@ -1755,11 +1748,10 @@ STAGE PLANS:
Select Operator
expressions: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY._bucket_number (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number
- Statistics: Num rows: 5 Data size: 3595 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
compressed: false
Dp Sort State: PARTITION_BUCKET_SORTED
- Statistics: Num rows: 5 Data size: 3595 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 2675 Basic stats: COMPLETE Column stats: PARTIAL
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/5eebbdf7/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out b/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out
index 9c7babb..916db93 100644
--- a/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out
+++ b/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out
@@ -299,7 +299,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_1d_n0 { string state, double locid, decimal(10,0) cnt, i32 zip}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 545
+ totalSize 539
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -391,7 +391,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_1d_n0 { string state, double locid, decimal(10,0) cnt, i32 zip}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 595
+ totalSize 594
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -690,7 +690,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_1d_n0 { string state, double locid, decimal(10,0) cnt, i32 zip}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 545
+ totalSize 539
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -782,7 +782,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_1d_n0 { string state, double locid, decimal(10,0) cnt, i32 zip}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 595
+ totalSize 594
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
http://git-wip-us.apache.org/repos/asf/hive/blob/5eebbdf7/ql/src/test/results/clientpositive/llap/lineage3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/lineage3.q.out b/ql/src/test/results/clientpositive/llap/lineage3.q.out
index cf38816..9bec309 100644
--- a/ql/src/test/results/clientpositive/llap/lineage3.q.out
+++ b/ql/src/test/results/clientpositive/llap/lineage3.q.out
@@ -372,7 +372,7 @@ PREHOOK: query: create table src_dp1 (f string, w string, m int)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@src_dp1
-Warning: Shuffle Join MERGEJOIN[54][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[62][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
PREHOOK: query: from src_dp, src_dp1
insert into dest_dp1 partition (year) select first, word, year
insert into dest_dp2 partition (y, m) select first, word, year, month
http://git-wip-us.apache.org/repos/asf/hive/blob/5eebbdf7/ql/src/test/results/clientpositive/llap/llap_partitioned.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/llap_partitioned.q.out b/ql/src/test/results/clientpositive/llap/llap_partitioned.q.out
index 7e6e88b..18dca49 100644
--- a/ql/src/test/results/clientpositive/llap/llap_partitioned.q.out
+++ b/ql/src/test/results/clientpositive/llap/llap_partitioned.q.out
@@ -1916,7 +1916,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: oft
- Statistics: Num rows: 12288 Data size: 1884148 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 12288 Data size: 13243096 Basic stats: COMPLETE Column stats: PARTIAL
TableScan Vectorization:
native: true
vectorizationSchemaColumns: [0:csmallint:smallint, 1:cint:int, 2:cbigint:bigint, 3:cfloat:float, 4:cdouble:double, 5:cstring1:string, 6:cchar1:char(255), 7:cvchar1:varchar(255), 8:cboolean1:boolean, 9:cboolean2:boolean, 10:ctinyint:tinyint, 11:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
@@ -1939,7 +1939,7 @@ STAGE PLANS:
outputColumnNames: _col1, _col6, _col7, _col10
input vertices:
1 Map 2
- Statistics: Num rows: 960 Data size: 4040 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 960 Data size: 240494 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: _col10 (type: tinyint), _col1 (type: int), _col6 (type: char(255)), _col7 (type: varchar(255))
outputColumnNames: _col0, _col1, _col2, _col3
@@ -1947,13 +1947,13 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [10, 1, 6, 7]
- Statistics: Num rows: 960 Data size: 4040 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 960 Data size: 240494 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 960 Data size: 4040 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 960 Data size: 240494 Basic stats: COMPLETE Column stats: PARTIAL
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/5eebbdf7/ql/src/test/results/clientpositive/llap/llap_smb.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/llap_smb.q.out b/ql/src/test/results/clientpositive/llap/llap_smb.q.out
index cdc10f8..5f5a747 100644
--- a/ql/src/test/results/clientpositive/llap/llap_smb.q.out
+++ b/ql/src/test/results/clientpositive/llap/llap_smb.q.out
@@ -269,10 +269,10 @@ STAGE PLANS:
TableScan
alias: a
filterExpr: id is not null (type: boolean)
- Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: PARTIAL
Filter Operator
predicate: id is not null (type: boolean)
- Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: PARTIAL
Merge Join Operator
condition map:
Inner Join 0 to 1
@@ -280,18 +280,18 @@ STAGE PLANS:
0 id (type: bigint)
1 id (type: bigint)
outputColumnNames: _col2, _col3
- Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: PARTIAL
Group By Operator
aggregations: count()
keys: _col2 (type: int), _col3 (type: smallint)
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: PARTIAL
Reduce Output Operator
key expressions: _col0 (type: int), _col1 (type: smallint)
sort order: ++
Map-reduce partition columns: _col0 (type: int), _col1 (type: smallint)
- Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: PARTIAL
value expressions: _col2 (type: bigint)
Execution mode: llap
Reducer 2
@@ -302,10 +302,10 @@ STAGE PLANS:
keys: KEY._col0 (type: int), KEY._col1 (type: smallint)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
compressed: false
- Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: PARTIAL
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/5eebbdf7/ql/src/test/results/clientpositive/llap/llap_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/llap_stats.q.out b/ql/src/test/results/clientpositive/llap/llap_stats.q.out
index 5bba6b0..89c0c57 100644
--- a/ql/src/test/results/clientpositive/llap/llap_stats.q.out
+++ b/ql/src/test/results/clientpositive/llap/llap_stats.q.out
@@ -153,11 +153,11 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: llap_stats
- Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 10 Data size: 196 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int)
outputColumnNames: ctinyint, csmallint, cint
- Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 10 Data size: 196 Basic stats: COMPLETE Column stats: PARTIAL
Group By Operator
aggregations: compute_stats(ctinyint, 'hll'), compute_stats(csmallint, 'hll')
keys: cint (type: int)