You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2019/10/31 17:38:09 UTC
[hive] branch master updated: HIVE-22430: Avoid creation of
additional RS for limit if it is equal to zero (Jesus Camacho Rodriguez,
reviewed by Vineet Garg)
This is an automated email from the ASF dual-hosted git repository.
jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 244de3b HIVE-22430: Avoid creation of additional RS for limit if it is equal to zero (Jesus Camacho Rodriguez, reviewed by Vineet Garg)
244de3b is described below
commit 244de3bd3ba53b92224b14090696d877c56d5809
Author: Jesus Camacho Rodriguez <jc...@apache.org>
AuthorDate: Tue Oct 29 19:49:22 2019 -0700
HIVE-22430: Avoid creation of additional RS for limit if it is equal to zero (Jesus Camacho Rodriguez, reviewed by Vineet Garg)
Close apache/hive#835
---
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 14 ++---
.../llap/insert_only_empty_query.q.out | 27 +++-------
.../clientpositive/llap/llap_nullscan.q.out | 32 +++++-------
.../clientpositive/llap/optimize_nullscan.q.out | 60 ++++------------------
.../clientpositive/spark/optimize_nullscan.q.out | 58 ++++-----------------
5 files changed, 46 insertions(+), 145 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 2257cc1..0198c0f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -8595,7 +8595,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
@SuppressWarnings("nls")
private Operator genLimitMapRedPlan(String dest, QB qb, Operator input,
- int offset, int limit, boolean extraMRStep) throws SemanticException {
+ int offset, int limit, boolean extraMRStep) throws SemanticException {
// A map-only job can be optimized - instead of converting it to a
// map-reduce job, we can have another map
// job to do the same to avoid the cost of sorting in the map-reduce phase.
@@ -10924,23 +10924,23 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
if (limit != null) {
// In case of order by, only 1 reducer is used, so no need of
// another shuffle
- curr = genLimitMapRedPlan(dest, qb, curr, offset.intValue(),
- limit.intValue(), !hasOrderBy);
+ curr = genLimitMapRedPlan(dest, qb, curr, offset,
+ limit, limit != 0 && !hasOrderBy);
}
} else {
// exact limit can be taken care of by the fetch operator
if (limit != null) {
boolean extraMRStep = true;
- if (hasOrderBy ||
+ if (limit == 0 || hasOrderBy ||
qb.getIsQuery() && qbp.getClusterByForClause(dest) == null &&
qbp.getSortByForClause(dest) == null) {
extraMRStep = false;
}
- curr = genLimitMapRedPlan(dest, qb, curr, offset.intValue(),
- limit.intValue(), extraMRStep);
- qb.getParseInfo().setOuterQueryLimit(limit.intValue());
+ curr = genLimitMapRedPlan(dest, qb, curr, offset,
+ limit, extraMRStep);
+ qb.getParseInfo().setOuterQueryLimit(limit);
}
if (!queryState.getHiveOperation().equals(HiveOperation.CREATEVIEW)) {
curr = genFileSinkPlan(dest, qb, curr);
diff --git a/ql/src/test/results/clientpositive/llap/insert_only_empty_query.q.out b/ql/src/test/results/clientpositive/llap/insert_only_empty_query.q.out
index bfa72c5..cf0016c 100644
--- a/ql/src/test/results/clientpositive/llap/insert_only_empty_query.q.out
+++ b/ql/src/test/results/clientpositive/llap/insert_only_empty_query.q.out
@@ -54,9 +54,8 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -72,28 +71,14 @@ STAGE PLANS:
Number of rows: 0
Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
- sort order:
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: decimal(3,2))
+ value expressions: _col0 (type: string), _col2 (type: decimal(3,2))
Execution mode: vectorized, llap
LLAP IO: no inputs
Reducer 2
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Select Operator
- expressions: VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(3,2))
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
- Limit
- Number of rows: 0
- Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col1 (type: int)
- sort order: +
- Map-reduce partition columns: _col1 (type: int)
- Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: string), _col2 (type: decimal(3,2))
- Reducer 3
Execution mode: llap
Reduce Operator Tree:
Select Operator
@@ -126,7 +111,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 1 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,min:decimal(3,2),max:decimal(3,2),countnulls:bigint,bitvector:binary>)
- Reducer 4
+ Reducer 3
Execution mode: llap
Reduce Operator Tree:
Group By Operator
diff --git a/ql/src/test/results/clientpositive/llap/llap_nullscan.q.out b/ql/src/test/results/clientpositive/llap/llap_nullscan.q.out
index c76d790..1a24361 100644
--- a/ql/src/test/results/clientpositive/llap/llap_nullscan.q.out
+++ b/ql/src/test/results/clientpositive/llap/llap_nullscan.q.out
@@ -169,8 +169,7 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
- Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -190,6 +189,13 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: src_orc_n1
+ Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
@@ -198,11 +204,12 @@ STAGE PLANS:
Number of rows: 0
Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
- sort order:
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: string)
Execution mode: vectorized, llap
- LLAP IO: all inputs
+ LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -221,21 +228,6 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 3
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Select Operator
- expressions: VALUE._col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
- Limit
- Number of rows: 0
- Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
Stage: Stage-0
Fetch Operator
diff --git a/ql/src/test/results/clientpositive/llap/optimize_nullscan.q.out b/ql/src/test/results/clientpositive/llap/optimize_nullscan.q.out
index 8945b7a..5fe2395 100644
--- a/ql/src/test/results/clientpositive/llap/optimize_nullscan.q.out
+++ b/ql/src/test/results/clientpositive/llap/optimize_nullscan.q.out
@@ -165,8 +165,7 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
- Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -259,12 +258,13 @@ STAGE PLANS:
Number of rows: 0
Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
- null sort order:
- sort order:
+ key expressions: _col0 (type: string)
+ null sort order: a
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
- tag: -1
- value expressions: _col0 (type: string)
- auto parallelism: false
+ tag: 1
+ auto parallelism: true
Execution mode: vectorized, llap
LLAP IO: no inputs
Path -> Alias:
@@ -505,25 +505,6 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
- Reducer 4
- Execution mode: vectorized, llap
- Needs Tagging: false
- Reduce Operator Tree:
- Select Operator
- expressions: VALUE._col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
- Limit
- Number of rows: 0
- Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
- tag: 1
- auto parallelism: true
Stage: Stage-0
Fetch Operator
@@ -979,7 +960,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
#### A masked pattern was here ####
0
2000
-Warning: Shuffle Join MERGEJOIN[15][tables = [a, b]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[12][tables = [a, b]] in Stage 'Reducer 2' is a cross product
PREHOOK: query: explain extended
select * from (select key from src where false) a left outer join (select value from srcpart limit 0) b
PREHOOK: type: QUERY
@@ -1009,8 +990,7 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE)
- Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -1105,7 +1085,7 @@ STAGE PLANS:
null sort order:
sort order:
Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: COMPLETE
- tag: -1
+ tag: 1
value expressions: _col0 (type: string)
auto parallelism: false
Execution mode: vectorized, llap
@@ -1348,24 +1328,6 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
- Reducer 4
- Execution mode: vectorized, llap
- Needs Tagging: false
- Reduce Operator Tree:
- Select Operator
- expressions: VALUE._col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: COMPLETE
- Limit
- Number of rows: 0
- Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: COMPLETE
- tag: 1
- value expressions: _col0 (type: string)
- auto parallelism: false
Stage: Stage-0
Fetch Operator
@@ -1373,7 +1335,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join MERGEJOIN[15][tables = [a, b]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[12][tables = [a, b]] in Stage 'Reducer 2' is a cross product
PREHOOK: query: select * from (select key from src where false) a left outer join (select value from srcpart limit 0) b
PREHOOK: type: QUERY
PREHOOK: Input: default@src
diff --git a/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out b/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out
index c366443..57ba67e 100644
--- a/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out
+++ b/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out
@@ -162,8 +162,7 @@ STAGE PLANS:
Stage: Stage-1
Spark
Edges:
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2)
- Reducer 4 <- Map 3 (GROUP, 1)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -255,11 +254,12 @@ STAGE PLANS:
Number of rows: 0
Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- null sort order:
- sort order:
+ key expressions: _col0 (type: string)
+ null sort order: a
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
- tag: -1
- value expressions: _col0 (type: string)
+ tag: 1
auto parallelism: false
Execution mode: vectorized
Path -> Alias:
@@ -498,25 +498,6 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
- Reducer 4
- Execution mode: vectorized
- Needs Tagging: false
- Reduce Operator Tree:
- Select Operator
- expressions: VALUE._col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
- Limit
- Number of rows: 0
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: a
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
- tag: 1
- auto parallelism: false
Stage: Stage-0
Fetch Operator
@@ -967,7 +948,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
#### A masked pattern was here ####
0
2000
-Warning: Shuffle Join JOIN[11][tables = [a, b]] in Work 'Reducer 2' is a cross product
+Warning: Shuffle Join JOIN[8][tables = [a, b]] in Work 'Reducer 2' is a cross product
PREHOOK: query: explain extended
select * from (select key from src where false) a left outer join (select value from srcpart limit 0) b
PREHOOK: type: QUERY
@@ -996,8 +977,7 @@ STAGE PLANS:
Stage: Stage-1
Spark
Edges:
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1)
- Reducer 4 <- Map 3 (GROUP, 1)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -1091,7 +1071,7 @@ STAGE PLANS:
null sort order:
sort order:
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
- tag: -1
+ tag: 1
value expressions: _col0 (type: string)
auto parallelism: false
Execution mode: vectorized
@@ -1331,24 +1311,6 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: false
MultiFileSpray: false
- Reducer 4
- Execution mode: vectorized
- Needs Tagging: false
- Reduce Operator Tree:
- Select Operator
- expressions: VALUE._col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
- Limit
- Number of rows: 0
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
- tag: 1
- value expressions: _col0 (type: string)
- auto parallelism: false
Stage: Stage-0
Fetch Operator
@@ -1356,7 +1318,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join JOIN[11][tables = [a, b]] in Work 'Reducer 2' is a cross product
+Warning: Shuffle Join JOIN[8][tables = [a, b]] in Work 'Reducer 2' is a cross product
PREHOOK: query: select * from (select key from src where false) a left outer join (select value from srcpart limit 0) b
PREHOOK: type: QUERY
PREHOOK: Input: default@src