You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2019/10/31 17:38:09 UTC

[hive] branch master updated: HIVE-22430: Avoid creation of additional RS for limit if it is equal to zero (Jesus Camacho Rodriguez, reviewed by Vineet Garg)

This is an automated email from the ASF dual-hosted git repository.

jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 244de3b  HIVE-22430: Avoid creation of additional RS for limit if it is equal to zero (Jesus Camacho Rodriguez, reviewed by Vineet Garg)
244de3b is described below

commit 244de3bd3ba53b92224b14090696d877c56d5809
Author: Jesus Camacho Rodriguez <jc...@apache.org>
AuthorDate: Tue Oct 29 19:49:22 2019 -0700

    HIVE-22430: Avoid creation of additional RS for limit if it is equal to zero (Jesus Camacho Rodriguez, reviewed by Vineet Garg)
    
    Close apache/hive#835
---
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java     | 14 ++---
 .../llap/insert_only_empty_query.q.out             | 27 +++-------
 .../clientpositive/llap/llap_nullscan.q.out        | 32 +++++-------
 .../clientpositive/llap/optimize_nullscan.q.out    | 60 ++++------------------
 .../clientpositive/spark/optimize_nullscan.q.out   | 58 ++++-----------------
 5 files changed, 46 insertions(+), 145 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 2257cc1..0198c0f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -8595,7 +8595,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
 
   @SuppressWarnings("nls")
   private Operator genLimitMapRedPlan(String dest, QB qb, Operator input,
-                                      int offset, int limit, boolean extraMRStep) throws SemanticException {
+      int offset, int limit, boolean extraMRStep) throws SemanticException {
     // A map-only job can be optimized - instead of converting it to a
     // map-reduce job, we can have another map
     // job to do the same to avoid the cost of sorting in the map-reduce phase.
@@ -10924,23 +10924,23 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
       if (limit != null) {
         // In case of order by, only 1 reducer is used, so no need of
         // another shuffle
-        curr = genLimitMapRedPlan(dest, qb, curr, offset.intValue(),
-            limit.intValue(), !hasOrderBy);
+        curr = genLimitMapRedPlan(dest, qb, curr, offset,
+            limit, limit != 0 && !hasOrderBy);
       }
     } else {
       // exact limit can be taken care of by the fetch operator
       if (limit != null) {
         boolean extraMRStep = true;
 
-        if (hasOrderBy ||
+        if (limit == 0 || hasOrderBy ||
             qb.getIsQuery() && qbp.getClusterByForClause(dest) == null &&
                 qbp.getSortByForClause(dest) == null) {
           extraMRStep = false;
         }
 
-        curr = genLimitMapRedPlan(dest, qb, curr, offset.intValue(),
-            limit.intValue(), extraMRStep);
-        qb.getParseInfo().setOuterQueryLimit(limit.intValue());
+        curr = genLimitMapRedPlan(dest, qb, curr, offset,
+            limit, extraMRStep);
+        qb.getParseInfo().setOuterQueryLimit(limit);
       }
       if (!queryState.getHiveOperation().equals(HiveOperation.CREATEVIEW)) {
         curr = genFileSinkPlan(dest, qb, curr);
diff --git a/ql/src/test/results/clientpositive/llap/insert_only_empty_query.q.out b/ql/src/test/results/clientpositive/llap/insert_only_empty_query.q.out
index bfa72c5..cf0016c 100644
--- a/ql/src/test/results/clientpositive/llap/insert_only_empty_query.q.out
+++ b/ql/src/test/results/clientpositive/llap/insert_only_empty_query.q.out
@@ -54,9 +54,8 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
         Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
-        Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -72,28 +71,14 @@ STAGE PLANS:
                       Number of rows: 0
                       Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
-                        sort order: 
+                        key expressions: _col1 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col1 (type: int)
                         Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
-                        value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: decimal(3,2))
+                        value expressions: _col0 (type: string), _col2 (type: decimal(3,2))
             Execution mode: vectorized, llap
             LLAP IO: no inputs
         Reducer 2 
-            Execution mode: vectorized, llap
-            Reduce Operator Tree:
-              Select Operator
-                expressions: VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(3,2))
-                outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
-                Limit
-                  Number of rows: 0
-                  Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
-                  Reduce Output Operator
-                    key expressions: _col1 (type: int)
-                    sort order: +
-                    Map-reduce partition columns: _col1 (type: int)
-                    Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
-                    value expressions: _col0 (type: string), _col2 (type: decimal(3,2))
-        Reducer 3 
             Execution mode: llap
             Reduce Operator Tree:
               Select Operator
@@ -126,7 +111,7 @@ STAGE PLANS:
                       Map-reduce partition columns: _col0 (type: int)
                       Statistics: Num rows: 1 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE
                       value expressions: _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,min:decimal(3,2),max:decimal(3,2),countnulls:bigint,bitvector:binary>)
-        Reducer 4 
+        Reducer 3 
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
diff --git a/ql/src/test/results/clientpositive/llap/llap_nullscan.q.out b/ql/src/test/results/clientpositive/llap/llap_nullscan.q.out
index c76d790..1a24361 100644
--- a/ql/src/test/results/clientpositive/llap/llap_nullscan.q.out
+++ b/ql/src/test/results/clientpositive/llap/llap_nullscan.q.out
@@ -169,8 +169,7 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
-        Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -190,6 +189,13 @@ STAGE PLANS:
                         sort order: +
                         Map-reduce partition columns: _col0 (type: string)
                         Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: src_orc_n1
+                  Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE
                   Select Operator
                     expressions: key (type: string)
                     outputColumnNames: _col0
@@ -198,11 +204,12 @@ STAGE PLANS:
                       Number of rows: 0
                       Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
-                        sort order: 
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
                         Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
-                        value expressions: _col0 (type: string)
             Execution mode: vectorized, llap
-            LLAP IO: all inputs
+            LLAP IO: no inputs
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:
@@ -221,21 +228,6 @@ STAGE PLANS:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 3 
-            Execution mode: vectorized, llap
-            Reduce Operator Tree:
-              Select Operator
-                expressions: VALUE._col0 (type: string)
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
-                Limit
-                  Number of rows: 0
-                  Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
-                    Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
 
   Stage: Stage-0
     Fetch Operator
diff --git a/ql/src/test/results/clientpositive/llap/optimize_nullscan.q.out b/ql/src/test/results/clientpositive/llap/optimize_nullscan.q.out
index 8945b7a..5fe2395 100644
--- a/ql/src/test/results/clientpositive/llap/optimize_nullscan.q.out
+++ b/ql/src/test/results/clientpositive/llap/optimize_nullscan.q.out
@@ -165,8 +165,7 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
-        Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -259,12 +258,13 @@ STAGE PLANS:
                       Number of rows: 0
                       Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
-                        null sort order: 
-                        sort order: 
+                        key expressions: _col0 (type: string)
+                        null sort order: a
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
                         Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
-                        tag: -1
-                        value expressions: _col0 (type: string)
-                        auto parallelism: false
+                        tag: 1
+                        auto parallelism: true
             Execution mode: vectorized, llap
             LLAP IO: no inputs
             Path -> Alias:
@@ -505,25 +505,6 @@ STAGE PLANS:
                   TotalFiles: 1
                   GatherStats: false
                   MultiFileSpray: false
-        Reducer 4 
-            Execution mode: vectorized, llap
-            Needs Tagging: false
-            Reduce Operator Tree:
-              Select Operator
-                expressions: VALUE._col0 (type: string)
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
-                Limit
-                  Number of rows: 0
-                  Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    null sort order: a
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
-                    Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
-                    tag: 1
-                    auto parallelism: true
 
   Stage: Stage-0
     Fetch Operator
@@ -979,7 +960,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
 #### A masked pattern was here ####
 0
 2000
-Warning: Shuffle Join MERGEJOIN[15][tables = [a, b]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[12][tables = [a, b]] in Stage 'Reducer 2' is a cross product
 PREHOOK: query: explain extended
 select * from (select key from src where false) a left outer join (select value from srcpart limit 0) b
 PREHOOK: type: QUERY
@@ -1009,8 +990,7 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE)
-        Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -1105,7 +1085,7 @@ STAGE PLANS:
                         null sort order: 
                         sort order: 
                         Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: COMPLETE
-                        tag: -1
+                        tag: 1
                         value expressions: _col0 (type: string)
                         auto parallelism: false
             Execution mode: vectorized, llap
@@ -1348,24 +1328,6 @@ STAGE PLANS:
                   TotalFiles: 1
                   GatherStats: false
                   MultiFileSpray: false
-        Reducer 4 
-            Execution mode: vectorized, llap
-            Needs Tagging: false
-            Reduce Operator Tree:
-              Select Operator
-                expressions: VALUE._col0 (type: string)
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: COMPLETE
-                Limit
-                  Number of rows: 0
-                  Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: COMPLETE
-                  Reduce Output Operator
-                    null sort order: 
-                    sort order: 
-                    Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: COMPLETE
-                    tag: 1
-                    value expressions: _col0 (type: string)
-                    auto parallelism: false
 
   Stage: Stage-0
     Fetch Operator
@@ -1373,7 +1335,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Shuffle Join MERGEJOIN[15][tables = [a, b]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[12][tables = [a, b]] in Stage 'Reducer 2' is a cross product
 PREHOOK: query: select * from (select key from src where false) a left outer join (select value from srcpart limit 0) b
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
diff --git a/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out b/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out
index c366443..57ba67e 100644
--- a/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out
+++ b/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out
@@ -162,8 +162,7 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2)
-        Reducer 4 <- Map 3 (GROUP, 1)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -255,11 +254,12 @@ STAGE PLANS:
                       Number of rows: 0
                       Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
-                        null sort order: 
-                        sort order: 
+                        key expressions: _col0 (type: string)
+                        null sort order: a
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
                         Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
-                        tag: -1
-                        value expressions: _col0 (type: string)
+                        tag: 1
                         auto parallelism: false
             Execution mode: vectorized
             Path -> Alias:
@@ -498,25 +498,6 @@ STAGE PLANS:
                   TotalFiles: 1
                   GatherStats: false
                   MultiFileSpray: false
-        Reducer 4 
-            Execution mode: vectorized
-            Needs Tagging: false
-            Reduce Operator Tree:
-              Select Operator
-                expressions: VALUE._col0 (type: string)
-                outputColumnNames: _col0
-                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
-                Limit
-                  Number of rows: 0
-                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string)
-                    null sort order: a
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: string)
-                    Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
-                    tag: 1
-                    auto parallelism: false
 
   Stage: Stage-0
     Fetch Operator
@@ -967,7 +948,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
 #### A masked pattern was here ####
 0
 2000
-Warning: Shuffle Join JOIN[11][tables = [a, b]] in Work 'Reducer 2' is a cross product
+Warning: Shuffle Join JOIN[8][tables = [a, b]] in Work 'Reducer 2' is a cross product
 PREHOOK: query: explain extended
 select * from (select key from src where false) a left outer join (select value from srcpart limit 0) b
 PREHOOK: type: QUERY
@@ -996,8 +977,7 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1)
-        Reducer 4 <- Map 3 (GROUP, 1)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -1091,7 +1071,7 @@ STAGE PLANS:
                         null sort order: 
                         sort order: 
                         Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
-                        tag: -1
+                        tag: 1
                         value expressions: _col0 (type: string)
                         auto parallelism: false
             Execution mode: vectorized
@@ -1331,24 +1311,6 @@ STAGE PLANS:
                   TotalFiles: 1
                   GatherStats: false
                   MultiFileSpray: false
-        Reducer 4 
-            Execution mode: vectorized
-            Needs Tagging: false
-            Reduce Operator Tree:
-              Select Operator
-                expressions: VALUE._col0 (type: string)
-                outputColumnNames: _col0
-                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
-                Limit
-                  Number of rows: 0
-                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
-                  Reduce Output Operator
-                    null sort order: 
-                    sort order: 
-                    Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
-                    tag: 1
-                    value expressions: _col0 (type: string)
-                    auto parallelism: false
 
   Stage: Stage-0
     Fetch Operator
@@ -1356,7 +1318,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Shuffle Join JOIN[11][tables = [a, b]] in Work 'Reducer 2' is a cross product
+Warning: Shuffle Join JOIN[8][tables = [a, b]] in Work 'Reducer 2' is a cross product
 PREHOOK: query: select * from (select key from src where false) a left outer join (select value from srcpart limit 0) b
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src