You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by dj...@apache.org on 2018/07/09 22:31:25 UTC
hive git commit: HIVE-20100 : OpTraits : Select Optraits should stop
when a mismatch is detected (Deepak Jaiswal, reviewed by Jason Dere)
Repository: hive
Updated Branches:
refs/heads/master 5016d6f05 -> ddf7e25d3
HIVE-20100 : OpTraits : Select Optraits should stop when a mismatch is detected (Deepak Jaiswal, reviewed by Jason Dere)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ddf7e25d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ddf7e25d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ddf7e25d
Branch: refs/heads/master
Commit: ddf7e25d3ee94224c17e353e334728241515fffb
Parents: 5016d6f
Author: Deepak Jaiswal <dj...@apache.org>
Authored: Mon Jul 9 15:31:13 2018 -0700
Committer: Deepak Jaiswal <dj...@apache.org>
Committed: Mon Jul 9 15:31:13 2018 -0700
----------------------------------------------------------------------
.../annotation/OpTraitsRulesProcFactory.java | 59 +++++-----
.../clientpositive/llap/subquery_notin.q.out | 118 ++++++++++++-------
.../results/clientpositive/llap/tez_join.q.out | 51 +++++---
3 files changed, 138 insertions(+), 90 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/ddf7e25d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java
index 263770e..89db530 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java
@@ -308,44 +308,40 @@ public class OpTraitsRulesProcFactory {
public static class SelectRule implements NodeProcessor {
- boolean processSortCols = false;
-
// For bucket columns
// If all the columns match to the parent, put them in the bucket cols
// else, add empty list.
// For sort columns
// Keep the subset of all the columns as long as order is maintained.
public List<List<String>> getConvertedColNames(
- List<List<String>> parentColNames, SelectOperator selOp) {
+ List<List<String>> parentColNames, SelectOperator selOp, boolean processSortCols) {
List<List<String>> listBucketCols = new ArrayList<>();
- if (selOp.getColumnExprMap() != null) {
- if (parentColNames != null) {
- for (List<String> colNames : parentColNames) {
- List<String> bucketColNames = new ArrayList<>();
- boolean found = false;
- for (String colName : colNames) {
- for (Entry<String, ExprNodeDesc> entry : selOp.getColumnExprMap().entrySet()) {
- if ((entry.getValue() instanceof ExprNodeColumnDesc) &&
- (((ExprNodeColumnDesc) (entry.getValue())).getColumn().equals(colName))) {
- bucketColNames.add(entry.getKey());
- found = true;
- break;
- }
- }
- if (!found) {
- // Bail out on first missed column.
- break;
- }
- }
- if (!processSortCols && !found) {
- // While processing bucket columns, atleast one bucket column
- // missed. This results in a different bucketing scheme.
- // Add empty list
- listBucketCols.add(new ArrayList<>());
- } else {
- listBucketCols.add(bucketColNames);
+ for (List<String> colNames : parentColNames) {
+ List<String> bucketColNames = new ArrayList<>();
+ boolean found = false;
+ for (String colName : colNames) {
+ // Reset found
+ found = false;
+ for (Entry<String, ExprNodeDesc> entry : selOp.getColumnExprMap().entrySet()) {
+ if ((entry.getValue() instanceof ExprNodeColumnDesc) &&
+ (((ExprNodeColumnDesc) (entry.getValue())).getColumn().equals(colName))) {
+ bucketColNames.add(entry.getKey());
+ found = true;
+ break;
}
}
+ if (!found) {
+ // Bail out on first missed column.
+ break;
+ }
+ }
+ if (!processSortCols && !found) {
+ // While processing bucket columns, atleast one bucket column
+ // missed. This results in a different bucketing scheme.
+ // Add empty list
+ listBucketCols.add(new ArrayList<>());
+ } else {
+ listBucketCols.add(bucketColNames);
}
}
@@ -363,13 +359,12 @@ public class OpTraitsRulesProcFactory {
List<List<String>> listSortCols = null;
if (selOp.getColumnExprMap() != null) {
if (parentBucketColNames != null) {
- listBucketCols = getConvertedColNames(parentBucketColNames, selOp);
+ listBucketCols = getConvertedColNames(parentBucketColNames, selOp, false);
}
List<List<String>> parentSortColNames =
selOp.getParentOperators().get(0).getOpTraits().getSortCols();
if (parentSortColNames != null) {
- processSortCols = true;
- listSortCols = getConvertedColNames(parentSortColNames, selOp);
+ listSortCols = getConvertedColNames(parentSortColNames, selOp, true);
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/ddf7e25d/ql/src/test/results/clientpositive/llap/subquery_notin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out
index c16b143..469ec69 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out
@@ -6102,9 +6102,11 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE), Reducer 7 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE)
Reducer 4 <- Map 3 (SIMPLE_EDGE)
- Reducer 7 <- Map 3 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
+ Reducer 5 <- Map 3 (SIMPLE_EDGE)
+ Reducer 6 <- Reducer 5 (SIMPLE_EDGE), Reducer 8 (ONE_TO_ONE_EDGE)
+ Reducer 8 <- Map 7 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -6160,7 +6162,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized, llap
LLAP IO: no inputs
- Map 6
+ Map 7
Map Operator Tree:
TableScan
alias: fixob
@@ -6223,7 +6225,8 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint), _col2 (type: bigint)
- Reducer 7
+ Reducer 5
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: int), KEY._col1 (type: int)
@@ -6234,27 +6237,42 @@ STAGE PLANS:
expressions: _col0 (type: int), true (type: boolean)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: boolean)
+ Reducer 6
Execution mode: llap
Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: boolean)
+ Reducer 8
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col1, _col2
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col2 (type: int)
- sort order: +
- Map-reduce partition columns: _col2 (type: int)
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: boolean)
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
Stage: Stage-0
Fetch Operator
@@ -6320,9 +6338,11 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 3 (ONE_TO_ONE_EDGE), Reducer 4 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 3 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE)
Reducer 3 <- Map 1 (SIMPLE_EDGE)
- Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
+ Reducer 4 <- Map 1 (SIMPLE_EDGE)
+ Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE)
+ Reducer 7 <- Map 6 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -6370,7 +6390,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized, llap
LLAP IO: no inputs
- Map 5
+ Map 6
Map Operator Tree:
TableScan
alias: t_n0
@@ -6434,13 +6454,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint), _col2 (type: bigint)
Reducer 4
- Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: int)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: int), KEY._col1 (type: int)
@@ -6451,20 +6465,42 @@ STAGE PLANS:
expressions: _col0 (type: int), true (type: boolean)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col1, _col2
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col2 (type: int)
- sort order: +
- Map-reduce partition columns: _col2 (type: int)
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: boolean)
+ value expressions: _col1 (type: boolean)
+ Reducer 5
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: boolean)
+ Reducer 7
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
Stage: Stage-0
Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/ddf7e25d/ql/src/test/results/clientpositive/llap/tez_join.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/tez_join.q.out b/ql/src/test/results/clientpositive/llap/tez_join.q.out
index 53f8895..ecf9299 100644
--- a/ql/src/test/results/clientpositive/llap/tez_join.q.out
+++ b/ql/src/test/results/clientpositive/llap/tez_join.q.out
@@ -49,7 +49,9 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+ Reducer 5 <- Map 4 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -71,7 +73,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized, llap
LLAP IO: no inputs
- Map 3
+ Map 4
Map Operator Tree:
TableScan
alias: t2_n26
@@ -91,32 +93,47 @@ STAGE PLANS:
Execution mode: vectorized, llap
LLAP IO: no inputs
Reducer 2
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+ Reducer 3
Execution mode: llap
Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: string)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
Stage: Stage-0
Fetch Operator