You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by vg...@apache.org on 2019/01/29 02:34:32 UTC
hive git commit: HIVE-17020: Aggressive RS dedup can incorrectly
remove OP tree branch (Vineet Garg, reviewed Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master 698206a29 -> 974708336
HIVE-17020: Aggressive RS dedup can incorrectly remove OP tree branch (Vineet Garg, reviewed Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/97470833
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/97470833
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/97470833
Branch: refs/heads/master
Commit: 974708336574dfb283a583ac1cebde27fc21b67a
Parents: 698206a
Author: Vineet Garg <vg...@apache.org>
Authored: Mon Jan 28 18:33:14 2019 -0800
Committer: Vineet Garg <vg...@apache.org>
Committed: Mon Jan 28 18:33:14 2019 -0800
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 1 +
.../optimizer/SortedDynPartitionOptimizer.java | 8 +-
.../correlation/CorrelationUtilities.java | 2 +-
.../ReduceSinkDeDuplicationUtils.java | 47 +++-
.../queries/clientpositive/reducesink_dedup.q | 12 +-
.../llap/dynpart_sort_opt_vectorization.q.out | 226 +++++++++++++++++--
.../llap/dynpart_sort_optimization.q.out | 106 ++++++++-
.../llap/dynpart_sort_optimization2.q.out | 92 ++++++--
.../clientpositive/llap/reducesink_dedup.q.out | 163 +++++++++++++
.../clientpositive/reducesink_dedup.q.out | 202 ++++++++++++++++-
10 files changed, 791 insertions(+), 68 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/97470833/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index e0ea710..8ba0ddf 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -636,6 +636,7 @@ minillaplocal.query.files=\
ptf_streaming.q,\
runtime_stats_merge.q,\
quotedid_smb.q,\
+ reducesink_dedup.q,\
resourceplan.q,\
results_cache_1.q,\
results_cache_2.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/97470833/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
index 6fd1093..9708c47 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
@@ -485,12 +485,10 @@ public class SortedDynPartitionOptimizer extends Transform {
ArrayList<ExprNodeDesc> keyCols = Lists.newArrayList();
List<Integer> newSortOrder = Lists.newArrayList();
List<Integer> newSortNullOrder = Lists.newArrayList();
- int numPartAndBuck = partitionPositions.size();
keyColsPosInVal.addAll(partitionPositions);
if (bucketColumns != null && !bucketColumns.isEmpty()) {
keyColsPosInVal.add(-1);
- numPartAndBuck += 1;
}
keyColsPosInVal.addAll(sortPositions);
@@ -501,10 +499,9 @@ public class SortedDynPartitionOptimizer extends Transform {
order = 0;
}
}
- for (int i = 0; i < numPartAndBuck; i++) {
+ for (int i = 0; i < keyColsPosInVal.size(); i++) {
newSortOrder.add(order);
}
- newSortOrder.addAll(sortOrder);
String orderStr = "";
for (Integer i : newSortOrder) {
@@ -525,10 +522,9 @@ public class SortedDynPartitionOptimizer extends Transform {
nullOrder = 1;
}
}
- for (int i = 0; i < numPartAndBuck; i++) {
+ for (int i = 0; i < keyColsPosInVal.size(); i++) {
newSortNullOrder.add(nullOrder);
}
- newSortNullOrder.addAll(sortNullOrder);
String nullOrderStr = "";
for (Integer i : newSortNullOrder) {
http://git-wip-us.apache.org/repos/asf/hive/blob/97470833/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java
index 6fcd8d2..c553dca 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java
@@ -387,7 +387,7 @@ public final class CorrelationUtilities {
SelectDesc select = new SelectDesc(childRS.getConf().getValueCols(), childRS.getConf().getOutputValueColumnNames());
Operator<?> parent = getSingleParent(childRS);
- parent.getChildOperators().clear();
+ parent.removeChild(childRS);
SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild(
select, new RowSchema(inputRS.getSignature()), parent);
http://git-wip-us.apache.org/repos/asf/hive/blob/97470833/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplicationUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplicationUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplicationUtils.java
index 7ccd4a3..6919da8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplicationUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplicationUtils.java
@@ -475,6 +475,40 @@ public class ReduceSinkDeDuplicationUtils {
return 0;
}
+ // Check that in the path between cRS and pRS, there are only Select operators
+ // i.e. the sequence must be pRS-SEL*-cRS
+ // ensure SEL does not branch
+ protected static boolean checkSelectSingleBranchOnly(ReduceSinkOperator cRS, ReduceSinkOperator pRS) {
+ Operator<? extends OperatorDesc> parent = cRS.getParentOperators().get(0);
+ while (parent != pRS) {
+ assert parent.getNumParent() == 1;
+ if (!(parent instanceof SelectOperator)) {
+ return false;
+ }
+ if (parent.getChildOperators().size() > 1) {
+ return false;
+ }
+
+ parent = parent.getParentOperators().get(0);
+ }
+ return true;
+ }
+
+ // Check that in the path between cRS and pRS, there are only single branch
+ // i.e. the sequence must be pRS-Op*-cRS
+ protected static boolean checkSingleBranchOnly(ReduceSinkOperator cRS, ReduceSinkOperator pRS) {
+ Operator<? extends OperatorDesc> parent = cRS.getParentOperators().get(0);
+ while (parent != pRS) {
+ assert parent.getNumParent() == 1;
+ if (parent.getChildOperators().size() > 1) {
+ return false;
+ }
+
+ parent = parent.getParentOperators().get(0);
+ }
+ return true;
+ }
+
protected static boolean aggressiveDedup(ReduceSinkOperator cRS, ReduceSinkOperator pRS,
ReduceSinkDeduplicateProcCtx dedupCtx) throws SemanticException {
assert cRS.getNumParent() == 1;
@@ -484,15 +518,8 @@ public class ReduceSinkDeDuplicationUtils {
List<ExprNodeDesc> cKeys = cConf.getKeyCols();
List<ExprNodeDesc> pKeys = pConf.getKeyCols();
- // Check that in the path between cRS and pRS, there are only Select operators
- // i.e. the sequence must be pRS-SEL*-cRS
- Operator<? extends OperatorDesc> parent = cRS.getParentOperators().get(0);
- while (parent != pRS) {
- assert parent.getNumParent() == 1;
- if (!(parent instanceof SelectOperator)) {
- return false;
- }
- parent = parent.getParentOperators().get(0);
+ if (!checkSelectSingleBranchOnly(cRS, pRS)) {
+ return false;
}
// If child keys are null or empty, we bail out
@@ -564,7 +591,7 @@ public class ReduceSinkDeDuplicationUtils {
// Replace pRS with cRS and remove operator sequence from pRS to cRS
// Recall that the sequence must be pRS-SEL*-cRS
- parent = cRS.getParentOperators().get(0);
+ Operator<? extends OperatorDesc> parent = cRS.getParentOperators().get(0);
while (parent != pRS) {
dedupCtx.addRemovedOperator(parent);
parent = parent.getParentOperators().get(0);
http://git-wip-us.apache.org/repos/asf/hive/blob/97470833/ql/src/test/queries/clientpositive/reducesink_dedup.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/reducesink_dedup.q b/ql/src/test/queries/clientpositive/reducesink_dedup.q
index 352a558..b7f9a01 100644
--- a/ql/src/test/queries/clientpositive/reducesink_dedup.q
+++ b/ql/src/test/queries/clientpositive/reducesink_dedup.q
@@ -1,5 +1,13 @@
--! qt:dataset:part
-select p_name
+--! qt:dataset:src
+select p_name
from (select p_name from part distribute by 1 sort by 1) p
distribute by 1 sort by 1
-;
\ No newline at end of file
+;
+
+create temporary table d1 (key int);
+create temporary table d2 (key int);
+
+explain from (select key from src cluster by key) a
+ insert overwrite table d1 select a.key
+ insert overwrite table d2 select a.key cluster by a.key;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/97470833/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out
index 53337fe..5a2cd47 100644
--- a/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out
@@ -170,6 +170,8 @@ STAGE PLANS:
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -186,13 +188,61 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
- key expressions: _col4 (type: tinyint), _col0 (type: smallint)
- sort order: ++
- Map-reduce partition columns: _col4 (type: tinyint)
- value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: float)
+ key expressions: _col0 (type: smallint)
+ sort order: +
+ Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint)
Execution mode: vectorized, llap
LLAP IO: all inputs
Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: int), VALUE._col1 (type: bigint), VALUE._col2 (type: float), VALUE._col3 (type: tinyint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint)
+ outputColumnNames: si, i, b, f, ds, t
+ Statistics: Num rows: 11 Data size: 1221 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll')
+ keys: ds (type: string), t (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: tinyint)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint)
+ Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col4 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col5 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>)
+ Reduce Output Operator
+ key expressions: _col4 (type: tinyint), _col0 (type: smallint)
+ sort order: ++
+ Map-reduce partition columns: _col4 (type: tinyint)
+ value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: float)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3)
+ keys: KEY._col0 (type: string), KEY._col1 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col4 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col5 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: tinyint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 4
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
@@ -616,6 +666,8 @@ STAGE PLANS:
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -632,13 +684,61 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
- key expressions: _col4 (type: tinyint), _col0 (type: smallint)
- sort order: ++
- Map-reduce partition columns: _col4 (type: tinyint)
- value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: float)
+ key expressions: _col0 (type: smallint)
+ sort order: +
+ Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint)
Execution mode: vectorized, llap
LLAP IO: all inputs
Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: int), VALUE._col1 (type: bigint), VALUE._col2 (type: float), VALUE._col3 (type: tinyint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint)
+ outputColumnNames: si, i, b, f, ds, t
+ Statistics: Num rows: 11 Data size: 1221 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll')
+ keys: ds (type: string), t (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: tinyint)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint)
+ Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col4 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col5 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>)
+ Reduce Output Operator
+ key expressions: _col4 (type: tinyint), _col0 (type: smallint)
+ sort order: ++
+ Map-reduce partition columns: _col4 (type: tinyint)
+ value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: float)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3)
+ keys: KEY._col0 (type: string), KEY._col1 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col4 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col5 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: tinyint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 4
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
@@ -1566,6 +1666,8 @@ STAGE PLANS:
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -1582,13 +1684,61 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
- key expressions: _col4 (type: tinyint), _col1 (type: int)
- sort order: ++
- Map-reduce partition columns: _col4 (type: tinyint)
- value expressions: _col0 (type: smallint), _col2 (type: bigint), _col3 (type: float)
+ key expressions: _col1 (type: int)
+ sort order: +
+ Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: smallint), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint)
Execution mode: vectorized, llap
LLAP IO: all inputs
Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: bigint), VALUE._col2 (type: float), VALUE._col3 (type: tinyint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 11 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint)
+ outputColumnNames: si, i, b, f, ds, t
+ Statistics: Num rows: 11 Data size: 1221 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll')
+ keys: ds (type: string), t (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: tinyint)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint)
+ Statistics: Num rows: 5 Data size: 8935 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col4 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col5 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>)
+ Reduce Output Operator
+ key expressions: _col4 (type: tinyint), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col4 (type: tinyint)
+ value expressions: _col0 (type: smallint), _col2 (type: bigint), _col3 (type: float)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3)
+ keys: KEY._col0 (type: string), KEY._col1 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col4 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col5 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: tinyint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 5 Data size: 9255 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 4
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
@@ -1909,6 +2059,7 @@ STAGE PLANS:
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -1933,7 +2084,7 @@ STAGE PLANS:
Execution mode: vectorized, llap
LLAP IO: all inputs
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: tinyint), KEY._col1 (type: smallint), KEY._col2 (type: int), KEY._col3 (type: bigint), KEY._col4 (type: float)
@@ -1944,15 +2095,54 @@ STAGE PLANS:
expressions: _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col0 (type: tinyint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 5 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint)
+ outputColumnNames: si, i, b, f, ds, t
+ Statistics: Num rows: 5 Data size: 555 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll')
+ keys: ds (type: string), t (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 2 Data size: 3574 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: tinyint)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint)
+ Statistics: Num rows: 2 Data size: 3574 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col4 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col5 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>)
+ Select Operator
+ expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ File Output Operator
+ compressed: false
+ Dp Sort State: PARTITION_SORTED
+ Statistics: Num rows: 5 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.over1k_part2_orc
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3)
+ keys: KEY._col0 (type: string), KEY._col1 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 2 Data size: 3702 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col4 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col5 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: tinyint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 2 Data size: 3702 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Dp Sort State: PARTITION_SORTED
- Statistics: Num rows: 5 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2 Data size: 3702 Basic stats: COMPLETE Column stats: COMPLETE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.over1k_part2_orc
Stage: Stage-2
Dependency Collection
http://git-wip-us.apache.org/repos/asf/hive/blob/97470833/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out
index cb3704f..14746aa 100644
--- a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out
@@ -1597,6 +1597,8 @@ STAGE PLANS:
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -1613,16 +1615,64 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col4 (type: tinyint), _col1 (type: int)
- sort order: ++
- Map-reduce partition columns: _col4 (type: tinyint)
- value expressions: _col0 (type: smallint), _col2 (type: bigint), _col3 (type: float)
+ key expressions: _col1 (type: int)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: smallint), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint)
Execution mode: llap
LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
Select Operator
+ expressions: VALUE._col0 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: bigint), VALUE._col2 (type: float), VALUE._col3 (type: tinyint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint)
+ outputColumnNames: si, i, b, f, ds, t
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll')
+ keys: ds (type: string), t (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: tinyint)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint)
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col4 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col5 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>)
+ Reduce Output Operator
+ key expressions: _col4 (type: tinyint), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col4 (type: tinyint)
+ value expressions: _col0 (type: smallint), _col2 (type: bigint), _col3 (type: float)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3)
+ keys: KEY._col0 (type: string), KEY._col1 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col4 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col5 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: tinyint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 4
+ Execution mode: llap
+ Reduce Operator Tree:
+ Select Operator
expressions: VALUE._col0 (type: smallint), KEY._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
File Output Operator
@@ -1940,6 +1990,7 @@ STAGE PLANS:
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -1975,15 +2026,54 @@ STAGE PLANS:
expressions: _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col0 (type: tinyint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint)
+ outputColumnNames: si, i, b, f, ds, t
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(si, 'hll'), compute_stats(i, 'hll'), compute_stats(b, 'hll'), compute_stats(f, 'hll')
+ keys: ds (type: string), t (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: tinyint)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint)
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col4 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col5 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>)
+ Select Operator
+ expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ File Output Operator
+ compressed: false
+ Dp Sort State: PARTITION_SORTED
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.over1k_part2
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3)
+ keys: KEY._col0 (type: string), KEY._col1 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col4 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col5 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: string), _col1 (type: tinyint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Dp Sort State: PARTITION_SORTED
Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.over1k_part2
Stage: Stage-2
Dependency Collection
http://git-wip-us.apache.org/repos/asf/hive/blob/97470833/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out
index 30074ab..6b2decd 100644
--- a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out
@@ -111,7 +111,7 @@ STAGE PLANS:
Execution mode: vectorized, llap
LLAP IO: no inputs
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: int), KEY._col1 (type: float), KEY._col2 (type: float)
@@ -122,15 +122,39 @@ STAGE PLANS:
expressions: _col1 (type: float), _col2 (type: float), _col0 (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Dp Sort State: PARTITION_SORTED
+ Select Operator
+ expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int)
+ outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, ss_sold_date_sk
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.ss_part
+ Group By Operator
+ aggregations: compute_stats(ss_net_paid_inc_tax, 'hll'), compute_stats(ss_net_profit, 'hll')
+ keys: ss_sold_date_sk (type: int)
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Select Operator
+ expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ Dp Sort State: PARTITION_SORTED
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.ss_part
Stage: Stage-2
Dependency Collection
@@ -352,26 +376,52 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col2 (type: int)
- sort order: ++
+ sort order: +
Map-reduce partition columns: _col2 (type: int)
- value expressions: _col0 (type: float), _col1 (type: float)
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int)
Execution mode: vectorized, llap
LLAP IO: no inputs
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Operator Tree:
Select Operator
- expressions: VALUE._col0 (type: float), VALUE._col1 (type: float), KEY._col2 (type: int)
+ expressions: VALUE._col0 (type: float), VALUE._col1 (type: float), VALUE._col2 (type: int)
outputColumnNames: _col0, _col1, _col2
- File Output Operator
- compressed: false
- Dp Sort State: PARTITION_SORTED
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int)
+ outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, ss_sold_date_sk
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.ss_part
+ Group By Operator
+ aggregations: compute_stats(ss_net_paid_inc_tax, 'hll'), compute_stats(ss_net_profit, 'hll')
+ keys: ss_sold_date_sk (type: int)
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col2 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Select Operator
+ expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ Dp Sort State: PARTITION_SORTED
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.ss_part
Stage: Stage-2
Dependency Collection
http://git-wip-us.apache.org/repos/asf/hive/blob/97470833/ql/src/test/results/clientpositive/llap/reducesink_dedup.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/reducesink_dedup.q.out b/ql/src/test/results/clientpositive/llap/reducesink_dedup.q.out
new file mode 100644
index 0000000..fbc75ce
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/reducesink_dedup.q.out
@@ -0,0 +1,163 @@
+PREHOOK: query: select p_name
+from (select p_name from part distribute by 1 sort by 1) p
+distribute by 1 sort by 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select p_name
+from (select p_name from part distribute by 1 sort by 1) p
+distribute by 1 sort by 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+almond antique burnished rose metallic
+almond antique burnished rose metallic
+almond antique chartreuse lavender yellow
+almond antique salmon chartreuse burlywood
+almond aquamarine burnished black steel
+almond aquamarine pink moccasin thistle
+almond antique violet chocolate turquoise
+almond antique violet turquoise frosted
+almond aquamarine midnight light salmon
+almond aquamarine rose maroon antique
+almond aquamarine sandy cyan gainsboro
+almond antique chartreuse khaki white
+almond antique forest lavender goldenrod
+almond antique metallic orange dim
+almond antique misty red olive
+almond antique olive coral navajo
+almond antique gainsboro frosted violet
+almond antique violet mint lemon
+almond aquamarine floral ivory bisque
+almond aquamarine yellow dodger mint
+almond azure aquamarine papaya violet
+almond antique blue firebrick mint
+almond antique medium spring khaki
+almond antique sky peru orange
+almond aquamarine dodger light gainsboro
+almond azure blanched chiffon midnight
+PREHOOK: query: create temporary table d1 (key int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@d1
+POSTHOOK: query: create temporary table d1 (key int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@d1
+PREHOOK: query: create temporary table d2 (key int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@d2
+POSTHOOK: query: create temporary table d2 (key int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@d2
+PREHOOK: query: explain from (select key from src cluster by key) a
+ insert overwrite table d1 select a.key
+ insert overwrite table d2 select a.key cluster by a.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@d1
+PREHOOK: Output: default@d2
+POSTHOOK: query: explain from (select key from src cluster by key) a
+ insert overwrite table d1 select a.key
+ insert overwrite table d2 select a.key cluster by a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@d1
+POSTHOOK: Output: default@d2
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-3
+ Stage-4 depends on stages: Stage-0
+ Stage-1 depends on stages: Stage-3
+ Stage-5 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: UDFToInteger(_col0) (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.d1
+ Select Operator
+ expressions: UDFToInteger(_col0) (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.d2
+
+ Stage: Stage-3
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.d1
+
+ Stage: Stage-4
+ Stats Work
+ Basic Stats Work:
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.d2
+
+ Stage: Stage-5
+ Stats Work
+ Basic Stats Work:
+
http://git-wip-us.apache.org/repos/asf/hive/blob/97470833/ql/src/test/results/clientpositive/reducesink_dedup.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/reducesink_dedup.q.out b/ql/src/test/results/clientpositive/reducesink_dedup.q.out
index b89df52..2b068ac 100644
--- a/ql/src/test/results/clientpositive/reducesink_dedup.q.out
+++ b/ql/src/test/results/clientpositive/reducesink_dedup.q.out
@@ -1,10 +1,10 @@
-PREHOOK: query: select p_name
+PREHOOK: query: select p_name
from (select p_name from part distribute by 1 sort by 1) p
distribute by 1 sort by 1
PREHOOK: type: QUERY
PREHOOK: Input: default@part
#### A masked pattern was here ####
-POSTHOOK: query: select p_name
+POSTHOOK: query: select p_name
from (select p_name from part distribute by 1 sort by 1) p
distribute by 1 sort by 1
POSTHOOK: type: QUERY
@@ -36,3 +36,201 @@ almond antique medium spring khaki
almond antique sky peru orange
almond aquamarine dodger light gainsboro
almond azure blanched chiffon midnight
+PREHOOK: query: create temporary table d1 (key int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@d1
+POSTHOOK: query: create temporary table d1 (key int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@d1
+PREHOOK: query: create temporary table d2 (key int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@d2
+POSTHOOK: query: create temporary table d2 (key int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@d2
+PREHOOK: query: explain from (select key from src cluster by key) a
+ insert overwrite table d1 select a.key
+ insert overwrite table d2 select a.key cluster by a.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@d1
+PREHOOK: Output: default@d2
+POSTHOOK: query: explain from (select key from src cluster by key) a
+ insert overwrite table d1 select a.key
+ insert overwrite table d2 select a.key cluster by a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@d1
+POSTHOOK: Output: default@d2
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0, Stage-4, Stage-6
+ Stage-4 depends on stages: Stage-2
+ Stage-5 depends on stages: Stage-1, Stage-4, Stage-6
+ Stage-1 depends on stages: Stage-2
+ Stage-6 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(_col0) (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.d1
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: key
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll')
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Select Operator
+ expressions: UDFToInteger(_col0) (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.d2
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: key
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 'hll')
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.d1
+
+ Stage: Stage-3
+ Stats Work
+ Basic Stats Work:
+ Column Stats Desc:
+ Columns: key
+ Column Types: int
+ Table: default.d1
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-5
+ Stats Work
+ Basic Stats Work:
+ Column Stats Desc:
+ Columns: key
+ Column Types: int
+ Table: default.d2
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.d2
+
+ Stage: Stage-6
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+