You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2016/05/31 14:55:37 UTC
[1/2] hive git commit: HIVE-13693: Multi-insert query drops Filter
before file output when there is a.val <> b.val (Jesus Camacho Rodriguez,
reviewed by Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master aed350351 -> 411ab3feb
http://git-wip-us.apache.org/repos/asf/hive/blob/411ab3fe/ql/src/test/results/clientpositive/subquery_multiinsert.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/subquery_multiinsert.q.out b/ql/src/test/results/clientpositive/subquery_multiinsert.q.out
index 62c5bf2..63f93fb 100644
--- a/ql/src/test/results/clientpositive/subquery_multiinsert.q.out
+++ b/ql/src/test/results/clientpositive/subquery_multiinsert.q.out
@@ -126,12 +126,15 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
value expressions: key (type: string), value (type: string)
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Filter Operator
+ predicate: (key is not null and value is not null) (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
TableScan
Reduce Output Operator
sort order:
@@ -488,7 +491,7 @@ POSTHOOK: Input: default@src_5
199 val_199
199 val_199
2 val_2
-Warning: Map Join MAPJOIN[55][bigTable=b] in task 'Stage-13:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[56][bigTable=b] in task 'Stage-13:MAPRED' is a cross product
Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product
PREHOOK: query: explain
from src b
@@ -614,12 +617,15 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Filter Operator
+ predicate: (key is not null and value is not null) (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Local Work:
Map Reduce Local Work
@@ -773,12 +779,15 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
value expressions: key (type: string), value (type: string)
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Filter Operator
+ predicate: (key is not null and value is not null) (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
TableScan
Reduce Output Operator
sort order:
@@ -799,7 +808,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-Warning: Map Join MAPJOIN[55][bigTable=b] in task 'Stage-13:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[56][bigTable=b] in task 'Stage-13:MAPRED' is a cross product
Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product
PREHOOK: query: from src b
INSERT OVERWRITE TABLE src_4
[2/2] hive git commit: HIVE-13693: Multi-insert query drops Filter
before file output when there is a.val <> b.val (Jesus Camacho Rodriguez,
reviewed by Ashutosh Chauhan)
Posted by jc...@apache.org.
HIVE-13693: Multi-insert query drops Filter before file output when there is a.val <> b.val (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/411ab3fe
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/411ab3fe
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/411ab3fe
Branch: refs/heads/master
Commit: 411ab3febcb5fd06233cc82c6f95f8e680706356
Parents: aed3503
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Tue May 31 15:25:51 2016 +0100
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Tue May 31 15:53:36 2016 +0100
----------------------------------------------------------------------
.../results/positive/hbase_ppd_key_range.q.out | 140 ++---
.../test/results/positive/hbase_pushdown.q.out | 35 +-
.../test/results/positive/ppd_key_ranges.q.out | 70 +--
.../hadoop/hive/ql/ppd/OpProcFactory.java | 60 +-
.../clientpositive/multi_insert_with_join2.q | 51 ++
.../groupby_multi_single_reducer.q.out | 112 ++--
.../clientpositive/multi_insert_gby.q.out | 50 +-
.../multi_insert_with_join2.q.out | 555 +++++++++++++++++++
.../spark/groupby_multi_single_reducer.q.out | 112 ++--
.../clientpositive/spark/multi_insert_gby.q.out | 50 +-
.../spark/subquery_multiinsert.q.out | 70 +--
.../clientpositive/subquery_multiinsert.q.out | 49 +-
12 files changed, 948 insertions(+), 406 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/411ab3fe/hbase-handler/src/test/results/positive/hbase_ppd_key_range.q.out
----------------------------------------------------------------------
diff --git a/hbase-handler/src/test/results/positive/hbase_ppd_key_range.q.out b/hbase-handler/src/test/results/positive/hbase_ppd_key_range.q.out
index 0ef0efd..f92371d 100644
--- a/hbase-handler/src/test/results/positive/hbase_ppd_key_range.q.out
+++ b/hbase-handler/src/test/results/positive/hbase_ppd_key_range.q.out
@@ -29,37 +29,22 @@ POSTHOOK: query: -- with full pushdown
explain select * from hbase_pushdown where key>'90'
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: hbase_pushdown
- filterExpr: (key > '90') (type: boolean)
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- Filter Operator
- predicate: (key > '90') (type: boolean)
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- ListSink
+ TableScan
+ alias: hbase_pushdown
+ filterExpr: (key > '90') (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ ListSink
PREHOOK: query: select * from hbase_pushdown where key>'90'
PREHOOK: type: QUERY
@@ -185,37 +170,22 @@ POSTHOOK: query: -- with cnostant expressinon
explain select * from hbase_pushdown where key>=cast(40 + 50 as string)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: hbase_pushdown
- filterExpr: (key >= '90') (type: boolean)
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- Filter Operator
- predicate: (key >= '90') (type: boolean)
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- ListSink
+ TableScan
+ alias: hbase_pushdown
+ filterExpr: (key >= '90') (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ ListSink
PREHOOK: query: select * from hbase_pushdown where key>=cast(40 + 50 as string)
PREHOOK: type: QUERY
@@ -429,37 +399,22 @@ explain select * from hbase_pushdown
where (case when key<'90' then 2 else 4 end) > 3
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: hbase_pushdown
- filterExpr: (key >= '90') (type: boolean)
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- Filter Operator
- predicate: (key >= '90') (type: boolean)
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- ListSink
+ TableScan
+ alias: hbase_pushdown
+ filterExpr: (key >= '90') (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ ListSink
PREHOOK: query: -- with a predicate which is under an OR, so it should
-- be ignored by pushdown
@@ -514,37 +469,22 @@ explain select * from hbase_pushdown where key > '281'
and key < '287'
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: hbase_pushdown
- filterExpr: ((key > '281') and (key < '287')) (type: boolean)
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- Filter Operator
- predicate: ((key > '281') and (key < '287')) (type: boolean)
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- ListSink
+ TableScan
+ alias: hbase_pushdown
+ filterExpr: ((key > '281') and (key < '287')) (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ ListSink
PREHOOK: query: select * from hbase_pushdown where key > '281'
and key < '287'
http://git-wip-us.apache.org/repos/asf/hive/blob/411ab3fe/hbase-handler/src/test/results/positive/hbase_pushdown.q.out
----------------------------------------------------------------------
diff --git a/hbase-handler/src/test/results/positive/hbase_pushdown.q.out b/hbase-handler/src/test/results/positive/hbase_pushdown.q.out
index d5661be..d957a7c 100644
--- a/hbase-handler/src/test/results/positive/hbase_pushdown.q.out
+++ b/hbase-handler/src/test/results/positive/hbase_pushdown.q.out
@@ -29,37 +29,22 @@ POSTHOOK: query: -- with full pushdown
explain select * from hbase_pushdown where key=90
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: hbase_pushdown
- filterExpr: (key = 90) (type: boolean)
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- Filter Operator
- predicate: (key = 90) (type: boolean)
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- Select Operator
- expressions: 90 (type: int), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- ListSink
+ TableScan
+ alias: hbase_pushdown
+ filterExpr: (key = 90) (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: 90 (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ ListSink
PREHOOK: query: select * from hbase_pushdown where key=90
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/411ab3fe/hbase-handler/src/test/results/positive/ppd_key_ranges.q.out
----------------------------------------------------------------------
diff --git a/hbase-handler/src/test/results/positive/ppd_key_ranges.q.out b/hbase-handler/src/test/results/positive/ppd_key_ranges.q.out
index 812ce95..1897777 100644
--- a/hbase-handler/src/test/results/positive/ppd_key_ranges.q.out
+++ b/hbase-handler/src/test/results/positive/ppd_key_ranges.q.out
@@ -27,37 +27,22 @@ PREHOOK: type: QUERY
POSTHOOK: query: explain select * from hbase_ppd_keyrange where key > 8 and key < 21
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: hbase_ppd_keyrange
- filterExpr: ((key > 8) and (key < 21)) (type: boolean)
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- Filter Operator
- predicate: ((key > 8) and (key < 21)) (type: boolean)
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- ListSink
+ TableScan
+ alias: hbase_ppd_keyrange
+ filterExpr: ((key > 8) and (key < 21)) (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ ListSink
PREHOOK: query: select * from hbase_ppd_keyrange where key > 8 and key < 21
PREHOOK: type: QUERY
@@ -81,37 +66,22 @@ PREHOOK: type: QUERY
POSTHOOK: query: explain select * from hbase_ppd_keyrange where key > 8 and key <= 17
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: hbase_ppd_keyrange
- filterExpr: ((key > 8) and (key <= 17)) (type: boolean)
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- Filter Operator
- predicate: ((key > 8) and (key <= 17)) (type: boolean)
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- ListSink
+ TableScan
+ alias: hbase_ppd_keyrange
+ filterExpr: ((key > 8) and (key <= 17)) (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ ListSink
PREHOOK: query: select * from hbase_ppd_keyrange where key > 8 and key <= 17
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/411ab3fe/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
index 5390ba7..73c2c19 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
@@ -77,6 +77,9 @@ import org.apache.hadoop.mapred.JobConf;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import com.google.common.collect.Lists;
+
+
/**
* Operator factory for predicate pushdown processing of operator graph Each
* operator determines the pushdown predicates by walking the expression tree.
@@ -100,9 +103,12 @@ public final class OpProcFactory {
}
if (current.getNumChild() > 1) {
// ppd for multi-insert query is not yet implemented
- // no-op for leafs
- for (Operator<?> child : current.getChildOperators()) {
- removeCandidates(child, owi); // remove candidated filters on this branch
+ // we assume that nothing can is pushed beyond this operator
+ List<Operator<? extends OperatorDesc>> children =
+ Lists.newArrayList(current.getChildOperators());
+ for (Operator<?> child : children) {
+ ExprWalkerInfo childInfo = owi.getPrunedPreds(child);
+ createFilter(child, childInfo, owi);
}
return null;
}
@@ -111,15 +117,40 @@ public final class OpProcFactory {
private static void removeCandidates(Operator<?> operator, OpWalkerInfo owi) {
if (operator instanceof FilterOperator) {
+ if (owi.getCandidateFilterOps().contains(operator)) {
+ removeOperator(operator);
+ }
owi.getCandidateFilterOps().remove(operator);
}
if (operator.getChildOperators() != null) {
- for (Operator<?> child : operator.getChildOperators()) {
+ List<Operator<? extends OperatorDesc>> children =
+ Lists.newArrayList(operator.getChildOperators());
+ for (Operator<?> child : children) {
removeCandidates(child, owi);
}
}
}
+ private static void removeAllCandidates(OpWalkerInfo owi) {
+ for (FilterOperator operator : owi.getCandidateFilterOps()) {
+ removeOperator(operator);
+ }
+ owi.getCandidateFilterOps().clear();
+ }
+
+ private static void removeOperator(Operator<? extends OperatorDesc> operator) {
+ List<Operator<? extends OperatorDesc>> children = operator.getChildOperators();
+ List<Operator<? extends OperatorDesc>> parents = operator.getParentOperators();
+ for (Operator<? extends OperatorDesc> parent : parents) {
+ parent.getChildOperators().addAll(children);
+ parent.removeChild(operator);
+ }
+ for (Operator<? extends OperatorDesc> child : children) {
+ child.getParentOperators().addAll(parents);
+ child.removeParent(operator);
+ }
+ }
+
/**
* Processor for Script Operator Prevents any predicates being pushed.
*/
@@ -386,6 +417,12 @@ public final class OpProcFactory {
OpWalkerInfo owi = (OpWalkerInfo) procCtx;
TableScanOperator tsOp = (TableScanOperator) nd;
mergeWithChildrenPred(tsOp, owi, null, null);
+ if (HiveConf.getBoolVar(owi.getParseContext().getConf(),
+ HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) {
+ // remove all the candidate filter operators
+ // when we get to the TS
+ removeAllCandidates(owi);
+ }
ExprWalkerInfo pushDownPreds = owi.getPrunedPreds(tsOp);
return createFilter(tsOp, pushDownPreds, owi);
}
@@ -899,20 +936,9 @@ public final class OpProcFactory {
if (HiveConf.getBoolVar(owi.getParseContext().getConf(),
HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) {
// remove the candidate filter ops
- for (FilterOperator fop : owi.getCandidateFilterOps()) {
- List<Operator<? extends OperatorDesc>> children = fop.getChildOperators();
- List<Operator<? extends OperatorDesc>> parents = fop.getParentOperators();
- for (Operator<? extends OperatorDesc> parent : parents) {
- parent.getChildOperators().addAll(children);
- parent.removeChild(fop);
- }
- for (Operator<? extends OperatorDesc> child : children) {
- child.getParentOperators().addAll(parents);
- child.removeParent(fop);
- }
- }
- owi.getCandidateFilterOps().clear();
+ removeCandidates(op, owi);
}
+
// push down current ppd context to newly added filter
ExprWalkerInfo walkerInfo = owi.getPrunedPreds(op);
if (walkerInfo != null) {
http://git-wip-us.apache.org/repos/asf/hive/blob/411ab3fe/ql/src/test/queries/clientpositive/multi_insert_with_join2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/multi_insert_with_join2.q b/ql/src/test/queries/clientpositive/multi_insert_with_join2.q
new file mode 100644
index 0000000..1529fa2
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/multi_insert_with_join2.q
@@ -0,0 +1,51 @@
+set hive.cbo.enable=false;
+
+CREATE TABLE T_A ( id STRING, val STRING );
+CREATE TABLE T_B ( id STRING, val STRING );
+CREATE TABLE join_result_1 ( ida STRING, vala STRING, idb STRING, valb STRING );
+CREATE TABLE join_result_3 ( ida STRING, vala STRING, idb STRING, valb STRING );
+
+INSERT INTO TABLE T_A
+VALUES ('Id_1', 'val_101'), ('Id_2', 'val_102'), ('Id_3', 'val_103');
+
+INSERT INTO TABLE T_B
+VALUES ('Id_1', 'val_103'), ('Id_2', 'val_104');
+
+explain
+FROM T_A a LEFT JOIN T_B b ON a.id = b.id
+INSERT OVERWRITE TABLE join_result_1
+SELECT a.*, b.*
+WHERE b.id = 'Id_1' AND b.val = 'val_103';
+
+explain
+FROM T_A a LEFT JOIN T_B b ON a.id = b.id
+INSERT OVERWRITE TABLE join_result_3
+SELECT a.*, b.*
+WHERE b.val = 'val_104' AND b.id = 'Id_2' AND a.val <> b.val;
+
+explain
+FROM T_A a LEFT JOIN T_B b ON a.id = b.id
+INSERT OVERWRITE TABLE join_result_1
+SELECT a.*, b.*
+WHERE b.id = 'Id_1' AND b.val = 'val_103'
+INSERT OVERWRITE TABLE join_result_3
+SELECT a.*, b.*
+WHERE b.val = 'val_104' AND b.id = 'Id_2' AND a.val <> b.val;
+
+explain
+FROM T_A a LEFT JOIN T_B b ON a.id = b.id
+INSERT OVERWRITE TABLE join_result_1
+SELECT a.*, b.*
+WHERE b.id = 'Id_1' AND b.val = 'val_103'
+INSERT OVERWRITE TABLE join_result_3
+SELECT a.*, b.*
+WHERE b.val = 'val_104' AND b.id = 'Id_2';
+
+explain
+FROM T_A a JOIN T_B b ON a.id = b.id
+INSERT OVERWRITE TABLE join_result_1
+SELECT a.*, b.*
+WHERE b.id = 'Id_1' AND b.val = 'val_103'
+INSERT OVERWRITE TABLE join_result_3
+SELECT a.*, b.*
+WHERE b.val = 'val_104' AND b.id = 'Id_2';
http://git-wip-us.apache.org/repos/asf/hive/blob/411ab3fe/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out
index 1381d91..f5c7c7f 100644
--- a/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out
+++ b/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out
@@ -56,12 +56,12 @@ INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-3 is a root stage
+ Stage-2 depends on stages: Stage-3
+ Stage-4 depends on stages: Stage-2
Stage-0 depends on stages: Stage-3
- Stage-4 depends on stages: Stage-0
+ Stage-5 depends on stages: Stage-0
Stage-1 depends on stages: Stage-3
- Stage-5 depends on stages: Stage-1
- Stage-2 depends on stages: Stage-3
- Stage-6 depends on stages: Stage-2
+ Stage-6 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-3
@@ -83,6 +83,24 @@ STAGE PLANS:
Reduce Operator Tree:
Forward
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_g4
Filter Operator
predicate: (KEY._col0 >= 5) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
@@ -125,26 +143,8 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest_g3
- Group By Operator
- aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0)
- keys: KEY._col0 (type: string)
- mode: complete
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_g4
- Stage: Stage-0
+ Stage: Stage-2
Move Operator
tables:
replace: true
@@ -152,12 +152,12 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_g2
+ name: default.dest_g4
Stage: Stage-4
Stats-Aggr Operator
- Stage: Stage-1
+ Stage: Stage-0
Move Operator
tables:
replace: true
@@ -165,12 +165,12 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_g3
+ name: default.dest_g2
Stage: Stage-5
Stats-Aggr Operator
- Stage: Stage-2
+ Stage: Stage-1
Move Operator
tables:
replace: true
@@ -178,7 +178,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_g4
+ name: default.dest_g3
Stage: Stage-6
Stats-Aggr Operator
@@ -278,12 +278,12 @@ INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-5 is a root stage
+ Stage-2 depends on stages: Stage-5
+ Stage-6 depends on stages: Stage-2
Stage-0 depends on stages: Stage-5
- Stage-6 depends on stages: Stage-0
+ Stage-7 depends on stages: Stage-0
Stage-1 depends on stages: Stage-5
- Stage-7 depends on stages: Stage-1
- Stage-2 depends on stages: Stage-5
- Stage-8 depends on stages: Stage-2
+ Stage-8 depends on stages: Stage-1
Stage-9 depends on stages: Stage-5
Stage-10 depends on stages: Stage-9
Stage-3 depends on stages: Stage-10
@@ -321,6 +321,24 @@ STAGE PLANS:
Reduce Operator Tree:
Forward
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_g4
Filter Operator
predicate: (KEY._col0 >= 5) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
@@ -363,26 +381,8 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest_g3
- Group By Operator
- aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0)
- keys: KEY._col0 (type: string)
- mode: complete
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_g4
- Stage: Stage-0
+ Stage: Stage-2
Move Operator
tables:
replace: true
@@ -390,12 +390,12 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_g2
+ name: default.dest_g4
Stage: Stage-6
Stats-Aggr Operator
- Stage: Stage-1
+ Stage: Stage-0
Move Operator
tables:
replace: true
@@ -403,12 +403,12 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_g3
+ name: default.dest_g2
Stage: Stage-7
Stats-Aggr Operator
- Stage: Stage-2
+ Stage: Stage-1
Move Operator
tables:
replace: true
@@ -416,7 +416,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_g4
+ name: default.dest_g3
Stage: Stage-8
Stats-Aggr Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/411ab3fe/ql/src/test/results/clientpositive/multi_insert_gby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/multi_insert_gby.q.out b/ql/src/test/results/clientpositive/multi_insert_gby.q.out
index 7c5e589..190f430 100644
--- a/ql/src/test/results/clientpositive/multi_insert_gby.q.out
+++ b/ql/src/test/results/clientpositive/multi_insert_gby.q.out
@@ -215,10 +215,10 @@ INSERT OVERWRITE TABLE e2
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-2 is a root stage
- Stage-0 depends on stages: Stage-2
- Stage-3 depends on stages: Stage-0
Stage-1 depends on stages: Stage-2
- Stage-4 depends on stages: Stage-1
+ Stage-3 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-4 depends on stages: Stage-0
STAGE PLANS:
Stage: Stage-2
@@ -239,6 +239,24 @@ STAGE PLANS:
Reduce Operator Tree:
Forward
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: KEY._col0 (type: string)
+ mode: complete
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), UDFToInteger(_col1) (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.e2
Filter Operator
predicate: (KEY._col0 > 450) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
@@ -260,26 +278,8 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.e1
- Group By Operator
- aggregations: count()
- keys: KEY._col0 (type: string)
- mode: complete
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), UDFToInteger(_col1) (type: int)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.e2
- Stage: Stage-0
+ Stage: Stage-1
Move Operator
tables:
replace: true
@@ -287,12 +287,12 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.e1
+ name: default.e2
Stage: Stage-3
Stats-Aggr Operator
- Stage: Stage-1
+ Stage: Stage-0
Move Operator
tables:
replace: true
@@ -300,7 +300,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.e2
+ name: default.e1
Stage: Stage-4
Stats-Aggr Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/411ab3fe/ql/src/test/results/clientpositive/multi_insert_with_join2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/multi_insert_with_join2.q.out b/ql/src/test/results/clientpositive/multi_insert_with_join2.q.out
new file mode 100644
index 0000000..e252de1
--- /dev/null
+++ b/ql/src/test/results/clientpositive/multi_insert_with_join2.q.out
@@ -0,0 +1,555 @@
+PREHOOK: query: CREATE TABLE T_A ( id STRING, val STRING )
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T_A
+POSTHOOK: query: CREATE TABLE T_A ( id STRING, val STRING )
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T_A
+PREHOOK: query: CREATE TABLE T_B ( id STRING, val STRING )
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T_B
+POSTHOOK: query: CREATE TABLE T_B ( id STRING, val STRING )
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T_B
+PREHOOK: query: CREATE TABLE join_result_1 ( ida STRING, vala STRING, idb STRING, valb STRING )
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@join_result_1
+POSTHOOK: query: CREATE TABLE join_result_1 ( ida STRING, vala STRING, idb STRING, valb STRING )
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@join_result_1
+PREHOOK: query: CREATE TABLE join_result_3 ( ida STRING, vala STRING, idb STRING, valb STRING )
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@join_result_3
+POSTHOOK: query: CREATE TABLE join_result_3 ( ida STRING, vala STRING, idb STRING, valb STRING )
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@join_result_3
+PREHOOK: query: INSERT INTO TABLE T_A
+VALUES ('Id_1', 'val_101'), ('Id_2', 'val_102'), ('Id_3', 'val_103')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@t_a
+POSTHOOK: query: INSERT INTO TABLE T_A
+VALUES ('Id_1', 'val_101'), ('Id_2', 'val_102'), ('Id_3', 'val_103')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@t_a
+POSTHOOK: Lineage: t_a.id SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: t_a.val SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: INSERT INTO TABLE T_B
+VALUES ('Id_1', 'val_103'), ('Id_2', 'val_104')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@t_b
+POSTHOOK: query: INSERT INTO TABLE T_B
+VALUES ('Id_1', 'val_103'), ('Id_2', 'val_104')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@t_b
+POSTHOOK: Lineage: t_b.id SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: t_b.val SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: explain
+FROM T_A a LEFT JOIN T_B b ON a.id = b.id
+INSERT OVERWRITE TABLE join_result_1
+SELECT a.*, b.*
+WHERE b.id = 'Id_1' AND b.val = 'val_103'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM T_A a LEFT JOIN T_B b ON a.id = b.id
+INSERT OVERWRITE TABLE join_result_1
+SELECT a.*, b.*
+WHERE b.id = 'Id_1' AND b.val = 'val_103'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: id (type: string)
+ sort order: +
+ Map-reduce partition columns: id (type: string)
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ value expressions: val (type: string)
+ TableScan
+ alias: b
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: id (type: string)
+ sort order: +
+ Map-reduce partition columns: id (type: string)
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ value expressions: val (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 id (type: string)
+ 1 id (type: string)
+ outputColumnNames: _col0, _col1, _col5, _col6
+ Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col5 = 'Id_1') and (_col6 = 'val_103')) (type: boolean)
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_1
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_1
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+PREHOOK: query: explain
+FROM T_A a LEFT JOIN T_B b ON a.id = b.id
+INSERT OVERWRITE TABLE join_result_3
+SELECT a.*, b.*
+WHERE b.val = 'val_104' AND b.id = 'Id_2' AND a.val <> b.val
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM T_A a LEFT JOIN T_B b ON a.id = b.id
+INSERT OVERWRITE TABLE join_result_3
+SELECT a.*, b.*
+WHERE b.val = 'val_104' AND b.id = 'Id_2' AND a.val <> b.val
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: id (type: string)
+ sort order: +
+ Map-reduce partition columns: id (type: string)
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ value expressions: val (type: string)
+ TableScan
+ alias: b
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: id (type: string)
+ sort order: +
+ Map-reduce partition columns: id (type: string)
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ value expressions: val (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 id (type: string)
+ 1 id (type: string)
+ outputColumnNames: _col0, _col1, _col5, _col6
+ Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col1 <> 'val_104') and (_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean)
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_3
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_3
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+PREHOOK: query: explain
+FROM T_A a LEFT JOIN T_B b ON a.id = b.id
+INSERT OVERWRITE TABLE join_result_1
+SELECT a.*, b.*
+WHERE b.id = 'Id_1' AND b.val = 'val_103'
+INSERT OVERWRITE TABLE join_result_3
+SELECT a.*, b.*
+WHERE b.val = 'val_104' AND b.id = 'Id_2' AND a.val <> b.val
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM T_A a LEFT JOIN T_B b ON a.id = b.id
+INSERT OVERWRITE TABLE join_result_1
+SELECT a.*, b.*
+WHERE b.id = 'Id_1' AND b.val = 'val_103'
+INSERT OVERWRITE TABLE join_result_3
+SELECT a.*, b.*
+WHERE b.val = 'val_104' AND b.id = 'Id_2' AND a.val <> b.val
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-4 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: id (type: string)
+ sort order: +
+ Map-reduce partition columns: id (type: string)
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ value expressions: val (type: string)
+ TableScan
+ alias: b
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: id (type: string)
+ sort order: +
+ Map-reduce partition columns: id (type: string)
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ value expressions: val (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 id (type: string)
+ 1 id (type: string)
+ outputColumnNames: _col0, _col1, _col5, _col6
+ Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2') and (_col1 <> _col6)) (type: boolean)
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_3
+ Filter Operator
+ predicate: ((_col5 = 'Id_1') and (_col6 = 'val_103')) (type: boolean)
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_1
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_3
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_1
+
+ Stage: Stage-4
+ Stats-Aggr Operator
+
+PREHOOK: query: explain
+FROM T_A a LEFT JOIN T_B b ON a.id = b.id
+INSERT OVERWRITE TABLE join_result_1
+SELECT a.*, b.*
+WHERE b.id = 'Id_1' AND b.val = 'val_103'
+INSERT OVERWRITE TABLE join_result_3
+SELECT a.*, b.*
+WHERE b.val = 'val_104' AND b.id = 'Id_2'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM T_A a LEFT JOIN T_B b ON a.id = b.id
+INSERT OVERWRITE TABLE join_result_1
+SELECT a.*, b.*
+WHERE b.id = 'Id_1' AND b.val = 'val_103'
+INSERT OVERWRITE TABLE join_result_3
+SELECT a.*, b.*
+WHERE b.val = 'val_104' AND b.id = 'Id_2'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+ Stage-1 depends on stages: Stage-2
+ Stage-4 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: id (type: string)
+ sort order: +
+ Map-reduce partition columns: id (type: string)
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ value expressions: val (type: string)
+ TableScan
+ alias: b
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: id (type: string)
+ sort order: +
+ Map-reduce partition columns: id (type: string)
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ value expressions: val (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 id (type: string)
+ 1 id (type: string)
+ outputColumnNames: _col0, _col1, _col5, _col6
+ Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col5 = 'Id_1') and (_col6 = 'val_103')) (type: boolean)
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_1
+ Filter Operator
+ predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean)
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_3
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_1
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_3
+
+ Stage: Stage-4
+ Stats-Aggr Operator
+
+PREHOOK: query: explain
+FROM T_A a JOIN T_B b ON a.id = b.id
+INSERT OVERWRITE TABLE join_result_1
+SELECT a.*, b.*
+WHERE b.id = 'Id_1' AND b.val = 'val_103'
+INSERT OVERWRITE TABLE join_result_3
+SELECT a.*, b.*
+WHERE b.val = 'val_104' AND b.id = 'Id_2'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM T_A a JOIN T_B b ON a.id = b.id
+INSERT OVERWRITE TABLE join_result_1
+SELECT a.*, b.*
+WHERE b.id = 'Id_1' AND b.val = 'val_103'
+INSERT OVERWRITE TABLE join_result_3
+SELECT a.*, b.*
+WHERE b.val = 'val_104' AND b.id = 'Id_2'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+ Stage-1 depends on stages: Stage-2
+ Stage-4 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: id is not null (type: boolean)
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: id (type: string)
+ sort order: +
+ Map-reduce partition columns: id (type: string)
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ value expressions: val (type: string)
+ TableScan
+ alias: b
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: id is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: id (type: string)
+ sort order: +
+ Map-reduce partition columns: id (type: string)
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ value expressions: val (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 id (type: string)
+ 1 id (type: string)
+ outputColumnNames: _col0, _col1, _col5, _col6
+ Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col5 = 'Id_1') and (_col6 = 'val_103')) (type: boolean)
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_1
+ Filter Operator
+ predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean)
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_3
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_1
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.join_result_3
+
+ Stage: Stage-4
+ Stats-Aggr Operator
+
http://git-wip-us.apache.org/repos/asf/hive/blob/411ab3fe/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out
index 0cc0867..fe3c5c2 100644
--- a/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out
@@ -56,12 +56,12 @@ INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-3 is a root stage
+ Stage-2 depends on stages: Stage-3
+ Stage-4 depends on stages: Stage-2
Stage-0 depends on stages: Stage-3
- Stage-4 depends on stages: Stage-0
+ Stage-5 depends on stages: Stage-0
Stage-1 depends on stages: Stage-3
- Stage-5 depends on stages: Stage-1
- Stage-2 depends on stages: Stage-3
- Stage-6 depends on stages: Stage-2
+ Stage-6 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-3
@@ -89,6 +89,24 @@ STAGE PLANS:
Reduce Operator Tree:
Forward
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_g4
Filter Operator
predicate: (KEY._col0 >= 5) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
@@ -131,26 +149,8 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest_g3
- Group By Operator
- aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0)
- keys: KEY._col0 (type: string)
- mode: complete
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_g4
- Stage: Stage-0
+ Stage: Stage-2
Move Operator
tables:
replace: true
@@ -158,12 +158,12 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_g2
+ name: default.dest_g4
Stage: Stage-4
Stats-Aggr Operator
- Stage: Stage-1
+ Stage: Stage-0
Move Operator
tables:
replace: true
@@ -171,12 +171,12 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_g3
+ name: default.dest_g2
Stage: Stage-5
Stats-Aggr Operator
- Stage: Stage-2
+ Stage: Stage-1
Move Operator
tables:
replace: true
@@ -184,7 +184,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_g4
+ name: default.dest_g3
Stage: Stage-6
Stats-Aggr Operator
@@ -284,12 +284,12 @@ INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-5 is a root stage
+ Stage-2 depends on stages: Stage-5
+ Stage-6 depends on stages: Stage-2
Stage-0 depends on stages: Stage-5
- Stage-6 depends on stages: Stage-0
+ Stage-7 depends on stages: Stage-0
Stage-1 depends on stages: Stage-5
- Stage-7 depends on stages: Stage-1
- Stage-2 depends on stages: Stage-5
- Stage-8 depends on stages: Stage-2
+ Stage-8 depends on stages: Stage-1
Stage-3 depends on stages: Stage-5
Stage-9 depends on stages: Stage-3
Stage-4 depends on stages: Stage-5
@@ -338,6 +338,24 @@ STAGE PLANS:
Reduce Operator Tree:
Forward
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_g4
Filter Operator
predicate: (KEY._col0 >= 5) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
@@ -380,24 +398,6 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest_g3
- Group By Operator
- aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0)
- keys: KEY._col0 (type: string)
- mode: complete
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_g4
Reducer 3
Reduce Operator Tree:
Forward
@@ -461,7 +461,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest_h2
- Stage: Stage-0
+ Stage: Stage-2
Move Operator
tables:
replace: true
@@ -469,12 +469,12 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_g2
+ name: default.dest_g4
Stage: Stage-6
Stats-Aggr Operator
- Stage: Stage-1
+ Stage: Stage-0
Move Operator
tables:
replace: true
@@ -482,12 +482,12 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_g3
+ name: default.dest_g2
Stage: Stage-7
Stats-Aggr Operator
- Stage: Stage-2
+ Stage: Stage-1
Move Operator
tables:
replace: true
@@ -495,7 +495,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest_g4
+ name: default.dest_g3
Stage: Stage-8
Stats-Aggr Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/411ab3fe/ql/src/test/results/clientpositive/spark/multi_insert_gby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/multi_insert_gby.q.out b/ql/src/test/results/clientpositive/spark/multi_insert_gby.q.out
index 9eeabb4..d5de394 100644
--- a/ql/src/test/results/clientpositive/spark/multi_insert_gby.q.out
+++ b/ql/src/test/results/clientpositive/spark/multi_insert_gby.q.out
@@ -221,10 +221,10 @@ INSERT OVERWRITE TABLE e2
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-2 is a root stage
- Stage-0 depends on stages: Stage-2
- Stage-3 depends on stages: Stage-0
Stage-1 depends on stages: Stage-2
- Stage-4 depends on stages: Stage-1
+ Stage-3 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-4 depends on stages: Stage-0
STAGE PLANS:
Stage: Stage-2
@@ -251,6 +251,24 @@ STAGE PLANS:
Reduce Operator Tree:
Forward
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: KEY._col0 (type: string)
+ mode: complete
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), UDFToInteger(_col1) (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.e2
Filter Operator
predicate: (KEY._col0 > 450) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
@@ -272,26 +290,8 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.e1
- Group By Operator
- aggregations: count()
- keys: KEY._col0 (type: string)
- mode: complete
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), UDFToInteger(_col1) (type: int)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.e2
- Stage: Stage-0
+ Stage: Stage-1
Move Operator
tables:
replace: true
@@ -299,12 +299,12 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.e1
+ name: default.e2
Stage: Stage-3
Stats-Aggr Operator
- Stage: Stage-1
+ Stage: Stage-0
Move Operator
tables:
replace: true
@@ -312,7 +312,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.e2
+ name: default.e1
Stage: Stage-4
Stats-Aggr Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/411ab3fe/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out b/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out
index 3162b64..cadab04 100644
--- a/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out
+++ b/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out
@@ -93,11 +93,14 @@ STAGE PLANS:
TableScan
alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: string), value (type: string)
- sort order: ++
- Map-reduce partition columns: key (type: string), value (type: string)
+ Filter Operator
+ predicate: (key is not null and value is not null) (type: boolean)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string), value (type: string)
+ sort order: ++
+ Map-reduce partition columns: key (type: string), value (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Map 6
Map Operator Tree:
TableScan
@@ -461,7 +464,7 @@ POSTHOOK: Input: default@src_5
199 val_199
199 val_199
2 val_2
-Warning: Map Join MAPJOIN[46][bigTable=b] in task 'Stage-2:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[47][bigTable=b] in task 'Stage-2:MAPRED' is a cross product
PREHOOK: query: explain
from src b
INSERT OVERWRITE TABLE src_4
@@ -493,10 +496,10 @@ POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-5 is a root stage
Stage-2 depends on stages: Stage-5
- Stage-1 depends on stages: Stage-2
- Stage-3 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
- Stage-4 depends on stages: Stage-0
+ Stage-3 depends on stages: Stage-0
+ Stage-1 depends on stages: Stage-2
+ Stage-4 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-5
@@ -600,6 +603,27 @@ STAGE PLANS:
TableScan
alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key is not null and value is not null) (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ keys:
+ 0 key (type: string), value (type: string)
+ 1 _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ input vertices:
+ 1 Map 3
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.src_4
Map Join Operator
condition map:
Left Semi Join 0 to 1
@@ -632,24 +656,6 @@ STAGE PLANS:
sort order: +
Statistics: Num rows: 275 Data size: 5396 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
- Map Join Operator
- condition map:
- Left Semi Join 0 to 1
- keys:
- 0 key (type: string), value (type: string)
- 1 _col0 (type: string), _col1 (type: string)
- outputColumnNames: _col0, _col1
- input vertices:
- 1 Map 3
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.src_4
Local Work:
Map Reduce Local Work
Reducer 2
@@ -667,7 +673,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.src_5
- Stage: Stage-1
+ Stage: Stage-0
Move Operator
tables:
replace: true
@@ -675,12 +681,12 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.src_5
+ name: default.src_4
Stage: Stage-3
Stats-Aggr Operator
- Stage: Stage-0
+ Stage: Stage-1
Move Operator
tables:
replace: true
@@ -688,12 +694,12 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.src_4
+ name: default.src_5
Stage: Stage-4
Stats-Aggr Operator
-Warning: Map Join MAPJOIN[46][bigTable=b] in task 'Stage-2:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[47][bigTable=b] in task 'Stage-2:MAPRED' is a cross product
PREHOOK: query: from src b
INSERT OVERWRITE TABLE src_4
select *
@@ -732,8 +738,8 @@ POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, c
POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ]
RUN: Stage-5:MAPRED
RUN: Stage-2:MAPRED
-RUN: Stage-1:MOVE
RUN: Stage-0:MOVE
+RUN: Stage-1:MOVE
RUN: Stage-3:STATS
RUN: Stage-4:STATS
PREHOOK: query: select * from src_4