You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2015/07/06 18:24:34 UTC
hive git commit: HIVE-10996 : Aggregation / Projection over
Multi-Join Inner Query producing incorrect results (Jesus Camacho Rodriguez,
reviewed by Laljo John Pullokkaran)
Repository: hive
Updated Branches:
refs/heads/branch-1.1 d8ff0bcdb -> 2f0ae24b7
HIVE-10996 : Aggregation / Projection over Multi-Join Inner Query producing incorrect results (Jesus Camacho Rodriguez, reviewed by Laljo John Pullokkaran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2f0ae24b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2f0ae24b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2f0ae24b
Branch: refs/heads/branch-1.1
Commit: 2f0ae24b70474d0f92c56c17b7056899ef328a36
Parents: d8ff0bc
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Mon Jul 6 17:23:17 2015 +0100
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Mon Jul 6 17:23:17 2015 +0100
----------------------------------------------------------------------
.../apache/hadoop/hive/ql/exec/ColumnInfo.java | 2 +-
.../hive/ql/optimizer/ColumnPrunerProcCtx.java | 27 +
.../ql/optimizer/ColumnPrunerProcFactory.java | 49 +-
ql/src/test/queries/clientpositive/join43.q | 83 +++
ql/src/test/results/clientpositive/having.q.out | 28 +-
ql/src/test/results/clientpositive/join43.q.out | 648 +++++++++++++++++++
.../results/clientpositive/spark/having.q.out | 28 +-
.../clientpositive/subquery_in_having.q.out | 128 ++--
.../subquery_notin_having.q.java1.7.out | 34 +-
.../subquery_unqualcolumnrefs.q.out | 32 +-
.../results/clientpositive/tez/having.q.out | 28 +-
11 files changed, 958 insertions(+), 129 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2f0ae24b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java
index a34a31d..64d30bd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java
@@ -156,7 +156,7 @@ public class ColumnInfo implements Serializable {
*/
@Override
public String toString() {
- return internalName + ": " + objectInspector.getTypeName();
+ return internalName + ": " + typeName;
}
public void setAlias(String col_alias) {
http://git-wip-us.apache.org/repos/asf/hive/blob/2f0ae24b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java
index 5d848a1..c88bb82 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java
@@ -118,6 +118,33 @@ public class ColumnPrunerProcCtx implements NodeProcessorCtx {
}
/**
+ * Creates the list of internal column names(these names are used in the
+ * RowResolver and are different from the external column names) that are
+ * needed in the subtree. These columns eventually have to be selected from
+ * the table scan.
+ *
+ * @param curOp
+ * The root of the operator subtree.
+ * @param child
+ * The consumer.
+ * @return List<String> of the internal column names.
+ * @throws SemanticException
+ */
+ public List<String> genColLists(Operator<? extends OperatorDesc> curOp,
+ Operator<? extends OperatorDesc> child)
+ throws SemanticException {
+ if (curOp.getChildOperators() == null) {
+ return null;
+ }
+ if (child instanceof CommonJoinOperator) {
+ int tag = child.getParentOperators().indexOf(curOp);
+ return joinPrunedColLists.get(child).get((byte) tag);
+ } else {
+ return prunedColLists.get(child);
+ }
+ }
+
+ /**
* Creates the list of internal column names from select expressions in a
* select operator. This function is used for the select operator instead of
* the genColLists function (which is used by the rest of the operators).
http://git-wip-us.apache.org/repos/asf/hive/blob/2f0ae24b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java
index 57ce849..c5482c4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java
@@ -126,10 +126,10 @@ public final class ColumnPrunerProcFactory {
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
Object... nodeOutputs) throws SemanticException {
- GroupByOperator op = (GroupByOperator) nd;
+ GroupByOperator gbOp = (GroupByOperator) nd;
ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
List<String> colLists = new ArrayList<String>();
- GroupByDesc conf = op.getConf();
+ GroupByDesc conf = gbOp.getConf();
ArrayList<ExprNodeDesc> keys = conf.getKeys();
for (ExprNodeDesc key : keys) {
colLists = Utilities.mergeUniqElems(colLists, key.getCols());
@@ -144,17 +144,52 @@ public final class ColumnPrunerProcFactory {
}
int groupingSetPosition = conf.getGroupingSetPosition();
if (groupingSetPosition >= 0) {
- List<String> cols = cppCtx.genColLists(op);
+ List<String> neededCols = cppCtx.genColLists(gbOp);
String groupingColumn = conf.getOutputColumnNames().get(groupingSetPosition);
- if (!cols.contains(groupingColumn)) {
+ if (!neededCols.contains(groupingColumn)) {
conf.getOutputColumnNames().remove(groupingSetPosition);
- if (op.getSchema() != null) {
- op.getSchema().getSignature().remove(groupingSetPosition);
+ if (gbOp.getSchema() != null) {
+ gbOp.getSchema().getSignature().remove(groupingSetPosition);
}
}
}
- cppCtx.getPrunedColLists().put(op, colLists);
+ // If the child has a different schema, we create a Project operator between them both,
+ // as we cannot prune the columns in the GroupBy operator
+ for (Operator<?> child : gbOp.getChildOperators()) {
+ if (child instanceof SelectOperator || child instanceof ReduceSinkOperator) {
+ continue;
+ }
+ Set<String> neededCols = new HashSet<String>(cppCtx.genColLists(gbOp, child));
+ if (neededCols.size() < gbOp.getSchema().getSignature().size()) {
+ ArrayList<ExprNodeDesc> exprs = new ArrayList<ExprNodeDesc>();
+ ArrayList<String> outputColNames = new ArrayList<String>();
+ Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
+ ArrayList<ColumnInfo> outputRS = new ArrayList<ColumnInfo>();
+ for (ColumnInfo colInfo : gbOp.getSchema().getSignature()) {
+ if (!neededCols.contains(colInfo.getInternalName())) {
+ continue;
+ }
+ ExprNodeDesc colDesc = new ExprNodeColumnDesc(colInfo.getType(),
+ colInfo.getInternalName(), colInfo.getTabAlias(), colInfo.getIsVirtualCol());
+ exprs.add(colDesc);
+ outputColNames.add(colInfo.getInternalName());
+ ColumnInfo newCol = new ColumnInfo(colInfo.getInternalName(), colInfo.getType(),
+ colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
+ newCol.setAlias(colInfo.getAlias());
+ outputRS.add(newCol);
+ colExprMap.put(colInfo.getInternalName(), colDesc);
+ }
+ SelectDesc select = new SelectDesc(exprs, outputColNames, false);
+ gbOp.removeChild(child);
+ SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild(
+ select, new RowSchema(outputRS), gbOp);
+ OperatorFactory.makeChild(sel, child);
+ sel.setColumnExprMap(colExprMap);
+ }
+ }
+
+ cppCtx.getPrunedColLists().put(gbOp, colLists);
return null;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/2f0ae24b/ql/src/test/queries/clientpositive/join43.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/join43.q b/ql/src/test/queries/clientpositive/join43.q
new file mode 100644
index 0000000..68694c6
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/join43.q
@@ -0,0 +1,83 @@
+create table purchase_history (s string, product string, price double, time int);
+insert into purchase_history values ('1', 'Belt', 20.00, 21);
+insert into purchase_history values ('1', 'Socks', 3.50, 31);
+insert into purchase_history values ('3', 'Belt', 20.00, 51);
+insert into purchase_history values ('4', 'Shirt', 15.50, 59);
+
+create table cart_history (s string, cart_id int, time int);
+insert into cart_history values ('1', 1, 10);
+insert into cart_history values ('1', 2, 20);
+insert into cart_history values ('1', 3, 30);
+insert into cart_history values ('1', 4, 40);
+insert into cart_history values ('3', 5, 50);
+insert into cart_history values ('4', 6, 60);
+
+create table events (s string, st2 string, n int, time int);
+insert into events values ('1', 'Bob', 1234, 20);
+insert into events values ('1', 'Bob', 1234, 30);
+insert into events values ('1', 'Bob', 1234, 25);
+insert into events values ('2', 'Sam', 1234, 30);
+insert into events values ('3', 'Jeff', 1234, 50);
+insert into events values ('4', 'Ted', 1234, 60);
+
+explain
+select s
+from (
+ select last.*, action.st2, action.n
+ from (
+ select purchase.s, purchase.time, max (mevt.time) as last_stage_time
+ from (select * from purchase_history) purchase
+ join (select * from cart_history) mevt
+ on purchase.s = mevt.s
+ where purchase.time > mevt.time
+ group by purchase.s, purchase.time
+ ) last
+ join (select * from events) action
+ on last.s = action.s and last.last_stage_time = action.time
+) list;
+
+select s
+from (
+ select last.*, action.st2, action.n
+ from (
+ select purchase.s, purchase.time, max (mevt.time) as last_stage_time
+ from (select * from purchase_history) purchase
+ join (select * from cart_history) mevt
+ on purchase.s = mevt.s
+ where purchase.time > mevt.time
+ group by purchase.s, purchase.time
+ ) last
+ join (select * from events) action
+ on last.s = action.s and last.last_stage_time = action.time
+) list;
+
+explain
+select *
+from (
+ select last.*, action.st2, action.n
+ from (
+ select purchase.s, purchase.time, max (mevt.time) as last_stage_time
+ from (select * from purchase_history) purchase
+ join (select * from cart_history) mevt
+ on purchase.s = mevt.s
+ where purchase.time > mevt.time
+ group by purchase.s, purchase.time
+ ) last
+ join (select * from events) action
+ on last.s = action.s and last.last_stage_time = action.time
+) list;
+
+select *
+from (
+ select last.*, action.st2, action.n
+ from (
+ select purchase.s, purchase.time, max (mevt.time) as last_stage_time
+ from (select * from purchase_history) purchase
+ join (select * from cart_history) mevt
+ on purchase.s = mevt.s
+ where purchase.time > mevt.time
+ group by purchase.s, purchase.time
+ ) last
+ join (select * from events) action
+ on last.s = action.s and last.last_stage_time = action.time
+) list;
http://git-wip-us.apache.org/repos/asf/hive/blob/2f0ae24b/ql/src/test/results/clientpositive/having.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/having.q.out b/ql/src/test/results/clientpositive/having.q.out
index 58fed5e..f7e057b 100644
--- a/ql/src/test/results/clientpositive/having.q.out
+++ b/ql/src/test/results/clientpositive/having.q.out
@@ -38,20 +38,24 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (_col1 > 3) (type: boolean)
- Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col1 (type: bigint)
- outputColumnNames: _col0
+ Select Operator
+ expressions: _col1 (type: bigint)
+ outputColumnNames: _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col1 > 3) (type: boolean)
Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
+ Select Operator
+ expressions: _col1 (type: bigint)
+ outputColumnNames: _col0
Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/2f0ae24b/ql/src/test/results/clientpositive/join43.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/join43.q.out b/ql/src/test/results/clientpositive/join43.q.out
new file mode 100644
index 0000000..f22a9b8
--- /dev/null
+++ b/ql/src/test/results/clientpositive/join43.q.out
@@ -0,0 +1,648 @@
+PREHOOK: query: create table purchase_history (s string, product string, price double, time int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@purchase_history
+POSTHOOK: query: create table purchase_history (s string, product string, price double, time int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@purchase_history
+PREHOOK: query: insert into purchase_history values ('1', 'Belt', 20.00, 21)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@purchase_history
+POSTHOOK: query: insert into purchase_history values ('1', 'Belt', 20.00, 21)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@purchase_history
+POSTHOOK: Lineage: purchase_history.price EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: purchase_history.product SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: purchase_history.s SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: purchase_history.time EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ]
+PREHOOK: query: insert into purchase_history values ('1', 'Socks', 3.50, 31)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@purchase_history
+POSTHOOK: query: insert into purchase_history values ('1', 'Socks', 3.50, 31)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@purchase_history
+POSTHOOK: Lineage: purchase_history.price EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: purchase_history.product SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: purchase_history.s SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: purchase_history.time EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ]
+PREHOOK: query: insert into purchase_history values ('3', 'Belt', 20.00, 51)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__3
+PREHOOK: Output: default@purchase_history
+POSTHOOK: query: insert into purchase_history values ('3', 'Belt', 20.00, 51)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__3
+POSTHOOK: Output: default@purchase_history
+POSTHOOK: Lineage: purchase_history.price EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: purchase_history.product SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: purchase_history.s SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: purchase_history.time EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ]
+PREHOOK: query: insert into purchase_history values ('4', 'Shirt', 15.50, 59)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__4
+PREHOOK: Output: default@purchase_history
+POSTHOOK: query: insert into purchase_history values ('4', 'Shirt', 15.50, 59)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__4
+POSTHOOK: Output: default@purchase_history
+POSTHOOK: Lineage: purchase_history.price EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: purchase_history.product SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: purchase_history.s SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: purchase_history.time EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col4, type:string, comment:), ]
+PREHOOK: query: create table cart_history (s string, cart_id int, time int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@cart_history
+POSTHOOK: query: create table cart_history (s string, cart_id int, time int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@cart_history
+PREHOOK: query: insert into cart_history values ('1', 1, 10)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__5
+PREHOOK: Output: default@cart_history
+POSTHOOK: query: insert into cart_history values ('1', 1, 10)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__5
+POSTHOOK: Output: default@cart_history
+POSTHOOK: Lineage: cart_history.cart_id EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: cart_history.s SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: cart_history.time EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+PREHOOK: query: insert into cart_history values ('1', 2, 20)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__6
+PREHOOK: Output: default@cart_history
+POSTHOOK: query: insert into cart_history values ('1', 2, 20)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__6
+POSTHOOK: Output: default@cart_history
+POSTHOOK: Lineage: cart_history.cart_id EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: cart_history.s SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: cart_history.time EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+PREHOOK: query: insert into cart_history values ('1', 3, 30)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__7
+PREHOOK: Output: default@cart_history
+POSTHOOK: query: insert into cart_history values ('1', 3, 30)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__7
+POSTHOOK: Output: default@cart_history
+POSTHOOK: Lineage: cart_history.cart_id EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: cart_history.s SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: cart_history.time EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+PREHOOK: query: insert into cart_history values ('1', 4, 40)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__8
+PREHOOK: Output: default@cart_history
+POSTHOOK: query: insert into cart_history values ('1', 4, 40)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__8
+POSTHOOK: Output: default@cart_history
+POSTHOOK: Lineage: cart_history.cart_id EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: cart_history.s SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: cart_history.time EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+PREHOOK: query: insert into cart_history values ('3', 5, 50)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__9
+PREHOOK: Output: default@cart_history
+POSTHOOK: query: insert into cart_history values ('3', 5, 50)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__9
+POSTHOOK: Output: default@cart_history
+POSTHOOK: Lineage: cart_history.cart_id EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: cart_history.s SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: cart_history.time EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+PREHOOK: query: insert into cart_history values ('4', 6, 60)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__10
+PREHOOK: Output: default@cart_history
+POSTHOOK: query: insert into cart_history values ('4', 6, 60)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__10
+POSTHOOK: Output: default@cart_history
+POSTHOOK: Lineage: cart_history.cart_id EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: cart_history.s SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: cart_history.time EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+PREHOOK: query: create table events (s string, st2 string, n int, time int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@events
+POSTHOOK: query: create table events (s string, st2 string, n int, time int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@events
+PREHOOK: query: insert into events values ('1', 'Bob', 1234, 20)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__11
+PREHOOK: Output: default@events
+POSTHOOK: query: insert into events values ('1', 'Bob', 1234, 20)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__11
+POSTHOOK: Output: default@events
+POSTHOOK: Lineage: events.n EXPRESSION [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: events.s SIMPLE [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: events.st2 SIMPLE [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: events.time EXPRESSION [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col4, type:string, comment:), ]
+PREHOOK: query: insert into events values ('1', 'Bob', 1234, 30)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__12
+PREHOOK: Output: default@events
+POSTHOOK: query: insert into events values ('1', 'Bob', 1234, 30)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__12
+POSTHOOK: Output: default@events
+POSTHOOK: Lineage: events.n EXPRESSION [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: events.s SIMPLE [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: events.st2 SIMPLE [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: events.time EXPRESSION [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col4, type:string, comment:), ]
+PREHOOK: query: insert into events values ('1', 'Bob', 1234, 25)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__13
+PREHOOK: Output: default@events
+POSTHOOK: query: insert into events values ('1', 'Bob', 1234, 25)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__13
+POSTHOOK: Output: default@events
+POSTHOOK: Lineage: events.n EXPRESSION [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: events.s SIMPLE [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: events.st2 SIMPLE [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: events.time EXPRESSION [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col4, type:string, comment:), ]
+PREHOOK: query: insert into events values ('2', 'Sam', 1234, 30)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__14
+PREHOOK: Output: default@events
+POSTHOOK: query: insert into events values ('2', 'Sam', 1234, 30)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__14
+POSTHOOK: Output: default@events
+POSTHOOK: Lineage: events.n EXPRESSION [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: events.s SIMPLE [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: events.st2 SIMPLE [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: events.time EXPRESSION [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col4, type:string, comment:), ]
+PREHOOK: query: insert into events values ('3', 'Jeff', 1234, 50)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__15
+PREHOOK: Output: default@events
+POSTHOOK: query: insert into events values ('3', 'Jeff', 1234, 50)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__15
+POSTHOOK: Output: default@events
+POSTHOOK: Lineage: events.n EXPRESSION [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: events.s SIMPLE [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: events.st2 SIMPLE [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: events.time EXPRESSION [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col4, type:string, comment:), ]
+PREHOOK: query: insert into events values ('4', 'Ted', 1234, 60)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__16
+PREHOOK: Output: default@events
+POSTHOOK: query: insert into events values ('4', 'Ted', 1234, 60)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__16
+POSTHOOK: Output: default@events
+POSTHOOK: Lineage: events.n EXPRESSION [(values__tmp__table__16)values__tmp__table__16.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: events.s SIMPLE [(values__tmp__table__16)values__tmp__table__16.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: events.st2 SIMPLE [(values__tmp__table__16)values__tmp__table__16.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: events.time EXPRESSION [(values__tmp__table__16)values__tmp__table__16.FieldSchema(name:tmp_values_col4, type:string, comment:), ]
+PREHOOK: query: explain
+select s
+from (
+ select last.*, action.st2, action.n
+ from (
+ select purchase.s, purchase.time, max (mevt.time) as last_stage_time
+ from (select * from purchase_history) purchase
+ join (select * from cart_history) mevt
+ on purchase.s = mevt.s
+ where purchase.time > mevt.time
+ group by purchase.s, purchase.time
+ ) last
+ join (select * from events) action
+ on last.s = action.s and last.last_stage_time = action.time
+) list
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select s
+from (
+ select last.*, action.st2, action.n
+ from (
+ select purchase.s, purchase.time, max (mevt.time) as last_stage_time
+ from (select * from purchase_history) purchase
+ join (select * from cart_history) mevt
+ on purchase.s = mevt.s
+ where purchase.time > mevt.time
+ group by purchase.s, purchase.time
+ ) last
+ join (select * from events) action
+ on last.s = action.s and last.last_stage_time = action.time
+) list
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-3 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: purchase_history
+ Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: s is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s (type: string), time (type: int)
+ outputColumnNames: _col0, _col3
+ Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: int)
+ TableScan
+ alias: cart_history
+ Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: s is not null (type: boolean)
+ Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s (type: string), time (type: int)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col3, _col6
+ Statistics: Num rows: 3 Data size: 19 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col3 > _col6) (type: boolean)
+ Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: max(_col6)
+ keys: _col0 (type: string), _col3 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+ Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: max(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: int)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col2 is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col2 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col2 (type: int)
+ Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ alias: events
+ Statistics: Num rows: 6 Data size: 79 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (s is not null and time is not null) (type: boolean)
+ Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s (type: string), time (type: int)
+ outputColumnNames: _col0, _col3
+ Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col3 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col3 (type: int)
+ Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string), _col2 (type: int)
+ 1 _col0 (type: string), _col3 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select s
+from (
+ select last.*, action.st2, action.n
+ from (
+ select purchase.s, purchase.time, max (mevt.time) as last_stage_time
+ from (select * from purchase_history) purchase
+ join (select * from cart_history) mevt
+ on purchase.s = mevt.s
+ where purchase.time > mevt.time
+ group by purchase.s, purchase.time
+ ) last
+ join (select * from events) action
+ on last.s = action.s and last.last_stage_time = action.time
+) list
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cart_history
+PREHOOK: Input: default@events
+PREHOOK: Input: default@purchase_history
+#### A masked pattern was here ####
+POSTHOOK: query: select s
+from (
+ select last.*, action.st2, action.n
+ from (
+ select purchase.s, purchase.time, max (mevt.time) as last_stage_time
+ from (select * from purchase_history) purchase
+ join (select * from cart_history) mevt
+ on purchase.s = mevt.s
+ where purchase.time > mevt.time
+ group by purchase.s, purchase.time
+ ) last
+ join (select * from events) action
+ on last.s = action.s and last.last_stage_time = action.time
+) list
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cart_history
+POSTHOOK: Input: default@events
+POSTHOOK: Input: default@purchase_history
+#### A masked pattern was here ####
+1
+1
+3
+PREHOOK: query: explain
+select *
+from (
+ select last.*, action.st2, action.n
+ from (
+ select purchase.s, purchase.time, max (mevt.time) as last_stage_time
+ from (select * from purchase_history) purchase
+ join (select * from cart_history) mevt
+ on purchase.s = mevt.s
+ where purchase.time > mevt.time
+ group by purchase.s, purchase.time
+ ) last
+ join (select * from events) action
+ on last.s = action.s and last.last_stage_time = action.time
+) list
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select *
+from (
+ select last.*, action.st2, action.n
+ from (
+ select purchase.s, purchase.time, max (mevt.time) as last_stage_time
+ from (select * from purchase_history) purchase
+ join (select * from cart_history) mevt
+ on purchase.s = mevt.s
+ where purchase.time > mevt.time
+ group by purchase.s, purchase.time
+ ) last
+ join (select * from events) action
+ on last.s = action.s and last.last_stage_time = action.time
+) list
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-3 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: purchase_history
+ Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: s is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s (type: string), time (type: int)
+ outputColumnNames: _col0, _col3
+ Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: int)
+ TableScan
+ alias: cart_history
+ Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: s is not null (type: boolean)
+ Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s (type: string), time (type: int)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col3, _col6
+ Statistics: Num rows: 3 Data size: 19 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col3 > _col6) (type: boolean)
+ Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: max(_col6)
+ keys: _col0 (type: string), _col3 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
+ Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: int)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: max(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col2 is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col2 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col2 (type: int)
+ Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
+ TableScan
+ alias: events
+ Statistics: Num rows: 6 Data size: 79 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (s is not null and time is not null) (type: boolean)
+ Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: s (type: string), st2 (type: string), n (type: int), time (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col3 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col3 (type: int)
+ Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: int)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string), _col2 (type: int)
+ 1 _col0 (type: string), _col3 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col4, _col5
+ Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col4 (type: string), _col5 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select *
+from (
+ select last.*, action.st2, action.n
+ from (
+ select purchase.s, purchase.time, max (mevt.time) as last_stage_time
+ from (select * from purchase_history) purchase
+ join (select * from cart_history) mevt
+ on purchase.s = mevt.s
+ where purchase.time > mevt.time
+ group by purchase.s, purchase.time
+ ) last
+ join (select * from events) action
+ on last.s = action.s and last.last_stage_time = action.time
+) list
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cart_history
+PREHOOK: Input: default@events
+PREHOOK: Input: default@purchase_history
+#### A masked pattern was here ####
+POSTHOOK: query: select *
+from (
+ select last.*, action.st2, action.n
+ from (
+ select purchase.s, purchase.time, max (mevt.time) as last_stage_time
+ from (select * from purchase_history) purchase
+ join (select * from cart_history) mevt
+ on purchase.s = mevt.s
+ where purchase.time > mevt.time
+ group by purchase.s, purchase.time
+ ) last
+ join (select * from events) action
+ on last.s = action.s and last.last_stage_time = action.time
+) list
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cart_history
+POSTHOOK: Input: default@events
+POSTHOOK: Input: default@purchase_history
+#### A masked pattern was here ####
+1 21 20 Bob 1234
+1 31 30 Bob 1234
+3 51 50 Jeff 1234
http://git-wip-us.apache.org/repos/asf/hive/blob/2f0ae24b/ql/src/test/results/clientpositive/spark/having.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/having.q.out b/ql/src/test/results/clientpositive/spark/having.q.out
index 65d85b7..7fb884d 100644
--- a/ql/src/test/results/clientpositive/spark/having.q.out
+++ b/ql/src/test/results/clientpositive/spark/having.q.out
@@ -44,20 +44,24 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (_col1 > 3) (type: boolean)
- Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col1 (type: bigint)
- outputColumnNames: _col0
+ Select Operator
+ expressions: _col1 (type: bigint)
+ outputColumnNames: _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col1 > 3) (type: boolean)
Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
+ Select Operator
+ expressions: _col1 (type: bigint)
+ outputColumnNames: _col0
Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/2f0ae24b/ql/src/test/results/clientpositive/subquery_in_having.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/subquery_in_having.q.out b/ql/src/test/results/clientpositive/subquery_in_having.q.out
index 03cc2af..437ecbe 100644
--- a/ql/src/test/results/clientpositive/subquery_in_having.q.out
+++ b/ql/src/test/results/clientpositive/subquery_in_having.q.out
@@ -173,24 +173,28 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: _col1 is not null (type: boolean)
+ Select Operator
+ expressions: _col1 (type: bigint)
+ outputColumnNames: _col1
Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col1 (type: bigint)
- outputColumnNames: _col0
+ Filter Operator
+ predicate: _col1 is not null (type: boolean)
Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: bigint)
- mode: hash
+ Select Operator
+ expressions: _col1 (type: bigint)
outputColumnNames: _col0
Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Group By Operator
+ keys: _col0 (type: bigint)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Fetch Operator
@@ -389,24 +393,28 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: _col2 is not null (type: boolean)
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: bigint)
+ outputColumnNames: _col0, _col2
Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col2 (type: bigint), _col0 (type: string)
- outputColumnNames: _col0, _col1
+ Filter Operator
+ predicate: _col2 is not null (type: boolean)
Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: bigint), _col1 (type: string)
- mode: hash
+ Select Operator
+ expressions: _col2 (type: bigint), _col0 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Group By Operator
+ keys: _col0 (type: bigint), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Fetch Operator
@@ -944,24 +952,28 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: _col1 is not null (type: boolean)
+ Select Operator
+ expressions: _col1 (type: bigint)
+ outputColumnNames: _col1
Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col1 (type: bigint)
- outputColumnNames: _col0
+ Filter Operator
+ predicate: _col1 is not null (type: boolean)
Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: bigint)
- mode: hash
+ Select Operator
+ expressions: _col1 (type: bigint)
outputColumnNames: _col0
Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Group By Operator
+ keys: _col0 (type: bigint)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Fetch Operator
@@ -1040,24 +1052,28 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: _col1 is not null (type: boolean)
+ Select Operator
+ expressions: _col1 (type: bigint)
+ outputColumnNames: _col1
Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col1 (type: bigint)
- outputColumnNames: _col0
+ Filter Operator
+ predicate: _col1 is not null (type: boolean)
Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: bigint)
- mode: hash
+ Select Operator
+ expressions: _col1 (type: bigint)
outputColumnNames: _col0
Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Group By Operator
+ keys: _col0 (type: bigint)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-7
Conditional Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/2f0ae24b/ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out b/ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out
index ebc6efd..3572cca 100644
--- a/ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out
+++ b/ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out
@@ -684,22 +684,26 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: ((_col1 - _col2) > 600.0) (type: boolean)
- Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE
- Select Operator
+ Select Operator
+ expressions: _col1 (type: double), _col2 (type: double)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col1 - _col2) > 600.0) (type: boolean)
Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Select Operator
+ Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-6
Map Reduce
http://git-wip-us.apache.org/repos/asf/hive/blob/2f0ae24b/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out b/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out
index 06d5708..7c3b10d 100644
--- a/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out
+++ b/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out
@@ -742,24 +742,28 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: _col2 is not null (type: boolean)
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: bigint)
+ outputColumnNames: _col0, _col2
Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col2 (type: bigint), _col0 (type: string)
- outputColumnNames: _col0, _col1
+ Filter Operator
+ predicate: _col2 is not null (type: boolean)
Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: bigint), _col1 (type: string)
- mode: hash
+ Select Operator
+ expressions: _col2 (type: bigint), _col0 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Group By Operator
+ keys: _col0 (type: bigint), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/2f0ae24b/ql/src/test/results/clientpositive/tez/having.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/having.q.out b/ql/src/test/results/clientpositive/tez/having.q.out
index 9463505..778bef3 100644
--- a/ql/src/test/results/clientpositive/tez/having.q.out
+++ b/ql/src/test/results/clientpositive/tez/having.q.out
@@ -44,20 +44,24 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (_col1 > 3) (type: boolean)
- Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col1 (type: bigint)
- outputColumnNames: _col0
+ Select Operator
+ expressions: _col1 (type: bigint)
+ outputColumnNames: _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col1 > 3) (type: boolean)
Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
+ Select Operator
+ expressions: _col1 (type: bigint)
+ outputColumnNames: _col0
Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator