You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kr...@apache.org on 2023/04/12 03:50:00 UTC
[hive] branch master updated: HIVE-27187: Incremental rebuild of materialized view having aggregate and stored by iceberg (Krisztian Kasa, reviewed by Denys Kuzmenko)
This is an automated email from the ASF dual-hosted git repository.
krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 53cb10347be HIVE-27187: Incremental rebuild of materialized view having aggregate and stored by iceberg (Krisztian Kasa, reviewed by Denys Kuzmenko)
53cb10347be is described below
commit 53cb10347bec803008b17ff05c48536fe9e722e9
Author: Krisztian Kasa <ka...@gmail.com>
AuthorDate: Wed Apr 12 05:49:50 2023 +0200
HIVE-27187: Incremental rebuild of materialized view having aggregate and stored by iceberg (Krisztian Kasa, reviewed by Denys Kuzmenko)
---
.../src/test/queries/positive/mv_iceberg_orc6.q | 3 -
.../src/test/queries/positive/mv_iceberg_orc7.q | 29 ++
.../test/results/positive/mv_iceberg_orc7.q.out | 132 ++++++++
.../AlterMaterializedViewRebuildAnalyzer.java | 189 ++++++------
.../alter/rebuild/MaterializedViewASTBuilder.java | 86 ++++++
.../NativeAcidMaterializedViewASTBuilder.java | 38 +++
.../NonNativeAcidMaterializedViewASTBuilder.java | 49 +++
.../materialized_view_create_rewrite_6.q | 1 +
.../materialized_view_create_rewrite_9.q | 1 +
.../llap/materialized_view_create_rewrite_4.q.out | 340 +++++++++++++++------
.../llap/materialized_view_create_rewrite_6.q.out | 214 ++++++++-----
.../llap/materialized_view_create_rewrite_7.q.out | 161 +++++++---
.../llap/materialized_view_create_rewrite_9.q.out | 212 ++++++++-----
.../materialized_view_create_rewrite_nulls.q.out | 170 ++++++++---
...erialized_view_create_rewrite_one_key_gby.q.out | 165 +++++++---
15 files changed, 1300 insertions(+), 490 deletions(-)
diff --git a/iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_orc6.q b/iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_orc6.q
index ff5a113bd0b..52da0cac75a 100644
--- a/iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_orc6.q
+++ b/iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_orc6.q
@@ -2,9 +2,6 @@
-- SORT_QUERY_RESULTS
--! qt:replace:/(.*fromVersion=\[)\S+(\].*)/$1#Masked#$2/
-set hive.support.concurrency=true;
-set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
-
drop table if exists tbl_ice;
create external table tbl_ice(a int, b string, c int) stored by iceberg stored as orc tblproperties ('format-version'='1');
diff --git a/iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_orc7.q b/iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_orc7.q
new file mode 100644
index 00000000000..28f06875596
--- /dev/null
+++ b/iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_orc7.q
@@ -0,0 +1,29 @@
+-- MV source tables are iceberg and MV has aggregate.
+-- SORT_QUERY_RESULTS
+--! qt:replace:/(.*fromVersion=\[)\S+(\].*)/$1#Masked#$2/
+
+set hive.stats.column.autogather=false;
+
+drop table if exists tbl_ice;
+
+create external table tbl_ice(a int, b string, c int) stored by iceberg stored as orc tblproperties ('format-version'='1');
+
+insert into tbl_ice values (1, 'one', 50), (4, 'four', 53), (5, 'five', 54);
+
+create materialized view mat1 stored by iceberg stored as orc tblproperties ('format-version'='2') as
+select a, count(c)
+from tbl_ice
+group by a;
+
+-- insert some new records to the source tables
+insert into tbl_ice values (1, 'one', 50);
+
+select * from mat1;
+
+explain cbo
+alter materialized view mat1 rebuild;
+explain
+alter materialized view mat1 rebuild;
+alter materialized view mat1 rebuild;
+
+select * from mat1;
diff --git a/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_orc7.q.out b/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_orc7.q.out
new file mode 100644
index 00000000000..2ad75a5b08a
--- /dev/null
+++ b/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_orc7.q.out
@@ -0,0 +1,132 @@
+PREHOOK: query: drop table if exists tbl_ice
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists tbl_ice
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create external table tbl_ice(a int, b string, c int) stored by iceberg stored as orc tblproperties ('format-version'='1')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: create external table tbl_ice(a int, b string, c int) stored by iceberg stored as orc tblproperties ('format-version'='1')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: insert into tbl_ice values (1, 'one', 50), (4, 'four', 53), (5, 'five', 54)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: insert into tbl_ice values (1, 'one', 50), (4, 'four', 53), (5, 'five', 54)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: create materialized view mat1 stored by iceberg stored as orc tblproperties ('format-version'='2') as
+select a, count(c)
+from tbl_ice
+group by a
+PREHOOK: type: CREATE_MATERIALIZED_VIEW
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mat1
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: create materialized view mat1 stored by iceberg stored as orc tblproperties ('format-version'='2') as
+select a, count(c)
+from tbl_ice
+group by a
+POSTHOOK: type: CREATE_MATERIALIZED_VIEW
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mat1
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: Lineage: mat1._c1 EXPRESSION [(tbl_ice)tbl_ice.FieldSchema(name:c, type:int, comment:null), ]
+POSTHOOK: Lineage: mat1.a SIMPLE [(tbl_ice)tbl_ice.FieldSchema(name:a, type:int, comment:null), ]
+PREHOOK: query: insert into tbl_ice values (1, 'one', 50)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: insert into tbl_ice values (1, 'one', 50)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: select * from mat1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mat1
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from mat1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mat1
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+1 1
+4 1
+5 1
+PREHOOK: query: explain cbo
+alter materialized view mat1 rebuild
+PREHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: default@mat1
+POSTHOOK: query: explain cbo
+alter materialized view mat1 rebuild
+POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: default@mat1
+CBO PLAN:
+HiveAggregate(group=[{0}], agg#0=[count($2)])
+ HiveTableScan(table=[[default, tbl_ice]], table:alias=[tbl_ice])
+
+PREHOOK: query: explain
+alter materialized view mat1 rebuild
+PREHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: default@mat1
+POSTHOOK: query: explain
+alter materialized view mat1 rebuild
+POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: default@mat1
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-4
+ Materialized View Update{"name:":"default.mat1","update creation metadata:":"true"}
+ Stage-3
+ Stats Work{}
+ Stage-0
+ Move Operator
+ table:{"name:":"default.mat1"}
+ Stage-2
+ Dependency Collection{}
+ Stage-1
+ Reducer 2 vectorized
+ File Output Operator [FS_11]
+ table:{"name:":"default.mat1"}
+ Group By Operator [GBY_10] (rows=2 width=8)
+ Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0
+ <-Map 1 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_9]
+ PartitionCols:_col0
+ Group By Operator [GBY_8] (rows=4 width=8)
+ Output:["_col0","_col1"],aggregations:["count(c)"],keys:a
+ Select Operator [SEL_7] (rows=4 width=8)
+ Output:["a","c"]
+ TableScan [TS_0] (rows=4 width=8)
+ default@tbl_ice,tbl_ice,Tbl:COMPLETE,Col:NONE,Output:["a","c"]
+
+PREHOOK: query: alter materialized view mat1 rebuild
+PREHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: default@mat1
+POSTHOOK: query: alter materialized view mat1 rebuild
+POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: default@mat1
+PREHOOK: query: select * from mat1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mat1
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from mat1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mat1
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+1 2
+4 1
+5 1
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/AlterMaterializedViewRebuildAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/AlterMaterializedViewRebuildAnalyzer.java
index d6e28e5a685..9f0b99fe33e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/AlterMaterializedViewRebuildAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/AlterMaterializedViewRebuildAnalyzer.java
@@ -73,6 +73,7 @@ import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.HiveOperation;
import org.apache.hadoop.hive.ql.plan.mapper.StatsSource;
import org.apache.hadoop.hive.ql.session.SessionState;
+import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -299,8 +300,9 @@ public class AlterMaterializedViewRebuildAnalyzer extends CalcitePlanner {
// First we need to check if it is valid to convert to MERGE/INSERT INTO.
// If we succeed, we modify the plan and afterwards the AST.
// MV should be an acid table.
- boolean fullAcidView = AcidUtils.isFullAcidTable(mvTable.getTTable());
- MaterializedViewRewritingRelVisitor visitor = new MaterializedViewRewritingRelVisitor(fullAcidView);
+ boolean acidView = AcidUtils.isFullAcidTable(mvTable.getTTable())
+ || AcidUtils.isNonNativeAcidTable(mvTable, true);
+ MaterializedViewRewritingRelVisitor visitor = new MaterializedViewRewritingRelVisitor(acidView);
visitor.go(basePlan);
if (visitor.isRewritingAllowed()) {
if (!materialization.isSourceTablesUpdateDeleteModified()) {
@@ -311,7 +313,7 @@ public class AlterMaterializedViewRebuildAnalyzer extends CalcitePlanner {
return applyJoinInsertIncremental(basePlan, mdProvider, executorProvider);
}
} else {
- if (fullAcidView) {
+ if (acidView) {
if (visitor.isContainsAggregate()) {
if (visitor.getCountIndex() < 0) {
// count(*) is necessary for determine which rows should be deleted from the view
@@ -465,21 +467,40 @@ public class AlterMaterializedViewRebuildAnalyzer extends CalcitePlanner {
protected ASTNode fixUpAfterCbo(ASTNode originalAst, ASTNode newAst, CalcitePlanner.PreCboCtx cboCtx)
throws SemanticException {
ASTNode fixedAST = super.fixUpAfterCbo(originalAst, newAst, cboCtx);
- // 1.2. Fix up the query for materialization rebuild
- if (mvRebuildMode == MaterializationRebuildMode.AGGREGATE_INSERT_REBUILD) {
- fixUpASTAggregateInsertIncrementalRebuild(fixedAST);
- } else if (mvRebuildMode == MaterializationRebuildMode.AGGREGATE_INSERT_DELETE_REBUILD) {
- fixUpASTAggregateInsertDeleteIncrementalRebuild(fixedAST);
- } else if (mvRebuildMode == MaterializationRebuildMode.JOIN_INSERT_REBUILD) {
- fixUpASTJoinInsertIncrementalRebuild(fixedAST);
- } else if (mvRebuildMode == MaterializationRebuildMode.JOIN_INSERT_DELETE_REBUILD) {
- fixUpASTJoinInsertDeleteIncrementalRebuild(fixedAST);
+ switch (mvRebuildMode) {
+ case INSERT_OVERWRITE_REBUILD:
+ return fixedAST;
+ case JOIN_INSERT_REBUILD:
+ fixUpASTJoinInsertIncrementalRebuild(fixedAST);
+ return fixedAST;
+ case AGGREGATE_INSERT_REBUILD:
+ fixUpASTAggregateInsertIncrementalRebuild(fixedAST, getMaterializedViewASTBuilder());
+ return fixedAST;
+ case AGGREGATE_INSERT_DELETE_REBUILD:
+ fixUpASTAggregateInsertDeleteIncrementalRebuild(fixedAST, getMaterializedViewASTBuilder());
+ return fixedAST;
+ case JOIN_INSERT_DELETE_REBUILD:
+ fixUpASTJoinInsertDeleteIncrementalRebuild(fixedAST, getMaterializedViewASTBuilder());
+ return fixedAST;
+ default:
+ throw new UnsupportedOperationException("No materialized view rebuild exists for mode " + mvRebuildMode);
}
+ }
- return fixedAST;
+ @NotNull
+ private MaterializedViewASTBuilder getMaterializedViewASTBuilder() {
+ if (AcidUtils.isFullAcidTable(mvTable.getTTable())) {
+ return new NativeAcidMaterializedViewASTBuilder();
+ } else if (AcidUtils.isNonNativeAcidTable(mvTable, true)) {
+ return new NonNativeAcidMaterializedViewASTBuilder(mvTable);
+ } else {
+ throw new UnsupportedOperationException("Incremental rebuild is supported only for fully ACID materialized " +
+ "views or if the Storage handler supports snapshots (Iceberg).");
+ }
}
- private void fixUpASTAggregateInsertIncrementalRebuild(ASTNode newAST) throws SemanticException {
+ private void fixUpASTAggregateInsertIncrementalRebuild(ASTNode newAST, MaterializedViewASTBuilder astBuilder)
+ throws SemanticException {
ASTNode updateNode = new CalcitePlanner.ASTSearcher().simpleBreadthFirstSearch(
newAST, HiveParser.TOK_QUERY, HiveParser.TOK_INSERT);
ASTNode subqueryNodeInputROJ = new CalcitePlanner.ASTSearcher().simpleBreadthFirstSearch(
@@ -510,13 +531,14 @@ public class AlterMaterializedViewRebuildAnalyzer extends CalcitePlanner {
throw new SemanticException("Unexpected condition in incremental rewriting");
}
- fixUpASTAggregateIncrementalRebuild(subqueryNodeInputROJ, updateNode, disjunctMap);
+ fixUpASTAggregateIncrementalRebuild(subqueryNodeInputROJ, updateNode, disjunctMap, astBuilder);
}
private void fixUpASTAggregateIncrementalRebuild(
ASTNode subqueryNodeInputROJ,
ASTNode updateNode,
- Map<Context.DestClausePrefix, ASTNode> disjuncts)
+ Map<Context.DestClausePrefix, ASTNode> disjuncts,
+ MaterializedViewASTBuilder astBuilder)
throws SemanticException {
// Replace INSERT OVERWRITE by MERGE equivalent rewriting.
// Here we need to do this complex AST rewriting that generates the same plan
@@ -541,7 +563,6 @@ public class AlterMaterializedViewRebuildAnalyzer extends CalcitePlanner {
ASTNode updateParent = (ASTNode) updateNode.getParent();
ASTNode insertNode = (ASTNode) ParseDriver.adaptor.dupTree(updateNode);
insertNode.setParent(updateParent);
- updateParent.addChild(insertNode);
// 3) Create ROW_ID column in select clause from left input for the RIGHT OUTER JOIN.
// This is needed for the UPDATE clause. Hence, we find the following node:
// TOK_QUERY
@@ -562,32 +583,39 @@ public class AlterMaterializedViewRebuildAnalyzer extends CalcitePlanner {
ASTNode selectNodeInputROJ = new ASTSearcher().simpleBreadthFirstSearch(
subqueryNodeInputROJ, HiveParser.TOK_SUBQUERY, HiveParser.TOK_QUERY,
HiveParser.TOK_INSERT, HiveParser.TOK_SELECT);
- ASTNode selectExprNodeInputROJ = (ASTNode) ParseDriver.adaptor.create(
- HiveParser.TOK_SELEXPR, "TOK_SELEXPR");
- ASTNode tableName = createRowIdNode(TableName.getDbTable(
+ astBuilder.createAcidSortNodes(TableName.getDbTable(
materializationNode.getChild(0).getText(),
- materializationNode.getChild(1).getText()));
- ParseDriver.adaptor.addChild(selectExprNodeInputROJ, tableName);
- ParseDriver.adaptor.addChild(selectNodeInputROJ, selectExprNodeInputROJ);
+ materializationNode.getChild(1).getText()))
+ .forEach(astNode -> ParseDriver.adaptor.addChild(selectNodeInputROJ, astNode));
// 4) Transform first INSERT branch into an UPDATE
- // 4.1) Adding ROW__ID field
- ASTNode selectNodeInUpdate = (ASTNode) updateNode.getChild(1);
- if (selectNodeInUpdate.getType() != HiveParser.TOK_SELECT) {
- throw new SemanticException("TOK_SELECT expected in incremental rewriting");
- }
- ASTNode selectExprNodeInUpdate = (ASTNode) ParseDriver.adaptor.dupNode(selectExprNodeInputROJ);
- ParseDriver.adaptor.addChild(selectExprNodeInUpdate, createRowIdNode((ASTNode) subqueryNodeInputROJ.getChild(1)));
- selectNodeInUpdate.insertChild(0, selectExprNodeInUpdate);
- // 4.2) Modifying filter condition.
+ // 4.1) Modifying filter condition.
ASTNode whereClauseInUpdate = findWhereClause(updateNode);
if (whereClauseInUpdate.getChild(0).getType() != HiveParser.KW_OR) {
throw new SemanticException("OR clause expected below TOK_WHERE in incremental rewriting");
}
// We bypass the OR clause and select the first disjunct for the Update branch
ParseDriver.adaptor.setChild(whereClauseInUpdate, 0, disjuncts.get(Context.DestClausePrefix.UPDATE));
+ ASTNode updateDeleteNode = (ASTNode) ParseDriver.adaptor.dupTree(updateNode);
+ // 4.2) Adding ROW__ID field
+ ASTNode selectNodeInUpdateDelete = (ASTNode) updateDeleteNode.getChild(1);
+ if (selectNodeInUpdateDelete.getType() != HiveParser.TOK_SELECT) {
+ throw new SemanticException("TOK_SELECT expected in incremental rewriting got "
+ + selectNodeInUpdateDelete.getType());
+ }
+ // Remove children
+ while (selectNodeInUpdateDelete.getChildCount() > 0) {
+ selectNodeInUpdateDelete.deleteChild(0);
+ }
+ // And add acid sort columns
+ List<ASTNode> selectExprNodesInUpdate = astBuilder.createDeleteSelectNodes(
+ subqueryNodeInputROJ.getChild(1).getText());
+ for (int i = 0; i < selectExprNodesInUpdate.size(); ++i) {
+ selectNodeInUpdateDelete.insertChild(i, selectExprNodesInUpdate.get(i));
+ }
// 4.3) Finally, we add SORT clause, this is needed for the UPDATE.
- ASTNode sortExprNode = createSortNode(createRowIdNode((ASTNode) subqueryNodeInputROJ.getChild(1)));
- ParseDriver.adaptor.addChild(updateNode, sortExprNode);
+ ASTNode sortExprNode = astBuilder.createSortNodes(
+ astBuilder.createAcidSortNodes((ASTNode) subqueryNodeInputROJ.getChild(1)));
+ ParseDriver.adaptor.addChild(updateDeleteNode, sortExprNode);
// 5) Modify INSERT branch condition. In particular, we need to modify the
// WHERE clause and pick up the disjunct for the Insert branch.
ASTNode whereClauseInInsert = findWhereClause(insertNode);
@@ -596,14 +624,20 @@ public class AlterMaterializedViewRebuildAnalyzer extends CalcitePlanner {
}
// We bypass the OR clause and select the second disjunct
ParseDriver.adaptor.setChild(whereClauseInInsert, 0, disjuncts.get(Context.DestClausePrefix.INSERT));
+
+ updateParent.addChild(updateDeleteNode);
+ updateParent.addChild(insertNode);
+
// 6) Now we set some tree properties related to multi-insert
// operation with INSERT/UPDATE
ctx.setOperation(Context.Operation.MERGE);
- ctx.addDestNamePrefix(1, Context.DestClausePrefix.UPDATE);
- ctx.addDestNamePrefix(2, Context.DestClausePrefix.INSERT);
+ ctx.addDestNamePrefix(1, Context.DestClausePrefix.INSERT);
+ ctx.addDestNamePrefix(2, Context.DestClausePrefix.DELETE);
+ ctx.addDestNamePrefix(3, Context.DestClausePrefix.INSERT);
}
- private void fixUpASTAggregateInsertDeleteIncrementalRebuild(ASTNode newAST) throws SemanticException {
+ private void fixUpASTAggregateInsertDeleteIncrementalRebuild(ASTNode newAST, MaterializedViewASTBuilder astBuilder)
+ throws SemanticException {
ASTNode updateNode = new CalcitePlanner.ASTSearcher().simpleBreadthFirstSearch(
newAST, HiveParser.TOK_QUERY, HiveParser.TOK_INSERT);
ASTNode subqueryNodeInputROJ = new CalcitePlanner.ASTSearcher().simpleBreadthFirstSearch(
@@ -646,10 +680,10 @@ public class AlterMaterializedViewRebuildAnalyzer extends CalcitePlanner {
}
}
- fixUpASTAggregateIncrementalRebuild(subqueryNodeInputROJ, updateNode, disjunctMap);
- addDeleteBranch(updateNode, subqueryNodeInputROJ, disjunctMap.get(Context.DestClausePrefix.DELETE));
+ fixUpASTAggregateIncrementalRebuild(subqueryNodeInputROJ, updateNode, disjunctMap, astBuilder);
+ addDeleteBranch(updateNode, subqueryNodeInputROJ, disjunctMap.get(Context.DestClausePrefix.DELETE), astBuilder);
- ctx.addDestNamePrefix(3, Context.DestClausePrefix.DELETE);
+ ctx.addDestNamePrefix(4, Context.DestClausePrefix.DELETE);
}
private ASTNode findWhereClause(ASTNode updateNode) throws SemanticException {
@@ -667,7 +701,8 @@ public class AlterMaterializedViewRebuildAnalyzer extends CalcitePlanner {
return whereClauseInUpdate;
}
- private void addDeleteBranch(ASTNode updateNode, ASTNode subqueryNodeInputROJ, ASTNode filter)
+ private void addDeleteBranch(ASTNode updateNode, ASTNode subqueryNodeInputROJ, ASTNode predicate,
+ MaterializedViewASTBuilder astBuilder)
throws SemanticException {
ASTNode updateParent = (ASTNode) updateNode.getParent();
ASTNode deleteNode = (ASTNode) ParseDriver.adaptor.dupTree(updateNode);
@@ -683,55 +718,13 @@ public class AlterMaterializedViewRebuildAnalyzer extends CalcitePlanner {
while (selectNodeInDelete.getChildCount() > 0) {
selectNodeInDelete.deleteChild(0);
}
- // 3) Adding ROW__ID field
- ASTNode selectExprNodeInUpdate = (ASTNode) ParseDriver.adaptor.create(
- HiveParser.TOK_SELEXPR, "TOK_SELEXPR");
- ParseDriver.adaptor.addChild(selectExprNodeInUpdate, createRowIdNode((ASTNode) subqueryNodeInputROJ.getChild(1)));
- selectNodeInDelete.insertChild(0, selectExprNodeInUpdate);
+ // 3) Adding acid sort columns
+ astBuilder.createDeleteSelectNodes(subqueryNodeInputROJ.getChild(1).getText())
+ .forEach(astNode -> ParseDriver.adaptor.addChild(selectNodeInDelete, astNode));
// 4) Add filter condition to delete
ASTNode whereClauseInDelete = findWhereClause(deleteNode);
- ParseDriver.adaptor.setChild(whereClauseInDelete, 0, filter);
- }
-
- private ASTNode createRowIdNode(ASTNode inputNode) {
- return createRowIdNode(inputNode.getText());
- }
-
- // .
- // TOK_TABLE_OR_COL
- // <tableName>
- // ROW__ID
- private ASTNode createRowIdNode(String tableName) {
- ASTNode dotNode = (ASTNode) ParseDriver.adaptor.create(HiveParser.DOT, ".");
- ASTNode columnTokNode = (ASTNode) ParseDriver.adaptor.create(
- HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL");
- ASTNode rowIdNode = (ASTNode) ParseDriver.adaptor.create(
- HiveParser.Identifier, VirtualColumn.ROWID.getName());
- ASTNode tableNameNode = (ASTNode) ParseDriver.adaptor.create(
- HiveParser.Identifier, tableName);
-
- ParseDriver.adaptor.addChild(dotNode, columnTokNode);
- ParseDriver.adaptor.addChild(dotNode, rowIdNode);
- ParseDriver.adaptor.addChild(columnTokNode, tableNameNode);
- return dotNode;
- }
-
- // TOK_SORTBY
- // TOK_TABSORTCOLNAMEASC
- // TOK_NULLS_FIRST
- // <sortKeyNode>
- private ASTNode createSortNode(ASTNode sortKeyNode) {
- ASTNode sortExprNode = (ASTNode) ParseDriver.adaptor.create(
- HiveParser.TOK_SORTBY, "TOK_SORTBY");
- ASTNode orderExprNode = (ASTNode) ParseDriver.adaptor.create(
- HiveParser.TOK_TABSORTCOLNAMEASC, "TOK_TABSORTCOLNAMEASC");
- ASTNode nullsOrderExprNode = (ASTNode) ParseDriver.adaptor.create(
- HiveParser.TOK_NULLS_FIRST, "TOK_NULLS_FIRST");
- ParseDriver.adaptor.addChild(sortExprNode, orderExprNode);
- ParseDriver.adaptor.addChild(orderExprNode, nullsOrderExprNode);
- ParseDriver.adaptor.addChild(nullsOrderExprNode, sortKeyNode);
- return sortExprNode;
+ ParseDriver.adaptor.setChild(whereClauseInDelete, 0, predicate);
}
private void fixUpASTJoinInsertIncrementalRebuild(ASTNode newAST) throws SemanticException {
@@ -758,7 +751,8 @@ public class AlterMaterializedViewRebuildAnalyzer extends CalcitePlanner {
destParent.insertChild(childIndex, newChild);
}
- private void fixUpASTJoinInsertDeleteIncrementalRebuild(ASTNode newAST) throws SemanticException {
+ private void fixUpASTJoinInsertDeleteIncrementalRebuild(ASTNode newAST, MaterializedViewASTBuilder astBuilder)
+ throws SemanticException {
// Replace INSERT OVERWRITE by MERGE equivalent rewriting.
// Here we need to do this complex AST rewriting that generates the same plan
// that a MERGE clause would generate because CBO does not support MERGE yet.
@@ -785,12 +779,10 @@ public class AlterMaterializedViewRebuildAnalyzer extends CalcitePlanner {
ASTNode selectNodeInputROJ = new ASTSearcher().simpleBreadthFirstSearch(
subqueryNodeInputROJ, HiveParser.TOK_SUBQUERY, HiveParser.TOK_QUERY,
HiveParser.TOK_INSERT, HiveParser.TOK_SELECT);
- ASTNode selectExprNodeInputROJ = (ASTNode) ParseDriver.adaptor.create(
- HiveParser.TOK_SELEXPR, "TOK_SELEXPR");
- ParseDriver.adaptor.addChild(selectNodeInputROJ, selectExprNodeInputROJ);
- ParseDriver.adaptor.addChild(selectExprNodeInputROJ, createRowIdNode(TableName.getDbTable(
+ astBuilder.createAcidSortNodes(TableName.getDbTable(
materializationNode.getChild(0).getText(),
- materializationNode.getChild(1).getText())));
+ materializationNode.getChild(1).getText()))
+ .forEach(astNode -> ParseDriver.adaptor.addChild(selectNodeInputROJ, astNode));
ASTNode whereClauseInInsert = findWhereClause(insertNode);
@@ -827,10 +819,12 @@ public class AlterMaterializedViewRebuildAnalyzer extends CalcitePlanner {
ASTNode newCondInInsert = (ASTNode) whereClauseInInsert.getChild(0).getChild(indexInsert);
ParseDriver.adaptor.setChild(whereClauseInInsert, 0, newCondInInsert);
- addDeleteBranch(insertNode, subqueryNodeInputROJ, (ASTNode) whereClauseInInsert.getChild(0).getChild(indexDelete));
+ ASTNode deletePredicate = (ASTNode) whereClauseInInsert.getChild(0).getChild(indexDelete);
+ addDeleteBranch(insertNode, subqueryNodeInputROJ, deletePredicate, astBuilder);
// 3) Add sort node to delete branch
- ASTNode sortNode = createSortNode(createRowIdNode((ASTNode) subqueryNodeInputROJ.getChild(1)));
+ ASTNode sortNode = astBuilder.createSortNodes(
+ astBuilder.createAcidSortNodes((ASTNode) subqueryNodeInputROJ.getChild(1)));
ParseDriver.adaptor.addChild(insertNode.getParent().getChild(2), sortNode);
// 4) Now we set some tree properties related to multi-insert
@@ -839,4 +833,9 @@ public class AlterMaterializedViewRebuildAnalyzer extends CalcitePlanner {
ctx.addDestNamePrefix(1, Context.DestClausePrefix.INSERT);
ctx.addDestNamePrefix(2, Context.DestClausePrefix.DELETE);
}
+
+ @Override
+ protected boolean allowOutputMultipleTimes() {
+ return true;
+ }
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/MaterializedViewASTBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/MaterializedViewASTBuilder.java
new file mode 100644
index 00000000000..ab4e2c04d68
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/MaterializedViewASTBuilder.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.ddl.view.materialized.alter.rebuild;
+
+import org.apache.hadoop.hive.ql.parse.ASTNode;
+import org.apache.hadoop.hive.ql.parse.HiveParser;
+import org.apache.hadoop.hive.ql.parse.ParseDriver;
+
+import java.util.List;
+import java.util.stream.Collectors;
+
+abstract class MaterializedViewASTBuilder {
+ public abstract List<ASTNode> createDeleteSelectNodes(String tableName);
+
+ public List<ASTNode> createAcidSortNodes(ASTNode inputNode) {
+ return createAcidSortNodesInternal(inputNode.getText());
+ }
+
+ public List<ASTNode> createAcidSortNodes(String tableName) {
+ return wrapIntoSelExpr(createAcidSortNodesInternal(tableName));
+ }
+
+ protected abstract List<ASTNode> createAcidSortNodesInternal(String tableName);
+
+ // .
+ // TOK_TABLE_OR_COL
+ // <tableName>
+ // <columnName>
+ public ASTNode createQualifiedColumnNode(String tableName, String columnName) {
+ ASTNode dotNode = (ASTNode) ParseDriver.adaptor.create(HiveParser.DOT, ".");
+ ASTNode columnTokNode = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL");
+ ASTNode rowIdNode = (ASTNode) ParseDriver.adaptor.create(HiveParser.Identifier, columnName);
+ ASTNode tableNameNode = (ASTNode) ParseDriver.adaptor.create(HiveParser.Identifier, tableName);
+
+ ParseDriver.adaptor.addChild(dotNode, columnTokNode);
+ ParseDriver.adaptor.addChild(dotNode, rowIdNode);
+ ParseDriver.adaptor.addChild(columnTokNode, tableNameNode);
+ return dotNode;
+ }
+
+ public List<ASTNode> wrapIntoSelExpr(List<ASTNode> expressionNodes) {
+ return expressionNodes.stream().map(expressionNode -> {
+ ASTNode selectExpr = (ASTNode) ParseDriver.adaptor.create(
+ HiveParser.TOK_SELEXPR, "TOK_SELEXPR");
+
+ ParseDriver.adaptor.addChild(selectExpr, expressionNode);
+ return selectExpr;
+ }).collect(Collectors.toList());
+ }
+
+ public ASTNode createSortNodes(List<ASTNode> sortKeyNodes) {
+ ASTNode sortExprNode = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_SORTBY, "TOK_SORTBY");
+ sortKeyNodes.forEach(sortKeyNode -> ParseDriver.adaptor.addChild(sortExprNode, createSortNode(sortKeyNode)));
+ return sortExprNode;
+ }
+
+ // TOK_SORTBY
+ // TOK_TABSORTCOLNAMEASC
+ // TOK_NULLS_FIRST
+ // <sortKeyNode>
+ public ASTNode createSortNode(ASTNode sortKeyNodes) {
+ ASTNode orderExprNode = (ASTNode) ParseDriver.adaptor.create(
+ HiveParser.TOK_TABSORTCOLNAMEASC, "TOK_TABSORTCOLNAMEASC");
+ ASTNode nullsOrderExprNode = (ASTNode) ParseDriver.adaptor.create(
+ HiveParser.TOK_NULLS_FIRST, "TOK_NULLS_FIRST");
+ ParseDriver.adaptor.addChild(orderExprNode, nullsOrderExprNode);
+ ParseDriver.adaptor.addChild(nullsOrderExprNode, sortKeyNodes);
+ return orderExprNode;
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/NativeAcidMaterializedViewASTBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/NativeAcidMaterializedViewASTBuilder.java
new file mode 100644
index 00000000000..d88075bd36d
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/NativeAcidMaterializedViewASTBuilder.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.ddl.view.materialized.alter.rebuild;
+
+import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
+import org.apache.hadoop.hive.ql.parse.ASTNode;
+
+import java.util.List;
+
+import static java.util.Collections.singletonList;
+
+public class NativeAcidMaterializedViewASTBuilder extends MaterializedViewASTBuilder {
+ @Override
+ public List<ASTNode> createDeleteSelectNodes(String tableName) {
+ return wrapIntoSelExpr(singletonList(createQualifiedColumnNode(tableName, VirtualColumn.ROWID.getName())));
+ }
+
+ @Override
+ protected List<ASTNode> createAcidSortNodesInternal(String tableName) {
+ return singletonList(createQualifiedColumnNode(tableName, VirtualColumn.ROWID.getName()));
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/NonNativeAcidMaterializedViewASTBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/NonNativeAcidMaterializedViewASTBuilder.java
new file mode 100644
index 00000000000..e29548d9a12
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/NonNativeAcidMaterializedViewASTBuilder.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.ddl.view.materialized.alter.rebuild;
+
+import org.apache.hadoop.hive.ql.Context;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
+import org.apache.hadoop.hive.ql.parse.ASTNode;
+
+import java.util.List;
+import java.util.stream.Collectors;
+
+import static java.util.Collections.singletonList;
+
+public class NonNativeAcidMaterializedViewASTBuilder extends MaterializedViewASTBuilder {
+ private final Table mvTable;
+
+ public NonNativeAcidMaterializedViewASTBuilder(Table mvTable) {
+ this.mvTable = mvTable;
+ }
+
+ @Override
+ public List<ASTNode> createDeleteSelectNodes(String tableName) {
+ return wrapIntoSelExpr(mvTable.getStorageHandler().acidSelectColumns(mvTable, Context.Operation.DELETE)
+ .stream().map(fieldSchema -> createQualifiedColumnNode(tableName, fieldSchema.getName()))
+ .collect(Collectors.toList()));
+ }
+
+ @Override
+ protected List<ASTNode> createAcidSortNodesInternal(String tableName) {
+ return singletonList(createQualifiedColumnNode(tableName, VirtualColumn.ROWID.getName()));
+ }
+}
diff --git a/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_6.q b/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_6.q
index 6024657b38c..18575edb0dc 100644
--- a/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_6.q
+++ b/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_6.q
@@ -1,4 +1,5 @@
-- Test Incremental rebuild of materialized view with aggregate and count(*) when source tables have delete operations since last rebuild.
+-- SORT_QUERY_RESULTS
set hive.support.concurrency=true;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
diff --git a/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_9.q b/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_9.q
index 884bff3d57e..98e04ccb76b 100644
--- a/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_9.q
+++ b/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_9.q
@@ -1,6 +1,7 @@
-- Test Incremental rebuild of materialized view with aggregate and count(*) when
-- 1) source tables have delete operations since last rebuild.
-- 2) a source table is insert only.
+-- SORT_QUERY_RESULTS
set hive.support.concurrency=true;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_4.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_4.q.out
index 974dc15cfb0..30cf1cb20a1 100644
--- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_4.q.out
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_4.q.out
@@ -732,24 +732,27 @@ POSTHOOK: Input: default@cmv_mat_view_n5
POSTHOOK: Output: default@cmv_mat_view_n5
POSTHOOK: Output: default@cmv_mat_view_n5
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-3 depends on stages: Stage-2
- Stage-0 depends on stages: Stage-3
- Stage-4 depends on stages: Stage-0
- Stage-6 depends on stages: Stage-4, Stage-5
- Stage-1 depends on stages: Stage-3
- Stage-5 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-4 depends on stages: Stage-3
+ Stage-0 depends on stages: Stage-4
+ Stage-5 depends on stages: Stage-0
+ Stage-8 depends on stages: Stage-5, Stage-6, Stage-7
+ Stage-1 depends on stages: Stage-4
+ Stage-6 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-4
+ Stage-7 depends on stages: Stage-2
STAGE PLANS:
- Stage: Stage-2
+ Stage: Stage-3
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
- Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
- Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
- Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 7 <- Map 6 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
+ Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -770,7 +773,7 @@ STAGE PLANS:
value expressions: _col2 (type: bigint), _col3 (type: boolean), _col4 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
Execution mode: llap
LLAP IO: may be used (ACID table)
- Map 5
+ Map 6
Map Operator Tree:
TableScan
alias: cmv_basetable_n5
@@ -791,7 +794,7 @@ STAGE PLANS:
Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: may be used (ACID table)
- Map 8
+ Map 9
Map Operator Tree:
TableScan
alias: cmv_basetable_2_n2
@@ -826,7 +829,20 @@ STAGE PLANS:
outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
Statistics: Num rows: 1 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: _col3 is null (type: boolean)
+ predicate: _col3 (type: boolean)
+ Statistics: Num rows: 1 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col4 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ null sort order: a
+ sort order: +
+ Map-reduce partition columns: UDFToInteger(_col0) (type: int)
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: _col3 (type: boolean)
Statistics: Num rows: 1 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col5 (type: int), _col6 (type: decimal(10,2)), CASE WHEN (_col2 is null) THEN (_col7) WHEN (_col7 is null) THEN (_col2) ELSE ((_col7 + _col2)) END (type: bigint)
@@ -857,20 +873,53 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), _col7 (type: bigint), _col8 (type: binary), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: binary)
Filter Operator
- predicate: _col3 (type: boolean)
+ predicate: _col3 is null (type: boolean)
Statistics: Num rows: 1 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: _col4 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), _col5 (type: int), _col6 (type: decimal(10,2)), CASE WHEN (_col2 is null) THEN (_col7) WHEN (_col7 is null) THEN (_col2) ELSE ((_col7 + _col2)) END (type: bigint)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
- null sort order: a
- sort order: +
- Map-reduce partition columns: UDFToInteger(_col0) (type: int)
- Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: int), _col2 (type: decimal(10,2)), _col3 (type: bigint)
+ expressions: _col5 (type: int), _col6 (type: decimal(10,2)), CASE WHEN (_col2 is null) THEN (_col7) WHEN (_col7 is null) THEN (_col2) ELSE ((_col7 + _col2)) END (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.cmv_mat_view_n5
+ Write Type: INSERT
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: decimal(10,2)), _col2 (type: bigint)
+ outputColumnNames: a, c, _c2
+ Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(a), max(a), count(1), count(a), compute_bit_vector_hll(a), min(c), max(c), count(c), compute_bit_vector_hll(c), min(_c2), max(_c2), count(_c2), compute_bit_vector_hll(_c2)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), _col7 (type: bigint), _col8 (type: binary), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: binary)
Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.cmv_mat_view_n5
+ Write Type: DELETE
+ Reducer 4
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -889,23 +938,26 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 4
+ Reducer 5
Execution mode: llap
Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: int), VALUE._col1 (type: decimal(10,2)), VALUE._col2 (type: bigint)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
- name: default.cmv_mat_view_n5
- Write Type: UPDATE
- Reducer 6
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector_hll(VALUE._col12)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DECIMAL' (type: string), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), _col9 (type: bigint), _col10 (type: bigin [...]
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
+ Statistics: Num rows: 1 Data size: 1003 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 1003 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 7
Execution mode: llap
Reduce Operator Tree:
Merge Join Operator
@@ -930,7 +982,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: int), _col1 (type: decimal(10,2))
Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: bigint)
- Reducer 7
+ Reducer 8
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -947,7 +999,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: bigint)
- Stage: Stage-3
+ Stage: Stage-4
Dependency Collection
Stage: Stage-0
@@ -959,13 +1011,13 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.cmv_mat_view_n5
- Write Type: INSERT
+ Write Type: DELETE
- Stage: Stage-4
+ Stage: Stage-5
Stats Work
Basic Stats Work:
- Stage: Stage-6
+ Stage: Stage-8
Materialized View Update
name: default.cmv_mat_view_n5
update creation metadata: true
@@ -979,9 +1031,24 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.cmv_mat_view_n5
- Write Type: UPDATE
+ Write Type: INSERT
- Stage: Stage-5
+ Stage: Stage-6
+ Stats Work
+ Basic Stats Work:
+
+ Stage: Stage-2
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.cmv_mat_view_n5
+ Write Type: INSERT
+
+ Stage: Stage-7
Stats Work
Basic Stats Work:
Column Stats Desc:
@@ -1004,8 +1071,11 @@ POSTHOOK: Input: default@cmv_mat_view_n5
POSTHOOK: Output: default@cmv_mat_view_n5
POSTHOOK: Output: default@cmv_mat_view_n5
POSTHOOK: Lineage: cmv_mat_view_n5._c2 EXPRESSION [(cmv_mat_view_n5)default.cmv_mat_view_n5.FieldSchema(name:_c2, type:bigint, comment:null), (cmv_basetable_2_n2)cmv_basetable_2_n2.FieldSchema(name:d, type:int, comment:null), ]
+POSTHOOK: Lineage: cmv_mat_view_n5._c2 EXPRESSION [(cmv_mat_view_n5)default.cmv_mat_view_n5.FieldSchema(name:_c2, type:bigint, comment:null), (cmv_basetable_2_n2)cmv_basetable_2_n2.FieldSchema(name:d, type:int, comment:null), ]
+POSTHOOK: Lineage: cmv_mat_view_n5.a SIMPLE [(cmv_basetable_n5)cmv_basetable_n5.FieldSchema(name:a, type:int, comment:null), ]
POSTHOOK: Lineage: cmv_mat_view_n5.a SIMPLE [(cmv_basetable_n5)cmv_basetable_n5.FieldSchema(name:a, type:int, comment:null), ]
POSTHOOK: Lineage: cmv_mat_view_n5.c SIMPLE [(cmv_basetable_2_n2)cmv_basetable_2_n2.FieldSchema(name:c, type:decimal(10,2), comment:null), ]
+POSTHOOK: Lineage: cmv_mat_view_n5.c SIMPLE [(cmv_basetable_2_n2)cmv_basetable_2_n2.FieldSchema(name:c, type:decimal(10,2), comment:null), ]
PREHOOK: query: DESCRIBE FORMATTED cmv_mat_view_n5
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@cmv_mat_view_n5
@@ -1029,7 +1099,7 @@ Table Parameters:
numFiles 3
numRows 3
rawDataSize 0
- totalSize 2319
+ totalSize 2325
transactional true
transactional_properties default
#### A masked pattern was here ####
@@ -1774,24 +1844,27 @@ POSTHOOK: Input: default@cmv_mat_view_n5
POSTHOOK: Output: default@cmv_mat_view_n5
POSTHOOK: Output: default@cmv_mat_view_n5
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-3 depends on stages: Stage-2
- Stage-0 depends on stages: Stage-3
- Stage-4 depends on stages: Stage-0
- Stage-6 depends on stages: Stage-4, Stage-5
- Stage-1 depends on stages: Stage-3
- Stage-5 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-4 depends on stages: Stage-3
+ Stage-0 depends on stages: Stage-4
+ Stage-5 depends on stages: Stage-0
+ Stage-8 depends on stages: Stage-5, Stage-6, Stage-7
+ Stage-1 depends on stages: Stage-4
+ Stage-6 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-4
+ Stage-7 depends on stages: Stage-2
STAGE PLANS:
- Stage: Stage-2
+ Stage: Stage-3
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
- Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
- Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
- Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 7 <- Map 6 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
+ Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -1812,7 +1885,7 @@ STAGE PLANS:
value expressions: _col2 (type: bigint), _col3 (type: boolean), _col4 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
Execution mode: llap
LLAP IO: may be used (ACID table)
- Map 5
+ Map 6
Map Operator Tree:
TableScan
alias: cmv_basetable_n5
@@ -1833,7 +1906,7 @@ STAGE PLANS:
Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: may be used (ACID table)
- Map 8
+ Map 9
Map Operator Tree:
TableScan
alias: cmv_basetable_2_n2
@@ -1868,7 +1941,20 @@ STAGE PLANS:
outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7
Statistics: Num rows: 1 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: _col3 is null (type: boolean)
+ predicate: _col3 (type: boolean)
+ Statistics: Num rows: 1 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col4 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ null sort order: a
+ sort order: +
+ Map-reduce partition columns: UDFToInteger(_col0) (type: int)
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: _col3 (type: boolean)
Statistics: Num rows: 1 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col5 (type: int), _col6 (type: decimal(10,2)), CASE WHEN (_col2 is null) THEN (_col7) WHEN (_col7 is null) THEN (_col2) ELSE ((_col7 + _col2)) END (type: bigint)
@@ -1899,20 +1985,53 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), _col7 (type: bigint), _col8 (type: binary), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: binary)
Filter Operator
- predicate: _col3 (type: boolean)
+ predicate: _col3 is null (type: boolean)
Statistics: Num rows: 1 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: _col4 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), _col5 (type: int), _col6 (type: decimal(10,2)), CASE WHEN (_col2 is null) THEN (_col7) WHEN (_col7 is null) THEN (_col2) ELSE ((_col7 + _col2)) END (type: bigint)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
- null sort order: a
- sort order: +
- Map-reduce partition columns: UDFToInteger(_col0) (type: int)
- Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: int), _col2 (type: decimal(10,2)), _col3 (type: bigint)
+ expressions: _col5 (type: int), _col6 (type: decimal(10,2)), CASE WHEN (_col2 is null) THEN (_col7) WHEN (_col7 is null) THEN (_col2) ELSE ((_col7 + _col2)) END (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.cmv_mat_view_n5
+ Write Type: INSERT
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: decimal(10,2)), _col2 (type: bigint)
+ outputColumnNames: a, c, _c2
+ Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(a), max(a), count(1), count(a), compute_bit_vector_hll(a), min(c), max(c), count(c), compute_bit_vector_hll(c), min(_c2), max(_c2), count(_c2), compute_bit_vector_hll(_c2)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), _col7 (type: bigint), _col8 (type: binary), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: binary)
Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.cmv_mat_view_n5
+ Write Type: DELETE
+ Reducer 4
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -1931,23 +2050,26 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 4
+ Reducer 5
Execution mode: llap
Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: int), VALUE._col1 (type: decimal(10,2)), VALUE._col2 (type: bigint)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
- name: default.cmv_mat_view_n5
- Write Type: UPDATE
- Reducer 6
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector_hll(VALUE._col12)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DECIMAL' (type: string), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), _col9 (type: bigint), _col10 (type: bigin [...]
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
+ Statistics: Num rows: 1 Data size: 1003 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 1003 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 7
Execution mode: llap
Reduce Operator Tree:
Merge Join Operator
@@ -1972,7 +2094,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: int), _col1 (type: decimal(10,2))
Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: bigint)
- Reducer 7
+ Reducer 8
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -1989,7 +2111,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: bigint)
- Stage: Stage-3
+ Stage: Stage-4
Dependency Collection
Stage: Stage-0
@@ -2001,13 +2123,13 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.cmv_mat_view_n5
- Write Type: INSERT
+ Write Type: DELETE
- Stage: Stage-4
+ Stage: Stage-5
Stats Work
Basic Stats Work:
- Stage: Stage-6
+ Stage: Stage-8
Materialized View Update
name: default.cmv_mat_view_n5
update creation metadata: true
@@ -2021,9 +2143,24 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.cmv_mat_view_n5
- Write Type: UPDATE
+ Write Type: INSERT
- Stage: Stage-5
+ Stage: Stage-6
+ Stats Work
+ Basic Stats Work:
+
+ Stage: Stage-2
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.cmv_mat_view_n5
+ Write Type: INSERT
+
+ Stage: Stage-7
Stats Work
Basic Stats Work:
Column Stats Desc:
@@ -2046,8 +2183,11 @@ POSTHOOK: Input: default@cmv_mat_view_n5
POSTHOOK: Output: default@cmv_mat_view_n5
POSTHOOK: Output: default@cmv_mat_view_n5
POSTHOOK: Lineage: cmv_mat_view_n5._c2 EXPRESSION [(cmv_mat_view_n5)default.cmv_mat_view_n5.FieldSchema(name:_c2, type:bigint, comment:null), (cmv_basetable_2_n2)cmv_basetable_2_n2.FieldSchema(name:d, type:int, comment:null), ]
+POSTHOOK: Lineage: cmv_mat_view_n5._c2 EXPRESSION [(cmv_mat_view_n5)default.cmv_mat_view_n5.FieldSchema(name:_c2, type:bigint, comment:null), (cmv_basetable_2_n2)cmv_basetable_2_n2.FieldSchema(name:d, type:int, comment:null), ]
+POSTHOOK: Lineage: cmv_mat_view_n5.a SIMPLE [(cmv_basetable_n5)cmv_basetable_n5.FieldSchema(name:a, type:int, comment:null), ]
POSTHOOK: Lineage: cmv_mat_view_n5.a SIMPLE [(cmv_basetable_n5)cmv_basetable_n5.FieldSchema(name:a, type:int, comment:null), ]
POSTHOOK: Lineage: cmv_mat_view_n5.c SIMPLE [(cmv_basetable_2_n2)cmv_basetable_2_n2.FieldSchema(name:c, type:decimal(10,2), comment:null), ]
+POSTHOOK: Lineage: cmv_mat_view_n5.c SIMPLE [(cmv_basetable_2_n2)cmv_basetable_2_n2.FieldSchema(name:c, type:decimal(10,2), comment:null), ]
PREHOOK: query: DESCRIBE FORMATTED cmv_mat_view_n5
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@cmv_mat_view_n5
@@ -2071,7 +2211,7 @@ Table Parameters:
numFiles 3
numRows 3
rawDataSize 0
- totalSize 1822
+ totalSize 1828
transactional true
transactional_properties default
#### A masked pattern was here ####
diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_6.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_6.q.out
index 81c0e7df05f..592709cd2cf 100644
--- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_6.q.out
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_6.q.out
@@ -237,7 +237,6 @@ PREHOOK: Input: default@t1
PREHOOK: Input: default@t2
PREHOOK: Output: default@mat1
PREHOOK: Output: default@mat1
-PREHOOK: Output: default@mat1
POSTHOOK: query: explain cbo
alter materialized view mat1 rebuild
POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
@@ -246,7 +245,6 @@ POSTHOOK: Input: default@t1
POSTHOOK: Input: default@t2
POSTHOOK: Output: default@mat1
POSTHOOK: Output: default@mat1
-POSTHOOK: Output: default@mat1
CBO PLAN:
HiveProject(a=[$5], _o__c1=[CASE(IS NULL($1), $6, IS NULL($6), $1, +($6, $1))], _o__c2=[CASE(IS NULL($2), $7, +($7, $2))], _o__c3=[/(CAST(CASE(IS NULL($1), $6, IS NULL($6), $1, +($6, $1))):DOUBLE, CASE(IS NULL($2), $7, +($7, $2)))], _o__c4=[CASE(IS NULL($3), $8, +($8, $3))])
HiveFilter(condition=[OR(AND($4, OR(AND(IS NULL($3), =($8, 0)), AND(=(+($8, $3), 0), IS NOT NULL($3)))), AND(IS NULL($4), OR(AND(IS NULL($3), >($8, 0)), AND(>(+($8, $3), 0), IS NOT NULL($3)))), AND($4, OR(AND(IS NULL($3), >($8, 0)), AND(>(+($8, $3), 0), IS NOT NULL($3)))))])
@@ -272,7 +270,6 @@ PREHOOK: Input: default@t1
PREHOOK: Input: default@t2
PREHOOK: Output: default@mat1
PREHOOK: Output: default@mat1
-PREHOOK: Output: default@mat1
POSTHOOK: query: explain
alter materialized view mat1 rebuild
POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
@@ -281,29 +278,31 @@ POSTHOOK: Input: default@t1
POSTHOOK: Input: default@t2
POSTHOOK: Output: default@mat1
POSTHOOK: Output: default@mat1
-POSTHOOK: Output: default@mat1
STAGE DEPENDENCIES:
- Stage-3 is a root stage
- Stage-4 depends on stages: Stage-3
- Stage-0 depends on stages: Stage-4
- Stage-5 depends on stages: Stage-0
- Stage-8 depends on stages: Stage-5, Stage-6, Stage-7
- Stage-1 depends on stages: Stage-4
- Stage-6 depends on stages: Stage-1
- Stage-2 depends on stages: Stage-4
- Stage-7 depends on stages: Stage-2
+ Stage-4 is a root stage
+ Stage-5 depends on stages: Stage-4
+ Stage-0 depends on stages: Stage-5
+ Stage-6 depends on stages: Stage-0
+ Stage-10 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9
+ Stage-1 depends on stages: Stage-5
+ Stage-7 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-5
+ Stage-8 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-5
+ Stage-9 depends on stages: Stage-3
STAGE PLANS:
- Stage: Stage-3
+ Stage: Stage-4
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
- Reducer 5 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 7 <- Map 6 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
- Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
+ Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 6 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 8 <- Map 10 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
+ Reducer 9 <- Reducer 8 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -324,52 +323,52 @@ STAGE PLANS:
value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: boolean), _col5 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
- Map 6
+ Map 10
Map Operator Tree:
TableScan
- alias: t1
+ alias: t2
filterExpr: a is not null (type: boolean)
properties:
acid.fetch.deleted.rows TRUE
- Statistics: Num rows: 7 Data size: 671 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 9 Data size: 837 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: a is not null (type: boolean)
- Statistics: Num rows: 7 Data size: 671 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 9 Data size: 837 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: a (type: char(15)), b (type: int), ROW__IS__DELETED (type: boolean), (ROW__ID.writeid > 3L) (type: boolean)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 7 Data size: 727 Basic stats: COMPLETE Column stats: COMPLETE
+ expressions: a (type: char(15)), ROW__IS__DELETED (type: boolean), (ROW__ID.writeid > 3L) (type: boolean)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 9 Data size: 909 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: char(15))
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: char(15))
- Statistics: Num rows: 7 Data size: 727 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: int), _col2 (type: boolean), _col3 (type: boolean)
+ Statistics: Num rows: 9 Data size: 909 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: boolean), _col2 (type: boolean)
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
- Map 9
+ Map 7
Map Operator Tree:
TableScan
- alias: t2
+ alias: t1
filterExpr: a is not null (type: boolean)
properties:
acid.fetch.deleted.rows TRUE
- Statistics: Num rows: 9 Data size: 837 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 7 Data size: 671 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: a is not null (type: boolean)
- Statistics: Num rows: 9 Data size: 837 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 7 Data size: 671 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: a (type: char(15)), ROW__IS__DELETED (type: boolean), (ROW__ID.writeid > 3L) (type: boolean)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 9 Data size: 909 Basic stats: COMPLETE Column stats: COMPLETE
+ expressions: a (type: char(15)), b (type: int), ROW__IS__DELETED (type: boolean), (ROW__ID.writeid > 3L) (type: boolean)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 7 Data size: 727 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: char(15))
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: char(15))
- Statistics: Num rows: 9 Data size: 909 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: boolean), _col2 (type: boolean)
+ Statistics: Num rows: 7 Data size: 727 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: int), _col2 (type: boolean), _col3 (type: boolean)
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Reducer 2
@@ -385,7 +384,7 @@ STAGE PLANS:
outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 5 Data size: 1097 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (_col4 and ((_col3 is null and (_col9 = 0L)) or (((_col9 + _col3) = 0) and _col3 is not null))) (type: boolean)
+ predicate: (_col4 and ((_col3 is null and (_col9 > 0L)) or (((_col9 + _col3) > 0) and _col3 is not null))) (type: boolean)
Statistics: Num rows: 1 Data size: 221 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col5 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
@@ -398,7 +397,20 @@ STAGE PLANS:
Map-reduce partition columns: UDFToInteger(_col0) (type: int)
Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (_col4 is null and ((_col3 is null and (_col9 > 0L)) or (((_col9 + _col3) > 0) and _col3 is not null))) (type: boolean)
+ predicate: (_col4 and ((_col3 is null and (_col9 = 0L)) or (((_col9 + _col3) = 0) and _col3 is not null))) (type: boolean)
+ Statistics: Num rows: 1 Data size: 221 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col5 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Map-reduce partition columns: UDFToInteger(_col0) (type: int)
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ Filter Operator
+ predicate: (_col4 and ((_col3 is null and (_col9 > 0L)) or (((_col9 + _col3) > 0) and _col3 is not null))) (type: boolean)
Statistics: Num rows: 1 Data size: 221 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col6 (type: char(15)), CASE WHEN (_col1 is null) THEN (_col7) WHEN (_col7 is null) THEN (_col1) ELSE ((_col7 + _col1)) END (type: bigint), if(_col2 is null, _col8, (_col8 + _col2)) (type: bigint), (UDFToDouble(CASE WHEN (_col1 is null) THEN (_col7) WHEN (_col7 is null) THEN (_col1) ELSE ((_col7 + _col1)) END) / if(_col2 is null, _col8, (_col8 + _col2))) (type: double), if(_col3 is null, _col9, (_col9 + _col3)) (type: bigint)
@@ -429,19 +441,36 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 912 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: int), _col1 (type: struct<count:bigint,sum:double,input:int>), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: binary), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: binary), _col13 (type: double), _col14 (type: double), _col15 (type: bigint), _col16 (type: binary), _col17 (type: bigint), _col18 (typ [...]
Filter Operator
- predicate: (_col4 and ((_col3 is null and (_col9 > 0L)) or (((_col9 + _col3) > 0) and _col3 is not null))) (type: boolean)
+ predicate: (_col4 is null and ((_col3 is null and (_col9 > 0L)) or (((_col9 + _col3) > 0) and _col3 is not null))) (type: boolean)
Statistics: Num rows: 1 Data size: 221 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: _col5 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), _col6 (type: char(15)), CASE WHEN (_col1 is null) THEN (_col7) WHEN (_col7 is null) THEN (_col1) ELSE ((_col7 + _col1)) END (type: bigint), if(_col2 is null, _col8, (_col8 + _col2)) (type: bigint), (UDFToDouble(CASE WHEN (_col1 is null) THEN (_col7) WHEN (_col7 is null) THEN (_col1) ELSE ((_col7 + _col1)) END) / if(_col2 is null, _col8, (_col8 + _col2))) (type: double), if(_col3 is null, _col [...]
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 1 Data size: 201 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
- null sort order: a
- sort order: +
- Map-reduce partition columns: UDFToInteger(_col0) (type: int)
- Statistics: Num rows: 1 Data size: 201 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: char(15)), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint)
+ expressions: _col6 (type: char(15)), CASE WHEN (_col1 is null) THEN (_col7) WHEN (_col7 is null) THEN (_col1) ELSE ((_col7 + _col1)) END (type: bigint), if(_col2 is null, _col8, (_col8 + _col2)) (type: bigint), (UDFToDouble(CASE WHEN (_col1 is null) THEN (_col7) WHEN (_col7 is null) THEN (_col1) ELSE ((_col7 + _col1)) END) / if(_col2 is null, _col8, (_col8 + _col2))) (type: double), if(_col3 is null, _col9, (_col9 + _col3)) (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.mat1
+ Write Type: INSERT
+ Select Operator
+ expressions: _col0 (type: char(15)), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: double), _col4 (type: bigint)
+ outputColumnNames: a, _c1, _c2, _c3, _c4
+ Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: max(length(a)), avg(COALESCE(length(a),0)), count(1), count(a), compute_bit_vector_hll(a), min(_c1), max(_c1), count(_c1), compute_bit_vector_hll(_c1), min(_c2), max(_c2), count(_c2), compute_bit_vector_hll(_c2), min(_c3), max(_c3), count(_c3), compute_bit_vector_hll(_c3), min(_c4), max(_c4), count(_c4), compute_bit_vector_hll(_c4)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20
+ Statistics: Num rows: 1 Data size: 912 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 912 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: struct<count:bigint,sum:double,input:int>), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: binary), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: binary), _col13 (type: double), _col14 (type: double), _col15 (type: bigint), _col16 (type: binary), _col17 (type: bigint), _col18 (typ [...]
Reducer 3
Execution mode: vectorized, llap
Reduce Operator Tree:
@@ -459,6 +488,22 @@ STAGE PLANS:
name: default.mat1
Write Type: DELETE
Reducer 4
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.mat1
+ Write Type: DELETE
+ Reducer 5
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
@@ -477,23 +522,26 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 5
+ Reducer 6
Execution mode: vectorized, llap
Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: char(15)), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint), VALUE._col3 (type: double), VALUE._col4 (type: bigint)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 1 Data size: 201 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 201 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
- name: default.mat1
- Write Type: UPDATE
- Reducer 7
+ Group By Operator
+ aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector_hll(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector_hll(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), comp [...]
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20
+ Statistics: Num rows: 1 Data size: 844 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), _col5 (type: bigint), _col6 (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), _col9 (type: bigint), _col10 (type: bigint) [...]
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29
+ Statistics: Num rows: 1 Data size: 1324 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 1324 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 8
Execution mode: llap
Reduce Operator Tree:
Merge Join Operator
@@ -523,7 +571,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: char(15))
Statistics: Num rows: 5 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint)
- Reducer 8
+ Reducer 9
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
@@ -540,7 +588,7 @@ STAGE PLANS:
Statistics: Num rows: 5 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint)
- Stage: Stage-4
+ Stage: Stage-5
Dependency Collection
Stage: Stage-0
@@ -554,11 +602,11 @@ STAGE PLANS:
name: default.mat1
Write Type: DELETE
- Stage: Stage-5
+ Stage: Stage-6
Stats Work
Basic Stats Work:
- Stage: Stage-8
+ Stage: Stage-10
Materialized View Update
name: default.mat1
update creation metadata: true
@@ -572,9 +620,9 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.mat1
- Write Type: INSERT
+ Write Type: DELETE
- Stage: Stage-6
+ Stage: Stage-7
Stats Work
Basic Stats Work:
@@ -587,9 +635,24 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.mat1
- Write Type: UPDATE
+ Write Type: INSERT
- Stage: Stage-7
+ Stage: Stage-8
+ Stats Work
+ Basic Stats Work:
+
+ Stage: Stage-3
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.mat1
+ Write Type: INSERT
+
+ Stage: Stage-9
Stats Work
Basic Stats Work:
Column Stats Desc:
@@ -604,7 +667,6 @@ PREHOOK: Input: default@t1
PREHOOK: Input: default@t2
PREHOOK: Output: default@mat1
PREHOOK: Output: default@mat1
-PREHOOK: Output: default@mat1
POSTHOOK: query: alter materialized view mat1 rebuild
POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
POSTHOOK: Input: default@mat1
@@ -612,11 +674,15 @@ POSTHOOK: Input: default@t1
POSTHOOK: Input: default@t2
POSTHOOK: Output: default@mat1
POSTHOOK: Output: default@mat1
-POSTHOOK: Output: default@mat1
POSTHOOK: Lineage: mat1._c1 EXPRESSION [(mat1)default.mat1.FieldSchema(name:_c1, type:bigint, comment:null), (t1)t1.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), (t2)t2.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), (t1)t1.FieldSchema(name:b, type:int, comment:null), ]
+POSTHOOK: Lineage: mat1._c1 EXPRESSION [(mat1)default.mat1.FieldSchema(name:_c1, type:bigint, comment:null), (t1)t1.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), (t2)t2.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), (t1)t1.FieldSchema(name:b, type:int, comment:null), ]
+POSTHOOK: Lineage: mat1._c2 EXPRESSION [(mat1)default.mat1.FieldSchema(name:_c2, type:bigint, comment:null), (t1)t1.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), (t2)t2.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), (t1)t1.FieldSchema(name:b, type:int, comment:null), ]
POSTHOOK: Lineage: mat1._c2 EXPRESSION [(mat1)default.mat1.FieldSchema(name:_c2, type:bigint, comment:null), (t1)t1.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), (t2)t2.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), (t1)t1.FieldSchema(name:b, type:int, comment:null), ]
POSTHOOK: Lineage: mat1._c3 EXPRESSION [(mat1)default.mat1.FieldSchema(name:_c1, type:bigint, comment:null), (t1)t1.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), (t2)t2.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), (t1)t1.FieldSchema(name:b, type:int, comment:null), (mat1)default.mat1.FieldSchema(name:_c2, type:bigint, comment:null), ]
+POSTHOOK: Lineage: mat1._c3 EXPRESSION [(mat1)default.mat1.FieldSchema(name:_c1, type:bigint, comment:null), (t1)t1.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), (t2)t2.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), (t1)t1.FieldSchema(name:b, type:int, comment:null), (mat1)default.mat1.FieldSchema(name:_c2, type:bigint, comment:null), ]
POSTHOOK: Lineage: mat1._c4 EXPRESSION [(mat1)default.mat1.FieldSchema(name:_c4, type:bigint, comment:null), (t1)t1.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), (t2)t2.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), ]
+POSTHOOK: Lineage: mat1._c4 EXPRESSION [(mat1)default.mat1.FieldSchema(name:_c4, type:bigint, comment:null), (t1)t1.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), (t2)t2.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), ]
+POSTHOOK: Lineage: mat1.a SIMPLE [(t1)t1.FieldSchema(name:a, type:char(15), comment:null), ]
POSTHOOK: Lineage: mat1.a SIMPLE [(t1)t1.FieldSchema(name:a, type:char(15), comment:null), ]
PREHOOK: query: explain cbo
select t1.a, sum(t1.b), count(t1.b), avg(t1.b), count(*) from t1
diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_7.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_7.q.out
index 48335aef885..bc996ec1c32 100644
--- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_7.q.out
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_7.q.out
@@ -125,23 +125,26 @@ POSTHOOK: Input: default@t1
POSTHOOK: Output: default@mat1
POSTHOOK: Output: default@mat1
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-3 depends on stages: Stage-2
- Stage-0 depends on stages: Stage-3
- Stage-4 depends on stages: Stage-0
- Stage-6 depends on stages: Stage-4, Stage-5
- Stage-1 depends on stages: Stage-3
- Stage-5 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-4 depends on stages: Stage-3
+ Stage-0 depends on stages: Stage-4
+ Stage-5 depends on stages: Stage-0
+ Stage-8 depends on stages: Stage-5, Stage-6, Stage-7
+ Stage-1 depends on stages: Stage-4
+ Stage-6 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-4
+ Stage-7 depends on stages: Stage-2
STAGE PLANS:
- Stage: Stage-2
+ Stage: Stage-3
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
- Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
- Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 6 <- Map 5 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 7 <- Map 6 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -162,7 +165,7 @@ STAGE PLANS:
value expressions: _col1 (type: bigint), _col2 (type: boolean), _col3 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
- Map 5
+ Map 6
Map Operator Tree:
TableScan
alias: t1
@@ -204,7 +207,20 @@ STAGE PLANS:
outputColumnNames: _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 4 Data size: 744 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: _col2 is null (type: boolean)
+ predicate: _col2 (type: boolean)
+ Statistics: Num rows: 1 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col3 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ null sort order: a
+ sort order: +
+ Map-reduce partition columns: UDFToInteger(_col0) (type: int)
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: _col2 (type: boolean)
Statistics: Num rows: 1 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col4 (type: char(15)), CASE WHEN (_col1 is null) THEN (_col5) WHEN (_col5 is null) THEN (_col1) ELSE ((_col5 + _col1)) END (type: bigint)
@@ -235,20 +251,53 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: int), _col1 (type: struct<count:bigint,sum:double,input:int>), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: binary)
Filter Operator
- predicate: _col2 (type: boolean)
+ predicate: _col2 is null (type: boolean)
Statistics: Num rows: 1 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: _col3 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), _col4 (type: char(15)), CASE WHEN (_col1 is null) THEN (_col5) WHEN (_col5 is null) THEN (_col1) ELSE ((_col5 + _col1)) END (type: bigint)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
- null sort order: a
- sort order: +
- Map-reduce partition columns: UDFToInteger(_col0) (type: int)
- Statistics: Num rows: 1 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: char(15)), _col2 (type: bigint)
+ expressions: _col4 (type: char(15)), CASE WHEN (_col1 is null) THEN (_col5) WHEN (_col5 is null) THEN (_col1) ELSE ((_col5 + _col1)) END (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.mat1
+ Write Type: INSERT
+ Select Operator
+ expressions: _col0 (type: char(15)), _col1 (type: bigint)
+ outputColumnNames: a, _c1
+ Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: max(length(a)), avg(COALESCE(length(a),0)), count(1), count(a), compute_bit_vector_hll(a), min(_c1), max(_c1), count(_c1), compute_bit_vector_hll(_c1)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: struct<count:bigint,sum:double,input:int>), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: binary)
Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.mat1
+ Write Type: DELETE
+ Reducer 4
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
@@ -267,23 +316,26 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 4
+ Reducer 5
Execution mode: vectorized, llap
Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: char(15)), VALUE._col1 (type: bigint)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
- name: default.mat1
- Write Type: UPDATE
- Reducer 6
+ Group By Operator
+ aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), _col5 (type: bigint), _col6 (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+ Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 7
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
@@ -300,7 +352,7 @@ STAGE PLANS:
Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint)
- Stage: Stage-3
+ Stage: Stage-4
Dependency Collection
Stage: Stage-0
@@ -312,13 +364,13 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.mat1
- Write Type: INSERT
+ Write Type: DELETE
- Stage: Stage-4
+ Stage: Stage-5
Stats Work
Basic Stats Work:
- Stage: Stage-6
+ Stage: Stage-8
Materialized View Update
name: default.mat1
update creation metadata: true
@@ -332,9 +384,24 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.mat1
- Write Type: UPDATE
+ Write Type: INSERT
- Stage: Stage-5
+ Stage: Stage-6
+ Stats Work
+ Basic Stats Work:
+
+ Stage: Stage-2
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.mat1
+ Write Type: INSERT
+
+ Stage: Stage-7
Stats Work
Basic Stats Work:
Column Stats Desc:
@@ -355,6 +422,8 @@ POSTHOOK: Input: default@t1
POSTHOOK: Output: default@mat1
POSTHOOK: Output: default@mat1
POSTHOOK: Lineage: mat1._c1 EXPRESSION [(mat1)default.mat1.FieldSchema(name:_c1, type:bigint, comment:null), (t1)t1.null, ]
+POSTHOOK: Lineage: mat1._c1 EXPRESSION [(mat1)default.mat1.FieldSchema(name:_c1, type:bigint, comment:null), (t1)t1.null, ]
+POSTHOOK: Lineage: mat1.a SIMPLE [(t1)t1.FieldSchema(name:a, type:char(15), comment:null), ]
POSTHOOK: Lineage: mat1.a SIMPLE [(t1)t1.FieldSchema(name:a, type:char(15), comment:null), ]
PREHOOK: query: explain cbo
select t1.a, count(*) from t1
diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_9.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_9.q.out
index 74724cdee6a..026c70f365a 100644
--- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_9.q.out
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_9.q.out
@@ -235,7 +235,6 @@ PREHOOK: Input: default@t1
PREHOOK: Input: default@t2
PREHOOK: Output: default@mat1
PREHOOK: Output: default@mat1
-PREHOOK: Output: default@mat1
POSTHOOK: query: explain cbo
alter materialized view mat1 rebuild
POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
@@ -244,7 +243,6 @@ POSTHOOK: Input: default@t1
POSTHOOK: Input: default@t2
POSTHOOK: Output: default@mat1
POSTHOOK: Output: default@mat1
-POSTHOOK: Output: default@mat1
CBO PLAN:
HiveProject(a0=[$4], $f1=[CASE(IS NULL($1), $5, IS NULL($5), $1, +($5, $1))], $f2=[CASE(IS NULL($2), $6, +($6, $2))])
HiveFilter(condition=[OR(AND($3, OR(AND(IS NULL($2), =($6, 0)), AND(=(+($6, $2), 0), IS NOT NULL($2)))), AND(IS NULL($3), OR(AND(IS NULL($2), >($6, 0)), AND(>(+($6, $2), 0), IS NOT NULL($2)))), AND($3, OR(AND(IS NULL($2), >($6, 0)), AND(>(+($6, $2), 0), IS NOT NULL($2)))))])
@@ -270,7 +268,6 @@ PREHOOK: Input: default@t1
PREHOOK: Input: default@t2
PREHOOK: Output: default@mat1
PREHOOK: Output: default@mat1
-PREHOOK: Output: default@mat1
POSTHOOK: query: explain
alter materialized view mat1 rebuild
POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
@@ -279,29 +276,31 @@ POSTHOOK: Input: default@t1
POSTHOOK: Input: default@t2
POSTHOOK: Output: default@mat1
POSTHOOK: Output: default@mat1
-POSTHOOK: Output: default@mat1
STAGE DEPENDENCIES:
- Stage-3 is a root stage
- Stage-4 depends on stages: Stage-3
- Stage-0 depends on stages: Stage-4
- Stage-5 depends on stages: Stage-0
- Stage-8 depends on stages: Stage-5, Stage-6, Stage-7
- Stage-1 depends on stages: Stage-4
- Stage-6 depends on stages: Stage-1
- Stage-2 depends on stages: Stage-4
- Stage-7 depends on stages: Stage-2
+ Stage-4 is a root stage
+ Stage-5 depends on stages: Stage-4
+ Stage-0 depends on stages: Stage-5
+ Stage-6 depends on stages: Stage-0
+ Stage-10 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9
+ Stage-1 depends on stages: Stage-5
+ Stage-7 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-5
+ Stage-8 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-5
+ Stage-9 depends on stages: Stage-3
STAGE PLANS:
- Stage: Stage-3
+ Stage: Stage-4
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
- Reducer 5 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 7 <- Map 6 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
- Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
+ Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 6 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 8 <- Map 10 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
+ Reducer 9 <- Reducer 8 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -322,52 +321,52 @@ STAGE PLANS:
value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: boolean), _col4 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
- Map 6
+ Map 10
Map Operator Tree:
TableScan
- alias: t1
+ alias: t2
filterExpr: a is not null (type: boolean)
properties:
acid.fetch.deleted.rows TRUE
- Statistics: Num rows: 7 Data size: 671 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 9 Data size: 837 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: a is not null (type: boolean)
- Statistics: Num rows: 7 Data size: 671 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 9 Data size: 837 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: a (type: char(15)), b (type: int), ROW__IS__DELETED (type: boolean), (ROW__ID.writeid > 3L) (type: boolean)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 7 Data size: 727 Basic stats: COMPLETE Column stats: COMPLETE
+ expressions: a (type: char(15)), ROW__IS__DELETED (type: boolean), (ROW__ID.writeid > 3L) (type: boolean)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 9 Data size: 909 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: char(15))
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: char(15))
- Statistics: Num rows: 7 Data size: 727 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: int), _col2 (type: boolean), _col3 (type: boolean)
+ Statistics: Num rows: 9 Data size: 909 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: boolean), _col2 (type: boolean)
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
- Map 9
+ Map 7
Map Operator Tree:
TableScan
- alias: t2
+ alias: t1
filterExpr: a is not null (type: boolean)
properties:
acid.fetch.deleted.rows TRUE
- Statistics: Num rows: 9 Data size: 837 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 7 Data size: 671 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: a is not null (type: boolean)
- Statistics: Num rows: 9 Data size: 837 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 7 Data size: 671 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: a (type: char(15)), ROW__IS__DELETED (type: boolean), (ROW__ID.writeid > 3L) (type: boolean)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 9 Data size: 909 Basic stats: COMPLETE Column stats: COMPLETE
+ expressions: a (type: char(15)), b (type: int), ROW__IS__DELETED (type: boolean), (ROW__ID.writeid > 3L) (type: boolean)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 7 Data size: 727 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: char(15))
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: char(15))
- Statistics: Num rows: 9 Data size: 909 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: boolean), _col2 (type: boolean)
+ Statistics: Num rows: 7 Data size: 727 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: int), _col2 (type: boolean), _col3 (type: boolean)
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Reducer 2
@@ -383,7 +382,7 @@ STAGE PLANS:
outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7
Statistics: Num rows: 5 Data size: 1017 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (_col3 and ((_col2 is null and (_col7 = 0L)) or (((_col7 + _col2) = 0) and _col2 is not null))) (type: boolean)
+ predicate: (_col3 and ((_col2 is null and (_col7 > 0L)) or (((_col7 + _col2) > 0) and _col2 is not null))) (type: boolean)
Statistics: Num rows: 1 Data size: 205 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col4 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
@@ -396,7 +395,20 @@ STAGE PLANS:
Map-reduce partition columns: UDFToInteger(_col0) (type: int)
Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (_col3 is null and ((_col2 is null and (_col7 > 0L)) or (((_col7 + _col2) > 0) and _col2 is not null))) (type: boolean)
+ predicate: (_col3 and ((_col2 is null and (_col7 = 0L)) or (((_col7 + _col2) = 0) and _col2 is not null))) (type: boolean)
+ Statistics: Num rows: 1 Data size: 205 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col4 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Map-reduce partition columns: UDFToInteger(_col0) (type: int)
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ Filter Operator
+ predicate: (_col3 and ((_col2 is null and (_col7 > 0L)) or (((_col7 + _col2) > 0) and _col2 is not null))) (type: boolean)
Statistics: Num rows: 1 Data size: 205 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col5 (type: char(15)), CASE WHEN (_col1 is null) THEN (_col6) WHEN (_col6 is null) THEN (_col1) ELSE ((_col6 + _col1)) END (type: bigint), if(_col2 is null, _col7, (_col7 + _col2)) (type: bigint)
@@ -427,19 +439,36 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: int), _col1 (type: struct<count:bigint,sum:double,input:int>), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: binary), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: binary)
Filter Operator
- predicate: (_col3 and ((_col2 is null and (_col7 > 0L)) or (((_col7 + _col2) > 0) and _col2 is not null))) (type: boolean)
+ predicate: (_col3 is null and ((_col2 is null and (_col7 > 0L)) or (((_col7 + _col2) > 0) and _col2 is not null))) (type: boolean)
Statistics: Num rows: 1 Data size: 205 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: _col4 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), _col5 (type: char(15)), CASE WHEN (_col1 is null) THEN (_col6) WHEN (_col6 is null) THEN (_col1) ELSE ((_col6 + _col1)) END (type: bigint), if(_col2 is null, _col7, (_col7 + _col2)) (type: bigint)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
- null sort order: a
- sort order: +
- Map-reduce partition columns: UDFToInteger(_col0) (type: int)
- Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: char(15)), _col2 (type: bigint), _col3 (type: bigint)
+ expressions: _col5 (type: char(15)), CASE WHEN (_col1 is null) THEN (_col6) WHEN (_col6 is null) THEN (_col1) ELSE ((_col6 + _col1)) END (type: bigint), if(_col2 is null, _col7, (_col7 + _col2)) (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 109 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 109 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.mat1
+ Write Type: INSERT
+ Select Operator
+ expressions: _col0 (type: char(15)), _col1 (type: bigint), _col2 (type: bigint)
+ outputColumnNames: a, _c1, _c2
+ Statistics: Num rows: 1 Data size: 109 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: max(length(a)), avg(COALESCE(length(a),0)), count(1), count(a), compute_bit_vector_hll(a), min(_c1), max(_c1), count(_c1), compute_bit_vector_hll(_c1), min(_c2), max(_c2), count(_c2), compute_bit_vector_hll(_c2)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: struct<count:bigint,sum:double,input:int>), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: binary), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: binary)
Reducer 3
Execution mode: vectorized, llap
Reduce Operator Tree:
@@ -457,6 +486,22 @@ STAGE PLANS:
name: default.mat1
Write Type: DELETE
Reducer 4
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.mat1
+ Write Type: DELETE
+ Reducer 5
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
@@ -475,23 +520,26 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 5
+ Reducer 6
Execution mode: vectorized, llap
Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: char(15)), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
- name: default.mat1
- Write Type: UPDATE
- Reducer 7
+ Group By Operator
+ aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector_hll(VALUE._col12)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), _col5 (type: bigint), _col6 (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), _col9 (type: bigint), _col10 (type: bigint) [...]
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
+ Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 8
Execution mode: llap
Reduce Operator Tree:
Merge Join Operator
@@ -521,7 +569,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: char(15))
Statistics: Num rows: 5 Data size: 545 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint), _col2 (type: bigint)
- Reducer 8
+ Reducer 9
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
@@ -538,7 +586,7 @@ STAGE PLANS:
Statistics: Num rows: 5 Data size: 545 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint), _col2 (type: bigint)
- Stage: Stage-4
+ Stage: Stage-5
Dependency Collection
Stage: Stage-0
@@ -552,11 +600,11 @@ STAGE PLANS:
name: default.mat1
Write Type: DELETE
- Stage: Stage-5
+ Stage: Stage-6
Stats Work
Basic Stats Work:
- Stage: Stage-8
+ Stage: Stage-10
Materialized View Update
name: default.mat1
update creation metadata: true
@@ -570,9 +618,9 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.mat1
- Write Type: INSERT
+ Write Type: DELETE
- Stage: Stage-6
+ Stage: Stage-7
Stats Work
Basic Stats Work:
@@ -585,9 +633,24 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.mat1
- Write Type: UPDATE
+ Write Type: INSERT
- Stage: Stage-7
+ Stage: Stage-8
+ Stats Work
+ Basic Stats Work:
+
+ Stage: Stage-3
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.mat1
+ Write Type: INSERT
+
+ Stage: Stage-9
Stats Work
Basic Stats Work:
Column Stats Desc:
@@ -602,7 +665,6 @@ PREHOOK: Input: default@t1
PREHOOK: Input: default@t2
PREHOOK: Output: default@mat1
PREHOOK: Output: default@mat1
-PREHOOK: Output: default@mat1
POSTHOOK: query: alter materialized view mat1 rebuild
POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
POSTHOOK: Input: default@mat1
@@ -610,9 +672,11 @@ POSTHOOK: Input: default@t1
POSTHOOK: Input: default@t2
POSTHOOK: Output: default@mat1
POSTHOOK: Output: default@mat1
-POSTHOOK: Output: default@mat1
+POSTHOOK: Lineage: mat1._c1 EXPRESSION [(mat1)default.mat1.FieldSchema(name:_c1, type:bigint, comment:null), (t1)t1.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), (t2)t2.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), (t1)t1.FieldSchema(name:b, type:int, comment:null), ]
POSTHOOK: Lineage: mat1._c1 EXPRESSION [(mat1)default.mat1.FieldSchema(name:_c1, type:bigint, comment:null), (t1)t1.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), (t2)t2.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), (t1)t1.FieldSchema(name:b, type:int, comment:null), ]
POSTHOOK: Lineage: mat1._c2 EXPRESSION [(mat1)default.mat1.FieldSchema(name:_c2, type:bigint, comment:null), (t1)t1.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), (t2)t2.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), ]
+POSTHOOK: Lineage: mat1._c2 EXPRESSION [(mat1)default.mat1.FieldSchema(name:_c2, type:bigint, comment:null), (t1)t1.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), (t2)t2.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), ]
+POSTHOOK: Lineage: mat1.a SIMPLE [(t1)t1.FieldSchema(name:a, type:char(15), comment:null), ]
POSTHOOK: Lineage: mat1.a SIMPLE [(t1)t1.FieldSchema(name:a, type:char(15), comment:null), ]
PREHOOK: query: explain cbo
select t1.a, sum(t1.b) from t1
diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_nulls.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_nulls.q.out
index c606676a01b..1a9418539cc 100644
--- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_nulls.q.out
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_nulls.q.out
@@ -142,23 +142,26 @@ POSTHOOK: Output: default@mat1
POSTHOOK: Output: default@mat1
Explain
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-3 depends on stages: Stage-2
- Stage-0 depends on stages: Stage-3
- Stage-4 depends on stages: Stage-0
- Stage-6 depends on stages: Stage-4, Stage-5
- Stage-1 depends on stages: Stage-3
- Stage-5 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-4 depends on stages: Stage-3
+ Stage-0 depends on stages: Stage-4
+ Stage-5 depends on stages: Stage-0
+ Stage-8 depends on stages: Stage-5, Stage-6, Stage-7
+ Stage-1 depends on stages: Stage-4
+ Stage-6 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-4
+ Stage-7 depends on stages: Stage-2
STAGE PLANS:
- Stage: Stage-2
+ Stage: Stage-3
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
- Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
- Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 6 <- Map 5 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 7 <- Map 6 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -179,7 +182,7 @@ STAGE PLANS:
value expressions: _col2 (type: bigint), _col3 (type: int), _col4 (type: int), _col5 (type: boolean), _col6 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
Execution mode: llap
LLAP IO: may be used (ACID table)
- Map 5
+ Map 6
Map Operator Tree:
TableScan
alias: t1
@@ -220,6 +223,50 @@ STAGE PLANS:
nullSafes: [true, true]
outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
Statistics: Num rows: 10 Data size: 2052 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: _col5 (type: boolean)
+ Statistics: Num rows: 1 Data size: 230 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col6 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ null sort order: a
+ sort order: +
+ Map-reduce partition columns: UDFToInteger(_col0) (type: int)
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: _col5 (type: boolean)
+ Statistics: Num rows: 1 Data size: 230 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col7 (type: int), _col8 (type: varchar(256)), CASE WHEN (_col2 is null) THEN (_col9) WHEN (_col9 is null) THEN (_col2) ELSE ((_col9 + _col2)) END (type: bigint), CASE WHEN (_col3 is null) THEN (_col10) WHEN (_col10 is null) THEN (_col3) ELSE (if((_col10 < _col3), _col10, _col3)) END (type: int), CASE WHEN (_col4 is null) THEN (_col11) WHEN (_col11 is null) THEN (_col4) ELSE (if((_col11 > _col4), _col11, _col4)) END (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 134 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 134 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.mat1
+ Write Type: INSERT
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: varchar(256)), _col2 (type: bigint), _col3 (type: int), _col4 (type: int)
+ outputColumnNames: a, b, _c2, _c3, _c4
+ Statistics: Num rows: 1 Data size: 134 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(a), max(a), count(1), count(a), compute_bit_vector_hll(a), max(length(b)), avg(COALESCE(length(b),0)), count(b), compute_bit_vector_hll(b), min(_c2), max(_c2), count(_c2), compute_bit_vector_hll(_c2), min(_c3), max(_c3), count(_c3), compute_bit_vector_hll(_c3), min(_c4), max(_c4), count(_c4), compute_bit_vector_hll(_c4)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20
+ Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct<count:bigint,sum:double,input:int>), _col7 (type: bigint), _col8 (type: binary), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: int), _col15 (type: bigint), _col16 (type: binary), _col17 (type: int), _col18 (type: int), _col19 [...]
Filter Operator
predicate: _col5 is null (type: boolean)
Statistics: Num rows: 3 Data size: 674 Basic stats: COMPLETE Column stats: COMPLETE
@@ -251,21 +298,23 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct<count:bigint,sum:double,input:int>), _col7 (type: bigint), _col8 (type: binary), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: int), _col15 (type: bigint), _col16 (type: binary), _col17 (type: int), _col18 (type: int), _col19 [...]
- Filter Operator
- predicate: _col5 (type: boolean)
- Statistics: Num rows: 1 Data size: 230 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col6 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), _col7 (type: int), _col8 (type: varchar(256)), CASE WHEN (_col2 is null) THEN (_col9) WHEN (_col9 is null) THEN (_col2) ELSE ((_col9 + _col2)) END (type: bigint), CASE WHEN (_col3 is null) THEN (_col10) WHEN (_col10 is null) THEN (_col3) ELSE (if((_col10 < _col3), _col10, _col3)) END (type: int), CASE WHEN (_col4 is null) THEN (_col11) WHEN (_col11 is null) THEN (_col4) ELSE (if((_col11 > _co [...]
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 1 Data size: 210 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
- null sort order: a
- sort order: +
- Map-reduce partition columns: UDFToInteger(_col0) (type: int)
- Statistics: Num rows: 1 Data size: 210 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: int), _col2 (type: varchar(256)), _col3 (type: bigint), _col4 (type: int), _col5 (type: int)
Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.mat1
+ Write Type: DELETE
+ Reducer 4
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -284,23 +333,26 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 4
+ Reducer 5
Execution mode: llap
Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: int), VALUE._col1 (type: varchar(256)), VALUE._col2 (type: bigint), VALUE._col3 (type: int), VALUE._col4 (type: int)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 1 Data size: 210 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 210 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
- name: default.mat1
- Write Type: UPDATE
- Reducer 6
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector_hll(VALUE._col12), min(VALUE._col13), max(VALUE._col14), count(VALUE._col15), compute_bit_vector_hll(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), comp [...]
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20
+ Statistics: Num rows: 1 Data size: 820 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), _col9 (type: bigint), [...]
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29
+ Statistics: Num rows: 1 Data size: 1322 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 1322 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 7
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -317,7 +369,7 @@ STAGE PLANS:
Statistics: Num rows: 7 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: bigint), _col3 (type: int), _col4 (type: int)
- Stage: Stage-3
+ Stage: Stage-4
Dependency Collection
Stage: Stage-0
@@ -329,13 +381,13 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.mat1
- Write Type: INSERT
+ Write Type: DELETE
- Stage: Stage-4
+ Stage: Stage-5
Stats Work
Basic Stats Work:
- Stage: Stage-6
+ Stage: Stage-8
Materialized View Update
name: default.mat1
update creation metadata: true
@@ -349,9 +401,24 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.mat1
- Write Type: UPDATE
+ Write Type: INSERT
- Stage: Stage-5
+ Stage: Stage-6
+ Stats Work
+ Basic Stats Work:
+
+ Stage: Stage-2
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.mat1
+ Write Type: INSERT
+
+ Stage: Stage-7
Stats Work
Basic Stats Work:
Column Stats Desc:
@@ -372,11 +439,16 @@ POSTHOOK: Input: default@t1
POSTHOOK: Output: default@mat1
POSTHOOK: Output: default@mat1
POSTHOOK: Lineage: mat1._c2 EXPRESSION [(mat1)default.mat1.FieldSchema(name:_c2, type:bigint, comment:null), (t1)t1.FieldSchema(name:d, type:int, comment:null), ]
+POSTHOOK: Lineage: mat1._c2 EXPRESSION [(mat1)default.mat1.FieldSchema(name:_c2, type:bigint, comment:null), (t1)t1.FieldSchema(name:d, type:int, comment:null), ]
+POSTHOOK: Lineage: mat1._c3 EXPRESSION [(mat1)default.mat1.FieldSchema(name:_c3, type:int, comment:null), (t1)t1.FieldSchema(name:d, type:int, comment:null), ]
POSTHOOK: Lineage: mat1._c3 EXPRESSION [(mat1)default.mat1.FieldSchema(name:_c3, type:int, comment:null), (t1)t1.FieldSchema(name:d, type:int, comment:null), ]
POSTHOOK: Lineage: mat1._c4 EXPRESSION [(mat1)default.mat1.FieldSchema(name:_c4, type:int, comment:null), (t1)t1.FieldSchema(name:d, type:int, comment:null), ]
+POSTHOOK: Lineage: mat1._c4 EXPRESSION [(mat1)default.mat1.FieldSchema(name:_c4, type:int, comment:null), (t1)t1.FieldSchema(name:d, type:int, comment:null), ]
+POSTHOOK: Lineage: mat1.a SIMPLE [(t1)t1.FieldSchema(name:a, type:int, comment:null), ]
POSTHOOK: Lineage: mat1.a SIMPLE [(t1)t1.FieldSchema(name:a, type:int, comment:null), ]
POSTHOOK: Lineage: mat1.b SIMPLE [(t1)t1.FieldSchema(name:b, type:varchar(256), comment:null), ]
-$hdt$_0.row__id t1.a t1.b _c2 _c3 _c4
+POSTHOOK: Lineage: mat1.b SIMPLE [(t1)t1.FieldSchema(name:b, type:varchar(256), comment:null), ]
+t1.a t1.b _c2 _c3 _c4
PREHOOK: query: EXPLAIN CBO
SELECT a, b, sum(d), min(d), max(d)
FROM t1
diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_one_key_gby.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_one_key_gby.q.out
index c1954ad0005..c50ae464b87 100644
--- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_one_key_gby.q.out
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_one_key_gby.q.out
@@ -105,24 +105,27 @@ POSTHOOK: Input: default@cmv_mat_view_n5
POSTHOOK: Output: default@cmv_mat_view_n5
POSTHOOK: Output: default@cmv_mat_view_n5
STAGE DEPENDENCIES:
- Stage-2 is a root stage
- Stage-3 depends on stages: Stage-2
- Stage-0 depends on stages: Stage-3
- Stage-4 depends on stages: Stage-0
- Stage-6 depends on stages: Stage-4, Stage-5
- Stage-1 depends on stages: Stage-3
- Stage-5 depends on stages: Stage-1
+ Stage-3 is a root stage
+ Stage-4 depends on stages: Stage-3
+ Stage-0 depends on stages: Stage-4
+ Stage-5 depends on stages: Stage-0
+ Stage-8 depends on stages: Stage-5, Stage-6, Stage-7
+ Stage-1 depends on stages: Stage-4
+ Stage-6 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-4
+ Stage-7 depends on stages: Stage-2
STAGE PLANS:
- Stage: Stage-2
+ Stage: Stage-3
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
- Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
- Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
- Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 7 <- Map 6 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
+ Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -143,7 +146,7 @@ STAGE PLANS:
value expressions: _col1 (type: bigint), _col2 (type: boolean), _col3 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
Execution mode: llap
LLAP IO: may be used (ACID table)
- Map 5
+ Map 6
Map Operator Tree:
TableScan
alias: cmv_basetable_n5
@@ -164,7 +167,7 @@ STAGE PLANS:
Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: may be used (ACID table)
- Map 8
+ Map 9
Map Operator Tree:
TableScan
alias: cmv_basetable_2_n2
@@ -199,7 +202,20 @@ STAGE PLANS:
outputColumnNames: _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: _col2 is null (type: boolean)
+ predicate: _col2 (type: boolean)
+ Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col3 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ null sort order: a
+ sort order: +
+ Map-reduce partition columns: UDFToInteger(_col0) (type: int)
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: _col2 (type: boolean)
Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col4 (type: int), CASE WHEN (_col1 is null) THEN (_col5) WHEN (_col5 is null) THEN (_col1) ELSE ((_col5 + _col1)) END (type: bigint)
@@ -230,20 +246,53 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: binary)
Filter Operator
- predicate: _col2 (type: boolean)
+ predicate: _col2 is null (type: boolean)
Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: _col3 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), _col4 (type: int), CASE WHEN (_col1 is null) THEN (_col5) WHEN (_col5 is null) THEN (_col1) ELSE ((_col5 + _col1)) END (type: bigint)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
- null sort order: a
- sort order: +
- Map-reduce partition columns: UDFToInteger(_col0) (type: int)
- Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: int), _col2 (type: bigint)
+ expressions: _col4 (type: int), CASE WHEN (_col1 is null) THEN (_col5) WHEN (_col5 is null) THEN (_col1) ELSE ((_col5 + _col1)) END (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.cmv_mat_view_n5
+ Write Type: INSERT
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: bigint)
+ outputColumnNames: a, _c1
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(a), max(a), count(1), count(a), compute_bit_vector_hll(a), min(_c1), max(_c1), count(_c1), compute_bit_vector_hll(_c1)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: binary)
Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.cmv_mat_view_n5
+ Write Type: DELETE
+ Reducer 4
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -262,23 +311,26 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 4
+ Reducer 5
Execution mode: llap
Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: int), VALUE._col1 (type: bigint)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
- serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
- name: default.cmv_mat_view_n5
- Write Type: UPDATE
- Reducer 6
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), _col5 (type: bigint), _col6 (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+ Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 7
Execution mode: llap
Reduce Operator Tree:
Merge Join Operator
@@ -303,7 +355,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint)
- Reducer 7
+ Reducer 8
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -320,7 +372,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint)
- Stage: Stage-3
+ Stage: Stage-4
Dependency Collection
Stage: Stage-0
@@ -332,13 +384,13 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.cmv_mat_view_n5
- Write Type: INSERT
+ Write Type: DELETE
- Stage: Stage-4
+ Stage: Stage-5
Stats Work
Basic Stats Work:
- Stage: Stage-6
+ Stage: Stage-8
Materialized View Update
name: default.cmv_mat_view_n5
update creation metadata: true
@@ -352,9 +404,24 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.cmv_mat_view_n5
- Write Type: UPDATE
+ Write Type: INSERT
- Stage: Stage-5
+ Stage: Stage-6
+ Stats Work
+ Basic Stats Work:
+
+ Stage: Stage-2
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.cmv_mat_view_n5
+ Write Type: INSERT
+
+ Stage: Stage-7
Stats Work
Basic Stats Work:
Column Stats Desc: