You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2018/04/10 09:30:01 UTC
[23/24] hive git commit: HIVE-18839: Implement incremental rebuild
for materialized views (only insert operations in source tables) (Jesus
Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/be420098/ql/src/java/org/apache/hadoop/hive/ql/parse/MaterializedViewRebuildSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/MaterializedViewRebuildSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/MaterializedViewRebuildSemanticAnalyzer.java
index 75eb50c..e5af95b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/MaterializedViewRebuildSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/MaterializedViewRebuildSemanticAnalyzer.java
@@ -18,33 +18,19 @@
package org.apache.hadoop.hive.ql.parse;
-import org.apache.hadoop.hive.common.HiveStatsUtils;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
-import org.apache.hadoop.hive.conf.HiveVariableSource;
-import org.apache.hadoop.hive.conf.VariableSubstitution;
-import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.LockState;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.QueryState;
-import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.io.AcidUtils;
+import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager;
+import org.apache.hadoop.hive.ql.lockmgr.LockException;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
-import org.apache.hadoop.hive.serde.serdeConstants;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-
/**
* MaterializedViewRebuildSemanticAnalyzer.
* Rewrites ALTER MATERIALIZED VIEW _mv_name_ REBUILD statement into
@@ -54,7 +40,6 @@ public class MaterializedViewRebuildSemanticAnalyzer extends CalcitePlanner {
private static final Logger LOG =
LoggerFactory.getLogger(MaterializedViewRebuildSemanticAnalyzer.class);
- static final private LogHelper console = new LogHelper(LOG);
public MaterializedViewRebuildSemanticAnalyzer(QueryState queryState) throws SemanticException {
@@ -64,7 +49,7 @@ public class MaterializedViewRebuildSemanticAnalyzer extends CalcitePlanner {
@Override
public void analyzeInternal(ASTNode ast) throws SemanticException {
- if (rewrittenRebuild) {
+ if (mvRebuildMode != MaterializationRebuildMode.NONE) {
super.analyzeInternal(ast);
return;
}
@@ -86,14 +71,35 @@ public class MaterializedViewRebuildSemanticAnalyzer extends CalcitePlanner {
throw new SemanticException(ErrorMsg.MATERIALIZED_VIEW_DEF_EMPTY);
}
Context ctx = new Context(queryState.getConf());
- rewrittenAST = ParseUtils.parse("insert overwrite table `" +
- dbDotTable + "` " + viewText, ctx);
+ rewrittenAST = ParseUtils.parse("insert overwrite table " +
+ "`" + qualifiedTableName[0] + "`.`" + qualifiedTableName[1] + "` " +
+ viewText, ctx);
this.ctx.addRewrittenStatementContext(ctx);
+
+ if (!this.ctx.isExplainPlan() && AcidUtils.isTransactionalTable(tab)) {
+ // Acquire lock for the given materialized view. Only one rebuild per materialized
+ // view can be triggered at a given time, as otherwise we might produce incorrect
+ // results if incremental maintenance is triggered.
+ HiveTxnManager txnManager = SessionState.get().getTxnMgr();
+ LockState state;
+ try {
+ state = txnManager.acquireMaterializationRebuildLock(
+ qualifiedTableName[0], qualifiedTableName[1], txnManager.getCurrentTxnId()).getState();
+ } catch (LockException e) {
+ throw new SemanticException("Exception acquiring lock for rebuilding the materialized view", e);
+ }
+ if (state != LockState.ACQUIRED) {
+ throw new SemanticException("Another process is rebuilding the materialized view " + dbDotTable);
+ }
+ }
} catch (Exception e) {
throw new SemanticException(e);
}
- rewrittenRebuild = true;
- LOG.info("Rebuilding view " + dbDotTable);
+ mvRebuildMode = MaterializationRebuildMode.INSERT_OVERWRITE_REBUILD;
+ mvRebuildDbName = qualifiedTableName[0];
+ mvRebuildName = qualifiedTableName[1];
+
+ LOG.debug("Rebuilding materialized view " + dbDotTable);
super.analyzeInternal(rewrittenAST);
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/be420098/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 3b74aba..7f00108 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -346,7 +346,9 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
protected boolean noscan;
// whether this is a mv rebuild rewritten expression
- protected boolean rewrittenRebuild = false;
+ protected MaterializationRebuildMode mvRebuildMode = MaterializationRebuildMode.NONE;
+ protected String mvRebuildDbName; // Db name for materialization to rebuild
+ protected String mvRebuildName; // Name for materialization to rebuild
protected volatile boolean disableJoinMerge = false;
protected final boolean defaultJoinMerge;
@@ -2210,7 +2212,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
case HiveParser.TOK_TAB: {
TableSpec ts = new TableSpec(db, conf, ast);
if (ts.tableHandle.isView() ||
- (!rewrittenRebuild && ts.tableHandle.isMaterializedView())) {
+ (mvRebuildMode == MaterializationRebuildMode.NONE && ts.tableHandle.isMaterializedView())) {
throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg());
}
@@ -14695,4 +14697,11 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
public boolean isValidQueryMaterialization() {
return (invalidQueryMaterializationReason == null);
}
+
+ protected enum MaterializationRebuildMode {
+ NONE,
+ INSERT_OVERWRITE_REBUILD,
+ AGGREGATE_REBUILD,
+ NO_AGGREGATE_REBUILD
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/be420098/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java
index 4c86fb8..fe570f0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java
@@ -249,7 +249,7 @@ public class GenericUDTFGetSplits extends GenericUDTF {
DriverCleanup driverCleanup = new DriverCleanup(driver, txnManager, splitsAppId.toString());
boolean needsCleanup = true;
try {
- CommandProcessorResponse cpr = driver.compileAndRespond(query);
+ CommandProcessorResponse cpr = driver.compileAndRespond(query, true);
if (cpr.getResponseCode() != 0) {
throw new HiveException("Failed to compile query: " + cpr.getException());
}
@@ -280,7 +280,7 @@ public class GenericUDTFGetSplits extends GenericUDTF {
HiveConf.setVar(conf, ConfVars.HIVE_EXECUTION_MODE, "llap");
query = "select * from " + tableName;
- cpr = driver.compileAndRespond(query);
+ cpr = driver.compileAndRespond(query, true);
if(cpr.getResponseCode() != 0) {
throw new HiveException("Failed to create temp table: "+cpr.getException());
}
http://git-wip-us.apache.org/repos/asf/hive/blob/be420098/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_4.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_4.q b/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_4.q
index c7f050b..dbea060 100644
--- a/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_4.q
+++ b/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_4.q
@@ -24,28 +24,30 @@ analyze table cmv_basetable_2 compute statistics for columns;
-- CREATE VIEW WITH REWRITE DISABLED
EXPLAIN
-CREATE MATERIALIZED VIEW cmv_mat_view AS
- SELECT cmv_basetable.a, cmv_basetable_2.c
+CREATE MATERIALIZED VIEW cmv_mat_view TBLPROPERTIES ('transactional'='true') AS
+ SELECT cmv_basetable.a, cmv_basetable_2.c, sum(cmv_basetable_2.d)
FROM cmv_basetable JOIN cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
WHERE cmv_basetable_2.c > 10.0
GROUP BY cmv_basetable.a, cmv_basetable_2.c;
-CREATE MATERIALIZED VIEW cmv_mat_view AS
- SELECT cmv_basetable.a, cmv_basetable_2.c
+CREATE MATERIALIZED VIEW cmv_mat_view TBLPROPERTIES ('transactional'='true') AS
+ SELECT cmv_basetable.a, cmv_basetable_2.c, sum(cmv_basetable_2.d)
FROM cmv_basetable JOIN cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
WHERE cmv_basetable_2.c > 10.0
GROUP BY cmv_basetable.a, cmv_basetable_2.c;
+analyze table cmv_mat_view compute statistics for columns;
+
DESCRIBE FORMATTED cmv_mat_view;
-- CANNOT USE THE VIEW, IT IS DISABLED FOR REWRITE
EXPLAIN
-SELECT cmv_basetable.a
+SELECT cmv_basetable.a, sum(cmv_basetable_2.d)
FROM cmv_basetable join cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
WHERE cmv_basetable_2.c > 10.10
GROUP BY cmv_basetable.a, cmv_basetable_2.c;
-SELECT cmv_basetable.a
+SELECT cmv_basetable.a, sum(cmv_basetable_2.d)
FROM cmv_basetable JOIN cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
WHERE cmv_basetable_2.c > 10.10
GROUP BY cmv_basetable.a, cmv_basetable_2.c;
@@ -65,12 +67,12 @@ DESCRIBE FORMATTED cmv_mat_view;
-- CANNOT USE THE VIEW, IT IS OUTDATED
EXPLAIN
-SELECT cmv_basetable.a
+SELECT cmv_basetable.a, sum(cmv_basetable_2.d)
FROM cmv_basetable join cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
WHERE cmv_basetable_2.c > 10.10
GROUP BY cmv_basetable.a, cmv_basetable_2.c;
-SELECT cmv_basetable.a
+SELECT cmv_basetable.a, sum(cmv_basetable_2.d)
FROM cmv_basetable JOIN cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
WHERE cmv_basetable_2.c > 10.10
GROUP BY cmv_basetable.a, cmv_basetable_2.c;
@@ -85,12 +87,76 @@ DESCRIBE FORMATTED cmv_mat_view;
-- NOW IT CAN BE USED AGAIN
EXPLAIN
-SELECT cmv_basetable.a
+SELECT cmv_basetable.a, sum(cmv_basetable_2.d)
+FROM cmv_basetable join cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
+WHERE cmv_basetable_2.c > 10.10
+GROUP BY cmv_basetable.a, cmv_basetable_2.c;
+
+SELECT cmv_basetable.a, sum(cmv_basetable_2.d)
+FROM cmv_basetable JOIN cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
+WHERE cmv_basetable_2.c > 10.10
+GROUP BY cmv_basetable.a, cmv_basetable_2.c;
+
+-- NOW AN UPDATE
+UPDATE cmv_basetable_2 SET a=2 WHERE a=1;
+
+-- INCREMENTAL REBUILD CANNOT BE TRIGGERED
+EXPLAIN
+ALTER MATERIALIZED VIEW cmv_mat_view REBUILD;
+
+ALTER MATERIALIZED VIEW cmv_mat_view REBUILD;
+
+-- MV CAN BE USED
+EXPLAIN
+SELECT cmv_basetable.a, sum(cmv_basetable_2.d)
+FROM cmv_basetable join cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
+WHERE cmv_basetable_2.c > 10.10
+GROUP BY cmv_basetable.a, cmv_basetable_2.c;
+
+SELECT cmv_basetable.a, sum(cmv_basetable_2.d)
+FROM cmv_basetable JOIN cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
+WHERE cmv_basetable_2.c > 10.10
+GROUP BY cmv_basetable.a, cmv_basetable_2.c;
+
+-- NOW A DELETE
+DELETE FROM cmv_basetable_2 WHERE a=2;
+
+-- INCREMENTAL REBUILD CANNOT BE TRIGGERED
+EXPLAIN
+ALTER MATERIALIZED VIEW cmv_mat_view REBUILD;
+
+ALTER MATERIALIZED VIEW cmv_mat_view REBUILD;
+
+-- MV CAN BE USED
+EXPLAIN
+SELECT cmv_basetable.a, sum(cmv_basetable_2.d)
+FROM cmv_basetable join cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
+WHERE cmv_basetable_2.c > 10.10
+GROUP BY cmv_basetable.a, cmv_basetable_2.c;
+
+SELECT cmv_basetable.a, sum(cmv_basetable_2.d)
+FROM cmv_basetable JOIN cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
+WHERE cmv_basetable_2.c > 10.10
+GROUP BY cmv_basetable.a, cmv_basetable_2.c;
+
+-- NOW AN INSERT
+insert into cmv_basetable_2 values
+ (1, 'charlie', 15.8, 1);
+
+-- INCREMENTAL REBUILD CAN BE TRIGGERED AGAIN
+EXPLAIN
+ALTER MATERIALIZED VIEW cmv_mat_view REBUILD;
+
+ALTER MATERIALIZED VIEW cmv_mat_view REBUILD;
+
+-- MV CAN BE USED
+EXPLAIN
+SELECT cmv_basetable.a, sum(cmv_basetable_2.d)
FROM cmv_basetable join cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
WHERE cmv_basetable_2.c > 10.10
GROUP BY cmv_basetable.a, cmv_basetable_2.c;
-SELECT cmv_basetable.a
+SELECT cmv_basetable.a, sum(cmv_basetable_2.d)
FROM cmv_basetable JOIN cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
WHERE cmv_basetable_2.c > 10.10
GROUP BY cmv_basetable.a, cmv_basetable_2.c;
http://git-wip-us.apache.org/repos/asf/hive/blob/be420098/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_5.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_5.q b/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_5.q
new file mode 100644
index 0000000..112b44c
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_5.q
@@ -0,0 +1,123 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.strict.checks.cartesian.product=false;
+set hive.materializedview.rewriting=true;
+
+create table cmv_basetable (a int, b varchar(256), c decimal(10,2), d int) stored as orc TBLPROPERTIES ('transactional'='true');
+
+insert into cmv_basetable values
+ (1, 'alfred', 10.30, 2),
+ (2, 'bob', 3.14, 3),
+ (2, 'bonnie', 172342.2, 3),
+ (3, 'calvin', 978.76, 3),
+ (3, 'charlie', 9.8, 1);
+
+analyze table cmv_basetable compute statistics for columns;
+
+create table cmv_basetable_2 (a int, b varchar(256), c decimal(10,2), d int) stored as orc TBLPROPERTIES ('transactional'='true');
+
+insert into cmv_basetable_2 values
+ (1, 'alfred', 10.30, 2),
+ (3, 'calvin', 978.76, 3);
+
+analyze table cmv_basetable_2 compute statistics for columns;
+
+CREATE MATERIALIZED VIEW cmv_mat_view ENABLE REWRITE
+ TBLPROPERTIES ('transactional'='true') AS
+ SELECT cmv_basetable.a, cmv_basetable_2.c
+ FROM cmv_basetable JOIN cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
+ WHERE cmv_basetable_2.c > 10.0;
+analyze table cmv_mat_view compute statistics for columns;
+
+insert into cmv_basetable_2 values
+ (3, 'charlie', 15.8, 1);
+
+analyze table cmv_basetable_2 compute statistics for columns;
+
+-- CANNOT USE THE VIEW, IT IS OUTDATED
+EXPLAIN
+SELECT cmv_basetable.a
+FROM cmv_basetable join cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
+WHERE cmv_basetable_2.c > 10.10;
+
+SELECT cmv_basetable.a
+FROM cmv_basetable JOIN cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
+WHERE cmv_basetable_2.c > 10.10;
+
+-- REBUILD
+EXPLAIN
+ALTER MATERIALIZED VIEW cmv_mat_view REBUILD;
+
+ALTER MATERIALIZED VIEW cmv_mat_view REBUILD;
+
+DESCRIBE FORMATTED cmv_mat_view;
+
+-- NOW IT CAN BE USED AGAIN
+EXPLAIN
+SELECT cmv_basetable.a
+FROM cmv_basetable join cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
+WHERE cmv_basetable_2.c > 10.10;
+
+SELECT cmv_basetable.a
+FROM cmv_basetable JOIN cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
+WHERE cmv_basetable_2.c > 10.10;
+
+-- NOW AN UPDATE
+UPDATE cmv_basetable_2 SET a=2 WHERE a=1;
+
+-- INCREMENTAL REBUILD CANNOT BE TRIGGERED
+EXPLAIN
+ALTER MATERIALIZED VIEW cmv_mat_view REBUILD;
+
+ALTER MATERIALIZED VIEW cmv_mat_view REBUILD;
+
+-- MV CAN BE USED
+EXPLAIN
+SELECT cmv_basetable.a
+FROM cmv_basetable join cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
+WHERE cmv_basetable_2.c > 10.10;
+
+SELECT cmv_basetable.a
+FROM cmv_basetable JOIN cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
+WHERE cmv_basetable_2.c > 10.10;
+
+-- NOW A DELETE
+DELETE FROM cmv_basetable_2 WHERE a=2;
+
+-- INCREMENTAL REBUILD CANNOT BE TRIGGERED
+EXPLAIN
+ALTER MATERIALIZED VIEW cmv_mat_view REBUILD;
+
+ALTER MATERIALIZED VIEW cmv_mat_view REBUILD;
+
+-- MV CAN BE USED
+EXPLAIN
+SELECT cmv_basetable.a
+FROM cmv_basetable join cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
+WHERE cmv_basetable_2.c > 10.10;
+
+SELECT cmv_basetable.a
+FROM cmv_basetable JOIN cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
+WHERE cmv_basetable_2.c > 10.10;
+
+-- NOW AN INSERT
+insert into cmv_basetable_2 values
+ (1, 'charlie', 15.8, 1);
+
+-- INCREMENTAL REBUILD CAN BE TRIGGERED AGAIN
+EXPLAIN
+ALTER MATERIALIZED VIEW cmv_mat_view REBUILD;
+
+ALTER MATERIALIZED VIEW cmv_mat_view REBUILD;
+
+-- MV CAN BE USED
+EXPLAIN
+SELECT cmv_basetable.a
+FROM cmv_basetable join cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
+WHERE cmv_basetable_2.c > 10.10;
+
+SELECT cmv_basetable.a
+FROM cmv_basetable JOIN cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
+WHERE cmv_basetable_2.c > 10.10;
+
+drop materialized view cmv_mat_view;
http://git-wip-us.apache.org/repos/asf/hive/blob/be420098/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_3.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_3.q.out
index 1ef7b87..bd0f903 100644
--- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_3.q.out
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_3.q.out
@@ -452,9 +452,11 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
- Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
+ Map 8 <- Union 4 (CONTAINS)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS)
+ Reducer 5 <- Union 4 (SIMPLE_EDGE)
+ Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -476,26 +478,47 @@ STAGE PLANS:
Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: may be used (ACID table)
- Map 5
+ Map 7
Map Operator Tree:
TableScan
alias: cmv_basetable_2
Statistics: Num rows: 3 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: ((c > 10) and a is not null) (type: boolean)
+ predicate: ((ROW__ID.writeid > 1) and (c > 10) and a is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: a (type: int), c (type: decimal(10,2))
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: decimal(10,2))
Execution mode: llap
LLAP IO: may be used (ACID table)
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: default.cmv_mat_view
+ Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: a (type: int), c (type: decimal(10,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: int), _col1 (type: decimal(10,2))
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: PARTIAL
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: decimal(10,2))
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: decimal(10,2))
+ Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: PARTIAL
+ Execution mode: llap
+ LLAP IO: all inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -525,9 +548,27 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: int), _col1 (type: decimal(10,2))
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: PARTIAL
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: decimal(10,2))
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: decimal(10,2))
+ Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: PARTIAL
+ Reducer 5
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int), KEY._col1 (type: decimal(10,2))
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: PARTIAL
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -536,31 +577,33 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: int), _col1 (type: decimal(10,2))
outputColumnNames: a, c
- Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: PARTIAL
Group By Operator
aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll')
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: PARTIAL
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: PARTIAL
value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:decimal(10,2),max:decimal(10,2),countnulls:bigint,bitvector:binary>)
- Reducer 4
+ Reducer 6
Execution mode: llap
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: PARTIAL
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Union 4
+ Vertex: Union 4
Stage: Stage-2
Dependency Collection
@@ -590,14 +633,16 @@ PREHOOK: query: ALTER MATERIALIZED VIEW cmv_mat_view REBUILD
PREHOOK: type: QUERY
PREHOOK: Input: default@cmv_basetable
PREHOOK: Input: default@cmv_basetable_2
+PREHOOK: Input: default@cmv_mat_view
PREHOOK: Output: default@cmv_mat_view
POSTHOOK: query: ALTER MATERIALIZED VIEW cmv_mat_view REBUILD
POSTHOOK: type: QUERY
POSTHOOK: Input: default@cmv_basetable
POSTHOOK: Input: default@cmv_basetable_2
+POSTHOOK: Input: default@cmv_mat_view
POSTHOOK: Output: default@cmv_mat_view
-POSTHOOK: Lineage: cmv_mat_view.a SIMPLE [(cmv_basetable)cmv_basetable.FieldSchema(name:a, type:int, comment:null), ]
-POSTHOOK: Lineage: cmv_mat_view.c SIMPLE [(cmv_basetable_2)cmv_basetable_2.FieldSchema(name:c, type:decimal(10,2), comment:null), ]
+POSTHOOK: Lineage: cmv_mat_view.a EXPRESSION [(cmv_basetable)cmv_basetable.FieldSchema(name:a, type:int, comment:null), (cmv_mat_view)default.cmv_mat_view.FieldSchema(name:a, type:int, comment:null), ]
+POSTHOOK: Lineage: cmv_mat_view.c EXPRESSION [(cmv_basetable_2)cmv_basetable_2.FieldSchema(name:c, type:decimal(10,2), comment:null), (cmv_mat_view)default.cmv_mat_view.FieldSchema(name:c, type:decimal(10,2), comment:null), ]
PREHOOK: query: EXPLAIN
SELECT cmv_basetable.a
FROM cmv_basetable join cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)