You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2018/04/10 09:30:01 UTC

[23/24] hive git commit: HIVE-18839: Implement incremental rebuild for materialized views (only insert operations in source tables) (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

http://git-wip-us.apache.org/repos/asf/hive/blob/be420098/ql/src/java/org/apache/hadoop/hive/ql/parse/MaterializedViewRebuildSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/MaterializedViewRebuildSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/MaterializedViewRebuildSemanticAnalyzer.java
index 75eb50c..e5af95b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/MaterializedViewRebuildSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/MaterializedViewRebuildSemanticAnalyzer.java
@@ -18,33 +18,19 @@
 
 package org.apache.hadoop.hive.ql.parse;
 
-import org.apache.hadoop.hive.common.HiveStatsUtils;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
-import org.apache.hadoop.hive.conf.HiveVariableSource;
-import org.apache.hadoop.hive.conf.VariableSubstitution;
-import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.LockState;
 import org.apache.hadoop.hive.ql.Context;
 import org.apache.hadoop.hive.ql.ErrorMsg;
 import org.apache.hadoop.hive.ql.QueryState;
-import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.io.AcidUtils;
+import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager;
+import org.apache.hadoop.hive.ql.lockmgr.LockException;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
-import org.apache.hadoop.hive.serde.serdeConstants;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-
 /**
  * MaterializedViewRebuildSemanticAnalyzer.
  * Rewrites ALTER MATERIALIZED VIEW _mv_name_ REBUILD statement into
@@ -54,7 +40,6 @@ public class MaterializedViewRebuildSemanticAnalyzer extends CalcitePlanner {
 
   private static final Logger LOG =
       LoggerFactory.getLogger(MaterializedViewRebuildSemanticAnalyzer.class);
-  static final private LogHelper console = new LogHelper(LOG);
 
 
   public MaterializedViewRebuildSemanticAnalyzer(QueryState queryState) throws SemanticException {
@@ -64,7 +49,7 @@ public class MaterializedViewRebuildSemanticAnalyzer extends CalcitePlanner {
 
   @Override
   public void analyzeInternal(ASTNode ast) throws SemanticException {
-    if (rewrittenRebuild) {
+    if (mvRebuildMode != MaterializationRebuildMode.NONE) {
       super.analyzeInternal(ast);
       return;
     }
@@ -86,14 +71,35 @@ public class MaterializedViewRebuildSemanticAnalyzer extends CalcitePlanner {
         throw new SemanticException(ErrorMsg.MATERIALIZED_VIEW_DEF_EMPTY);
       }
       Context ctx = new Context(queryState.getConf());
-      rewrittenAST = ParseUtils.parse("insert overwrite table `" +
-          dbDotTable + "` " + viewText, ctx);
+      rewrittenAST = ParseUtils.parse("insert overwrite table " +
+          "`" + qualifiedTableName[0] + "`.`" + qualifiedTableName[1] + "` " +
+          viewText, ctx);
       this.ctx.addRewrittenStatementContext(ctx);
+
+      if (!this.ctx.isExplainPlan() && AcidUtils.isTransactionalTable(tab)) {
+        // Acquire lock for the given materialized view. Only one rebuild per materialized
+        // view can be triggered at a given time, as otherwise we might produce incorrect
+        // results if incremental maintenance is triggered.
+        HiveTxnManager txnManager = SessionState.get().getTxnMgr();
+        LockState state;
+        try {
+          state = txnManager.acquireMaterializationRebuildLock(
+              qualifiedTableName[0], qualifiedTableName[1], txnManager.getCurrentTxnId()).getState();
+        } catch (LockException e) {
+          throw new SemanticException("Exception acquiring lock for rebuilding the materialized view", e);
+        }
+        if (state != LockState.ACQUIRED) {
+          throw new SemanticException("Another process is rebuilding the materialized view " + dbDotTable);
+        }
+      }
     } catch (Exception e) {
       throw new SemanticException(e);
     }
-    rewrittenRebuild = true;
-    LOG.info("Rebuilding view " + dbDotTable);
+    mvRebuildMode = MaterializationRebuildMode.INSERT_OVERWRITE_REBUILD;
+    mvRebuildDbName = qualifiedTableName[0];
+    mvRebuildName = qualifiedTableName[1];
+
+    LOG.debug("Rebuilding materialized view " + dbDotTable);
     super.analyzeInternal(rewrittenAST);
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/be420098/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 3b74aba..7f00108 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -346,7 +346,9 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
   protected boolean noscan;
 
   // whether this is a mv rebuild rewritten expression
-  protected boolean rewrittenRebuild = false;
+  protected MaterializationRebuildMode mvRebuildMode = MaterializationRebuildMode.NONE;
+  protected String mvRebuildDbName; // Db name for materialization to rebuild
+  protected String mvRebuildName; // Name for materialization to rebuild
 
   protected volatile boolean disableJoinMerge = false;
   protected final boolean defaultJoinMerge;
@@ -2210,7 +2212,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
       case HiveParser.TOK_TAB: {
         TableSpec ts = new TableSpec(db, conf, ast);
         if (ts.tableHandle.isView() ||
-            (!rewrittenRebuild && ts.tableHandle.isMaterializedView())) {
+            (mvRebuildMode == MaterializationRebuildMode.NONE && ts.tableHandle.isMaterializedView())) {
           throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg());
         }
 
@@ -14695,4 +14697,11 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
   public boolean isValidQueryMaterialization() {
     return (invalidQueryMaterializationReason == null);
   }
+
+  protected enum MaterializationRebuildMode {
+    NONE,
+    INSERT_OVERWRITE_REBUILD,
+    AGGREGATE_REBUILD,
+    NO_AGGREGATE_REBUILD
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/be420098/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java
index 4c86fb8..fe570f0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java
@@ -249,7 +249,7 @@ public class GenericUDTFGetSplits extends GenericUDTF {
     DriverCleanup driverCleanup = new DriverCleanup(driver, txnManager, splitsAppId.toString());
     boolean needsCleanup = true;
     try {
-      CommandProcessorResponse cpr = driver.compileAndRespond(query);
+      CommandProcessorResponse cpr = driver.compileAndRespond(query, true);
       if (cpr.getResponseCode() != 0) {
         throw new HiveException("Failed to compile query: " + cpr.getException());
       }
@@ -280,7 +280,7 @@ public class GenericUDTFGetSplits extends GenericUDTF {
 
         HiveConf.setVar(conf, ConfVars.HIVE_EXECUTION_MODE, "llap");
         query = "select * from " + tableName;
-        cpr = driver.compileAndRespond(query);
+        cpr = driver.compileAndRespond(query, true);
         if(cpr.getResponseCode() != 0) {
           throw new HiveException("Failed to create temp table: "+cpr.getException());
         }

http://git-wip-us.apache.org/repos/asf/hive/blob/be420098/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_4.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_4.q b/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_4.q
index c7f050b..dbea060 100644
--- a/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_4.q
+++ b/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_4.q
@@ -24,28 +24,30 @@ analyze table cmv_basetable_2 compute statistics for columns;
 
 -- CREATE VIEW WITH REWRITE DISABLED
 EXPLAIN
-CREATE MATERIALIZED VIEW cmv_mat_view AS
-  SELECT cmv_basetable.a, cmv_basetable_2.c
+CREATE MATERIALIZED VIEW cmv_mat_view TBLPROPERTIES ('transactional'='true') AS
+  SELECT cmv_basetable.a, cmv_basetable_2.c, sum(cmv_basetable_2.d)
   FROM cmv_basetable JOIN cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
   WHERE cmv_basetable_2.c > 10.0
   GROUP BY cmv_basetable.a, cmv_basetable_2.c;
 
-CREATE MATERIALIZED VIEW cmv_mat_view AS
-  SELECT cmv_basetable.a, cmv_basetable_2.c
+CREATE MATERIALIZED VIEW cmv_mat_view TBLPROPERTIES ('transactional'='true') AS
+  SELECT cmv_basetable.a, cmv_basetable_2.c, sum(cmv_basetable_2.d)
   FROM cmv_basetable JOIN cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
   WHERE cmv_basetable_2.c > 10.0
   GROUP BY cmv_basetable.a, cmv_basetable_2.c;
 
+analyze table cmv_mat_view compute statistics for columns;
+
 DESCRIBE FORMATTED cmv_mat_view;
 
 -- CANNOT USE THE VIEW, IT IS DISABLED FOR REWRITE
 EXPLAIN
-SELECT cmv_basetable.a
+SELECT cmv_basetable.a, sum(cmv_basetable_2.d)
 FROM cmv_basetable join cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
 WHERE cmv_basetable_2.c > 10.10
 GROUP BY cmv_basetable.a, cmv_basetable_2.c;
 
-SELECT cmv_basetable.a
+SELECT cmv_basetable.a, sum(cmv_basetable_2.d)
 FROM cmv_basetable JOIN cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
 WHERE cmv_basetable_2.c > 10.10
 GROUP BY cmv_basetable.a, cmv_basetable_2.c;
@@ -65,12 +67,12 @@ DESCRIBE FORMATTED cmv_mat_view;
 
 -- CANNOT USE THE VIEW, IT IS OUTDATED
 EXPLAIN
-SELECT cmv_basetable.a
+SELECT cmv_basetable.a, sum(cmv_basetable_2.d)
 FROM cmv_basetable join cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
 WHERE cmv_basetable_2.c > 10.10
 GROUP BY cmv_basetable.a, cmv_basetable_2.c;
 
-SELECT cmv_basetable.a
+SELECT cmv_basetable.a, sum(cmv_basetable_2.d)
 FROM cmv_basetable JOIN cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
 WHERE cmv_basetable_2.c > 10.10
 GROUP BY cmv_basetable.a, cmv_basetable_2.c;
@@ -85,12 +87,76 @@ DESCRIBE FORMATTED cmv_mat_view;
 
 -- NOW IT CAN BE USED AGAIN
 EXPLAIN
-SELECT cmv_basetable.a
+SELECT cmv_basetable.a, sum(cmv_basetable_2.d)
+FROM cmv_basetable join cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
+WHERE cmv_basetable_2.c > 10.10
+GROUP BY cmv_basetable.a, cmv_basetable_2.c;
+
+SELECT cmv_basetable.a, sum(cmv_basetable_2.d)
+FROM cmv_basetable JOIN cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
+WHERE cmv_basetable_2.c > 10.10
+GROUP BY cmv_basetable.a, cmv_basetable_2.c;
+
+-- NOW AN UPDATE
+UPDATE cmv_basetable_2 SET a=2 WHERE a=1;
+
+-- INCREMENTAL REBUILD CANNOT BE TRIGGERED
+EXPLAIN
+ALTER MATERIALIZED VIEW cmv_mat_view REBUILD;
+
+ALTER MATERIALIZED VIEW cmv_mat_view REBUILD;
+
+-- MV CAN BE USED
+EXPLAIN
+SELECT cmv_basetable.a, sum(cmv_basetable_2.d)
+FROM cmv_basetable join cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
+WHERE cmv_basetable_2.c > 10.10
+GROUP BY cmv_basetable.a, cmv_basetable_2.c;
+
+SELECT cmv_basetable.a, sum(cmv_basetable_2.d)
+FROM cmv_basetable JOIN cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
+WHERE cmv_basetable_2.c > 10.10
+GROUP BY cmv_basetable.a, cmv_basetable_2.c;
+
+-- NOW A DELETE
+DELETE FROM cmv_basetable_2 WHERE a=2;
+
+-- INCREMENTAL REBUILD CANNOT BE TRIGGERED
+EXPLAIN
+ALTER MATERIALIZED VIEW cmv_mat_view REBUILD;
+
+ALTER MATERIALIZED VIEW cmv_mat_view REBUILD;
+
+-- MV CAN BE USED
+EXPLAIN
+SELECT cmv_basetable.a, sum(cmv_basetable_2.d)
+FROM cmv_basetable join cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
+WHERE cmv_basetable_2.c > 10.10
+GROUP BY cmv_basetable.a, cmv_basetable_2.c;
+
+SELECT cmv_basetable.a, sum(cmv_basetable_2.d)
+FROM cmv_basetable JOIN cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
+WHERE cmv_basetable_2.c > 10.10
+GROUP BY cmv_basetable.a, cmv_basetable_2.c;
+
+-- NOW AN INSERT
+insert into cmv_basetable_2 values
+ (1, 'charlie', 15.8, 1);
+
+-- INCREMENTAL REBUILD CAN BE TRIGGERED AGAIN
+EXPLAIN
+ALTER MATERIALIZED VIEW cmv_mat_view REBUILD;
+
+ALTER MATERIALIZED VIEW cmv_mat_view REBUILD;
+
+-- MV CAN BE USED
+EXPLAIN
+SELECT cmv_basetable.a, sum(cmv_basetable_2.d)
 FROM cmv_basetable join cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
 WHERE cmv_basetable_2.c > 10.10
 GROUP BY cmv_basetable.a, cmv_basetable_2.c;
 
-SELECT cmv_basetable.a
+SELECT cmv_basetable.a, sum(cmv_basetable_2.d)
 FROM cmv_basetable JOIN cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
 WHERE cmv_basetable_2.c > 10.10
 GROUP BY cmv_basetable.a, cmv_basetable_2.c;

http://git-wip-us.apache.org/repos/asf/hive/blob/be420098/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_5.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_5.q b/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_5.q
new file mode 100644
index 0000000..112b44c
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_5.q
@@ -0,0 +1,123 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.strict.checks.cartesian.product=false;
+set hive.materializedview.rewriting=true;
+
+create table cmv_basetable (a int, b varchar(256), c decimal(10,2), d int) stored as orc TBLPROPERTIES ('transactional'='true');
+
+insert into cmv_basetable values
+ (1, 'alfred', 10.30, 2),
+ (2, 'bob', 3.14, 3),
+ (2, 'bonnie', 172342.2, 3),
+ (3, 'calvin', 978.76, 3),
+ (3, 'charlie', 9.8, 1);
+
+analyze table cmv_basetable compute statistics for columns;
+
+create table cmv_basetable_2 (a int, b varchar(256), c decimal(10,2), d int) stored as orc TBLPROPERTIES ('transactional'='true');
+
+insert into cmv_basetable_2 values
+ (1, 'alfred', 10.30, 2),
+ (3, 'calvin', 978.76, 3);
+
+analyze table cmv_basetable_2 compute statistics for columns;
+
+CREATE MATERIALIZED VIEW cmv_mat_view ENABLE REWRITE
+  TBLPROPERTIES ('transactional'='true') AS
+  SELECT cmv_basetable.a, cmv_basetable_2.c
+  FROM cmv_basetable JOIN cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
+  WHERE cmv_basetable_2.c > 10.0;
+analyze table cmv_mat_view compute statistics for columns;
+
+insert into cmv_basetable_2 values
+ (3, 'charlie', 15.8, 1);
+
+analyze table cmv_basetable_2 compute statistics for columns;
+
+-- CANNOT USE THE VIEW, IT IS OUTDATED
+EXPLAIN
+SELECT cmv_basetable.a
+FROM cmv_basetable join cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
+WHERE cmv_basetable_2.c > 10.10;
+
+SELECT cmv_basetable.a
+FROM cmv_basetable JOIN cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
+WHERE cmv_basetable_2.c > 10.10;
+
+-- REBUILD
+EXPLAIN
+ALTER MATERIALIZED VIEW cmv_mat_view REBUILD;
+
+ALTER MATERIALIZED VIEW cmv_mat_view REBUILD;
+
+DESCRIBE FORMATTED cmv_mat_view;
+
+-- NOW IT CAN BE USED AGAIN
+EXPLAIN
+SELECT cmv_basetable.a
+FROM cmv_basetable join cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
+WHERE cmv_basetable_2.c > 10.10;
+
+SELECT cmv_basetable.a
+FROM cmv_basetable JOIN cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
+WHERE cmv_basetable_2.c > 10.10;
+
+-- NOW AN UPDATE
+UPDATE cmv_basetable_2 SET a=2 WHERE a=1;
+
+-- INCREMENTAL REBUILD CANNOT BE TRIGGERED
+EXPLAIN
+ALTER MATERIALIZED VIEW cmv_mat_view REBUILD;
+
+ALTER MATERIALIZED VIEW cmv_mat_view REBUILD;
+
+-- MV CAN BE USED
+EXPLAIN
+SELECT cmv_basetable.a
+FROM cmv_basetable join cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
+WHERE cmv_basetable_2.c > 10.10;
+
+SELECT cmv_basetable.a
+FROM cmv_basetable JOIN cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
+WHERE cmv_basetable_2.c > 10.10;
+
+-- NOW A DELETE
+DELETE FROM cmv_basetable_2 WHERE a=2;
+
+-- INCREMENTAL REBUILD CANNOT BE TRIGGERED
+EXPLAIN
+ALTER MATERIALIZED VIEW cmv_mat_view REBUILD;
+
+ALTER MATERIALIZED VIEW cmv_mat_view REBUILD;
+
+-- MV CAN BE USED
+EXPLAIN
+SELECT cmv_basetable.a
+FROM cmv_basetable join cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
+WHERE cmv_basetable_2.c > 10.10;
+
+SELECT cmv_basetable.a
+FROM cmv_basetable JOIN cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
+WHERE cmv_basetable_2.c > 10.10;
+
+-- NOW AN INSERT
+insert into cmv_basetable_2 values
+ (1, 'charlie', 15.8, 1);
+
+-- INCREMENTAL REBUILD CAN BE TRIGGERED AGAIN
+EXPLAIN
+ALTER MATERIALIZED VIEW cmv_mat_view REBUILD;
+
+ALTER MATERIALIZED VIEW cmv_mat_view REBUILD;
+
+-- MV CAN BE USED
+EXPLAIN
+SELECT cmv_basetable.a
+FROM cmv_basetable join cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
+WHERE cmv_basetable_2.c > 10.10;
+
+SELECT cmv_basetable.a
+FROM cmv_basetable JOIN cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)
+WHERE cmv_basetable_2.c > 10.10;
+
+drop materialized view cmv_mat_view;

http://git-wip-us.apache.org/repos/asf/hive/blob/be420098/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_3.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_3.q.out
index 1ef7b87..bd0f903 100644
--- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_3.q.out
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_3.q.out
@@ -452,9 +452,11 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
-        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
-        Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
+        Map 8 <- Union 4 (CONTAINS)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS)
+        Reducer 5 <- Union 4 (SIMPLE_EDGE)
+        Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -476,26 +478,47 @@ STAGE PLANS:
                         Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: may be used (ACID table)
-        Map 5 
+        Map 7 
             Map Operator Tree:
                 TableScan
                   alias: cmv_basetable_2
                   Statistics: Num rows: 3 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: ((c > 10) and a is not null) (type: boolean)
+                    predicate: ((ROW__ID.writeid > 1) and (c > 10) and a is not null) (type: boolean)
                     Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: a (type: int), c (type: decimal(10,2))
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE
                         value expressions: _col1 (type: decimal(10,2))
             Execution mode: llap
             LLAP IO: may be used (ACID table)
+        Map 8 
+            Map Operator Tree:
+                TableScan
+                  alias: default.cmv_mat_view
+                  Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: a (type: int), c (type: decimal(10,2))
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: _col0 (type: int), _col1 (type: decimal(10,2))
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: PARTIAL
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int), _col1 (type: decimal(10,2))
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: int), _col1 (type: decimal(10,2))
+                        Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: PARTIAL
+            Execution mode: llap
+            LLAP IO: all inputs
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:
@@ -525,9 +548,27 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  keys: _col0 (type: int), _col1 (type: decimal(10,2))
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: PARTIAL
+                  Reduce Output Operator
+                    key expressions: _col0 (type: int), _col1 (type: decimal(10,2))
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: int), _col1 (type: decimal(10,2))
+                    Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: PARTIAL
+        Reducer 5 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int), KEY._col1 (type: decimal(10,2))
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: PARTIAL
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: PARTIAL
                   table:
                       input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
                       output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -536,31 +577,33 @@ STAGE PLANS:
                 Select Operator
                   expressions: _col0 (type: int), _col1 (type: decimal(10,2))
                   outputColumnNames: a, c
-                  Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: PARTIAL
                   Group By Operator
                     aggregations: compute_stats(a, 'hll'), compute_stats(c, 'hll')
                     mode: hash
                     outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: PARTIAL
                     Reduce Output Operator
                       sort order: 
-                      Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 1 Data size: 1056 Basic stats: COMPLETE Column stats: PARTIAL
                       value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:decimal(10,2),max:decimal(10,2),countnulls:bigint,bitvector:binary>)
-        Reducer 4 
+        Reducer 6 
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
                 aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: PARTIAL
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE Column stats: PARTIAL
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Union 4 
+            Vertex: Union 4
 
   Stage: Stage-2
     Dependency Collection
@@ -590,14 +633,16 @@ PREHOOK: query: ALTER MATERIALIZED VIEW cmv_mat_view REBUILD
 PREHOOK: type: QUERY
 PREHOOK: Input: default@cmv_basetable
 PREHOOK: Input: default@cmv_basetable_2
+PREHOOK: Input: default@cmv_mat_view
 PREHOOK: Output: default@cmv_mat_view
 POSTHOOK: query: ALTER MATERIALIZED VIEW cmv_mat_view REBUILD
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@cmv_basetable
 POSTHOOK: Input: default@cmv_basetable_2
+POSTHOOK: Input: default@cmv_mat_view
 POSTHOOK: Output: default@cmv_mat_view
-POSTHOOK: Lineage: cmv_mat_view.a SIMPLE [(cmv_basetable)cmv_basetable.FieldSchema(name:a, type:int, comment:null), ]
-POSTHOOK: Lineage: cmv_mat_view.c SIMPLE [(cmv_basetable_2)cmv_basetable_2.FieldSchema(name:c, type:decimal(10,2), comment:null), ]
+POSTHOOK: Lineage: cmv_mat_view.a EXPRESSION [(cmv_basetable)cmv_basetable.FieldSchema(name:a, type:int, comment:null), (cmv_mat_view)default.cmv_mat_view.FieldSchema(name:a, type:int, comment:null), ]
+POSTHOOK: Lineage: cmv_mat_view.c EXPRESSION [(cmv_basetable_2)cmv_basetable_2.FieldSchema(name:c, type:decimal(10,2), comment:null), (cmv_mat_view)default.cmv_mat_view.FieldSchema(name:c, type:decimal(10,2), comment:null), ]
 PREHOOK: query: EXPLAIN
 SELECT cmv_basetable.a
 FROM cmv_basetable join cmv_basetable_2 ON (cmv_basetable.a = cmv_basetable_2.a)