You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kr...@apache.org on 2021/10/05 03:28:46 UTC
[hive] branch master updated: HIVE-25546: Enable incremental
rebuild of Materialized views with insert only source tables (Krisztian
Kasa, reviewed by Stamatis Zampetakis, Alessandro Solimando)
This is an automated email from the ASF dual-hosted git repository.
krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 8cfb1c9 HIVE-25546: Enable incremental rebuild of Materialized views with insert only source tables (Krisztian Kasa, reviewed by Stamatis Zampetakis, Alessandro Solimando)
8cfb1c9 is described below
commit 8cfb1c958e82b36d141fd7398110ce8927cf9953
Author: Krisztian Kasa <kk...@cloudera.com>
AuthorDate: Tue Oct 5 05:28:36 2021 +0200
HIVE-25546: Enable incremental rebuild of Materialized views with insert only source tables (Krisztian Kasa, reviewed by Stamatis Zampetakis, Alessandro Solimando)
---
.../AlterMaterializedViewRebuildAnalyzer.java | 21 +-
.../rules/views/HiveInsertOnlyScanWriteIdRule.java | 61 ++
.../materialized_view_create_rewrite_7.q | 3 +-
.../materialized_view_create_rewrite_8.q | 80 +++
.../materialized_view_create_rewrite_9.q | 85 +++
.../clientpositive/materialized_view_parquet.q | 6 +-
...erialized_view_partitioned_create_rewrite_agg.q | 2 +-
...ialized_view_partitioned_create_rewrite_agg_2.q | 26 +-
...alized_view_partitioned_create_rewrite_agg_3.q} | 6 +-
.../llap/materialized_view_create_rewrite_8.q.out | 701 +++++++++++++++++++++
.../llap/materialized_view_create_rewrite_9.q.out | 686 ++++++++++++++++++++
.../llap/materialized_view_parquet.q.out | 32 +-
...lized_view_partitioned_create_rewrite_agg.q.out | 4 +-
...zed_view_partitioned_create_rewrite_agg_2.q.out | 48 +-
...ed_view_partitioned_create_rewrite_agg_3.q.out} | 14 +-
15 files changed, 1703 insertions(+), 72 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/AlterMaterializedViewRebuildAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/AlterMaterializedViewRebuildAnalyzer.java
index 7e9df5b..d6ff263 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/AlterMaterializedViewRebuildAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rebuild/AlterMaterializedViewRebuildAnalyzer.java
@@ -52,6 +52,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.ColumnPropagation
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveAggregateInsertDeleteIncrementalRewritingRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveAggregateInsertIncrementalRewritingRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveAggregatePartitionIncrementalRewritingRule;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveInsertOnlyScanWriteIdRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveJoinInsertDeleteIncrementalRewritingRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveJoinInsertIncrementalRewritingRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializationRelMetadataProvider;
@@ -75,6 +76,7 @@ import org.slf4j.LoggerFactory;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import static java.util.Collections.singletonList;
@@ -266,6 +268,7 @@ public class AlterMaterializedViewRebuildAnalyzer extends CalcitePlanner {
RelNode incrementalRebuildPlan = applyRecordIncrementalRebuildPlan(
basePlan, mdProvider, executorProvider, optCluster, calcitePreMVRewritingPlan, materialization);
+
if (mvRebuildMode != MaterializationRebuildMode.INSERT_OVERWRITE_REBUILD) {
return incrementalRebuildPlan;
}
@@ -331,8 +334,8 @@ public class AlterMaterializedViewRebuildAnalyzer extends CalcitePlanner {
RelNode basePlan, RelMetadataProvider mdProvider, RexExecutor executorProvider, RelOptCluster optCluster,
RelNode calcitePreMVRewritingPlan) {
mvRebuildMode = MaterializationRebuildMode.AGGREGATE_INSERT_REBUILD;
- basePlan = applyIncrementalRebuild(
- basePlan, mdProvider, executorProvider, HiveAggregateInsertIncrementalRewritingRule.INSTANCE);
+ basePlan = applyIncrementalRebuild(basePlan, mdProvider, executorProvider,
+ HiveInsertOnlyScanWriteIdRule.INSTANCE, HiveAggregateInsertIncrementalRewritingRule.INSTANCE);
// Make a cost-based decision factoring the configuration property
optCluster.invalidateMetadataQuery();
@@ -373,8 +376,8 @@ public class AlterMaterializedViewRebuildAnalyzer extends CalcitePlanner {
private RelNode applyJoinInsertIncremental(
RelNode basePlan, RelMetadataProvider mdProvider, RexExecutor executorProvider) {
mvRebuildMode = MaterializationRebuildMode.JOIN_INSERT_REBUILD;
- return applyIncrementalRebuild(
- basePlan, mdProvider, executorProvider, HiveJoinInsertIncrementalRewritingRule.INSTANCE);
+ return applyIncrementalRebuild(basePlan, mdProvider, executorProvider,
+ HiveInsertOnlyScanWriteIdRule.INSTANCE, HiveJoinInsertIncrementalRewritingRule.INSTANCE);
}
private RelNode applyPartitionIncrementalRebuildPlan(
@@ -396,14 +399,14 @@ public class AlterMaterializedViewRebuildAnalyzer extends CalcitePlanner {
return applyPreJoinOrderingTransforms(basePlan, mdProvider, executorProvider);
}
- return applyIncrementalRebuild(
- basePlan, mdProvider, executorProvider, HiveAggregatePartitionIncrementalRewritingRule.INSTANCE);
+ return applyIncrementalRebuild(basePlan, mdProvider, executorProvider,
+ HiveInsertOnlyScanWriteIdRule.INSTANCE, HiveAggregatePartitionIncrementalRewritingRule.INSTANCE);
}
- private RelNode applyIncrementalRebuild(
- RelNode basePlan, RelMetadataProvider mdProvider, RexExecutor executorProvider, RelOptRule rebuildRule) {
+ private RelNode applyIncrementalRebuild(RelNode basePlan, RelMetadataProvider mdProvider,
+ RexExecutor executorProvider, RelOptRule... rebuildRules) {
HepProgramBuilder program = new HepProgramBuilder();
- generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST, rebuildRule);
+ generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST, rebuildRules);
basePlan = executeProgram(basePlan, program.build(), mdProvider, executorProvider);
return applyPreJoinOrderingTransforms(basePlan, mdProvider, executorProvider);
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveInsertOnlyScanWriteIdRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveInsertOnlyScanWriteIdRule.java
new file mode 100644
index 0000000..8fa318b
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveInsertOnlyScanWriteIdRule.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules.views;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.rel.RelNode;
+import org.apache.hadoop.hive.ql.io.AcidUtils;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+
+/**
+ * This rule turns on populating writeId of insert only table scans.
+ * Currently fetching writeId from insert-only tables is not turned on automatically:
+ * 1. only not compacted records has valid writeId.
+ * 2. the writeId and bucketId is populated into the ROW_ID struct however the third field
+ * of the struct (rowId) is always 0.
+ *
+ * This feature is only used when rebuilding materialized view incrementally when the view has
+ * insert-only source tables.
+ */
+public class HiveInsertOnlyScanWriteIdRule extends RelOptRule {
+
+ public static final HiveInsertOnlyScanWriteIdRule INSTANCE = new HiveInsertOnlyScanWriteIdRule();
+
+ private HiveInsertOnlyScanWriteIdRule() {
+ super(operand(HiveTableScan.class, none()));
+ }
+
+ @Override
+ public boolean matches(RelOptRuleCall call) {
+ HiveTableScan tableScan = call.rel(0);
+ Table tableMD = ((RelOptHiveTable) tableScan.getTable()).getHiveTableMD();
+ return !tableMD.isMaterializedView() && AcidUtils.isInsertOnlyTable(tableMD);
+ }
+
+ @Override
+ public void onMatch(RelOptRuleCall call) {
+ HiveTableScan tableScan = call.rel(0);
+ RelNode newTableScan = call.builder()
+ .push(tableScan.setTableScanTrait(HiveTableScan.HiveTableScanTrait.FetchInsertOnlyBucketIds))
+ .build();
+ call.transformTo(newTableScan);
+ }
+}
diff --git a/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_7.q b/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_7.q
index 63d25d6..2886a64 100644
--- a/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_7.q
+++ b/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_7.q
@@ -1,10 +1,9 @@
--- Test habndling Sum0 aggregate function when rewriting insert overwrite MV rebuild plan to incremental
+-- Test handling Sum0 aggregate function when rewriting insert overwrite MV rebuild plan to incremental
set hive.support.concurrency=true;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.materializedview.rewriting.sql=false;
---create table t1(a char(15), b int, c int) stored as parquet TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only');
create table t1(a char(15), b int, c int) stored as orc TBLPROPERTIES ('transactional'='true');
insert into t1(a, b, c) values
diff --git a/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_8.q b/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_8.q
new file mode 100644
index 0000000..651bcb4
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_8.q
@@ -0,0 +1,80 @@
+-- Test Incremental rebuild of materialized view without aggregate when a source table is insert only.
+
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.materializedview.rewriting=true;
+
+create table cmv_basetable_n6 (a int, b varchar(256), c decimal(10,2), d int) stored as orc TBLPROPERTIES ('transactional'='true');
+
+insert into cmv_basetable_n6 values
+ (1, 'alfred', 10.30, 2),
+ (2, 'bob', 3.14, 3),
+ (2, 'bonnie', 172342.2, 3),
+ (3, 'calvin', 978.76, 3),
+ (3, 'charlie', 9.8, 1);
+
+create table cmv_basetable_2_n3 (a int, b varchar(256), c decimal(10,2), d int) stored as orc TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only');
+
+insert into cmv_basetable_2_n3 values
+ (1, 'alfred', 10.30, 2),
+ (3, 'calvin', 978.76, 3);
+
+CREATE MATERIALIZED VIEW cmv_mat_view_n6
+ TBLPROPERTIES ('transactional'='true') AS
+ SELECT cmv_basetable_n6.a, cmv_basetable_2_n3.c
+ FROM cmv_basetable_n6 JOIN cmv_basetable_2_n3 ON (cmv_basetable_n6.a = cmv_basetable_2_n3.a)
+ WHERE cmv_basetable_2_n3.c > 10.0;
+
+insert into cmv_basetable_2_n3 values
+ (3, 'charlie', 15.8, 1);
+
+-- CANNOT USE THE VIEW, IT IS OUTDATED
+EXPLAIN CBO
+SELECT cmv_basetable_n6.a
+FROM cmv_basetable_n6 join cmv_basetable_2_n3 ON (cmv_basetable_n6.a = cmv_basetable_2_n3.a)
+WHERE cmv_basetable_2_n3.c > 10.10;
+
+SELECT cmv_basetable_n6.a
+FROM cmv_basetable_n6 JOIN cmv_basetable_2_n3 ON (cmv_basetable_n6.a = cmv_basetable_2_n3.a)
+WHERE cmv_basetable_2_n3.c > 10.10;
+
+-- REBUILD
+EXPLAIN
+ALTER MATERIALIZED VIEW cmv_mat_view_n6 REBUILD;
+
+ALTER MATERIALIZED VIEW cmv_mat_view_n6 REBUILD;
+
+-- NOW IT CAN BE USED AGAIN
+EXPLAIN CBO
+SELECT cmv_basetable_n6.a
+FROM cmv_basetable_n6 join cmv_basetable_2_n3 ON (cmv_basetable_n6.a = cmv_basetable_2_n3.a)
+WHERE cmv_basetable_2_n3.c > 10.10;
+
+SELECT cmv_basetable_n6.a
+FROM cmv_basetable_n6 JOIN cmv_basetable_2_n3 ON (cmv_basetable_n6.a = cmv_basetable_2_n3.a)
+WHERE cmv_basetable_2_n3.c > 10.10;
+
+-- NOW AN UPDATE
+UPDATE cmv_basetable_n6 SET a=2 WHERE a=1;
+
+-- INCREMENTAL REBUILD CAN BE TRIGGERED
+EXPLAIN
+ALTER MATERIALIZED VIEW cmv_mat_view_n6 REBUILD;
+
+ALTER MATERIALIZED VIEW cmv_mat_view_n6 REBUILD;
+
+-- MV CAN BE USED
+EXPLAIN CBO
+SELECT cmv_basetable_n6.a
+FROM cmv_basetable_n6 join cmv_basetable_2_n3 ON (cmv_basetable_n6.a = cmv_basetable_2_n3.a)
+WHERE cmv_basetable_2_n3.c > 10.10;
+
+SELECT cmv_basetable_n6.a
+FROM cmv_basetable_n6 JOIN cmv_basetable_2_n3 ON (cmv_basetable_n6.a = cmv_basetable_2_n3.a)
+WHERE cmv_basetable_2_n3.c > 10.10;
+
+drop materialized view cmv_mat_view_n6;
+
+SELECT cmv_basetable_n6.a
+FROM cmv_basetable_n6 JOIN cmv_basetable_2_n3 ON (cmv_basetable_n6.a = cmv_basetable_2_n3.a)
+WHERE cmv_basetable_2_n3.c > 10.10;
diff --git a/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_9.q b/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_9.q
new file mode 100644
index 0000000..884bff3
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_9.q
@@ -0,0 +1,85 @@
+-- Test Incremental rebuild of materialized view with aggregate and count(*) when
+-- 1) source tables have delete operations since last rebuild.
+-- 2) a source table is insert only.
+
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.materializedview.rewriting.sql=false;
+
+create table t1(a char(15), b int, c int) stored as orc TBLPROPERTIES ('transactional'='true');
+create table t2(a char(15), b int) stored as orc TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only');
+
+insert into t1(a, b, c) values
+('update', 1, 1), ('update', 2, 1),
+('null_update', null, 1), ('null_update', null, 2);
+insert into t1(a, b, c) values ('remove', 3, 1), ('null_remove', null, 1);
+insert into t1(a, b, c) values ('sum0', 0, 1), ('sum0', 0, 2);
+
+insert into t2(a, b) values
+('update', 10),
+('null_update', null);
+insert into t2(a, b) values ('remove', 30), ('null_remove', null);
+insert into t2(a, b) values ('sum0', 0);
+
+-- Aggregate with count(*): incremental rebuild should be triggered even if there were deletes from source table
+create materialized view mat1 stored as orc TBLPROPERTIES ('transactional'='true') as
+select t1.a, sum(t1.b), count(*) from t1
+join t2 on (t1.a = t2.a)
+group by t1.a;
+
+
+explain cbo
+select t1.a, sum(t1.b) from t1
+join t2 on (t1.a = t2.a)
+group by t1.a;
+
+-- do some changes on source table data
+delete from t1 where b = 1;
+delete from t1 where a like '%remove';
+delete from t1 where c = 2;
+
+insert into t1(a,b,c) values
+('update', 5, 1),
+('add', 5, 1),
+('add/remove', 0, 0),
+('null_update', null, 0),
+('null_add', null, 0),
+('null_add/remove', null, 0);
+
+insert into t2(a,b) values
+('add', 15),
+('add/remove', 0),
+('null_add', null),
+('null_add/remove', null);
+
+delete from t1 where a like '%add/remove';
+
+-- view can not be used
+explain cbo
+select t1.a, sum(t1.b) from t1
+join t2 on (t1.a = t2.a)
+group by t1.a;
+
+
+-- rebuild the view (incrementally)
+explain cbo
+alter materialized view mat1 rebuild;
+explain
+alter materialized view mat1 rebuild;
+alter materialized view mat1 rebuild;
+
+-- the view should be up to date and used
+explain cbo
+select t1.a, sum(t1.b) from t1
+join t2 on (t1.a = t2.a)
+group by t1.a;
+
+select t1.a, sum(t1.b) from t1
+join t2 on (t1.a = t2.a)
+group by t1.a;
+
+drop materialized view mat1;
+
+select t1.a, sum(t1.b) from t1
+join t2 on (t1.a = t2.a)
+group by t1.a;
diff --git a/ql/src/test/queries/clientpositive/materialized_view_parquet.q b/ql/src/test/queries/clientpositive/materialized_view_parquet.q
index 712fd8e..fd24a07 100644
--- a/ql/src/test/queries/clientpositive/materialized_view_parquet.q
+++ b/ql/src/test/queries/clientpositive/materialized_view_parquet.q
@@ -189,8 +189,10 @@ alter materialized view mv1_parquet_n2 rebuild;
alter materialized view mv1_parquet_n2 rebuild;
explain cbo
-select name from emps_parquet_n3 group by name;
+select name, sum(empid) from emps_parquet_n3 group by name;
-select name from emps_parquet_n3 group by name;
+select name, sum(empid) from emps_parquet_n3 group by name;
drop materialized view mv1_parquet_n2;
+
+select name, sum(empid) from emps_parquet_n3 group by name;
diff --git a/ql/src/test/queries/clientpositive/materialized_view_partitioned_create_rewrite_agg.q b/ql/src/test/queries/clientpositive/materialized_view_partitioned_create_rewrite_agg.q
index 89c4660..38b8bc6 100644
--- a/ql/src/test/queries/clientpositive/materialized_view_partitioned_create_rewrite_agg.q
+++ b/ql/src/test/queries/clientpositive/materialized_view_partitioned_create_rewrite_agg.q
@@ -12,7 +12,7 @@ INSERT INTO t1(a, b, c) VALUES
(1, 3, 100),
(null, 4, 200);
-CREATE MATERIALIZED VIEW mat1 PARTITIONED ON (a) STORED AS ORC TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only") AS
+CREATE MATERIALIZED VIEW mat1 PARTITIONED ON (a) STORED AS ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only') AS
SELECT a, b, sum(c) sumc FROM t1 GROUP BY b, a;
INSERT INTO t1(a, b, c) VALUES
diff --git a/ql/src/test/queries/clientpositive/materialized_view_partitioned_create_rewrite_agg_2.q b/ql/src/test/queries/clientpositive/materialized_view_partitioned_create_rewrite_agg_2.q
index 85c6502..d87f116 100644
--- a/ql/src/test/queries/clientpositive/materialized_view_partitioned_create_rewrite_agg_2.q
+++ b/ql/src/test/queries/clientpositive/materialized_view_partitioned_create_rewrite_agg_2.q
@@ -4,22 +4,22 @@ set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
CREATE TABLE t1(a int, b int, c int, d string, e float) STORED AS ORC TBLPROPERTIES ('transactional' = 'true');
INSERT INTO t1(a, b, c, d, e) VALUES
-(1, 1, 1, "one", 1.1),
-(1, 4, 1, "one", 4.2),
-(2, 2, 2, "two", 2.2),
-(1, 10, 1, "one", 10.1),
-(2, 2, 2, "two", 2.2),
-(1, 3, 1, "one", 3.1),
-(null, 4, null, "unknown", 4.6),
-(null, 4, 2, "unknown", 4.7);
-
-CREATE MATERIALIZED VIEW mat1 PARTITIONED ON (a, c, d) STORED AS ORC TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only") AS
+(1, 1, 1, 'one', 1.1),
+(1, 4, 1, 'one', 4.2),
+(2, 2, 2, 'two', 2.2),
+(1, 10, 1, 'one', 10.1),
+(2, 2, 2, 'two', 2.2),
+(1, 3, 1, 'one', 3.1),
+(null, 4, null, 'unknown', 4.6),
+(null, 4, 2, 'unknown', 4.7);
+
+CREATE MATERIALIZED VIEW mat1 PARTITIONED ON (a, c, d) STORED AS ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only') AS
SELECT a, sum(b) sumb, c, d, sum(e) sume FROM t1 GROUP BY a, c, d;
INSERT INTO t1(a, b, c, d, e) VALUES
-(1, 3, 1, "one", 3.3),
-(1, 110, 1, "one", 110.11),
-(null, 20, null, "unknown", 20.22);
+(1, 3, 1, 'one', 3.3),
+(1, 110, 1, 'one', 110.11),
+(null, 20, null, 'unknown', 20.22);
EXPLAIN CBO
ALTER MATERIALIZED VIEW mat1 REBUILD;
diff --git a/ql/src/test/queries/clientpositive/materialized_view_partitioned_create_rewrite_agg.q b/ql/src/test/queries/clientpositive/materialized_view_partitioned_create_rewrite_agg_3.q
similarity index 82%
copy from ql/src/test/queries/clientpositive/materialized_view_partitioned_create_rewrite_agg.q
copy to ql/src/test/queries/clientpositive/materialized_view_partitioned_create_rewrite_agg_3.q
index 89c4660..e9687bf 100644
--- a/ql/src/test/queries/clientpositive/materialized_view_partitioned_create_rewrite_agg.q
+++ b/ql/src/test/queries/clientpositive/materialized_view_partitioned_create_rewrite_agg_3.q
@@ -1,7 +1,9 @@
+-- Test partition based MV rebuild when source table is insert only
+
set hive.support.concurrency=true;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
-CREATE TABLE t1(a int, b int,c int) STORED AS ORC TBLPROPERTIES ('transactional' = 'true');
+CREATE TABLE t1(a int, b int,c int) STORED AS ORC TBLPROPERTIES ('transactional' = 'true', 'transactional_properties'='insert_only');
INSERT INTO t1(a, b, c) VALUES
(1, 1, 1),
@@ -12,7 +14,7 @@ INSERT INTO t1(a, b, c) VALUES
(1, 3, 100),
(null, 4, 200);
-CREATE MATERIALIZED VIEW mat1 PARTITIONED ON (a) STORED AS ORC TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only") AS
+CREATE MATERIALIZED VIEW mat1 PARTITIONED ON (a) STORED AS ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only') AS
SELECT a, b, sum(c) sumc FROM t1 GROUP BY b, a;
INSERT INTO t1(a, b, c) VALUES
diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_8.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_8.q.out
new file mode 100644
index 0000000..218f70b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_8.q.out
@@ -0,0 +1,701 @@
+PREHOOK: query: create table cmv_basetable_n6 (a int, b varchar(256), c decimal(10,2), d int) stored as orc TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@cmv_basetable_n6
+POSTHOOK: query: create table cmv_basetable_n6 (a int, b varchar(256), c decimal(10,2), d int) stored as orc TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@cmv_basetable_n6
+PREHOOK: query: insert into cmv_basetable_n6 values
+ (1, 'alfred', 10.30, 2),
+ (2, 'bob', 3.14, 3),
+ (2, 'bonnie', 172342.2, 3),
+ (3, 'calvin', 978.76, 3),
+ (3, 'charlie', 9.8, 1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@cmv_basetable_n6
+POSTHOOK: query: insert into cmv_basetable_n6 values
+ (1, 'alfred', 10.30, 2),
+ (2, 'bob', 3.14, 3),
+ (2, 'bonnie', 172342.2, 3),
+ (3, 'calvin', 978.76, 3),
+ (3, 'charlie', 9.8, 1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@cmv_basetable_n6
+POSTHOOK: Lineage: cmv_basetable_n6.a SCRIPT []
+POSTHOOK: Lineage: cmv_basetable_n6.b SCRIPT []
+POSTHOOK: Lineage: cmv_basetable_n6.c SCRIPT []
+POSTHOOK: Lineage: cmv_basetable_n6.d SCRIPT []
+PREHOOK: query: create table cmv_basetable_2_n3 (a int, b varchar(256), c decimal(10,2), d int) stored as orc TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@cmv_basetable_2_n3
+POSTHOOK: query: create table cmv_basetable_2_n3 (a int, b varchar(256), c decimal(10,2), d int) stored as orc TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@cmv_basetable_2_n3
+PREHOOK: query: insert into cmv_basetable_2_n3 values
+ (1, 'alfred', 10.30, 2),
+ (3, 'calvin', 978.76, 3)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@cmv_basetable_2_n3
+POSTHOOK: query: insert into cmv_basetable_2_n3 values
+ (1, 'alfred', 10.30, 2),
+ (3, 'calvin', 978.76, 3)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@cmv_basetable_2_n3
+POSTHOOK: Lineage: cmv_basetable_2_n3.a SCRIPT []
+POSTHOOK: Lineage: cmv_basetable_2_n3.b SCRIPT []
+POSTHOOK: Lineage: cmv_basetable_2_n3.c SCRIPT []
+POSTHOOK: Lineage: cmv_basetable_2_n3.d SCRIPT []
+PREHOOK: query: CREATE MATERIALIZED VIEW cmv_mat_view_n6
+ TBLPROPERTIES ('transactional'='true') AS
+ SELECT cmv_basetable_n6.a, cmv_basetable_2_n3.c
+ FROM cmv_basetable_n6 JOIN cmv_basetable_2_n3 ON (cmv_basetable_n6.a = cmv_basetable_2_n3.a)
+ WHERE cmv_basetable_2_n3.c > 10.0
+PREHOOK: type: CREATE_MATERIALIZED_VIEW
+PREHOOK: Input: default@cmv_basetable_2_n3
+PREHOOK: Input: default@cmv_basetable_n6
+PREHOOK: Output: database:default
+PREHOOK: Output: default@cmv_mat_view_n6
+POSTHOOK: query: CREATE MATERIALIZED VIEW cmv_mat_view_n6
+ TBLPROPERTIES ('transactional'='true') AS
+ SELECT cmv_basetable_n6.a, cmv_basetable_2_n3.c
+ FROM cmv_basetable_n6 JOIN cmv_basetable_2_n3 ON (cmv_basetable_n6.a = cmv_basetable_2_n3.a)
+ WHERE cmv_basetable_2_n3.c > 10.0
+POSTHOOK: type: CREATE_MATERIALIZED_VIEW
+POSTHOOK: Input: default@cmv_basetable_2_n3
+POSTHOOK: Input: default@cmv_basetable_n6
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@cmv_mat_view_n6
+POSTHOOK: Lineage: cmv_mat_view_n6.a SIMPLE [(cmv_basetable_n6)cmv_basetable_n6.FieldSchema(name:a, type:int, comment:null), ]
+POSTHOOK: Lineage: cmv_mat_view_n6.c SIMPLE [(cmv_basetable_2_n3)cmv_basetable_2_n3.FieldSchema(name:c, type:decimal(10,2), comment:null), ]
+PREHOOK: query: insert into cmv_basetable_2_n3 values
+ (3, 'charlie', 15.8, 1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@cmv_basetable_2_n3
+POSTHOOK: query: insert into cmv_basetable_2_n3 values
+ (3, 'charlie', 15.8, 1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@cmv_basetable_2_n3
+POSTHOOK: Lineage: cmv_basetable_2_n3.a SCRIPT []
+POSTHOOK: Lineage: cmv_basetable_2_n3.b SCRIPT []
+POSTHOOK: Lineage: cmv_basetable_2_n3.c SCRIPT []
+POSTHOOK: Lineage: cmv_basetable_2_n3.d SCRIPT []
+PREHOOK: query: EXPLAIN CBO
+SELECT cmv_basetable_n6.a
+FROM cmv_basetable_n6 join cmv_basetable_2_n3 ON (cmv_basetable_n6.a = cmv_basetable_2_n3.a)
+WHERE cmv_basetable_2_n3.c > 10.10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cmv_basetable_2_n3
+PREHOOK: Input: default@cmv_basetable_n6
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO
+SELECT cmv_basetable_n6.a
+FROM cmv_basetable_n6 join cmv_basetable_2_n3 ON (cmv_basetable_n6.a = cmv_basetable_2_n3.a)
+WHERE cmv_basetable_2_n3.c > 10.10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cmv_basetable_2_n3
+POSTHOOK: Input: default@cmv_basetable_n6
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(a=[$0])
+ HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(a=[$0])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, cmv_basetable_n6]], table:alias=[cmv_basetable_n6])
+ HiveProject(a=[$0])
+ HiveFilter(condition=[AND(>($2, 10.1:DECIMAL(3, 1)), IS NOT NULL($0))])
+ HiveTableScan(table=[[default, cmv_basetable_2_n3]], table:alias=[cmv_basetable_2_n3])
+
+PREHOOK: query: SELECT cmv_basetable_n6.a
+FROM cmv_basetable_n6 JOIN cmv_basetable_2_n3 ON (cmv_basetable_n6.a = cmv_basetable_2_n3.a)
+WHERE cmv_basetable_2_n3.c > 10.10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cmv_basetable_2_n3
+PREHOOK: Input: default@cmv_basetable_n6
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cmv_basetable_n6.a
+FROM cmv_basetable_n6 JOIN cmv_basetable_2_n3 ON (cmv_basetable_n6.a = cmv_basetable_2_n3.a)
+WHERE cmv_basetable_2_n3.c > 10.10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cmv_basetable_2_n3
+POSTHOOK: Input: default@cmv_basetable_n6
+#### A masked pattern was here ####
+1
+3
+3
+3
+3
+PREHOOK: query: EXPLAIN
+ALTER MATERIALIZED VIEW cmv_mat_view_n6 REBUILD
+PREHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
+PREHOOK: Input: default@cmv_basetable_2_n3
+PREHOOK: Input: default@cmv_basetable_n6
+PREHOOK: Output: default@cmv_mat_view_n6
+POSTHOOK: query: EXPLAIN
+ALTER MATERIALIZED VIEW cmv_mat_view_n6 REBUILD
+POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
+POSTHOOK: Input: default@cmv_basetable_2_n3
+POSTHOOK: Input: default@cmv_basetable_n6
+POSTHOOK: Output: default@cmv_mat_view_n6
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+ Stage-4 depends on stages: Stage-3
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: cmv_basetable_n6
+ filterExpr: a is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: a is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: a (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: may be used (ACID table)
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: cmv_basetable_2_n3
+ filterExpr: ((ROW__ID.writeid > 1L) and (c > 10) and a is not null) (type: boolean)
+ properties:
+ insertonly.fetch.bucketid TRUE
+ Statistics: Num rows: 3 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: ((ROW__ID.writeid > 1L) and (c > 10) and a is not null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: a (type: int), c (type: decimal(10,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: decimal(10,2))
+ Execution mode: vectorized, llap
+ LLAP IO: may be used (ACID table)
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), _col2 (type: decimal(10,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.cmv_mat_view_n6
+ Write Type: INSERT
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: decimal(10,2))
+ outputColumnNames: a, c
+ Statistics: Num rows: 2 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(a), max(a), count(1), count(a), compute_bit_vector_hll(a), min(c), max(c), count(c), compute_bit_vector_hll(c)
+ minReductionHashAggr: 0.5
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), _col7 (type: bigint), _col8 (type: binary)
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DECIMAL' (type: string), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+ Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.cmv_mat_view_n6
+ Write Type: INSERT
+
+ Stage: Stage-3
+ Stats Work
+ Basic Stats Work:
+ Column Stats Desc:
+ Columns: a, c
+ Column Types: int, decimal(10,2)
+ Table: default.cmv_mat_view_n6
+
+ Stage: Stage-4
+ Materialized View Update
+ name: default.cmv_mat_view_n6
+ update creation metadata: true
+
+PREHOOK: query: ALTER MATERIALIZED VIEW cmv_mat_view_n6 REBUILD
+PREHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
+PREHOOK: Input: default@cmv_basetable_2_n3
+PREHOOK: Input: default@cmv_basetable_n6
+PREHOOK: Output: default@cmv_mat_view_n6
+POSTHOOK: query: ALTER MATERIALIZED VIEW cmv_mat_view_n6 REBUILD
+POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
+POSTHOOK: Input: default@cmv_basetable_2_n3
+POSTHOOK: Input: default@cmv_basetable_n6
+POSTHOOK: Output: default@cmv_mat_view_n6
+POSTHOOK: Lineage: cmv_mat_view_n6.a SIMPLE [(cmv_basetable_n6)cmv_basetable_n6.FieldSchema(name:a, type:int, comment:null), ]
+POSTHOOK: Lineage: cmv_mat_view_n6.c SIMPLE [(cmv_basetable_2_n3)cmv_basetable_2_n3.FieldSchema(name:c, type:decimal(10,2), comment:null), ]
+PREHOOK: query: EXPLAIN CBO
+SELECT cmv_basetable_n6.a
+FROM cmv_basetable_n6 join cmv_basetable_2_n3 ON (cmv_basetable_n6.a = cmv_basetable_2_n3.a)
+WHERE cmv_basetable_2_n3.c > 10.10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cmv_basetable_2_n3
+PREHOOK: Input: default@cmv_basetable_n6
+PREHOOK: Input: default@cmv_mat_view_n6
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO
+SELECT cmv_basetable_n6.a
+FROM cmv_basetable_n6 join cmv_basetable_2_n3 ON (cmv_basetable_n6.a = cmv_basetable_2_n3.a)
+WHERE cmv_basetable_2_n3.c > 10.10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cmv_basetable_2_n3
+POSTHOOK: Input: default@cmv_basetable_n6
+POSTHOOK: Input: default@cmv_mat_view_n6
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(a=[$0])
+ HiveFilter(condition=[<(10.1:DECIMAL(3, 1), $1)])
+ HiveTableScan(table=[[default, cmv_mat_view_n6]], table:alias=[default.cmv_mat_view_n6])
+
+PREHOOK: query: SELECT cmv_basetable_n6.a
+FROM cmv_basetable_n6 JOIN cmv_basetable_2_n3 ON (cmv_basetable_n6.a = cmv_basetable_2_n3.a)
+WHERE cmv_basetable_2_n3.c > 10.10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cmv_basetable_2_n3
+PREHOOK: Input: default@cmv_basetable_n6
+PREHOOK: Input: default@cmv_mat_view_n6
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cmv_basetable_n6.a
+FROM cmv_basetable_n6 JOIN cmv_basetable_2_n3 ON (cmv_basetable_n6.a = cmv_basetable_2_n3.a)
+WHERE cmv_basetable_2_n3.c > 10.10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cmv_basetable_2_n3
+POSTHOOK: Input: default@cmv_basetable_n6
+POSTHOOK: Input: default@cmv_mat_view_n6
+#### A masked pattern was here ####
+1
+3
+3
+3
+3
+PREHOOK: query: UPDATE cmv_basetable_n6 SET a=2 WHERE a=1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cmv_basetable_n6
+PREHOOK: Output: default@cmv_basetable_n6
+POSTHOOK: query: UPDATE cmv_basetable_n6 SET a=2 WHERE a=1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cmv_basetable_n6
+POSTHOOK: Output: default@cmv_basetable_n6
+PREHOOK: query: EXPLAIN
+ALTER MATERIALIZED VIEW cmv_mat_view_n6 REBUILD
+PREHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
+PREHOOK: Input: default@cmv_basetable_2_n3
+PREHOOK: Input: default@cmv_basetable_n6
+PREHOOK: Input: default@cmv_mat_view_n6
+PREHOOK: Output: default@cmv_mat_view_n6
+PREHOOK: Output: default@cmv_mat_view_n6
+POSTHOOK: query: EXPLAIN
+ALTER MATERIALIZED VIEW cmv_mat_view_n6 REBUILD
+POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
+POSTHOOK: Input: default@cmv_basetable_2_n3
+POSTHOOK: Input: default@cmv_basetable_n6
+POSTHOOK: Input: default@cmv_mat_view_n6
+POSTHOOK: Output: default@cmv_mat_view_n6
+POSTHOOK: Output: default@cmv_mat_view_n6
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-3 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-3
+ Stage-4 depends on stages: Stage-0
+ Stage-6 depends on stages: Stage-4, Stage-5
+ Stage-1 depends on stages: Stage-3
+ Stage-5 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: default.cmv_mat_view_n6
+ Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: a (type: int), ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 5 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ Execution mode: vectorized, llap
+ LLAP IO: may be used (ACID table)
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: cmv_basetable_n6
+ filterExpr: ((ROW__ID.writeid > 1L) and a is not null) (type: boolean)
+ properties:
+ acid.fetch.deleted.rows TRUE
+ Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: ((ROW__ID.writeid > 1L) and a is not null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: a (type: int), ROW__IS__DELETED (type: boolean)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: boolean)
+ Execution mode: vectorized, llap
+ LLAP IO: may be used (ACID table)
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: cmv_basetable_2_n3
+ filterExpr: ((c > 10) and a is not null) (type: boolean)
+ properties:
+ acid.fetch.deleted.rows TRUE
+ Statistics: Num rows: 3 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: ((c > 10) and a is not null) (type: boolean)
+ Statistics: Num rows: 3 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: a (type: int), c (type: decimal(10,2)), ROW__IS__DELETED (type: boolean)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 3 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 3 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: decimal(10,2)), _col2 (type: boolean)
+ Execution mode: vectorized, llap
+ LLAP IO: may be used (ACID table)
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Right Outer Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ nullSafes: [true]
+ outputColumnNames: _col1, _col2, _col3, _col4
+ Statistics: Num rows: 2 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: _col4 (type: boolean)
+ Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ null sort order: a
+ sort order: +
+ Map-reduce partition columns: UDFToInteger(_col0) (type: int)
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (not _col4) (type: boolean)
+ Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col2 (type: int), _col3 (type: decimal(10,2))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.cmv_mat_view_n6
+ Write Type: INSERT
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: decimal(10,2))
+ outputColumnNames: a, c
+ Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(a), max(a), count(1), count(a), compute_bit_vector_hll(a), min(c), max(c), count(c), compute_bit_vector_hll(c)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), _col7 (type: bigint), _col8 (type: binary)
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.cmv_mat_view_n6
+ Write Type: DELETE
+ Reducer 4
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 1 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'DECIMAL' (type: string), _col5 (type: decimal(10,2)), _col6 (type: decimal(10,2)), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+ Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 739 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 6
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col3, _col4
+ Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), _col3 (type: decimal(10,2)), (_col1 or _col4) (type: boolean)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: decimal(10,2)), _col2 (type: boolean)
+
+ Stage: Stage-3
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.cmv_mat_view_n6
+ Write Type: DELETE
+
+ Stage: Stage-4
+ Stats Work
+ Basic Stats Work:
+
+ Stage: Stage-6
+ Materialized View Update
+ name: default.cmv_mat_view_n6
+ update creation metadata: true
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.cmv_mat_view_n6
+ Write Type: INSERT
+
+ Stage: Stage-5
+ Stats Work
+ Basic Stats Work:
+ Column Stats Desc:
+ Columns: a, c
+ Column Types: int, decimal(10,2)
+ Table: default.cmv_mat_view_n6
+
+PREHOOK: query: ALTER MATERIALIZED VIEW cmv_mat_view_n6 REBUILD
+PREHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
+PREHOOK: Input: default@cmv_basetable_2_n3
+PREHOOK: Input: default@cmv_basetable_n6
+PREHOOK: Input: default@cmv_mat_view_n6
+PREHOOK: Output: default@cmv_mat_view_n6
+PREHOOK: Output: default@cmv_mat_view_n6
+POSTHOOK: query: ALTER MATERIALIZED VIEW cmv_mat_view_n6 REBUILD
+POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
+POSTHOOK: Input: default@cmv_basetable_2_n3
+POSTHOOK: Input: default@cmv_basetable_n6
+POSTHOOK: Input: default@cmv_mat_view_n6
+POSTHOOK: Output: default@cmv_mat_view_n6
+POSTHOOK: Output: default@cmv_mat_view_n6
+POSTHOOK: Lineage: cmv_mat_view_n6.a SIMPLE [(cmv_basetable_n6)cmv_basetable_n6.FieldSchema(name:a, type:int, comment:null), ]
+POSTHOOK: Lineage: cmv_mat_view_n6.c SIMPLE [(cmv_basetable_2_n3)cmv_basetable_2_n3.FieldSchema(name:c, type:decimal(10,2), comment:null), ]
+PREHOOK: query: EXPLAIN CBO
+SELECT cmv_basetable_n6.a
+FROM cmv_basetable_n6 join cmv_basetable_2_n3 ON (cmv_basetable_n6.a = cmv_basetable_2_n3.a)
+WHERE cmv_basetable_2_n3.c > 10.10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cmv_basetable_2_n3
+PREHOOK: Input: default@cmv_basetable_n6
+PREHOOK: Input: default@cmv_mat_view_n6
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO
+SELECT cmv_basetable_n6.a
+FROM cmv_basetable_n6 join cmv_basetable_2_n3 ON (cmv_basetable_n6.a = cmv_basetable_2_n3.a)
+WHERE cmv_basetable_2_n3.c > 10.10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cmv_basetable_2_n3
+POSTHOOK: Input: default@cmv_basetable_n6
+POSTHOOK: Input: default@cmv_mat_view_n6
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(a=[$0])
+ HiveFilter(condition=[<(10.1:DECIMAL(3, 1), $1)])
+ HiveTableScan(table=[[default, cmv_mat_view_n6]], table:alias=[default.cmv_mat_view_n6])
+
+PREHOOK: query: SELECT cmv_basetable_n6.a
+FROM cmv_basetable_n6 JOIN cmv_basetable_2_n3 ON (cmv_basetable_n6.a = cmv_basetable_2_n3.a)
+WHERE cmv_basetable_2_n3.c > 10.10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cmv_basetable_2_n3
+PREHOOK: Input: default@cmv_basetable_n6
+PREHOOK: Input: default@cmv_mat_view_n6
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cmv_basetable_n6.a
+FROM cmv_basetable_n6 JOIN cmv_basetable_2_n3 ON (cmv_basetable_n6.a = cmv_basetable_2_n3.a)
+WHERE cmv_basetable_2_n3.c > 10.10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cmv_basetable_2_n3
+POSTHOOK: Input: default@cmv_basetable_n6
+POSTHOOK: Input: default@cmv_mat_view_n6
+#### A masked pattern was here ####
+3
+3
+3
+3
+PREHOOK: query: drop materialized view cmv_mat_view_n6
+PREHOOK: type: DROP_MATERIALIZED_VIEW
+PREHOOK: Input: default@cmv_mat_view_n6
+PREHOOK: Output: default@cmv_mat_view_n6
+POSTHOOK: query: drop materialized view cmv_mat_view_n6
+POSTHOOK: type: DROP_MATERIALIZED_VIEW
+POSTHOOK: Input: default@cmv_mat_view_n6
+POSTHOOK: Output: default@cmv_mat_view_n6
+PREHOOK: query: SELECT cmv_basetable_n6.a
+FROM cmv_basetable_n6 JOIN cmv_basetable_2_n3 ON (cmv_basetable_n6.a = cmv_basetable_2_n3.a)
+WHERE cmv_basetable_2_n3.c > 10.10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cmv_basetable_2_n3
+PREHOOK: Input: default@cmv_basetable_n6
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cmv_basetable_n6.a
+FROM cmv_basetable_n6 JOIN cmv_basetable_2_n3 ON (cmv_basetable_n6.a = cmv_basetable_2_n3.a)
+WHERE cmv_basetable_2_n3.c > 10.10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cmv_basetable_2_n3
+POSTHOOK: Input: default@cmv_basetable_n6
+#### A masked pattern was here ####
+3
+3
+3
+3
diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_9.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_9.q.out
new file mode 100644
index 0000000..ddb68c9
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_9.q.out
@@ -0,0 +1,686 @@
+PREHOOK: query: create table t1(a char(15), b int, c int) stored as orc TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1(a char(15), b int, c int) stored as orc TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: create table t2(a char(15), b int) stored as orc TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t2
+POSTHOOK: query: create table t2(a char(15), b int) stored as orc TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t2
+PREHOOK: query: insert into t1(a, b, c) values
+('update', 1, 1), ('update', 2, 1),
+('null_update', null, 1), ('null_update', null, 2)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t1
+POSTHOOK: query: insert into t1(a, b, c) values
+('update', 1, 1), ('update', 2, 1),
+('null_update', null, 1), ('null_update', null, 2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t1
+POSTHOOK: Lineage: t1.a SCRIPT []
+POSTHOOK: Lineage: t1.b SCRIPT []
+POSTHOOK: Lineage: t1.c SCRIPT []
+PREHOOK: query: insert into t1(a, b, c) values ('remove', 3, 1), ('null_remove', null, 1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t1
+POSTHOOK: query: insert into t1(a, b, c) values ('remove', 3, 1), ('null_remove', null, 1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t1
+POSTHOOK: Lineage: t1.a SCRIPT []
+POSTHOOK: Lineage: t1.b SCRIPT []
+POSTHOOK: Lineage: t1.c SCRIPT []
+PREHOOK: query: insert into t1(a, b, c) values ('sum0', 0, 1), ('sum0', 0, 2)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t1
+POSTHOOK: query: insert into t1(a, b, c) values ('sum0', 0, 1), ('sum0', 0, 2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t1
+POSTHOOK: Lineage: t1.a SCRIPT []
+POSTHOOK: Lineage: t1.b SCRIPT []
+POSTHOOK: Lineage: t1.c SCRIPT []
+PREHOOK: query: insert into t2(a, b) values
+('update', 10),
+('null_update', null)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t2
+POSTHOOK: query: insert into t2(a, b) values
+('update', 10),
+('null_update', null)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t2
+POSTHOOK: Lineage: t2.a SCRIPT []
+POSTHOOK: Lineage: t2.b SCRIPT []
+PREHOOK: query: insert into t2(a, b) values ('remove', 30), ('null_remove', null)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t2
+POSTHOOK: query: insert into t2(a, b) values ('remove', 30), ('null_remove', null)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t2
+POSTHOOK: Lineage: t2.a SCRIPT []
+POSTHOOK: Lineage: t2.b SCRIPT []
+PREHOOK: query: insert into t2(a, b) values ('sum0', 0)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t2
+POSTHOOK: query: insert into t2(a, b) values ('sum0', 0)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t2
+POSTHOOK: Lineage: t2.a SCRIPT []
+POSTHOOK: Lineage: t2.b SCRIPT []
+PREHOOK: query: create materialized view mat1 stored as orc TBLPROPERTIES ('transactional'='true') as
+select t1.a, sum(t1.b), count(*) from t1
+join t2 on (t1.a = t2.a)
+group by t1.a
+PREHOOK: type: CREATE_MATERIALIZED_VIEW
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mat1
+POSTHOOK: query: create materialized view mat1 stored as orc TBLPROPERTIES ('transactional'='true') as
+select t1.a, sum(t1.b), count(*) from t1
+join t2 on (t1.a = t2.a)
+group by t1.a
+POSTHOOK: type: CREATE_MATERIALIZED_VIEW
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mat1
+POSTHOOK: Lineage: mat1._c1 EXPRESSION [(t1)t1.FieldSchema(name:b, type:int, comment:null), ]
+POSTHOOK: Lineage: mat1._c2 EXPRESSION [(t1)t1.null, (t2)t2.null, ]
+POSTHOOK: Lineage: mat1.a SIMPLE [(t1)t1.FieldSchema(name:a, type:char(15), comment:null), ]
+PREHOOK: query: explain cbo
+select t1.a, sum(t1.b) from t1
+join t2 on (t1.a = t2.a)
+group by t1.a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mat1
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select t1.a, sum(t1.b) from t1
+join t2 on (t1.a = t2.a)
+group by t1.a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mat1
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(a=[$0], _c1=[$1])
+ HiveTableScan(table=[[default, mat1]], table:alias=[default.mat1])
+
+PREHOOK: query: delete from t1 where b = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@t1
+POSTHOOK: query: delete from t1 where b = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@t1
+PREHOOK: query: delete from t1 where a like '%remove'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@t1
+POSTHOOK: query: delete from t1 where a like '%remove'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@t1
+PREHOOK: query: delete from t1 where c = 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@t1
+POSTHOOK: query: delete from t1 where c = 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@t1
+PREHOOK: query: insert into t1(a,b,c) values
+('update', 5, 1),
+('add', 5, 1),
+('add/remove', 0, 0),
+('null_update', null, 0),
+('null_add', null, 0),
+('null_add/remove', null, 0)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t1
+POSTHOOK: query: insert into t1(a,b,c) values
+('update', 5, 1),
+('add', 5, 1),
+('add/remove', 0, 0),
+('null_update', null, 0),
+('null_add', null, 0),
+('null_add/remove', null, 0)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t1
+POSTHOOK: Lineage: t1.a SCRIPT []
+POSTHOOK: Lineage: t1.b SCRIPT []
+POSTHOOK: Lineage: t1.c SCRIPT []
+PREHOOK: query: insert into t2(a,b) values
+('add', 15),
+('add/remove', 0),
+('null_add', null),
+('null_add/remove', null)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t2
+POSTHOOK: query: insert into t2(a,b) values
+('add', 15),
+('add/remove', 0),
+('null_add', null),
+('null_add/remove', null)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t2
+POSTHOOK: Lineage: t2.a SCRIPT []
+POSTHOOK: Lineage: t2.b SCRIPT []
+PREHOOK: query: delete from t1 where a like '%add/remove'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@t1
+POSTHOOK: query: delete from t1 where a like '%add/remove'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@t1
+PREHOOK: query: explain cbo
+select t1.a, sum(t1.b) from t1
+join t2 on (t1.a = t2.a)
+group by t1.a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select t1.a, sum(t1.b) from t1
+join t2 on (t1.a = t2.a)
+group by t1.a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+CBO PLAN:
+HiveAggregate(group=[{0}], agg#0=[sum($1)])
+ HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(a=[$0], b=[$1])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, t1]], table:alias=[t1])
+ HiveProject(a=[$0])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, t2]], table:alias=[t2])
+
+PREHOOK: query: explain cbo
+alter materialized view mat1 rebuild
+PREHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
+PREHOOK: Input: default@mat1
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+PREHOOK: Output: default@mat1
+PREHOOK: Output: default@mat1
+PREHOOK: Output: default@mat1
+POSTHOOK: query: explain cbo
+alter materialized view mat1 rebuild
+POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
+POSTHOOK: Input: default@mat1
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+POSTHOOK: Output: default@mat1
+POSTHOOK: Output: default@mat1
+POSTHOOK: Output: default@mat1
+CBO PLAN:
+HiveProject(a0=[$4], $f1=[CASE(IS NULL($1), $5, IS NULL($5), $1, +($5, $1))], $f2=[CASE(IS NULL($2), $6, +($6, $2))])
+ HiveFilter(condition=[OR(AND($3, OR(AND(IS NULL($2), =($6, 0)), AND(=(+($6, $2), 0), IS NOT NULL($2)))), AND(IS NULL($3), OR(AND(IS NULL($2), >($6, 0)), AND(>(+($6, $2), 0), IS NOT NULL($2)))), AND($3, OR(AND(IS NULL($2), >($6, 0)), AND(>(+($6, $2), 0), IS NOT NULL($2)))))])
+ HiveJoin(condition=[IS NOT DISTINCT FROM($0, $4)], joinType=[right], algorithm=[none], cost=[not available])
+ HiveProject(a=[$0], _c1=[$1], _c2=[$2], $f3=[true])
+ HiveTableScan(table=[[default, mat1]], table:alias=[default.mat1])
+ HiveProject(a=[$0], $f1=[$1], $f2=[$2])
+ HiveAggregate(group=[{0}], agg#0=[SUM($1)], agg#1=[SUM($2)])
+ HiveProject(a=[$0], $f3=[CASE(OR($2, $5), *(-1, $1), $1)], $f4=[CASE(OR($2, $5), -1, 1)])
+ HiveJoin(condition=[AND(=($0, $4), OR($3, $6))], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(a=[$0], b=[$1], ROW__IS__DELETED=[$6], <=[<(3, $5.writeid)])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, t1]], table:alias=[t1])
+ HiveProject(a=[$0], ROW__IS__DELETED=[$5], <=[<(3, $4.writeid)])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, t2]], table:alias=[t2])
+
+PREHOOK: query: explain
+alter materialized view mat1 rebuild
+PREHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
+PREHOOK: Input: default@mat1
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+PREHOOK: Output: default@mat1
+PREHOOK: Output: default@mat1
+PREHOOK: Output: default@mat1
+POSTHOOK: query: explain
+alter materialized view mat1 rebuild
+POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
+POSTHOOK: Input: default@mat1
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+POSTHOOK: Output: default@mat1
+POSTHOOK: Output: default@mat1
+POSTHOOK: Output: default@mat1
+STAGE DEPENDENCIES:
+ Stage-3 is a root stage
+ Stage-4 depends on stages: Stage-3
+ Stage-0 depends on stages: Stage-4
+ Stage-5 depends on stages: Stage-0
+ Stage-8 depends on stages: Stage-5, Stage-6, Stage-7
+ Stage-1 depends on stages: Stage-4
+ Stage-6 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-4
+ Stage-7 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-3
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 5 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 7 <- Map 6 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
+ Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: default.mat1
+ Statistics: Num rows: 5 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: a (type: char(15)), _c1 (type: bigint), _c2 (type: bigint), true (type: boolean), ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 5 Data size: 932 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: char(15))
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: char(15))
+ Statistics: Num rows: 5 Data size: 932 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: boolean), _col4 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ Execution mode: vectorized, llap
+ LLAP IO: may be used (ACID table)
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ filterExpr: a is not null (type: boolean)
+ properties:
+ acid.fetch.deleted.rows TRUE
+ Statistics: Num rows: 7 Data size: 671 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: a is not null (type: boolean)
+ Statistics: Num rows: 7 Data size: 671 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: a (type: char(15)), b (type: int), ROW__IS__DELETED (type: boolean), (ROW__ID.writeid > 3L) (type: boolean)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 7 Data size: 727 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: char(15))
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: char(15))
+ Statistics: Num rows: 7 Data size: 727 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: int), _col2 (type: boolean), _col3 (type: boolean)
+ Execution mode: vectorized, llap
+ LLAP IO: may be used (ACID table)
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: t2
+ filterExpr: a is not null (type: boolean)
+ properties:
+ acid.fetch.deleted.rows TRUE
+ Statistics: Num rows: 9 Data size: 837 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: a is not null (type: boolean)
+ Statistics: Num rows: 9 Data size: 837 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: a (type: char(15)), ROW__IS__DELETED (type: boolean), (ROW__ID.writeid > 3L) (type: boolean)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 9 Data size: 909 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: char(15))
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: char(15))
+ Statistics: Num rows: 9 Data size: 909 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: boolean), _col2 (type: boolean)
+ Execution mode: vectorized, llap
+ LLAP IO: may be used (ACID table)
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Right Outer Join 0 to 1
+ keys:
+ 0 _col0 (type: char(15))
+ 1 _col0 (type: char(15))
+ nullSafes: [true]
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 5 Data size: 1017 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (_col3 and ((_col2 is null and (_col7 = 0L)) or (((_col7 + _col2) = 0) and _col2 is not null))) (type: boolean)
+ Statistics: Num rows: 1 Data size: 205 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col4 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ null sort order: a
+ sort order: +
+ Map-reduce partition columns: UDFToInteger(_col0) (type: int)
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (_col3 is null and ((_col2 is null and (_col7 > 0L)) or (((_col7 + _col2) > 0) and _col2 is not null))) (type: boolean)
+ Statistics: Num rows: 1 Data size: 205 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col5 (type: char(15)), CASE WHEN (_col1 is null) THEN (_col6) WHEN (_col6 is null) THEN (_col1) ELSE ((_col6 + _col1)) END (type: bigint), CASE WHEN (_col2 is null) THEN (_col7) ELSE ((_col7 + _col2)) END (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 109 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 109 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.mat1
+ Write Type: INSERT
+ Select Operator
+ expressions: _col0 (type: char(15)), _col1 (type: bigint), _col2 (type: bigint)
+ outputColumnNames: a, _c1, _c2
+ Statistics: Num rows: 1 Data size: 109 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: max(length(a)), avg(COALESCE(length(a),0)), count(1), count(a), compute_bit_vector_hll(a), min(_c1), max(_c1), count(_c1), compute_bit_vector_hll(_c1), min(_c2), max(_c2), count(_c2), compute_bit_vector_hll(_c2)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 576 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: struct<count:bigint,sum:double,input:int>), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: binary), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: binary)
+ Filter Operator
+ predicate: (_col3 and ((_col2 is null and (_col7 > 0L)) or (((_col7 + _col2) > 0) and _col2 is not null))) (type: boolean)
+ Statistics: Num rows: 1 Data size: 205 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col4 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), _col5 (type: char(15)), CASE WHEN (_col1 is null) THEN (_col6) WHEN (_col6 is null) THEN (_col1) ELSE ((_col6 + _col1)) END (type: bigint), CASE WHEN (_col2 is null) THEN (_col7) ELSE ((_col7 + _col2)) END (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ null sort order: a
+ sort order: +
+ Map-reduce partition columns: UDFToInteger(_col0) (type: int)
+ Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: char(15)), _col2 (type: bigint), _col3 (type: bigint)
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.mat1
+ Write Type: DELETE
+ Reducer 4
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: max(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector_hll(VALUE._col12)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Statistics: Num rows: 1 Data size: 508 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col0,0)) (type: bigint), COALESCE(_col1,0) (type: double), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), _col5 (type: bigint), _col6 (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), _col9 (type: bigint), _col10 (type: bigint) [...]
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
+ Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: char(15)), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 185 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.mat1
+ Write Type: UPDATE
+ Reducer 7
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: char(15))
+ 1 _col0 (type: char(15))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6
+ residual filter predicates: {(_col3 or _col6)}
+ Statistics: Num rows: 6 Data size: 670 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: char(15)), CASE WHEN ((_col2 or _col5)) THEN ((-1 * _col1)) ELSE (_col1) END (type: int), CASE WHEN ((_col2 or _col5)) THEN (-1) ELSE (1) END (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 6 Data size: 670 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col1), sum(_col2)
+ keys: _col0 (type: char(15))
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 5 Data size: 545 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: char(15))
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: char(15))
+ Statistics: Num rows: 5 Data size: 545 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint), _col2 (type: bigint)
+ Reducer 8
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1)
+ keys: KEY._col0 (type: char(15))
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 5 Data size: 545 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: char(15))
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: char(15))
+ Statistics: Num rows: 5 Data size: 545 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint), _col2 (type: bigint)
+
+ Stage: Stage-4
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.mat1
+ Write Type: DELETE
+
+ Stage: Stage-5
+ Stats Work
+ Basic Stats Work:
+
+ Stage: Stage-8
+ Materialized View Update
+ name: default.mat1
+ update creation metadata: true
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.mat1
+ Write Type: INSERT
+
+ Stage: Stage-6
+ Stats Work
+ Basic Stats Work:
+
+ Stage: Stage-2
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.mat1
+ Write Type: UPDATE
+
+ Stage: Stage-7
+ Stats Work
+ Basic Stats Work:
+ Column Stats Desc:
+ Columns: a, _c1, _c2
+ Column Types: char(15), bigint, bigint
+ Table: default.mat1
+
+PREHOOK: query: alter materialized view mat1 rebuild
+PREHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
+PREHOOK: Input: default@mat1
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+PREHOOK: Output: default@mat1
+PREHOOK: Output: default@mat1
+PREHOOK: Output: default@mat1
+POSTHOOK: query: alter materialized view mat1 rebuild
+POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
+POSTHOOK: Input: default@mat1
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+POSTHOOK: Output: default@mat1
+POSTHOOK: Output: default@mat1
+POSTHOOK: Output: default@mat1
+POSTHOOK: Lineage: mat1._c1 EXPRESSION [(mat1)default.mat1.FieldSchema(name:_c1, type:bigint, comment:null), (t1)t1.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), (t2)t2.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), (t1)t1.FieldSchema(name:b, type:int, comment:null), ]
+POSTHOOK: Lineage: mat1._c2 EXPRESSION [(mat1)default.mat1.FieldSchema(name:_c2, type:bigint, comment:null), (t1)t1.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), (t2)t2.FieldSchema(name:ROW__IS__DELETED, type:boolean, comment:), ]
+POSTHOOK: Lineage: mat1.a SIMPLE [(t1)t1.FieldSchema(name:a, type:char(15), comment:null), ]
+PREHOOK: query: explain cbo
+select t1.a, sum(t1.b) from t1
+join t2 on (t1.a = t2.a)
+group by t1.a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mat1
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select t1.a, sum(t1.b) from t1
+join t2 on (t1.a = t2.a)
+group by t1.a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mat1
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(a=[$0], _c1=[$1])
+ HiveTableScan(table=[[default, mat1]], table:alias=[default.mat1])
+
+PREHOOK: query: select t1.a, sum(t1.b) from t1
+join t2 on (t1.a = t2.a)
+group by t1.a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mat1
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: select t1.a, sum(t1.b) from t1
+join t2 on (t1.a = t2.a)
+group by t1.a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mat1
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+add 5
+null_add NULL
+null_update NULL
+sum0 0
+update 7
+PREHOOK: query: drop materialized view mat1
+PREHOOK: type: DROP_MATERIALIZED_VIEW
+PREHOOK: Input: default@mat1
+PREHOOK: Output: default@mat1
+POSTHOOK: query: drop materialized view mat1
+POSTHOOK: type: DROP_MATERIALIZED_VIEW
+POSTHOOK: Input: default@mat1
+POSTHOOK: Output: default@mat1
+PREHOOK: query: select t1.a, sum(t1.b) from t1
+join t2 on (t1.a = t2.a)
+group by t1.a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: select t1.a, sum(t1.b) from t1
+join t2 on (t1.a = t2.a)
+group by t1.a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+add 5
+null_add NULL
+null_update NULL
+sum0 0
+update 7
diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_parquet.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_parquet.q.out
index 4886c6c..fb800e9 100644
--- a/ql/src/test/results/clientpositive/llap/materialized_view_parquet.q.out
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_parquet.q.out
@@ -817,6 +817,8 @@ STAGE PLANS:
TableScan
alias: emps_parquet_n3
filterExpr: (ROW__ID.writeid > 3L) (type: boolean)
+ properties:
+ insertonly.fetch.bucketid TRUE
Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (ROW__ID.writeid > 3L) (type: boolean)
@@ -991,35 +993,35 @@ POSTHOOK: Lineage: mv1_parquet_n2.name EXPRESSION [(emps_parquet_n3)emps_parquet
POSTHOOK: Lineage: mv1_parquet_n2.s EXPRESSION [(emps_parquet_n3)emps_parquet_n3.FieldSchema(name:empid, type:int, comment:null), (mv1_parquet_n2)default.mv1_parquet_n2.FieldSchema(name:s, type:bigint, comment:null), ]
POSTHOOK: Lineage: mv1_parquet_n2.salary EXPRESSION [(emps_parquet_n3)emps_parquet_n3.FieldSchema(name:salary, type:float, comment:null), (mv1_parquet_n2)default.mv1_parquet_n2.FieldSchema(name:salary, type:float, comment:null), ]
PREHOOK: query: explain cbo
-select name from emps_parquet_n3 group by name
+select name, sum(empid) from emps_parquet_n3 group by name
PREHOOK: type: QUERY
PREHOOK: Input: default@emps_parquet_n3
PREHOOK: Input: default@mv1_parquet_n2
#### A masked pattern was here ####
POSTHOOK: query: explain cbo
-select name from emps_parquet_n3 group by name
+select name, sum(empid) from emps_parquet_n3 group by name
POSTHOOK: type: QUERY
POSTHOOK: Input: default@emps_parquet_n3
POSTHOOK: Input: default@mv1_parquet_n2
#### A masked pattern was here ####
CBO PLAN:
-HiveAggregate(group=[{0}])
+HiveAggregate(group=[{0}], agg#0=[sum($3)])
HiveTableScan(table=[[default, mv1_parquet_n2]], table:alias=[default.mv1_parquet_n2])
-PREHOOK: query: select name from emps_parquet_n3 group by name
+PREHOOK: query: select name, sum(empid) from emps_parquet_n3 group by name
PREHOOK: type: QUERY
PREHOOK: Input: default@emps_parquet_n3
PREHOOK: Input: default@mv1_parquet_n2
#### A masked pattern was here ####
-POSTHOOK: query: select name from emps_parquet_n3 group by name
+POSTHOOK: query: select name, sum(empid) from emps_parquet_n3 group by name
POSTHOOK: type: QUERY
POSTHOOK: Input: default@emps_parquet_n3
POSTHOOK: Input: default@mv1_parquet_n2
#### A masked pattern was here ####
-Bill
-Eric
-Sebastian
-Theodore
+Bill 620
+Eric 200
+Sebastian 150
+Theodore 110
PREHOOK: query: drop materialized view mv1_parquet_n2
PREHOOK: type: DROP_MATERIALIZED_VIEW
PREHOOK: Input: default@mv1_parquet_n2
@@ -1028,3 +1030,15 @@ POSTHOOK: query: drop materialized view mv1_parquet_n2
POSTHOOK: type: DROP_MATERIALIZED_VIEW
POSTHOOK: Input: default@mv1_parquet_n2
POSTHOOK: Output: default@mv1_parquet_n2
+PREHOOK: query: select name, sum(empid) from emps_parquet_n3 group by name
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emps_parquet_n3
+#### A masked pattern was here ####
+POSTHOOK: query: select name, sum(empid) from emps_parquet_n3 group by name
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emps_parquet_n3
+#### A masked pattern was here ####
+Bill 620
+Eric 200
+Sebastian 150
+Theodore 110
diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_partitioned_create_rewrite_agg.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_partitioned_create_rewrite_agg.q.out
index 7411fb3..54aa122 100644
--- a/ql/src/test/results/clientpositive/llap/materialized_view_partitioned_create_rewrite_agg.q.out
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_partitioned_create_rewrite_agg.q.out
@@ -31,14 +31,14 @@ POSTHOOK: Output: default@t1
POSTHOOK: Lineage: t1.a SCRIPT []
POSTHOOK: Lineage: t1.b SCRIPT []
POSTHOOK: Lineage: t1.c SCRIPT []
-PREHOOK: query: CREATE MATERIALIZED VIEW mat1 PARTITIONED ON (a) STORED AS ORC TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only") AS
+PREHOOK: query: CREATE MATERIALIZED VIEW mat1 PARTITIONED ON (a) STORED AS ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only') AS
SELECT a, b, sum(c) sumc FROM t1 GROUP BY b, a
PREHOOK: type: CREATE_MATERIALIZED_VIEW
PREHOOK: Input: default@t1
PREHOOK: Output: database:default
PREHOOK: Output: default@mat1
PREHOOK: Output: default@mat1
-POSTHOOK: query: CREATE MATERIALIZED VIEW mat1 PARTITIONED ON (a) STORED AS ORC TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only") AS
+POSTHOOK: query: CREATE MATERIALIZED VIEW mat1 PARTITIONED ON (a) STORED AS ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only') AS
SELECT a, b, sum(c) sumc FROM t1 GROUP BY b, a
POSTHOOK: type: CREATE_MATERIALIZED_VIEW
POSTHOOK: Input: default@t1
diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_partitioned_create_rewrite_agg_2.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_partitioned_create_rewrite_agg_2.q.out
index 8f48380..4e02cb3 100644
--- a/ql/src/test/results/clientpositive/llap/materialized_view_partitioned_create_rewrite_agg_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_partitioned_create_rewrite_agg_2.q.out
@@ -7,26 +7,26 @@ POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@t1
PREHOOK: query: INSERT INTO t1(a, b, c, d, e) VALUES
-(1, 1, 1, "one", 1.1),
-(1, 4, 1, "one", 4.2),
-(2, 2, 2, "two", 2.2),
-(1, 10, 1, "one", 10.1),
-(2, 2, 2, "two", 2.2),
-(1, 3, 1, "one", 3.1),
-(null, 4, null, "unknown", 4.6),
-(null, 4, 2, "unknown", 4.7)
+(1, 1, 1, 'one', 1.1),
+(1, 4, 1, 'one', 4.2),
+(2, 2, 2, 'two', 2.2),
+(1, 10, 1, 'one', 10.1),
+(2, 2, 2, 'two', 2.2),
+(1, 3, 1, 'one', 3.1),
+(null, 4, null, 'unknown', 4.6),
+(null, 4, 2, 'unknown', 4.7)
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@t1
POSTHOOK: query: INSERT INTO t1(a, b, c, d, e) VALUES
-(1, 1, 1, "one", 1.1),
-(1, 4, 1, "one", 4.2),
-(2, 2, 2, "two", 2.2),
-(1, 10, 1, "one", 10.1),
-(2, 2, 2, "two", 2.2),
-(1, 3, 1, "one", 3.1),
-(null, 4, null, "unknown", 4.6),
-(null, 4, 2, "unknown", 4.7)
+(1, 1, 1, 'one', 1.1),
+(1, 4, 1, 'one', 4.2),
+(2, 2, 2, 'two', 2.2),
+(1, 10, 1, 'one', 10.1),
+(2, 2, 2, 'two', 2.2),
+(1, 3, 1, 'one', 3.1),
+(null, 4, null, 'unknown', 4.6),
+(null, 4, 2, 'unknown', 4.7)
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@t1
@@ -35,14 +35,14 @@ POSTHOOK: Lineage: t1.b SCRIPT []
POSTHOOK: Lineage: t1.c SCRIPT []
POSTHOOK: Lineage: t1.d SCRIPT []
POSTHOOK: Lineage: t1.e SCRIPT []
-PREHOOK: query: CREATE MATERIALIZED VIEW mat1 PARTITIONED ON (a, c, d) STORED AS ORC TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only") AS
+PREHOOK: query: CREATE MATERIALIZED VIEW mat1 PARTITIONED ON (a, c, d) STORED AS ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only') AS
SELECT a, sum(b) sumb, c, d, sum(e) sume FROM t1 GROUP BY a, c, d
PREHOOK: type: CREATE_MATERIALIZED_VIEW
PREHOOK: Input: default@t1
PREHOOK: Output: database:default
PREHOOK: Output: default@mat1
PREHOOK: Output: default@mat1
-POSTHOOK: query: CREATE MATERIALIZED VIEW mat1 PARTITIONED ON (a, c, d) STORED AS ORC TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only") AS
+POSTHOOK: query: CREATE MATERIALIZED VIEW mat1 PARTITIONED ON (a, c, d) STORED AS ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only') AS
SELECT a, sum(b) sumb, c, d, sum(e) sume FROM t1 GROUP BY a, c, d
POSTHOOK: type: CREATE_MATERIALIZED_VIEW
POSTHOOK: Input: default@t1
@@ -64,16 +64,16 @@ POSTHOOK: Lineage: mat1 PARTITION(a=__HIVE_DEFAULT_PARTITION__,c=2,d=unknown).su
POSTHOOK: Lineage: mat1 PARTITION(a=__HIVE_DEFAULT_PARTITION__,c=__HIVE_DEFAULT_PARTITION__,d=unknown).sumb EXPRESSION [(t1)t1.FieldSchema(name:b, type:int, comment:null), ]
POSTHOOK: Lineage: mat1 PARTITION(a=__HIVE_DEFAULT_PARTITION__,c=__HIVE_DEFAULT_PARTITION__,d=unknown).sume EXPRESSION [(t1)t1.FieldSchema(name:e, type:float, comment:null), ]
PREHOOK: query: INSERT INTO t1(a, b, c, d, e) VALUES
-(1, 3, 1, "one", 3.3),
-(1, 110, 1, "one", 110.11),
-(null, 20, null, "unknown", 20.22)
+(1, 3, 1, 'one', 3.3),
+(1, 110, 1, 'one', 110.11),
+(null, 20, null, 'unknown', 20.22)
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@t1
POSTHOOK: query: INSERT INTO t1(a, b, c, d, e) VALUES
-(1, 3, 1, "one", 3.3),
-(1, 110, 1, "one", 110.11),
-(null, 20, null, "unknown", 20.22)
+(1, 3, 1, 'one', 3.3),
+(1, 110, 1, 'one', 110.11),
+(null, 20, null, 'unknown', 20.22)
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@t1
diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_partitioned_create_rewrite_agg.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_partitioned_create_rewrite_agg_3.q.out
similarity index 98%
copy from ql/src/test/results/clientpositive/llap/materialized_view_partitioned_create_rewrite_agg.q.out
copy to ql/src/test/results/clientpositive/llap/materialized_view_partitioned_create_rewrite_agg_3.q.out
index 7411fb3..9c3897d 100644
--- a/ql/src/test/results/clientpositive/llap/materialized_view_partitioned_create_rewrite_agg.q.out
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_partitioned_create_rewrite_agg_3.q.out
@@ -1,8 +1,8 @@
-PREHOOK: query: CREATE TABLE t1(a int, b int,c int) STORED AS ORC TBLPROPERTIES ('transactional' = 'true')
+PREHOOK: query: CREATE TABLE t1(a int, b int,c int) STORED AS ORC TBLPROPERTIES ('transactional' = 'true', 'transactional_properties'='insert_only')
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@t1
-POSTHOOK: query: CREATE TABLE t1(a int, b int,c int) STORED AS ORC TBLPROPERTIES ('transactional' = 'true')
+POSTHOOK: query: CREATE TABLE t1(a int, b int,c int) STORED AS ORC TBLPROPERTIES ('transactional' = 'true', 'transactional_properties'='insert_only')
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@t1
@@ -31,14 +31,14 @@ POSTHOOK: Output: default@t1
POSTHOOK: Lineage: t1.a SCRIPT []
POSTHOOK: Lineage: t1.b SCRIPT []
POSTHOOK: Lineage: t1.c SCRIPT []
-PREHOOK: query: CREATE MATERIALIZED VIEW mat1 PARTITIONED ON (a) STORED AS ORC TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only") AS
+PREHOOK: query: CREATE MATERIALIZED VIEW mat1 PARTITIONED ON (a) STORED AS ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only') AS
SELECT a, b, sum(c) sumc FROM t1 GROUP BY b, a
PREHOOK: type: CREATE_MATERIALIZED_VIEW
PREHOOK: Input: default@t1
PREHOOK: Output: database:default
PREHOOK: Output: default@mat1
PREHOOK: Output: default@mat1
-POSTHOOK: query: CREATE MATERIALIZED VIEW mat1 PARTITIONED ON (a) STORED AS ORC TBLPROPERTIES ("transactional"="true", "transactional_properties"="insert_only") AS
+POSTHOOK: query: CREATE MATERIALIZED VIEW mat1 PARTITIONED ON (a) STORED AS ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only') AS
SELECT a, b, sum(c) sumc FROM t1 GROUP BY b, a
POSTHOOK: type: CREATE_MATERIALIZED_VIEW
POSTHOOK: Input: default@t1
@@ -103,10 +103,6 @@ POSTHOOK: Input: default@mat1@a=2
POSTHOOK: Input: default@mat1@a=__HIVE_DEFAULT_PARTITION__
POSTHOOK: Input: default@t1
#### A masked pattern was here ####
-1 8 1
-2 10 1
-3 210 1
-4 220 NULL
PREHOOK: query: EXPLAIN CBO
ALTER MATERIALIZED VIEW mat1 REBUILD
PREHOOK: type: ALTER_MATERIALIZED_VIEW_REBUILD
@@ -203,6 +199,8 @@ STAGE PLANS:
TableScan
alias: t1
filterExpr: (ROW__ID.writeid > 1L) (type: boolean)
+ properties:
+ insertonly.fetch.bucketid TRUE
Statistics: Num rows: 10 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (ROW__ID.writeid > 1L) (type: boolean)