You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kg...@apache.org on 2020/05/09 09:31:00 UTC

[hive] branch master updated (134f3b2 -> 39faad1)

This is an automated email from the ASF dual-hosted git repository.

kgyrtkirk pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git.


    from 134f3b2  HIVE-23359 'show tables like' support for SQL wildcard characters (% and _) APPENDUM - remove unused imports (Miklos Gergely, reviewed by Zoltan Haiandrich)
     new 220351f  HIVE-23369: schq_ingest may run twice during a test execution (Zoltan Haindrich reviewed by Miklos Gergely)
     new 39faad1  HIVE-23368: MV rebuild should produce the same view as the one configured at creation time (Zoltan Haindrich reviewed by Jesus Camacho Rodriguez)

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../test/resources/testconfiguration.properties    |   1 +
 .../hadoop/hive/ql/parse/CalcitePlanner.java       |  12 +-
 ql/src/test/queries/clientpositive/schq_analyze.q  |   4 +-
 ql/src/test/queries/clientpositive/schq_ingest.q   |   4 +-
 .../queries/clientpositive/schq_materialized.q     |   4 +-
 .../sketches_materialized_view_safety.q            |  38 ++
 .../results/clientpositive/llap/schq_analyze.q.out |   4 +-
 .../results/clientpositive/llap/schq_ingest.q.out  |   4 +-
 .../clientpositive/llap/schq_materialized.q.out    |   6 +-
 .../llap/sketches_materialized_view_safety.q.out   | 519 +++++++++++++++++++++
 10 files changed, 582 insertions(+), 14 deletions(-)
 create mode 100644 ql/src/test/queries/clientpositive/sketches_materialized_view_safety.q
 create mode 100644 ql/src/test/results/clientpositive/llap/sketches_materialized_view_safety.q.out


[hive] 01/02: HIVE-23369: schq_ingest may run twice during a test execution (Zoltan Haindrich reviewed by Miklos Gergely)

Posted by kg...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

kgyrtkirk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git

commit 220351f23c79bae8849f8c00cc59bb8ebb18b6ca
Author: Zoltan Haindrich <ki...@rxd.hu>
AuthorDate: Sat May 9 08:50:07 2020 +0000

    HIVE-23369: schq_ingest may run twice during a test execution (Zoltan Haindrich reviewed by Miklos Gergely)
    
    Signed-off-by: Zoltan Haindrich <ki...@rxd.hu>
---
 ql/src/test/queries/clientpositive/schq_analyze.q               | 4 ++--
 ql/src/test/queries/clientpositive/schq_ingest.q                | 4 ++--
 ql/src/test/queries/clientpositive/schq_materialized.q          | 4 ++--
 ql/src/test/results/clientpositive/llap/schq_analyze.q.out      | 4 ++--
 ql/src/test/results/clientpositive/llap/schq_ingest.q.out       | 4 ++--
 ql/src/test/results/clientpositive/llap/schq_materialized.q.out | 6 +++---
 6 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/ql/src/test/queries/clientpositive/schq_analyze.q b/ql/src/test/queries/clientpositive/schq_analyze.q
index 246a215..7d8fa8b 100644
--- a/ql/src/test/queries/clientpositive/schq_analyze.q
+++ b/ql/src/test/queries/clientpositive/schq_analyze.q
@@ -16,8 +16,8 @@ insert into t values (1),(2),(3);
 -- basic stats show that the table has "0" rows
 desc formatted t;
 
--- create a schedule to compute stats
-create scheduled query t_analyze cron '0 */1 * * * ? *' as analyze table t compute statistics for columns;
+-- create a schedule to compute stats in the far future
+create scheduled query t_analyze cron '0 0 0 1 * ? 2030' as analyze table t compute statistics for columns;
 
 alter scheduled query t_analyze execute;
 
diff --git a/ql/src/test/queries/clientpositive/schq_ingest.q b/ql/src/test/queries/clientpositive/schq_ingest.q
index 8ffc722..2357e7e 100644
--- a/ql/src/test/queries/clientpositive/schq_ingest.q
+++ b/ql/src/test/queries/clientpositive/schq_ingest.q
@@ -26,8 +26,8 @@ join t_offset on id>=offset) s1
 insert into t select id,cnt where not first
 insert overwrite table t_offset select max(s1.id);
  
--- configure to run ingestion every 10 minutes
-create scheduled query ingest every 10 minutes defined as
+-- configure to run ingestion - in the far future
+create scheduled query ingest cron '0 0 0 1 * ? 2030' defined as
 from (select id==offset as first,* from s
 join t_offset on id>=offset) s1
 insert into t select id,cnt where not first
diff --git a/ql/src/test/queries/clientpositive/schq_materialized.q b/ql/src/test/queries/clientpositive/schq_materialized.q
index 46b725e..f629bdf 100644
--- a/ql/src/test/queries/clientpositive/schq_materialized.q
+++ b/ql/src/test/queries/clientpositive/schq_materialized.q
@@ -59,8 +59,8 @@ SELECT empid, deptname FROM emps
 JOIN depts ON (emps.deptno = depts.deptno)
 WHERE hire_date >= '2018-01-01';
 
--- create a schedule to rebuild mv
-create scheduled query d cron '0 0 * * * ? *' defined as 
+-- create a schedule to rebuild mv (in the far future)
+create scheduled query d cron '0 0 0 1 * ? 2030' defined as 
   alter materialized view mv1 rebuild;
 
 set hive.support.quoted.identifiers=none;
diff --git a/ql/src/test/results/clientpositive/llap/schq_analyze.q.out b/ql/src/test/results/clientpositive/llap/schq_analyze.q.out
index a083479..4824557 100644
--- a/ql/src/test/results/clientpositive/llap/schq_analyze.q.out
+++ b/ql/src/test/results/clientpositive/llap/schq_analyze.q.out
@@ -53,9 +53,9 @@ Bucket Columns:     	[]
 Sort Columns:       	[]                  	 
 Storage Desc Params:	 	 
 	serialization.format	1                   
-PREHOOK: query: create scheduled query t_analyze cron '0 */1 * * * ? *' as analyze table t compute statistics for columns
+PREHOOK: query: create scheduled query t_analyze cron '0 0 0 1 * ? 2030' as analyze table t compute statistics for columns
 PREHOOK: type: CREATE SCHEDULED QUERY
-POSTHOOK: query: create scheduled query t_analyze cron '0 */1 * * * ? *' as analyze table t compute statistics for columns
+POSTHOOK: query: create scheduled query t_analyze cron '0 0 0 1 * ? 2030' as analyze table t compute statistics for columns
 POSTHOOK: type: CREATE SCHEDULED QUERY
 PREHOOK: query: alter scheduled query t_analyze execute
 PREHOOK: type: ALTER SCHEDULED QUERY
diff --git a/ql/src/test/results/clientpositive/llap/schq_ingest.q.out b/ql/src/test/results/clientpositive/llap/schq_ingest.q.out
index 19d2b11..8e5c123 100644
--- a/ql/src/test/results/clientpositive/llap/schq_ingest.q.out
+++ b/ql/src/test/results/clientpositive/llap/schq_ingest.q.out
@@ -76,13 +76,13 @@ POSTHOOK: Lineage: t.cnt SIMPLE [(s)s.FieldSchema(name:cnt, type:int, comment:nu
 POSTHOOK: Lineage: t.id SIMPLE [(s)s.FieldSchema(name:id, type:int, comment:null), ]
 POSTHOOK: Lineage: t_offset.offset EXPRESSION [(s)s.FieldSchema(name:id, type:int, comment:null), ]
 Warning: Shuffle Join MERGEJOIN[34][tables = [s, t_offset]] in Stage 'Reducer 2' is a cross product
-PREHOOK: query: create scheduled query ingest every 10 minutes defined as
+PREHOOK: query: create scheduled query ingest cron '0 0 0 1 * ? 2030' defined as
 from (select id==offset as first,* from s
 join t_offset on id>=offset) s1
 insert into t select id,cnt where not first
 insert overwrite table t_offset select max(s1.id)
 PREHOOK: type: CREATE SCHEDULED QUERY
-POSTHOOK: query: create scheduled query ingest every 10 minutes defined as
+POSTHOOK: query: create scheduled query ingest cron '0 0 0 1 * ? 2030' defined as
 from (select id==offset as first,* from s
 join t_offset on id>=offset) s1
 insert into t select id,cnt where not first
diff --git a/ql/src/test/results/clientpositive/llap/schq_materialized.q.out b/ql/src/test/results/clientpositive/llap/schq_materialized.q.out
index ff86bb5..43631b5 100644
--- a/ql/src/test/results/clientpositive/llap/schq_materialized.q.out
+++ b/ql/src/test/results/clientpositive/llap/schq_materialized.q.out
@@ -263,10 +263,10 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: create scheduled query d cron '0 0 * * * ? *' defined as 
+PREHOOK: query: create scheduled query d cron '0 0 0 1 * ? 2030' defined as 
   alter materialized view mv1 rebuild
 PREHOOK: type: CREATE SCHEDULED QUERY
-POSTHOOK: query: create scheduled query d cron '0 0 * * * ? *' defined as 
+POSTHOOK: query: create scheduled query d cron '0 0 0 1 * ? 2030' defined as 
   alter materialized view mv1 rebuild
 POSTHOOK: type: CREATE SCHEDULED QUERY
 PREHOOK: query: select `(NEXT_EXECUTION|SCHEDULED_QUERY_ID)?+.+` from sys.scheduled_queries
@@ -277,7 +277,7 @@ POSTHOOK: query: select `(NEXT_EXECUTION|SCHEDULED_QUERY_ID)?+.+` from sys.sched
 POSTHOOK: type: QUERY
 POSTHOOK: Input: sys@scheduled_queries
 #### A masked pattern was here ####
-d	true	hive	0 0 * * * ? *	hive_admin_user	alter materialized view `default`.`mv1` rebuild	NULL
+d	true	hive	0 0 0 1 * ? 2030	hive_admin_user	alter materialized view `default`.`mv1` rebuild	NULL
 PREHOOK: query: alter scheduled query d execute
 PREHOOK: type: ALTER SCHEDULED QUERY
 POSTHOOK: query: alter scheduled query d execute


[hive] 02/02: HIVE-23368: MV rebuild should produce the same view as the one configured at creation time (Zoltan Haindrich reviewed by Jesus Camacho Rodriguez)

Posted by kg...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

kgyrtkirk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git

commit 39faad1dae7316b1f29b6c5589a3b8edc54092f7
Author: Zoltan Haindrich <ki...@rxd.hu>
AuthorDate: Sat May 9 08:50:22 2020 +0000

    HIVE-23368: MV rebuild should produce the same view as the one configured at creation time (Zoltan Haindrich reviewed by Jesus Camacho Rodriguez)
    
    Signed-off-by: Zoltan Haindrich <ki...@rxd.hu>
---
 .../test/resources/testconfiguration.properties    |   1 +
 .../hadoop/hive/ql/parse/CalcitePlanner.java       |  12 +-
 .../sketches_materialized_view_safety.q            |  38 ++
 .../llap/sketches_materialized_view_safety.q.out   | 519 +++++++++++++++++++++
 4 files changed, 569 insertions(+), 1 deletion(-)

diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 2036f29..cf3bc5c 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -830,6 +830,7 @@ minillaplocal.query.files=\
   sketches_rewrite.q,\
   sketches_materialized_view_rollup.q,\
   sketches_materialized_view_rollup2.q,\
+  sketches_materialized_view_safety.q,\
   table_access_keys_stats.q,\
   temp_table_llap_partitioned.q,\
   tez_bmj_schema_evolution.q,\
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index bf08306..085de48 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -1970,7 +1970,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
           HiveExceptRewriteRule.INSTANCE);
 
       //1. Distinct aggregate rewrite
-      if (conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_BI_ENABLED)) {
+
+      if (!isMaterializedViewMaintenance() && conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_BI_ENABLED)) {
         // Rewrite to datasketches if enabled
         if (conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_BI_REWRITE_COUNTDISTINCT_ENABLED)) {
           String sketchClass = conf.getVar(ConfVars.HIVE_OPTIMIZE_BI_REWRITE_COUNT_DISTINCT_SKETCH);
@@ -2106,6 +2107,15 @@ public class CalcitePlanner extends SemanticAnalyzer {
       return basePlan;
     }
 
+    /**
+     * Returns true if MV is being loaded, constructed or being rebuilt.
+     */
+    private boolean isMaterializedViewMaintenance() {
+      return mvRebuildMode != MaterializationRebuildMode.NONE
+          || ctx.isLoadingMaterializedView()
+          || getQB().isMaterializedView();
+    }
+
     private RelNode applyMaterializedViewRewriting(RelOptPlanner planner, RelNode basePlan,
         RelMetadataProvider mdProvider, RexExecutor executorProvider) {
       final RelOptCluster optCluster = basePlan.getCluster();
diff --git a/ql/src/test/queries/clientpositive/sketches_materialized_view_safety.q b/ql/src/test/queries/clientpositive/sketches_materialized_view_safety.q
new file mode 100644
index 0000000..620cbb7
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/sketches_materialized_view_safety.q
@@ -0,0 +1,38 @@
+--! qt:transactional
+set hive.fetch.task.conversion=none;
+set hive.optimize.bi.enabled=true;
+
+create table sketch_input (id int, category char(1))
+STORED AS ORC
+TBLPROPERTIES ('transactional'='true');
+
+insert into table sketch_input values
+  (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+  (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b')
+; 
+
+explain
+create  materialized view mv_1 as
+  select 'no-rewrite-may-happen',category, count(distinct id) from sketch_input group by category;
+create  materialized view mv_1 as
+  select 'no-rewrite-may-happen',category, count(distinct id) from sketch_input group by category;
+
+insert into table sketch_input values
+  (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+  (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b')
+;
+
+explain
+alter materialized view mv_1 rebuild;
+alter materialized view mv_1 rebuild;
+
+-- see if we use the mv
+explain
+select 'rewritten;mv not used',category, count(distinct id) from sketch_input group by category;
+select 'rewritten;mv not used',category, count(distinct id) from sketch_input group by category;
+
+set hive.optimize.bi.enabled=false;
+
+explain
+select 'mv used',category, count(distinct id) from sketch_input group by category;
+select 'mv used',category, count(distinct id) from sketch_input group by category;
diff --git a/ql/src/test/results/clientpositive/llap/sketches_materialized_view_safety.q.out b/ql/src/test/results/clientpositive/llap/sketches_materialized_view_safety.q.out
new file mode 100644
index 0000000..959edfc
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/sketches_materialized_view_safety.q.out
@@ -0,0 +1,519 @@
+PREHOOK: query: create table sketch_input (id int, category char(1))
+STORED AS ORC
+TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@sketch_input
+POSTHOOK: query: create table sketch_input (id int, category char(1))
+STORED AS ORC
+TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@sketch_input
+PREHOOK: query: insert into table sketch_input values
+  (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+  (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@sketch_input
+POSTHOOK: query: insert into table sketch_input values
+  (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+  (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@sketch_input
+POSTHOOK: Lineage: sketch_input.category SCRIPT []
+POSTHOOK: Lineage: sketch_input.id SCRIPT []
+PREHOOK: query: explain
+create  materialized view mv_1 as
+  select 'no-rewrite-may-happen',category, count(distinct id) from sketch_input group by category
+PREHOOK: type: CREATE_MATERIALIZED_VIEW
+PREHOOK: Input: default@sketch_input
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mv_1
+POSTHOOK: query: explain
+create  materialized view mv_1 as
+  select 'no-rewrite-may-happen',category, count(distinct id) from sketch_input group by category
+POSTHOOK: type: CREATE_MATERIALIZED_VIEW
+POSTHOOK: Input: default@sketch_input
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mv_1
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-4 depends on stages: Stage-0, Stage-2
+  Stage-3 depends on stages: Stage-4
+  Stage-5 depends on stages: Stage-3
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: sketch_input
+                  Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: id (type: int), category (type: char(1))
+                    outputColumnNames: id, category
+                    Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      keys: category (type: char(1)), id (type: int)
+                      minReductionHashAggr: 0.3181818
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: char(1)), _col1 (type: int)
+                        null sort order: zz
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: char(1))
+                        Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: may be used (ACID table)
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: char(1)), KEY._col1 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: int), _col0 (type: char(1))
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    aggregations: count(_col0)
+                    keys: _col1 (type: char(1))
+                    mode: complete
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: 'no-rewrite-may-happen' (type: string), _col0 (type: char(1)), _col1 (type: bigint)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 2 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 2 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
+                        table:
+                            input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+                            serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                            name: default.mv_1
+                      Select Operator
+                        expressions: _col0 (type: string), _col1 (type: char(1)), _col2 (type: bigint)
+                        outputColumnNames: col1, col2, col3
+                        Statistics: Num rows: 2 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
+                        Group By Operator
+                          aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll')
+                          minReductionHashAggr: 0.5
+                          mode: hash
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE
+                          Reduce Output Operator
+                            null sort order: 
+                            sort order: 
+                            Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE
+                            value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-2
+    Dependency Collection
+
+  Stage: Stage-4
+    Create View
+      columns: _c0 string, category char(1), _c2 bigint
+      expanded text: select 'no-rewrite-may-happen',`sketch_input`.`category`, count(distinct `sketch_input`.`id`) from `default`.`sketch_input` group by `sketch_input`.`category`
+      name: default.mv_1
+      original text: select 'no-rewrite-may-happen',category, count(distinct id) from sketch_input group by category
+      rewrite enabled: true
+
+  Stage: Stage-3
+    Stats Work
+      Basic Stats Work:
+      Column Stats Desc:
+          Columns: _c0, category, _c2
+          Column Types: string, char(1), bigint
+          Table: default.mv_1
+
+  Stage: Stage-5
+    Materialized View Update
+      name: default.mv_1
+      retrieve and include: true
+
+  Stage: Stage-0
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+PREHOOK: query: create  materialized view mv_1 as
+  select 'no-rewrite-may-happen',category, count(distinct id) from sketch_input group by category
+PREHOOK: type: CREATE_MATERIALIZED_VIEW
+PREHOOK: Input: default@sketch_input
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mv_1
+POSTHOOK: query: create  materialized view mv_1 as
+  select 'no-rewrite-may-happen',category, count(distinct id) from sketch_input group by category
+POSTHOOK: type: CREATE_MATERIALIZED_VIEW
+POSTHOOK: Input: default@sketch_input
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mv_1
+PREHOOK: query: insert into table sketch_input values
+  (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+  (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@sketch_input
+POSTHOOK: query: insert into table sketch_input values
+  (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+  (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@sketch_input
+POSTHOOK: Lineage: sketch_input.category SCRIPT []
+POSTHOOK: Lineage: sketch_input.id SCRIPT []
+PREHOOK: query: explain
+alter materialized view mv_1 rebuild
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sketch_input
+PREHOOK: Output: default@mv_1
+POSTHOOK: query: explain
+alter materialized view mv_1 rebuild
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sketch_input
+POSTHOOK: Output: default@mv_1
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-0
+  Stage-4 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: sketch_input
+                  Statistics: Num rows: 44 Data size: 3916 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: id (type: int), category (type: char(1))
+                    outputColumnNames: id, category
+                    Statistics: Num rows: 44 Data size: 3916 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      keys: category (type: char(1)), id (type: int)
+                      minReductionHashAggr: 0.52272725
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: char(1)), _col1 (type: int)
+                        null sort order: zz
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: char(1))
+                        Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: may be used (ACID table)
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: char(1)), KEY._col1 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: int), _col0 (type: char(1))
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    aggregations: count(_col0)
+                    keys: _col1 (type: char(1))
+                    mode: complete
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: 'no-rewrite-may-happen' (type: string), _col0 (type: char(1)), _col1 (type: bigint)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 2 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 2 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
+                        table:
+                            input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+                            serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                            name: default.mv_1
+                      Select Operator
+                        expressions: _col0 (type: string), _col1 (type: char(1)), _col2 (type: bigint)
+                        outputColumnNames: _c0, category, _c2
+                        Statistics: Num rows: 2 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
+                        Group By Operator
+                          aggregations: compute_stats(_c0, 'hll'), compute_stats(category, 'hll'), compute_stats(_c2, 'hll')
+                          minReductionHashAggr: 0.5
+                          mode: hash
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE
+                          Reduce Output Operator
+                            null sort order: 
+                            sort order: 
+                            Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE
+                            value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-2
+    Dependency Collection
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+              output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+              serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+              name: default.mv_1
+
+  Stage: Stage-3
+    Stats Work
+      Basic Stats Work:
+      Column Stats Desc:
+          Columns: _c0, category, _c2
+          Column Types: string, char(1), bigint
+          Table: default.mv_1
+
+  Stage: Stage-4
+    Materialized View Update
+      name: default.mv_1
+      update creation metadata: true
+
+PREHOOK: query: alter materialized view mv_1 rebuild
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sketch_input
+PREHOOK: Output: default@mv_1
+POSTHOOK: query: alter materialized view mv_1 rebuild
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sketch_input
+POSTHOOK: Output: default@mv_1
+POSTHOOK: Lineage: mv_1._c0 SIMPLE []
+POSTHOOK: Lineage: mv_1._c2 EXPRESSION [(sketch_input)sketch_input.FieldSchema(name:id, type:int, comment:null), ]
+POSTHOOK: Lineage: mv_1.category SIMPLE [(sketch_input)sketch_input.FieldSchema(name:category, type:char(1), comment:null), ]
+PREHOOK: query: explain
+select 'rewritten;mv not used',category, count(distinct id) from sketch_input group by category
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+select 'rewritten;mv not used',category, count(distinct id) from sketch_input group by category
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: sketch_input
+                  Statistics: Num rows: 44 Data size: 3916 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: id (type: int), category (type: char(1))
+                    outputColumnNames: id, category
+                    Statistics: Num rows: 44 Data size: 3916 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: ds_hll_sketch(id)
+                      keys: category (type: char(1))
+                      minReductionHashAggr: 0.95454544
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 2 Data size: 946 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: char(1))
+                        null sort order: z
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: char(1))
+                        Statistics: Num rows: 2 Data size: 946 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: struct<lgk:int,type:string,sketch:binary>)
+            Execution mode: llap
+            LLAP IO: may be used (ACID table)
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: ds_hll_sketch(VALUE._col0)
+                keys: KEY._col0 (type: char(1))
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 2 Data size: 458 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: 'rewritten;mv not used' (type: string), _col0 (type: char(1)), UDFToLong(ds_hll_estimate(_col1)) (type: bigint)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 2 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 2 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select 'rewritten;mv not used',category, count(distinct id) from sketch_input group by category
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+POSTHOOK: query: select 'rewritten;mv not used',category, count(distinct id) from sketch_input group by category
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+rewritten;mv not used	a	10
+rewritten;mv not used	b	10
+PREHOOK: query: explain
+select 'mv used',category, count(distinct id) from sketch_input group by category
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+select 'mv used',category, count(distinct id) from sketch_input group by category
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: sketch_input
+                  Statistics: Num rows: 44 Data size: 3916 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: id (type: int), category (type: char(1))
+                    outputColumnNames: id, category
+                    Statistics: Num rows: 44 Data size: 3916 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      keys: category (type: char(1)), id (type: int)
+                      minReductionHashAggr: 0.52272725
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: char(1)), _col1 (type: int)
+                        null sort order: zz
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: char(1))
+                        Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: may be used (ACID table)
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: char(1)), KEY._col1 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: int), _col0 (type: char(1))
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    aggregations: count(_col0)
+                    keys: _col1 (type: char(1))
+                    mode: complete
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: 'mv used' (type: string), _col0 (type: char(1)), _col1 (type: bigint)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE
+                        table:
+                            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select 'mv used',category, count(distinct id) from sketch_input group by category
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+POSTHOOK: query: select 'mv used',category, count(distinct id) from sketch_input group by category
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+mv used	a	10
+mv used	b	10