You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kg...@apache.org on 2020/05/09 09:31:02 UTC
[hive] 02/02: HIVE-23368: MV rebuild should produce the same view
as the one configured at creation time (Zoltan Haindrich reviewed by Jesus
Camacho Rodriguez)
This is an automated email from the ASF dual-hosted git repository.
kgyrtkirk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
commit 39faad1dae7316b1f29b6c5589a3b8edc54092f7
Author: Zoltan Haindrich <ki...@rxd.hu>
AuthorDate: Sat May 9 08:50:22 2020 +0000
HIVE-23368: MV rebuild should produce the same view as the one configured at creation time (Zoltan Haindrich reviewed by Jesus Camacho Rodriguez)
Signed-off-by: Zoltan Haindrich <ki...@rxd.hu>
---
.../test/resources/testconfiguration.properties | 1 +
.../hadoop/hive/ql/parse/CalcitePlanner.java | 12 +-
.../sketches_materialized_view_safety.q | 38 ++
.../llap/sketches_materialized_view_safety.q.out | 519 +++++++++++++++++++++
4 files changed, 569 insertions(+), 1 deletion(-)
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 2036f29..cf3bc5c 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -830,6 +830,7 @@ minillaplocal.query.files=\
sketches_rewrite.q,\
sketches_materialized_view_rollup.q,\
sketches_materialized_view_rollup2.q,\
+ sketches_materialized_view_safety.q,\
table_access_keys_stats.q,\
temp_table_llap_partitioned.q,\
tez_bmj_schema_evolution.q,\
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index bf08306..085de48 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -1970,7 +1970,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
HiveExceptRewriteRule.INSTANCE);
//1. Distinct aggregate rewrite
- if (conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_BI_ENABLED)) {
+
+ if (!isMaterializedViewMaintenance() && conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_BI_ENABLED)) {
// Rewrite to datasketches if enabled
if (conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_BI_REWRITE_COUNTDISTINCT_ENABLED)) {
String sketchClass = conf.getVar(ConfVars.HIVE_OPTIMIZE_BI_REWRITE_COUNT_DISTINCT_SKETCH);
@@ -2106,6 +2107,15 @@ public class CalcitePlanner extends SemanticAnalyzer {
return basePlan;
}
+ /**
+ * Returns true if MV is being loaded, constructed or being rebuilt.
+ */
+ private boolean isMaterializedViewMaintenance() {
+ return mvRebuildMode != MaterializationRebuildMode.NONE
+ || ctx.isLoadingMaterializedView()
+ || getQB().isMaterializedView();
+ }
+
private RelNode applyMaterializedViewRewriting(RelOptPlanner planner, RelNode basePlan,
RelMetadataProvider mdProvider, RexExecutor executorProvider) {
final RelOptCluster optCluster = basePlan.getCluster();
diff --git a/ql/src/test/queries/clientpositive/sketches_materialized_view_safety.q b/ql/src/test/queries/clientpositive/sketches_materialized_view_safety.q
new file mode 100644
index 0000000..620cbb7
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/sketches_materialized_view_safety.q
@@ -0,0 +1,38 @@
+--! qt:transactional
+set hive.fetch.task.conversion=none;
+set hive.optimize.bi.enabled=true;
+
+create table sketch_input (id int, category char(1))
+STORED AS ORC
+TBLPROPERTIES ('transactional'='true');
+
+insert into table sketch_input values
+ (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+ (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b')
+;
+
+explain
+create materialized view mv_1 as
+ select 'no-rewrite-may-happen',category, count(distinct id) from sketch_input group by category;
+create materialized view mv_1 as
+ select 'no-rewrite-may-happen',category, count(distinct id) from sketch_input group by category;
+
+insert into table sketch_input values
+ (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+ (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b')
+;
+
+explain
+alter materialized view mv_1 rebuild;
+alter materialized view mv_1 rebuild;
+
+-- see if we use the mv
+explain
+select 'rewritten;mv not used',category, count(distinct id) from sketch_input group by category;
+select 'rewritten;mv not used',category, count(distinct id) from sketch_input group by category;
+
+set hive.optimize.bi.enabled=false;
+
+explain
+select 'mv used',category, count(distinct id) from sketch_input group by category;
+select 'mv used',category, count(distinct id) from sketch_input group by category;
diff --git a/ql/src/test/results/clientpositive/llap/sketches_materialized_view_safety.q.out b/ql/src/test/results/clientpositive/llap/sketches_materialized_view_safety.q.out
new file mode 100644
index 0000000..959edfc
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/sketches_materialized_view_safety.q.out
@@ -0,0 +1,519 @@
+PREHOOK: query: create table sketch_input (id int, category char(1))
+STORED AS ORC
+TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@sketch_input
+POSTHOOK: query: create table sketch_input (id int, category char(1))
+STORED AS ORC
+TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@sketch_input
+PREHOOK: query: insert into table sketch_input values
+ (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+ (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@sketch_input
+POSTHOOK: query: insert into table sketch_input values
+ (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+ (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@sketch_input
+POSTHOOK: Lineage: sketch_input.category SCRIPT []
+POSTHOOK: Lineage: sketch_input.id SCRIPT []
+PREHOOK: query: explain
+create materialized view mv_1 as
+ select 'no-rewrite-may-happen',category, count(distinct id) from sketch_input group by category
+PREHOOK: type: CREATE_MATERIALIZED_VIEW
+PREHOOK: Input: default@sketch_input
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mv_1
+POSTHOOK: query: explain
+create materialized view mv_1 as
+ select 'no-rewrite-may-happen',category, count(distinct id) from sketch_input group by category
+POSTHOOK: type: CREATE_MATERIALIZED_VIEW
+POSTHOOK: Input: default@sketch_input
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mv_1
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-4 depends on stages: Stage-0, Stage-2
+ Stage-3 depends on stages: Stage-4
+ Stage-5 depends on stages: Stage-3
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: sketch_input
+ Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: id (type: int), category (type: char(1))
+ outputColumnNames: id, category
+ Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: category (type: char(1)), id (type: int)
+ minReductionHashAggr: 0.3181818
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: char(1)), _col1 (type: int)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: char(1))
+ Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: may be used (ACID table)
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: char(1)), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: int), _col0 (type: char(1))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 11 Data size: 979 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(_col0)
+ keys: _col1 (type: char(1))
+ mode: complete
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 'no-rewrite-may-happen' (type: string), _col0 (type: char(1)), _col1 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.mv_1
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: char(1)), _col2 (type: bigint)
+ outputColumnNames: col1, col2, col3
+ Statistics: Num rows: 2 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll')
+ minReductionHashAggr: 0.5
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-4
+ Create View
+ columns: _c0 string, category char(1), _c2 bigint
+ expanded text: select 'no-rewrite-may-happen',`sketch_input`.`category`, count(distinct `sketch_input`.`id`) from `default`.`sketch_input` group by `sketch_input`.`category`
+ name: default.mv_1
+ original text: select 'no-rewrite-may-happen',category, count(distinct id) from sketch_input group by category
+ rewrite enabled: true
+
+ Stage: Stage-3
+ Stats Work
+ Basic Stats Work:
+ Column Stats Desc:
+ Columns: _c0, category, _c2
+ Column Types: string, char(1), bigint
+ Table: default.mv_1
+
+ Stage: Stage-5
+ Materialized View Update
+ name: default.mv_1
+ retrieve and include: true
+
+ Stage: Stage-0
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+PREHOOK: query: create materialized view mv_1 as
+ select 'no-rewrite-may-happen',category, count(distinct id) from sketch_input group by category
+PREHOOK: type: CREATE_MATERIALIZED_VIEW
+PREHOOK: Input: default@sketch_input
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mv_1
+POSTHOOK: query: create materialized view mv_1 as
+ select 'no-rewrite-may-happen',category, count(distinct id) from sketch_input group by category
+POSTHOOK: type: CREATE_MATERIALIZED_VIEW
+POSTHOOK: Input: default@sketch_input
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mv_1
+PREHOOK: query: insert into table sketch_input values
+ (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+ (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@sketch_input
+POSTHOOK: query: insert into table sketch_input values
+ (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+ (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@sketch_input
+POSTHOOK: Lineage: sketch_input.category SCRIPT []
+POSTHOOK: Lineage: sketch_input.id SCRIPT []
+PREHOOK: query: explain
+alter materialized view mv_1 rebuild
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sketch_input
+PREHOOK: Output: default@mv_1
+POSTHOOK: query: explain
+alter materialized view mv_1 rebuild
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sketch_input
+POSTHOOK: Output: default@mv_1
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+ Stage-4 depends on stages: Stage-3
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: sketch_input
+ Statistics: Num rows: 44 Data size: 3916 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: id (type: int), category (type: char(1))
+ outputColumnNames: id, category
+ Statistics: Num rows: 44 Data size: 3916 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: category (type: char(1)), id (type: int)
+ minReductionHashAggr: 0.52272725
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: char(1)), _col1 (type: int)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: char(1))
+ Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: may be used (ACID table)
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: char(1)), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: int), _col0 (type: char(1))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(_col0)
+ keys: _col1 (type: char(1))
+ mode: complete
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 'no-rewrite-may-happen' (type: string), _col0 (type: char(1)), _col1 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.mv_1
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: char(1)), _col2 (type: bigint)
+ outputColumnNames: _c0, category, _c2
+ Statistics: Num rows: 2 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: compute_stats(_c0, 'hll'), compute_stats(category, 'hll'), compute_stats(_c2, 'hll')
+ minReductionHashAggr: 0.5
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.mv_1
+
+ Stage: Stage-3
+ Stats Work
+ Basic Stats Work:
+ Column Stats Desc:
+ Columns: _c0, category, _c2
+ Column Types: string, char(1), bigint
+ Table: default.mv_1
+
+ Stage: Stage-4
+ Materialized View Update
+ name: default.mv_1
+ update creation metadata: true
+
+PREHOOK: query: alter materialized view mv_1 rebuild
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sketch_input
+PREHOOK: Output: default@mv_1
+POSTHOOK: query: alter materialized view mv_1 rebuild
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sketch_input
+POSTHOOK: Output: default@mv_1
+POSTHOOK: Lineage: mv_1._c0 SIMPLE []
+POSTHOOK: Lineage: mv_1._c2 EXPRESSION [(sketch_input)sketch_input.FieldSchema(name:id, type:int, comment:null), ]
+POSTHOOK: Lineage: mv_1.category SIMPLE [(sketch_input)sketch_input.FieldSchema(name:category, type:char(1), comment:null), ]
+PREHOOK: query: explain
+select 'rewritten;mv not used',category, count(distinct id) from sketch_input group by category
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+select 'rewritten;mv not used',category, count(distinct id) from sketch_input group by category
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: sketch_input
+ Statistics: Num rows: 44 Data size: 3916 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: id (type: int), category (type: char(1))
+ outputColumnNames: id, category
+ Statistics: Num rows: 44 Data size: 3916 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: ds_hll_sketch(id)
+ keys: category (type: char(1))
+ minReductionHashAggr: 0.95454544
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 946 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: char(1))
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: char(1))
+ Statistics: Num rows: 2 Data size: 946 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: struct<lgk:int,type:string,sketch:binary>)
+ Execution mode: llap
+ LLAP IO: may be used (ACID table)
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: ds_hll_sketch(VALUE._col0)
+ keys: KEY._col0 (type: char(1))
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 458 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 'rewritten;mv not used' (type: string), _col0 (type: char(1)), UDFToLong(ds_hll_estimate(_col1)) (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select 'rewritten;mv not used',category, count(distinct id) from sketch_input group by category
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+POSTHOOK: query: select 'rewritten;mv not used',category, count(distinct id) from sketch_input group by category
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+rewritten;mv not used a 10
+rewritten;mv not used b 10
+PREHOOK: query: explain
+select 'mv used',category, count(distinct id) from sketch_input group by category
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+select 'mv used',category, count(distinct id) from sketch_input group by category
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: sketch_input
+ Statistics: Num rows: 44 Data size: 3916 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: id (type: int), category (type: char(1))
+ outputColumnNames: id, category
+ Statistics: Num rows: 44 Data size: 3916 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: category (type: char(1)), id (type: int)
+ minReductionHashAggr: 0.52272725
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: char(1)), _col1 (type: int)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: char(1))
+ Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: may be used (ACID table)
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: char(1)), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: int), _col0 (type: char(1))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 22 Data size: 1958 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(_col0)
+ keys: _col1 (type: char(1))
+ mode: complete
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 'mv used' (type: string), _col0 (type: char(1)), _col1 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select 'mv used',category, count(distinct id) from sketch_input group by category
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+POSTHOOK: query: select 'mv used',category, count(distinct id) from sketch_input group by category
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+mv used a 10
+mv used b 10