You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by vg...@apache.org on 2018/10/15 03:33:49 UTC
[1/3] hive git commit: HIVE-17043: Remove non unique columns from
group by keys if not referenced later (Vineet Garg,
reviewed by Jesus Camacho Rodriguez)
Repository: hive
Updated Branches:
refs/heads/master 259db56e3 -> 1db3debcb
http://git-wip-us.apache.org/repos/asf/hive/blob/1db3debc/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_1.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_1.q.out
index 735a4db..a7a1a0b 100644
--- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_1.q.out
@@ -641,11 +641,13 @@ PREHOOK: query: explain
select empid, deptno from emps_n3 group by empid, deptno
PREHOOK: type: QUERY
PREHOOK: Input: default@emps_n3
+PREHOOK: Input: default@mv1_n2
#### A masked pattern was here ####
POSTHOOK: query: explain
select empid, deptno from emps_n3 group by empid, deptno
POSTHOOK: type: QUERY
POSTHOOK: Input: default@emps_n3
+POSTHOOK: Input: default@mv1_n2
#### A masked pattern was here ####
STAGE DEPENDENCIES:
Stage-0 is a root stage
@@ -656,7 +658,7 @@ STAGE PLANS:
limit: -1
Processor Tree:
TableScan
- alias: emps_n3
+ alias: default.mv1_n2
Select Operator
expressions: empid (type: int), deptno (type: int)
outputColumnNames: _col0, _col1
@@ -665,10 +667,12 @@ STAGE PLANS:
PREHOOK: query: select empid, deptno from emps_n3 group by empid, deptno
PREHOOK: type: QUERY
PREHOOK: Input: default@emps_n3
+PREHOOK: Input: default@mv1_n2
#### A masked pattern was here ####
POSTHOOK: query: select empid, deptno from emps_n3 group by empid, deptno
POSTHOOK: type: QUERY
POSTHOOK: Input: default@emps_n3
+POSTHOOK: Input: default@mv1_n2
#### A masked pattern was here ####
100 10
110 10
@@ -709,11 +713,13 @@ PREHOOK: query: explain
select empid, name from emps_n3 group by empid, name
PREHOOK: type: QUERY
PREHOOK: Input: default@emps_n3
+PREHOOK: Input: default@mv1_n2
#### A masked pattern was here ####
POSTHOOK: query: explain
select empid, name from emps_n3 group by empid, name
POSTHOOK: type: QUERY
POSTHOOK: Input: default@emps_n3
+POSTHOOK: Input: default@mv1_n2
#### A masked pattern was here ####
STAGE DEPENDENCIES:
Stage-0 is a root stage
@@ -724,7 +730,7 @@ STAGE PLANS:
limit: -1
Processor Tree:
TableScan
- alias: emps_n3
+ alias: default.mv1_n2
Select Operator
expressions: empid (type: int), name (type: varchar(256))
outputColumnNames: _col0, _col1
@@ -733,10 +739,12 @@ STAGE PLANS:
PREHOOK: query: select empid, name from emps_n3 group by empid, name
PREHOOK: type: QUERY
PREHOOK: Input: default@emps_n3
+PREHOOK: Input: default@mv1_n2
#### A masked pattern was here ####
POSTHOOK: query: select empid, name from emps_n3 group by empid, name
POSTHOOK: type: QUERY
POSTHOOK: Input: default@emps_n3
+POSTHOOK: Input: default@mv1_n2
#### A masked pattern was here ####
100 Bill
110 Theodore
[2/3] hive git commit: HIVE-17043: Remove non unique columns from
group by keys if not referenced later (Vineet Garg,
reviewed by Jesus Camacho Rodriguez)
Posted by vg...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/1db3debc/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out b/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out
new file mode 100644
index 0000000..b45b7c4
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out
@@ -0,0 +1,2010 @@
+PREHOOK: query: CREATE TABLE `customer_removal_n0`(
+ `c_custkey` bigint,
+ `c_name` string,
+ `c_address` string,
+ `c_city` string,
+ `c_nation` string,
+ `c_region` string,
+ `c_phone` string,
+ `c_mktsegment` string,
+ primary key (`c_custkey`) disable rely)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@customer_removal_n0
+POSTHOOK: query: CREATE TABLE `customer_removal_n0`(
+ `c_custkey` bigint,
+ `c_name` string,
+ `c_address` string,
+ `c_city` string,
+ `c_nation` string,
+ `c_region` string,
+ `c_phone` string,
+ `c_mktsegment` string,
+ primary key (`c_custkey`) disable rely)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@customer_removal_n0
+PREHOOK: query: CREATE TABLE `dates_removal_n0`(
+ `d_datekey` bigint,
+ `d_id` bigint,
+ `d_date` string,
+ `d_dayofweek` string,
+ `d_month` string,
+ `d_year` int,
+ `d_yearmonthnum` int,
+ `d_yearmonth` string,
+ `d_daynuminweek` int,
+ `d_daynuminmonth` int,
+ `d_daynuminyear` int,
+ `d_monthnuminyear` int,
+ `d_weeknuminyear` int,
+ `d_sellingseason` string,
+ `d_lastdayinweekfl` int,
+ `d_lastdayinmonthfl` int,
+ `d_holidayfl` int ,
+ `d_weekdayfl`int,
+ primary key (`d_datekey`, `d_id`) disable rely)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dates_removal_n0
+POSTHOOK: query: CREATE TABLE `dates_removal_n0`(
+ `d_datekey` bigint,
+ `d_id` bigint,
+ `d_date` string,
+ `d_dayofweek` string,
+ `d_month` string,
+ `d_year` int,
+ `d_yearmonthnum` int,
+ `d_yearmonth` string,
+ `d_daynuminweek` int,
+ `d_daynuminmonth` int,
+ `d_daynuminyear` int,
+ `d_monthnuminyear` int,
+ `d_weeknuminyear` int,
+ `d_sellingseason` string,
+ `d_lastdayinweekfl` int,
+ `d_lastdayinmonthfl` int,
+ `d_holidayfl` int ,
+ `d_weekdayfl`int,
+ primary key (`d_datekey`, `d_id`) disable rely)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dates_removal_n0
+PREHOOK: query: -- group by key has single primary key
+ EXPLAIN SELECT c_custkey from customer_removal_n0 where c_nation IN ('USA', 'INDIA') group by c_custkey
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: -- group by key has single primary key
+ EXPLAIN SELECT c_custkey from customer_removal_n0 where c_nation IN ('USA', 'INDIA') group by c_custkey
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: customer_removal_n0
+ filterExpr: (c_nation) IN ('USA', 'INDIA') (type: boolean)
+ Filter Operator
+ predicate: (c_nation) IN ('USA', 'INDIA') (type: boolean)
+ Select Operator
+ expressions: c_custkey (type: bigint)
+ outputColumnNames: _col0
+ ListSink
+
+PREHOOK: query: -- mix of primary + non-primary keys
+ EXPLAIN SELECT c_custkey from customer_removal_n0 where c_nation IN ('USA', 'INDIA') group by c_custkey, c_nation
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: -- mix of primary + non-primary keys
+ EXPLAIN SELECT c_custkey from customer_removal_n0 where c_nation IN ('USA', 'INDIA') group by c_custkey, c_nation
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: customer_removal_n0
+ filterExpr: (c_nation) IN ('USA', 'INDIA') (type: boolean)
+ Filter Operator
+ predicate: (c_nation) IN ('USA', 'INDIA') (type: boolean)
+ Select Operator
+ expressions: c_custkey (type: bigint)
+ outputColumnNames: _col0
+ ListSink
+
+PREHOOK: query: -- multiple keys
+ EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_datekey, d_id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: -- multiple keys
+ EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_datekey, d_id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: dates_removal_n0
+ filterExpr: (d_year) IN (1985, 2004) (type: boolean)
+ Filter Operator
+ predicate: (d_year) IN (1985, 2004) (type: boolean)
+ Select Operator
+ expressions: d_datekey (type: bigint)
+ outputColumnNames: _col0
+ ListSink
+
+PREHOOK: query: -- multiple keys + non-keys + different order
+ EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_datekey, d_sellingseason
+ order by d_datekey limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: -- multiple keys + non-keys + different order
+ EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_datekey, d_sellingseason
+ order by d_datekey limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: dates_removal_n0
+ filterExpr: (d_year) IN (1985, 2004) (type: boolean)
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_year) IN (1985, 2004) (type: boolean)
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_datekey (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: -- multiple keys in different order and mixed with non-keys
+ EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_daynuminmonth, d_datekey,
+ d_sellingseason order by d_datekey limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: -- multiple keys in different order and mixed with non-keys
+ EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_daynuminmonth, d_datekey,
+ d_sellingseason order by d_datekey limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: dates_removal_n0
+ filterExpr: (d_year) IN (1985, 2004) (type: boolean)
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_year) IN (1985, 2004) (type: boolean)
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_datekey (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: -- same as above but with aggregate
+ EXPLAIN SELECT count(d_datekey) from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_daynuminmonth, d_datekey,
+ d_sellingseason order by d_datekey limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: -- same as above but with aggregate
+ EXPLAIN SELECT count(d_datekey) from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_daynuminmonth, d_datekey,
+ d_sellingseason order by d_datekey limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: dates_removal_n0
+ filterExpr: (d_year) IN (1985, 2004) (type: boolean)
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (d_year) IN (1985, 2004) (type: boolean)
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: d_datekey (type: bigint), d_id (type: bigint)
+ outputColumnNames: d_datekey, d_id
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Top N Key Operator
+ sort order: ++
+ keys: d_datekey (type: bigint), d_id (type: bigint)
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ top n: 10
+ Group By Operator
+ aggregations: count()
+ keys: d_datekey (type: bigint), d_id (type: bigint)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint), _col1 (type: bigint)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: bigint), _col1 (type: bigint)
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col2 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: bigint), KEY._col1 (type: bigint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: bigint), _col0 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: bigint)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col0 (type: bigint)
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: -- join
+ insert into dates_removal_n0(d_datekey, d_id) values(3, 0)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@dates_removal_n0
+POSTHOOK: query: -- join
+ insert into dates_removal_n0(d_datekey, d_id) values(3, 0)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@dates_removal_n0
+POSTHOOK: Lineage: dates_removal_n0.d_date SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_datekey SCRIPT []
+POSTHOOK: Lineage: dates_removal_n0.d_daynuminmonth SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_daynuminweek SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_daynuminyear SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_dayofweek SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_holidayfl SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_id SCRIPT []
+POSTHOOK: Lineage: dates_removal_n0.d_lastdayinmonthfl SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_lastdayinweekfl SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_month SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_monthnuminyear SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_sellingseason SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_weekdayfl SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_weeknuminyear SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_year SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_yearmonth SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_yearmonthnum SIMPLE []
+PREHOOK: query: insert into dates_removal_n0(d_datekey, d_id) values(3, 1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@dates_removal_n0
+POSTHOOK: query: insert into dates_removal_n0(d_datekey, d_id) values(3, 1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@dates_removal_n0
+POSTHOOK: Lineage: dates_removal_n0.d_date SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_datekey SCRIPT []
+POSTHOOK: Lineage: dates_removal_n0.d_daynuminmonth SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_daynuminweek SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_daynuminyear SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_dayofweek SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_holidayfl SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_id SCRIPT []
+POSTHOOK: Lineage: dates_removal_n0.d_lastdayinmonthfl SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_lastdayinweekfl SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_month SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_monthnuminyear SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_sellingseason SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_weekdayfl SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_weeknuminyear SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_year SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_yearmonth SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_yearmonthnum SIMPLE []
+PREHOOK: query: insert into customer_removal_n0 (c_custkey) values(3)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@customer_removal_n0
+POSTHOOK: query: insert into customer_removal_n0 (c_custkey) values(3)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@customer_removal_n0
+POSTHOOK: Lineage: customer_removal_n0.c_address SIMPLE []
+POSTHOOK: Lineage: customer_removal_n0.c_city SIMPLE []
+POSTHOOK: Lineage: customer_removal_n0.c_custkey SCRIPT []
+POSTHOOK: Lineage: customer_removal_n0.c_mktsegment SIMPLE []
+POSTHOOK: Lineage: customer_removal_n0.c_name SIMPLE []
+POSTHOOK: Lineage: customer_removal_n0.c_nation SIMPLE []
+POSTHOOK: Lineage: customer_removal_n0.c_phone SIMPLE []
+POSTHOOK: Lineage: customer_removal_n0.c_region SIMPLE []
+PREHOOK: query: EXPLAIN SELECT d_datekey from dates_removal_n0 join customer_removal_n0 on d_datekey = c_custkey group by d_datekey, d_id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer_removal_n0
+PREHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN SELECT d_datekey from dates_removal_n0 join customer_removal_n0 on d_datekey = c_custkey group by d_datekey, d_id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer_removal_n0
+POSTHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: dates_removal_n0
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: d_datekey (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: customer_removal_n0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: c_custkey (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: bigint)
+ 1 _col0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT d_datekey from dates_removal_n0 join customer_removal_n0 on d_datekey = c_custkey group by d_datekey, d_id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer_removal_n0
+PREHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT d_datekey from dates_removal_n0 join customer_removal_n0 on d_datekey = c_custkey group by d_datekey, d_id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer_removal_n0
+POSTHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+3
+3
+PREHOOK: query: -- group by keys are not primary keys
+ EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_datekey, d_sellingseason
+ order by d_datekey limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: -- group by keys are not primary keys
+ EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_datekey, d_sellingseason
+ order by d_datekey limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: dates_removal_n0
+ filterExpr: (d_year) IN (1985, 2004) (type: boolean)
+ Statistics: Num rows: 2 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (d_year) IN (1985, 2004) (type: boolean)
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: d_datekey (type: bigint), d_sellingseason (type: string)
+ outputColumnNames: d_datekey, d_sellingseason
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ Top N Key Operator
+ sort order: ++
+ keys: d_datekey (type: bigint), d_sellingseason (type: string)
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ top n: 10
+ Group By Operator
+ keys: d_datekey (type: bigint), d_sellingseason (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: bigint), _col1 (type: string)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: bigint), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: -- negative
+ -- with aggregate function
+ EXPLAIN SELECT count(c_custkey) from customer_removal_n0 where c_nation IN ('USA', 'INDIA')
+ group by c_custkey, c_nation
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: -- negative
+ -- with aggregate function
+ EXPLAIN SELECT count(c_custkey) from customer_removal_n0 where c_nation IN ('USA', 'INDIA')
+ group by c_custkey, c_nation
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: customer_removal_n0
+ filterExpr: (c_nation) IN ('USA', 'INDIA') (type: boolean)
+ Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (c_nation) IN ('USA', 'INDIA') (type: boolean)
+ Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: c_custkey (type: bigint)
+ outputColumnNames: c_custkey
+ Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ keys: c_custkey (type: bigint)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: bigint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: DROP TABLE customer_removal_n0
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@customer_removal_n0
+PREHOOK: Output: default@customer_removal_n0
+POSTHOOK: query: DROP TABLE customer_removal_n0
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@customer_removal_n0
+POSTHOOK: Output: default@customer_removal_n0
+PREHOOK: query: DROP TABLE dates_removal_n0
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dates_removal_n0
+PREHOOK: Output: default@dates_removal_n0
+POSTHOOK: query: DROP TABLE dates_removal_n0
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@dates_removal_n0
+POSTHOOK: Output: default@dates_removal_n0
+PREHOOK: query: -- group by reduction optimization
+ create table dest_g21 (key1 int, value1 double, primary key(key1) disable rely)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest_g21
+POSTHOOK: query: -- group by reduction optimization
+ create table dest_g21 (key1 int, value1 double, primary key(key1) disable rely)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest_g21
+PREHOOK: query: insert into dest_g21 values(1, 2), (2,2), (3, 1), (4,4), (5, null), (6, null)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@dest_g21
+POSTHOOK: query: insert into dest_g21 values(1, 2), (2,2), (3, 1), (4,4), (5, null), (6, null)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@dest_g21
+POSTHOOK: Lineage: dest_g21.key1 SCRIPT []
+POSTHOOK: Lineage: dest_g21.value1 SCRIPT []
+PREHOOK: query: -- value1 will removed because it is unused, then whole group by will be removed because key1 is unique
+ explain select key1 from dest_g21 group by key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: -- value1 will removed because it is unused, then whole group by will be removed because key1 is unique
+ explain select key1 from dest_g21 group by key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: dest_g21
+ Select Operator
+ expressions: key1 (type: int)
+ outputColumnNames: _col0
+ ListSink
+
+PREHOOK: query: select key1 from dest_g21 group by key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: select key1 from dest_g21 group by key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+1
+2
+3
+4
+5
+6
+PREHOOK: query: -- same query but with filter
+ explain select key1 from dest_g21 where value1 > 1 group by key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: -- same query but with filter
+ explain select key1 from dest_g21 where value1 > 1 group by key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: dest_g21
+ filterExpr: (value1 > 1.0D) (type: boolean)
+ Filter Operator
+ predicate: (value1 > 1.0D) (type: boolean)
+ Select Operator
+ expressions: key1 (type: int)
+ outputColumnNames: _col0
+ ListSink
+
+PREHOOK: query: select key1 from dest_g21 where value1 > 1 group by key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: select key1 from dest_g21 where value1 > 1 group by key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+1
+2
+4
+PREHOOK: query: explain select key1 from dest_g21 where key1 > 1 group by key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: explain select key1 from dest_g21 where key1 > 1 group by key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: dest_g21
+ filterExpr: (key1 > 1) (type: boolean)
+ Filter Operator
+ predicate: (key1 > 1) (type: boolean)
+ Select Operator
+ expressions: key1 (type: int)
+ outputColumnNames: _col0
+ ListSink
+
+PREHOOK: query: select key1 from dest_g21 where key1 > 1 group by key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: select key1 from dest_g21 where key1 > 1 group by key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+2
+3
+4
+5
+6
+PREHOOK: query: -- only value1 will be removed because there is aggregate call
+ explain select count(key1) from dest_g21 group by key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: -- only value1 will be removed because there is aggregate call
+ explain select count(key1) from dest_g21 group by key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: dest_g21
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key1 (type: int)
+ outputColumnNames: key1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ keys: key1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(key1) from dest_g21 group by key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: select count(key1) from dest_g21 group by key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+1
+1
+1
+1
+1
+1
+PREHOOK: query: explain select count(key1) from dest_g21 where value1 > 1 group by key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: explain select count(key1) from dest_g21 where value1 > 1 group by key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: dest_g21
+ filterExpr: (value1 > 1.0D) (type: boolean)
+ Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (value1 > 1.0D) (type: boolean)
+ Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key1 (type: int)
+ outputColumnNames: key1
+ Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ keys: key1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(key1) from dest_g21 where value1 > 1 group by key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: select count(key1) from dest_g21 where value1 > 1 group by key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+1
+1
+1
+PREHOOK: query: -- t1.key is unique even after join therefore group by = group by (t1.key)
+ explain select t1.key1 from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: -- t1.key is unique even after join therefore group by = group by (t1.key)
+ explain select t1.key1 from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: t2
+ filterExpr: (value1 > 2.0D) (type: boolean)
+ Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (value1 > 2.0D) (type: boolean)
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select t1.key1 from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: select t1.key1 from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+4
+PREHOOK: query: explain select count(t1.key1) from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: explain select count(t1.key1) from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: t2
+ filterExpr: (value1 > 2.0D) (type: boolean)
+ Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (value1 > 2.0D) (type: boolean)
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(t1.key1) from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: select count(t1.key1) from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+1
+PREHOOK: query: -- both aggregate and one of the key1 should be removed
+ explain select key1 from (select key1, count(key1) from dest_g21 where value1 < 4.5 group by key1, value1) sub
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: -- both aggregate and one of the key1 should be removed
+ explain select key1 from (select key1, count(key1) from dest_g21 where value1 < 4.5 group by key1, value1) sub
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: dest_g21
+ filterExpr: (value1 < 4.5D) (type: boolean)
+ Filter Operator
+ predicate: (value1 < 4.5D) (type: boolean)
+ Select Operator
+ expressions: key1 (type: int)
+ outputColumnNames: _col0
+ ListSink
+
+PREHOOK: query: select key1 from (select key1, count(key1) from dest_g21 where value1 < 4.5 group by key1, value1) sub
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: select key1 from (select key1, count(key1) from dest_g21 where value1 < 4.5 group by key1, value1) sub
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+1
+2
+3
+4
+PREHOOK: query: -- one of the aggregate will be removed and one of the key1 will be removed
+ explain select key1, sm from (select key1, count(key1), sum(key1) as sm from dest_g21 where value1 < 4.5 group by key1, value1) sub
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: -- one of the aggregate will be removed and one of the key1 will be removed
+ explain select key1, sm from (select key1, count(key1), sum(key1) as sm from dest_g21 where value1 < 4.5 group by key1, value1) sub
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: dest_g21
+ filterExpr: (value1 < 4.5D) (type: boolean)
+ Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (value1 < 4.5D) (type: boolean)
+ Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key1 (type: int)
+ outputColumnNames: key1
+ Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(key1)
+ keys: key1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select key1, sm from (select key1, count(key1), sum(key1) as sm from dest_g21 where value1 < 4.5 group by key1, value1) sub
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: select key1, sm from (select key1, count(key1), sum(key1) as sm from dest_g21 where value1 < 4.5 group by key1, value1) sub
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+1 1
+3 3
+4 4
+2 2
+PREHOOK: query: DROP table dest_g21
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dest_g21
+PREHOOK: Output: default@dest_g21
+POSTHOOK: query: DROP table dest_g21
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@dest_g21
+POSTHOOK: Output: default@dest_g21
+PREHOOK: query: CREATE TABLE tconst(i int NOT NULL disable rely, j INT NOT NULL disable norely, d_year string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tconst
+POSTHOOK: query: CREATE TABLE tconst(i int NOT NULL disable rely, j INT NOT NULL disable norely, d_year string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tconst
+PREHOOK: query: INSERT INTO tconst values(1, 1, '2001'), (2, null, '2002'), (3, 3, '2010')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tconst
+POSTHOOK: query: INSERT INTO tconst values(1, 1, '2001'), (2, null, '2002'), (3, 3, '2010')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tconst
+POSTHOOK: Lineage: tconst.d_year SCRIPT []
+POSTHOOK: Lineage: tconst.i SCRIPT []
+POSTHOOK: Lineage: tconst.j SCRIPT []
+PREHOOK: query: explain select i, j from tconst where i is not null group by i,j, d_year
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tconst
+#### A masked pattern was here ####
+POSTHOOK: query: explain select i, j from tconst where i is not null group by i,j, d_year
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tconst
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tconst
+ Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: i (type: int), j (type: int), d_year (type: string)
+ outputColumnNames: i, j, d_year
+ Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: i (type: int), j (type: int), d_year (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string)
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select i, j from tconst where i is not null group by i,j, d_year
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tconst
+#### A masked pattern was here ####
+POSTHOOK: query: select i, j from tconst where i is not null group by i,j, d_year
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tconst
+#### A masked pattern was here ####
+3 3
+2 NULL
+1 1
+PREHOOK: query: explain select i, j from tconst where i IS NOT NULL and j IS NOT NULL group by i,j, d_year
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tconst
+#### A masked pattern was here ####
+POSTHOOK: query: explain select i, j from tconst where i IS NOT NULL and j IS NOT NULL group by i,j, d_year
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tconst
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tconst
+ filterExpr: j is not null (type: boolean)
+ Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: j is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: i (type: int), j (type: int), d_year (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string)
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select i, j from tconst where i IS NOT NULL and j IS NOT NULL group by i,j, d_year
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tconst
+#### A masked pattern was here ####
+POSTHOOK: query: select i, j from tconst where i IS NOT NULL and j IS NOT NULL group by i,j, d_year
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tconst
+#### A masked pattern was here ####
+3 3
+1 1
+PREHOOK: query: explain select i,j from tconst where i is not null OR j IS NOT NULL group by i, j, d_year
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tconst
+#### A masked pattern was here ####
+POSTHOOK: query: explain select i,j from tconst where i is not null OR j IS NOT NULL group by i, j, d_year
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tconst
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tconst
+ Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: i (type: int), j (type: int), d_year (type: string)
+ outputColumnNames: i, j, d_year
+ Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: i (type: int), j (type: int), d_year (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string)
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select i,j from tconst where i is not null OR j IS NOT NULL group by i, j, d_year
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tconst
+#### A masked pattern was here ####
+POSTHOOK: query: select i,j from tconst where i is not null OR j IS NOT NULL group by i, j, d_year
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tconst
+#### A masked pattern was here ####
+3 3
+2 NULL
+1 1
+PREHOOK: query: explain select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.j group by t1.i, t1.d_year
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tconst
+#### A masked pattern was here ####
+POSTHOOK: query: explain select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.j group by t1.i, t1.d_year
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tconst
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: i (type: int), d_year (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: t2
+ filterExpr: j is not null (type: boolean)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: j is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: j (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col0)
+ keys: _col0 (type: int), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: string)
+ Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col2 (type: bigint)
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col2 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.j group by t1.i, t1.d_year
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tconst
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.j group by t1.i, t1.d_year
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tconst
+#### A masked pattern was here ####
+1
+3
+PREHOOK: query: explain select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.i group by t1.i, t1.d_year
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tconst
+#### A masked pattern was here ####
+POSTHOOK: query: explain select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.i group by t1.i, t1.d_year
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tconst
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: i (type: int), d_year (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: t2
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: i (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col0)
+ keys: _col0 (type: int), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: string)
+ Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col2 (type: bigint)
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col2 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.i group by t1.i, t1.d_year
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tconst
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.i group by t1.i, t1.d_year
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tconst
+#### A masked pattern was here ####
+1
+2
+3
+PREHOOK: query: DROP TABLE tconst
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@tconst
+PREHOOK: Output: default@tconst
+POSTHOOK: query: DROP TABLE tconst
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@tconst
+POSTHOOK: Output: default@tconst
+PREHOOK: query: create table dest_g21 (key1 int NOT NULL disable rely, value1 double, UNIQUE(key1) disable rely)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest_g21
+POSTHOOK: query: create table dest_g21 (key1 int NOT NULL disable rely, value1 double, UNIQUE(key1) disable rely)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest_g21
+PREHOOK: query: explain select key1 from dest_g21 group by key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: explain select key1 from dest_g21 group by key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: dest_g21
+ Select Operator
+ expressions: key1 (type: int)
+ outputColumnNames: _col0
+ ListSink
+
+PREHOOK: query: create table dest_g24 (key1 int , value1 double, UNIQUE(key1) disable rely)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest_g24
+POSTHOOK: query: create table dest_g24 (key1 int , value1 double, UNIQUE(key1) disable rely)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest_g24
+PREHOOK: query: explain select key1 from dest_g24 group by key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g24
+#### A masked pattern was here ####
+POSTHOOK: query: explain select key1 from dest_g24 group by key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g24
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: dest_g24
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key1 (type: int), value1 (type: double)
+ outputColumnNames: key1, value1
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: key1 (type: int), value1 (type: double)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: double)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: double)
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int), KEY._col1 (type: double)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: DROP TABLE dest_g21
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dest_g21
+PREHOOK: Output:
<TRUNCATED>
[3/3] hive git commit: HIVE-17043: Remove non unique columns from
group by keys if not referenced later (Vineet Garg,
reviewed by Jesus Camacho Rodriguez)
Posted by vg...@apache.org.
HIVE-17043: Remove non unique columns from group by keys if not referenced later (Vineet Garg, reviewed by Jesus Camacho Rodriguez)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1db3debc
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1db3debc
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1db3debc
Branch: refs/heads/master
Commit: 1db3debcb564c60db872022761b3ac4862509694
Parents: 259db56
Author: Vineet Garg <vg...@apache.org>
Authored: Sun Oct 14 20:32:40 2018 -0700
Committer: Vineet Garg <vg...@apache.org>
Committed: Sun Oct 14 20:32:40 2018 -0700
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 1 +
.../calcite/HiveDefaultRelMetadataProvider.java | 2 +
.../ql/optimizer/calcite/RelOptHiveTable.java | 49 +-
.../rules/HiveAggregateJoinTransposeRule.java | 2 +-
.../calcite/rules/HiveRelFieldTrimmer.java | 269 ++-
.../calcite/stats/EstimateUniqueKeys.java | 333 +++
.../stats/HiveRelMdColumnUniqueness.java | 63 +
.../calcite/stats/HiveRelMdRowCount.java | 7 +-
.../calcite/stats/HiveRelMdUniqueKeys.java | 152 +-
.../clientpositive/constraints_optimization.q | 149 ++
.../llap/constraints_optimization.q.out | 2010 ++++++++++++++++++
.../llap/materialized_view_rewrite_1.q.out | 12 +-
12 files changed, 2848 insertions(+), 201 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/1db3debc/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 9a87464..b6d42c6 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -489,6 +489,7 @@ minillaplocal.query.files=\
colstats_date_min_max.q,\
compare_double_bigint_2.q,\
constprog_dpp.q,\
+ constraints_optimization.q,\
current_date_timestamp.q,\
correlationoptimizer1.q,\
correlationoptimizer2.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/1db3debc/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
index 635d27e..41c2f9e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
@@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCostModel;
import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveDefaultCostModel;
import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveOnTezCostModel;
import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveRelMdCost;
+import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdColumnUniqueness;
import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdCollation;
import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdDistinctRowCount;
import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdDistribution;
@@ -71,6 +72,7 @@ public class HiveDefaultRelMetadataProvider {
HiveRelMdSelectivity.SOURCE,
HiveRelMdRowCount.SOURCE,
HiveRelMdUniqueKeys.SOURCE,
+ HiveRelMdColumnUniqueness.SOURCE,
HiveRelMdSize.SOURCE,
HiveRelMdMemory.SOURCE,
new HiveRelMdParallelism(maxSplitSize).getMetadataProvider(),
http://git-wip-us.apache.org/repos/asf/hive/blob/1db3debc/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
index 42e60de..fc9178f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
@@ -18,7 +18,6 @@
package org.apache.hadoop.hive.ql.optimizer.calcite;
import java.util.ArrayList;
-import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@@ -35,7 +34,6 @@ import org.apache.calcite.prepare.RelOptTableImpl;
import org.apache.calcite.rel.RelCollation;
import org.apache.calcite.rel.RelCollationTraitDef;
import org.apache.calcite.rel.RelDistribution;
-import org.apache.calcite.rel.RelDistributions;
import org.apache.calcite.rel.RelFieldCollation;
import org.apache.calcite.rel.RelFieldCollation.Direction;
import org.apache.calcite.rel.RelFieldCollation.NullDirection;
@@ -49,6 +47,7 @@ import org.apache.calcite.rel.type.RelDataTypeField;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.schema.ColumnStrategy;
import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.Pair;
import org.apache.calcite.util.mapping.IntPair;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
@@ -100,6 +99,7 @@ public class RelOptHiveTable implements RelOptTable {
private final ImmutableList<VirtualColumn> hiveVirtualCols;
private final int noOfNonVirtualCols;
private final List<ImmutableBitSet> keys;
+ private final List<ImmutableBitSet> nonNullablekeys;
private final List<RelReferentialConstraint> referentialConstraints;
final HiveConf hiveConf;
@@ -133,7 +133,9 @@ public class RelOptHiveTable implements RelOptTable {
this.partitionCache = partitionCache;
this.colStatsCache = colStatsCache;
this.noColsMissingStats = noColsMissingStats;
- this.keys = generateKeys();
+ Pair<List<ImmutableBitSet>, List<ImmutableBitSet>> constraintKeys = generateKeys();
+ this.keys = constraintKeys.left;
+ this.nonNullablekeys = constraintKeys.right;
this.referentialConstraints = generateReferentialConstraints();
}
@@ -167,6 +169,10 @@ public class RelOptHiveTable implements RelOptTable {
throw new UnsupportedOperationException();
}
+ public List<ImmutableBitSet> getNonNullableKeys() {
+ return nonNullablekeys;
+ }
+
@Override
public RelOptTable extend(List<RelDataTypeField> extendedFields) {
throw new UnsupportedOperationException();
@@ -213,6 +219,16 @@ public class RelOptHiveTable implements RelOptTable {
this.hiveConf, this.partitionCache, this.colStatsCache, this.noColsMissingStats);
}
+ // Given a key this method returns true if all of the columns in the key are not nullable
+ public boolean isNonNullableKey(ImmutableBitSet columns) {
+ for (ImmutableBitSet key : nonNullablekeys) {
+ if (key.contains(columns)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
@Override
public boolean isKey(ImmutableBitSet columns) {
for (ImmutableBitSet key : keys) {
@@ -228,8 +244,9 @@ public class RelOptHiveTable implements RelOptTable {
return referentialConstraints;
}
- private List<ImmutableBitSet> generateKeys() {
+ private Pair<List<ImmutableBitSet>, List<ImmutableBitSet>> generateKeys() {
ImmutableList.Builder<ImmutableBitSet> builder = ImmutableList.builder();
+ ImmutableList.Builder<ImmutableBitSet> nonNullbuilder = ImmutableList.builder();
// First PK
final PrimaryKeyInfo pki;
try {
@@ -250,11 +267,12 @@ public class RelOptHiveTable implements RelOptTable {
}
if (pkPos == rowType.getFieldNames().size()) {
LOG.error("Column for primary key definition " + pkColName + " not found");
- return ImmutableList.of();
}
keys.set(pkPos);
}
- builder.add(keys.build());
+ ImmutableBitSet key = keys.build();
+ builder.add(key);
+ nonNullbuilder.add(key);
}
// Then UKs
final UniqueConstraint uki;
@@ -266,23 +284,31 @@ public class RelOptHiveTable implements RelOptTable {
}
for (List<UniqueConstraintCol> ukCols : uki.getUniqueConstraints().values()) {
ImmutableBitSet.Builder keys = ImmutableBitSet.builder();
+ boolean isNonNullable = true;
for (UniqueConstraintCol ukCol : ukCols) {
int ukPos;
for (ukPos = 0; ukPos < rowType.getFieldNames().size(); ukPos++) {
String colName = rowType.getFieldNames().get(ukPos);
if (ukCol.colName.equals(colName)) {
+ if(rowType.getFieldList().get(ukPos).getType().isNullable()) {
+ // they should all be nullable
+ isNonNullable = false;
+ }
break;
}
}
if (ukPos == rowType.getFieldNames().size()) {
LOG.error("Column for unique constraint definition " + ukCol.colName + " not found");
- return ImmutableList.of();
}
keys.set(ukPos);
}
- builder.add(keys.build());
+ ImmutableBitSet key = keys.build();
+ builder.add(key);
+ if(isNonNullable) {
+ nonNullbuilder.add(key);
+ }
}
- return builder.build();
+ return new Pair<>(builder.build(), nonNullbuilder.build());
}
private List<RelReferentialConstraint> generateReferentialConstraints() {
@@ -368,12 +394,11 @@ public class RelOptHiveTable implements RelOptTable {
if (sortColumn.getOrder() == BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_ASC) {
direction = Direction.ASCENDING;
nullDirection = NullDirection.FIRST;
- }
- else {
+ } else {
direction = Direction.DESCENDING;
nullDirection = NullDirection.LAST;
}
- collationList.add(new RelFieldCollation(i,direction,nullDirection));
+ collationList.add(new RelFieldCollation(i, direction, nullDirection));
break;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/1db3debc/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java
index f43ef01..ed6659c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java
@@ -171,7 +171,7 @@ public class HiveAggregateJoinTransposeRule extends AggregateJoinTransposeRule {
unique = true;
} else {
final Boolean unique0 =
- mq.areColumnsUnique(joinInput, belowAggregateKey);
+ mq.areColumnsUnique(joinInput, belowAggregateKey, true);
unique = unique0 != null && unique0;
}
if (unique) {
http://git-wip-us.apache.org/repos/asf/hive/blob/1db3debc/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
index 5857f73..b7c31bd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
@@ -24,15 +24,19 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
import org.apache.calcite.adapter.druid.DruidQuery;
import org.apache.calcite.linq4j.Ord;
import org.apache.calcite.plan.RelOptTable;
import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Aggregate;
+import org.apache.calcite.rel.core.AggregateCall;
import org.apache.calcite.rel.core.CorrelationId;
import org.apache.calcite.rel.core.Project;
import org.apache.calcite.rel.core.TableScan;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeField;
import org.apache.calcite.rex.RexBuilder;
@@ -300,18 +304,58 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer {
private boolean isRexLiteral(final RexNode rexNode) {
if(rexNode instanceof RexLiteral) {
return true;
- }
- else if(rexNode instanceof RexCall
+ } else if(rexNode instanceof RexCall
&& ((RexCall)rexNode).getOperator().getKind() == SqlKind.CAST){
return isRexLiteral(((RexCall)(rexNode)).getOperands().get(0));
- }
- else {
+ } else {
return false;
}
}
+
+ // if gby keys consist of pk/uk non-pk/non-uk columns are removed if they are not being used
+ private ImmutableBitSet generateNewGroupset(Aggregate aggregate, ImmutableBitSet fieldsUsed) {
+
+ ImmutableBitSet originalGroupSet = aggregate.getGroupSet();
+
+ if (aggregate.getGroupSets().size() > 1 || aggregate.getIndicatorCount() > 0
+ || fieldsUsed.contains(originalGroupSet)) {
+ // if there is grouping sets, indicator or all the group keys are being used we do no need to proceed further
+ return originalGroupSet;
+ }
+
+ final RelNode input = aggregate.getInput();
+ RelMetadataQuery mq = aggregate.getCluster().getMetadataQuery();
+
+ final Set<ImmutableBitSet> uniqueKeys = mq.getUniqueKeys(input, false);
+ if (uniqueKeys == null || uniqueKeys.isEmpty()) {
+ return originalGroupSet;
+ }
+
+ // we have set of unique key, get to the key which is same as group by key
+ ImmutableBitSet groupByUniqueKey = null;
+
+ for (ImmutableBitSet key : uniqueKeys) {
+ if (aggregate.getGroupSet().contains(key)) {
+ groupByUniqueKey = key;
+ break;
+ }
+ }
+
+ if (groupByUniqueKey == null) {
+ // group by keys do not represent unique keys
+ return originalGroupSet;
+ }
+
+ // we know group by key contains primary key and there is at least one column in group by which is not being used
+ // if that column is not part of key it should be removed
+ ImmutableBitSet nonKeyColumns = aggregate.getGroupSet().except(groupByUniqueKey);
+ ImmutableBitSet columnsToRemove = nonKeyColumns.except(fieldsUsed);
+ ImmutableBitSet newGroupSet = aggregate.getGroupSet().except(columnsToRemove);
+
+ return newGroupSet;
+ }
+
/**
- * Variant of {@link #trimFields(Aggregate, ImmutableBitSet, Set)} for
- * {@link org.apache.calcite.rel.logical.LogicalAggregate}.
* This method replaces group by 'constant key' with group by true (boolean)
* if and only if
* group by doesn't have grouping sets
@@ -322,50 +366,193 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer {
* underneath aggregate
*
* This is mainly done so that hive is able to push down queries with
- * group by 'constant key with type not supported by druid' into druid
+ * group by 'constant key with type not supported by druid' into druid.
+ *
*/
- public TrimResult trimFields(Aggregate aggregate, ImmutableBitSet fieldsUsed,
- Set<RelDataTypeField> extraFields) {
-
- Aggregate newAggregate = aggregate;
- if (!(aggregate.getIndicatorCount() > 0)
- && !(aggregate.getGroupSet().isEmpty())
- && !fieldsUsed.contains(aggregate.getGroupSet())) {
- final RelNode input = aggregate.getInput();
- final RelDataType rowType = input.getRowType();
- RexBuilder rexBuilder = aggregate.getCluster().getRexBuilder();
- final List<RexNode> newProjects = new ArrayList<>();
-
- final List<RexNode> inputExprs = input.getChildExps();
- if(inputExprs == null || inputExprs.isEmpty()) {
- return super.trimFields(newAggregate, fieldsUsed, extraFields);
+ private Aggregate rewriteGBConstantKeys(Aggregate aggregate, ImmutableBitSet fieldsUsed,
+ Set<RelDataTypeField> extraFields) {
+ if ((aggregate.getIndicatorCount() > 0)
+ || (aggregate.getGroupSet().isEmpty())
+ || fieldsUsed.contains(aggregate.getGroupSet())) {
+ return aggregate;
+ }
+
+ final RelNode input = aggregate.getInput();
+
+
+ final RelDataType rowType = input.getRowType();
+ RexBuilder rexBuilder = aggregate.getCluster().getRexBuilder();
+ final List<RexNode> newProjects = new ArrayList<>();
+
+ final List<RexNode> inputExprs = input.getChildExps();
+ if (inputExprs == null || inputExprs.isEmpty()) {
+ return aggregate;
+ }
+
+ boolean allConstants = true;
+ for (int key : aggregate.getGroupSet()) {
+ // getChildExprs on Join could return less number of expressions than there are coming out of join
+ if (inputExprs.size() <= key || !isRexLiteral(inputExprs.get(key))) {
+ allConstants = false;
+ break;
}
+ }
- boolean allConstants = true;
- for(int key : aggregate.getGroupSet()) {
- // getChildExprs on Join could return less number of expressions than there are coming out of join
- if(inputExprs.size() <= key || !isRexLiteral(inputExprs.get(key))){
- allConstants = false;
- break;
+ if (allConstants) {
+ for (int i = 0; i < rowType.getFieldCount(); i++) {
+ if (aggregate.getGroupSet().get(i)) {
+ newProjects.add(rexBuilder.makeLiteral(true));
+ } else {
+ newProjects.add(rexBuilder.makeInputRef(input, i));
}
}
+ relBuilder.push(input);
+ relBuilder.project(newProjects);
+ Aggregate newAggregate = new HiveAggregate(aggregate.getCluster(), aggregate.getTraitSet(), relBuilder.build(),
+ aggregate.getGroupSet(), null, aggregate.getAggCallList());
+ return newAggregate;
+ }
+ return aggregate;
+ }
- if (allConstants) {
- for (int i = 0; i < rowType.getFieldCount(); i++) {
- if (aggregate.getGroupSet().get(i)) {
- newProjects.add(rexBuilder.makeLiteral(true));
- } else {
- newProjects.add(rexBuilder.makeInputRef(input, i));
- }
- }
- relBuilder.push(input);
- relBuilder.project(newProjects);
- newAggregate = new HiveAggregate(aggregate.getCluster(), aggregate.getTraitSet(), relBuilder.build(),
- aggregate.getGroupSet(), null, aggregate.getAggCallList());
+ @Override
+ public TrimResult trimFields(Aggregate aggregate, ImmutableBitSet fieldsUsed, Set<RelDataTypeField> extraFields) {
+ // Fields:
+ //
+ // | sys fields | group fields | indicator fields | agg functions |
+ //
+ // Two kinds of trimming:
+ //
+ // 1. If agg rel has system fields but none of these are used, create an
+ // agg rel with no system fields.
+ //
+ // 2. If aggregate functions are not used, remove them.
+ //
+ // But group and indicator fields stay, even if they are not used.
+
+ aggregate = rewriteGBConstantKeys(aggregate, fieldsUsed, extraFields);
+
+ final RelDataType rowType = aggregate.getRowType();
+
+ // Compute which input fields are used.
+ // 1. group fields are always used
+ final ImmutableBitSet.Builder inputFieldsUsed =
+ aggregate.getGroupSet().rebuild();
+ // 2. agg functions
+ for (AggregateCall aggCall : aggregate.getAggCallList()) {
+ for (int i : aggCall.getArgList()) {
+ inputFieldsUsed.set(i);
+ }
+ if (aggCall.filterArg >= 0) {
+ inputFieldsUsed.set(aggCall.filterArg);
+ }
+ }
+
+ // Create input with trimmed columns.
+ final RelNode input = aggregate.getInput();
+ final Set<RelDataTypeField> inputExtraFields = Collections.emptySet();
+ final TrimResult trimResult =
+ trimChild(aggregate, input, inputFieldsUsed.build(), inputExtraFields);
+ final RelNode newInput = trimResult.left;
+ final Mapping inputMapping = trimResult.right;
+
+ ImmutableBitSet originalGroupSet = aggregate.getGroupSet();
+ ImmutableBitSet updatedGroupSet = generateNewGroupset(aggregate, fieldsUsed);
+ ImmutableBitSet gbKeysDeleted = originalGroupSet.except(updatedGroupSet);
+ ImmutableBitSet updatedGroupFields = ImmutableBitSet.range(originalGroupSet.cardinality());
+ final int updatedGroupCount = updatedGroupSet.cardinality();
+
+ // we need to clear the bits corresponding to deleted gb keys
+ int setIdx = 0;
+ while(setIdx != -1) {
+ setIdx = gbKeysDeleted.nextSetBit(setIdx);
+ if(setIdx != -1) {
+ updatedGroupFields = updatedGroupFields.clear(setIdx);
+ setIdx++;
}
}
- return super.trimFields(newAggregate, fieldsUsed, extraFields);
+ fieldsUsed =
+ fieldsUsed.union(updatedGroupFields);
+
+ // If the input is unchanged, and we need to project all columns,
+ // there's nothing to do.
+ if (input == newInput
+ && fieldsUsed.equals(ImmutableBitSet.range(rowType.getFieldCount()))) {
+ return result(aggregate,
+ Mappings.createIdentity(rowType.getFieldCount()));
+ }
+
+ // update the group by keys based on inputMapping
+ ImmutableBitSet newGroupSet =
+ Mappings.apply(inputMapping, updatedGroupSet);
+
+ // Which agg calls are used by our consumer?
+ int originalGroupCount = aggregate.getGroupSet().cardinality();
+ int j = originalGroupCount;
+ int usedAggCallCount = 0;
+ for (int i = 0; i < aggregate.getAggCallList().size(); i++) {
+ if (fieldsUsed.get(j++)) {
+ ++usedAggCallCount;
+ }
+ }
+
+ // Offset due to the number of system fields having changed.
+ Mapping mapping =
+ Mappings.create(
+ MappingType.INVERSE_SURJECTION,
+ rowType.getFieldCount(),
+ updatedGroupCount + usedAggCallCount);
+
+
+ // if group keys were reduced, it means we didn't have grouping therefore
+ // we don't need to transform group sets
+ ImmutableList<ImmutableBitSet> newGroupSets = null;
+ if(!updatedGroupSet.equals(aggregate.getGroupSet())) {
+ newGroupSets = ImmutableList.of(newGroupSet);
+ } else {
+ newGroupSets = ImmutableList.copyOf(
+ Iterables.transform(aggregate.getGroupSets(),
+ input1 -> Mappings.apply(inputMapping, input1)));
+ }
+
+ // Populate mapping of where to find the fields. System, group key and
+ // indicator fields first.
+ int gbKeyIdx = 0;
+ for (j = 0; j < originalGroupCount; j++) {
+ if(fieldsUsed.get(j)) {
+ mapping.set(j, gbKeyIdx);
+ gbKeyIdx++;
+ }
+ }
+
+ // Now create new agg calls, and populate mapping for them.
+ relBuilder.push(newInput);
+ final List<RelBuilder.AggCall> newAggCallList = new ArrayList<>();
+ j = originalGroupCount; // because lookup in fieldsUsed is done using original group count
+ for (AggregateCall aggCall : aggregate.getAggCallList()) {
+ if (fieldsUsed.get(j)) {
+ final ImmutableList<RexNode> args =
+ relBuilder.fields(
+ Mappings.apply2(inputMapping, aggCall.getArgList()));
+ final RexNode filterArg = aggCall.filterArg < 0 ? null
+ : relBuilder.field(Mappings.apply(inputMapping, aggCall.filterArg));
+ RelBuilder.AggCall newAggCall =
+ relBuilder.aggregateCall(aggCall.getAggregation(),
+ aggCall.isDistinct(), aggCall.isApproximate(),
+ filterArg, aggCall.name, args);
+ mapping.set(j, updatedGroupCount + newAggCallList.size());
+ newAggCallList.add(newAggCall);
+ }
+ ++j;
+ }
+
+ final RelBuilder.GroupKey groupKey =
+ relBuilder.groupKey(newGroupSet, newGroupSets);
+ relBuilder.aggregate(groupKey, newAggCallList);
+
+ return result(relBuilder.build(), mapping);
}
+
/**
* Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for
* {@link org.apache.calcite.rel.logical.LogicalProject}.
http://git-wip-us.apache.org/repos/asf/hive/blob/1db3debc/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/EstimateUniqueKeys.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/EstimateUniqueKeys.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/EstimateUniqueKeys.java
new file mode 100644
index 0000000..5ef945c
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/EstimateUniqueKeys.java
@@ -0,0 +1,333 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.stats;
+
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Correlate;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.JoinInfo;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.core.SetOp;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.util.BitSets;
+import org.apache.calcite.util.ImmutableBitSet;
+
+import com.google.common.collect.ImmutableSet;
+import org.apache.calcite.util.Util;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
+
+import java.util.BitSet;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * EstimateUniqueKeys provides an ability to estimate unique keys based on statistics.
+ */
+//TODO: Ideally RelMdUniqueKeys should be modified (on Calcite side) to accept a parameter based on which
+// this logic whoud be implemented
+public final class EstimateUniqueKeys {
+ //~ Constructors -----------------------------------------------------------
+ private EstimateUniqueKeys() {
+ }
+
+ //~ Methods ----------------------------------------------------------------
+
+ private static Set<ImmutableBitSet> getUniqueKeys(HiveFilter rel) {
+ return getUniqueKeys(rel.getInput());
+ }
+
+ private static Set<ImmutableBitSet> getUniqueKeys(HiveSortLimit rel) {
+ return getUniqueKeys(rel.getInput());
+ }
+
+ private static Set<ImmutableBitSet> getUniqueKeys(Correlate rel) {
+ return getUniqueKeys(rel.getLeft());
+ }
+
+
+ //Infer Uniquenes if: - rowCount(col) = ndv(col) - TBD for numerics: max(col)
+ // - min(col) = rowCount(col)
+ private static Set<ImmutableBitSet> generateKeysUsingStatsEstimation(Project rel,
+ HiveTableScan tScan) {
+ Map<Integer, Integer> posMap = new HashMap<Integer, Integer>();
+ int projectPos = 0;
+ int colStatsPos = 0;
+
+ BitSet projectedCols = new BitSet();
+ for (RexNode r : rel.getProjects()) {
+ if (r instanceof RexInputRef) {
+ projectedCols.set(((RexInputRef) r).getIndex());
+ posMap.put(colStatsPos, projectPos);
+ colStatsPos++;
+ }
+ projectPos++;
+ }
+
+ RelMetadataQuery mq = rel.getCluster().getMetadataQuery();
+ double numRows = mq.getRowCount(tScan);
+ List<ColStatistics> colStats = tScan.getColStat(BitSets
+ .toList(projectedCols));
+ Set<ImmutableBitSet> keys = new HashSet<ImmutableBitSet>();
+
+ colStatsPos = 0;
+ for (ColStatistics cStat : colStats) {
+ boolean isKey = false;
+ if (cStat.getCountDistint() >= numRows) {
+ isKey = true;
+ }
+ if (!isKey && cStat.getRange() != null &&
+ cStat.getRange().maxValue != null &&
+ cStat.getRange().minValue != null) {
+ double r = cStat.getRange().maxValue.doubleValue() -
+ cStat.getRange().minValue.doubleValue() + 1;
+ isKey = (Math.abs(numRows - r) < RelOptUtil.EPSILON);
+ }
+ if (isKey) {
+ ImmutableBitSet key = ImmutableBitSet.of(posMap.get(colStatsPos));
+ keys.add(key);
+ }
+ colStatsPos++;
+ }
+
+ return keys;
+
+ }
+
+ private static Set<ImmutableBitSet> getUniqueKeys(HiveProject rel) {
+
+ HiveTableScan tScan = getTableScan(rel.getInput(), false);
+ if (tScan != null) {
+ return generateKeysUsingStatsEstimation(rel, tScan);
+ }
+
+ // LogicalProject maps a set of rows to a different set;
+ // Without knowledge of the mapping function(whether it
+ // preserves uniqueness), it is only safe to derive uniqueness
+ // info from the child of a project when the mapping is f(a) => a.
+ //
+ // Further more, the unique bitset coming from the child needs
+ // to be mapped to match the output of the project.
+ final Map<Integer, Integer> mapInToOutPos = new HashMap<>();
+ final List<RexNode> projExprs = rel.getProjects();
+ final Set<ImmutableBitSet> projUniqueKeySet = new HashSet<>();
+
+ // Build an input to output position map.
+ for (int i = 0; i < projExprs.size(); i++) {
+ RexNode projExpr = projExprs.get(i);
+ if (projExpr instanceof RexInputRef) {
+ mapInToOutPos.put(((RexInputRef) projExpr).getIndex(), i);
+ }
+ }
+
+ if (mapInToOutPos.isEmpty()) {
+ // if there's no RexInputRef in the projected expressions
+ // return empty set.
+ return projUniqueKeySet;
+ }
+
+ Set<ImmutableBitSet> childUniqueKeySet =
+ getUniqueKeys(rel.getInput());
+
+ if (childUniqueKeySet != null) {
+ // Now add to the projUniqueKeySet the child keys that are fully
+ // projected.
+ for (ImmutableBitSet colMask : childUniqueKeySet) {
+ ImmutableBitSet.Builder tmpMask = ImmutableBitSet.builder();
+ boolean completeKeyProjected = true;
+ for (int bit : colMask) {
+ if (mapInToOutPos.containsKey(bit)) {
+ tmpMask.set(mapInToOutPos.get(bit));
+ } else {
+ // Skip the child unique key if part of it is not
+ // projected.
+ completeKeyProjected = false;
+ break;
+ }
+ }
+ if (completeKeyProjected) {
+ projUniqueKeySet.add(tmpMask.build());
+ }
+ }
+ }
+
+ return projUniqueKeySet;
+
+ }
+
+ private static RelNode getRelNode(RelNode rel) {
+ if (rel != null && rel instanceof HepRelVertex) {
+ rel = ((HepRelVertex) rel).getCurrentRel();
+ } else if (rel != null && rel instanceof RelSubset) {
+ rel = Util.first(((RelSubset)rel).getBest(), ((RelSubset) rel).getOriginal());
+ }
+ return rel;
+ }
+
+ private static Set<ImmutableBitSet> getUniqueKeys(HiveJoin rel) {
+ RelNode left = getRelNode(rel.getLeft());
+ RelNode right = getRelNode(rel.getRight());
+
+ // first add the different combinations of concatenated unique keys
+ // from the left and the right, adjusting the right hand side keys to
+ // reflect the addition of the left hand side
+ //
+ // NOTE zfong 12/18/06 - If the number of tables in a join is large,
+ // the number of combinations of unique key sets will explode. If
+ // that is undesirable, use RelMetadataQuery.areColumnsUnique() as
+ // an alternative way of getting unique key information.
+
+ final Set<ImmutableBitSet> retSet = new HashSet<>();
+ final Set<ImmutableBitSet> leftSet = getUniqueKeys(left);
+ Set<ImmutableBitSet> rightSet = null;
+
+ final Set<ImmutableBitSet> tmpRightSet = getUniqueKeys(right);
+ int nFieldsOnLeft = left.getRowType().getFieldCount();
+
+ if (tmpRightSet != null) {
+ rightSet = new HashSet<>();
+ for (ImmutableBitSet colMask : tmpRightSet) {
+ ImmutableBitSet.Builder tmpMask = ImmutableBitSet.builder();
+ for (int bit : colMask) {
+ tmpMask.set(bit + nFieldsOnLeft);
+ }
+ rightSet.add(tmpMask.build());
+ }
+
+ if (leftSet != null) {
+ for (ImmutableBitSet colMaskRight : rightSet) {
+ for (ImmutableBitSet colMaskLeft : leftSet) {
+ retSet.add(colMaskLeft.union(colMaskRight));
+ }
+ }
+ }
+ }
+
+ // locate the columns that participate in equijoins
+ final JoinInfo joinInfo = rel.analyzeCondition();
+ RelMetadataQuery mq = rel.getCluster().getMetadataQuery();
+
+ // determine if either or both the LHS and RHS are unique on the
+ // equijoin columns
+ final Boolean leftUnique =
+ mq.areColumnsUnique(left, joinInfo.leftSet());
+ final Boolean rightUnique =
+ mq.areColumnsUnique(right, joinInfo.rightSet());
+
+ // if the right hand side is unique on its equijoin columns, then we can
+ // add the unique keys from left if the left hand side is not null
+ // generating
+ if ((rightUnique != null)
+ && rightUnique
+ && (leftSet != null)
+ && !(rel.getJoinType().generatesNullsOnLeft())) {
+ retSet.addAll(leftSet);
+ }
+
+ // same as above except left and right are reversed
+ if ((leftUnique != null)
+ && leftUnique
+ && (rightSet != null)
+ && !(rel.getJoinType().generatesNullsOnRight())) {
+ retSet.addAll(rightSet);
+ }
+
+ return retSet;
+ }
+
+ private static Set<ImmutableBitSet> getUniqueKeys(HiveSemiJoin rel) {
+ // only return the unique keys from the LHS since a semijoin only
+ // returns the LHS
+ return getUniqueKeys(rel.getLeft());
+ }
+
+ private static Set<ImmutableBitSet> getUniqueKeys(HiveAggregate rel) {
+ // group by keys form a unique key
+ return ImmutableSet.of(rel.getGroupSet());
+ }
+
+ private static Set<ImmutableBitSet> getUniqueKeys(SetOp rel) {
+ if (!rel.all) {
+ return ImmutableSet.of(
+ ImmutableBitSet.range(rel.getRowType().getFieldCount()));
+ }
+ return ImmutableSet.of();
+ }
+
+ // Catch-all rule when none of the others apply.
+ public static Set<ImmutableBitSet> getUniqueKeys(RelNode rel) {
+ rel = getRelNode(rel);
+ if (rel instanceof HiveFilter) {
+ return getUniqueKeys((HiveFilter) rel);
+ } else if (rel instanceof HiveSortLimit) {
+ return getUniqueKeys((HiveSortLimit) rel);
+ } else if (rel instanceof Correlate) {
+ return getUniqueKeys((Correlate) rel);
+ } else if (rel instanceof HiveProject) {
+ return getUniqueKeys((HiveProject) rel);
+ } else if (rel instanceof HiveJoin) {
+ return getUniqueKeys((HiveJoin) rel);
+ } else if (rel instanceof HiveSemiJoin) {
+ return getUniqueKeys((HiveSemiJoin) rel);
+ } else if (rel instanceof HiveAggregate) {
+ return getUniqueKeys((HiveAggregate) rel);
+ } else if (rel instanceof SetOp) {
+ return getUniqueKeys((SetOp) rel);
+ } else {
+ return null;
+ }
+ }
+
+ /*
+ * traverse a path of Filter, Projects to get to the TableScan.
+ * In case of Unique keys, stop if you reach a Project, it will be handled
+ * by the invocation on the Project.
+ * In case of getting the base rowCount of a Path, keep going past a Project.
+ */
+ static HiveTableScan getTableScan(RelNode r, boolean traverseProject) {
+
+ while (r != null && !(r instanceof HiveTableScan)) {
+ if (r instanceof HepRelVertex) {
+ r = ((HepRelVertex) r).getCurrentRel();
+ } else if (r instanceof Filter) {
+ r = ((Filter) r).getInput();
+ } else if (traverseProject && r instanceof Project) {
+ r = ((Project) r).getInput();
+ } else {
+ r = null;
+ }
+ }
+ return r == null ? null : (HiveTableScan) r;
+ }
+}
+
+// End EstimateUniqueKeys.java
http://git-wip-us.apache.org/repos/asf/hive/blob/1db3debc/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdColumnUniqueness.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdColumnUniqueness.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdColumnUniqueness.java
new file mode 100644
index 0000000..2dede97
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdColumnUniqueness.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.stats;
+
+
+import org.apache.calcite.rel.metadata.BuiltInMetadata;
+import org.apache.calcite.rel.metadata.MetadataDef;
+import org.apache.calcite.rel.metadata.MetadataHandler;
+import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.util.BuiltInMethod;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+
+/**
+ * HiveRelMdColumnUniqueness implements the ability to get unique keys for table scan op.
+ */
+public final class HiveRelMdColumnUniqueness
+ implements MetadataHandler<BuiltInMetadata.ColumnUniqueness> {
+ public static final RelMetadataProvider SOURCE =
+ ReflectiveRelMetadataProvider.reflectiveSource(
+ BuiltInMethod.COLUMN_UNIQUENESS.method, new HiveRelMdColumnUniqueness());
+
+ //~ Constructors -----------------------------------------------------------
+
+ private HiveRelMdColumnUniqueness() {}
+
+ //~ Methods ----------------------------------------------------------------
+
+ public MetadataDef<BuiltInMetadata.ColumnUniqueness> getDef() {
+ return BuiltInMetadata.ColumnUniqueness.DEF;
+ }
+
+ public Boolean areColumnsUnique(HiveTableScan rel, RelMetadataQuery mq,
+ ImmutableBitSet columns, boolean ignoreNulls) {
+ if(ignoreNulls) {
+ return rel.getTable().isKey(columns);
+ } else {
+ RelOptHiveTable tbl = (RelOptHiveTable)rel.getTable();
+ return tbl.isNonNullableKey(columns);
+ }
+ }
+}
+
+// End HiveRelMdColumnUniqueness.java
+
http://git-wip-us.apache.org/repos/asf/hive/blob/1db3debc/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java
index 1ca1937..576ed34 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java
@@ -317,7 +317,7 @@ public class HiveRelMdRowCount extends RelMdRowCount {
(!leftChild && joinRel.getJoinType().generatesNullsOnLeft())) {
return 1.0;
} else {
- HiveTableScan tScan = HiveRelMdUniqueKeys.getTableScan(child, true);
+ HiveTableScan tScan = EstimateUniqueKeys.getTableScan(child, true);
if (tScan != null) {
double tRowCount = mq.getRowCount(tScan);
return childRowCount / tRowCount;
@@ -329,7 +329,10 @@ public class HiveRelMdRowCount extends RelMdRowCount {
private static boolean isKey(ImmutableBitSet c, RelNode rel, RelMetadataQuery mq) {
boolean isKey = false;
- Set<ImmutableBitSet> keys = mq.getUniqueKeys(rel);
+ //EstimateUniqueKeys doesn't go through metadata providers anymore, which means we will not be
+ // taking advantage of metadata caching anymore. This could potential increase query compile time
+ // Leaving a note here in case we see increase in timings
+ Set<ImmutableBitSet> keys = EstimateUniqueKeys.getUniqueKeys(rel);
if (keys != null) {
for (ImmutableBitSet key : keys) {
if (key.equals(c)) {
http://git-wip-us.apache.org/repos/asf/hive/blob/1db3debc/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java
index 3bf62c5..7772335 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java
@@ -17,31 +17,20 @@
*/
package org.apache.hadoop.hive.ql.optimizer.calcite.stats;
-import java.util.BitSet;
-import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
-import java.util.Map;
import java.util.Set;
-import org.apache.calcite.plan.RelOptUtil;
-import org.apache.calcite.plan.hep.HepRelVertex;
-import org.apache.calcite.rel.RelNode;
-import org.apache.calcite.rel.core.Filter;
-import org.apache.calcite.rel.core.Project;
import org.apache.calcite.rel.metadata.BuiltInMetadata;
import org.apache.calcite.rel.metadata.MetadataDef;
import org.apache.calcite.rel.metadata.MetadataHandler;
import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
import org.apache.calcite.rel.metadata.RelMetadataProvider;
import org.apache.calcite.rel.metadata.RelMetadataQuery;
-import org.apache.calcite.rex.RexInputRef;
-import org.apache.calcite.rex.RexNode;
-import org.apache.calcite.util.BitSets;
import org.apache.calcite.util.BuiltInMethod;
import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
-import org.apache.hadoop.hive.ql.plan.ColStatistics;
public class HiveRelMdUniqueKeys implements MetadataHandler<BuiltInMetadata.UniqueKeys> {
@@ -54,138 +43,15 @@ public class HiveRelMdUniqueKeys implements MetadataHandler<BuiltInMetadata.Uniq
return BuiltInMetadata.UniqueKeys.DEF;
}
- /*
- * Infer Uniquenes if: - rowCount(col) = ndv(col) - TBD for numerics: max(col)
- * - min(col) = rowCount(col)
- *
- * Why are we intercepting Project and not TableScan? Because if we
- * have a method for TableScan, it will not know which columns to check for.
- * Inferring Uniqueness for all columns is very expensive right now. The flip
- * side of doing this is, it only works post Field Trimming.
- */
- public Set<ImmutableBitSet> getUniqueKeys(Project rel, RelMetadataQuery mq, boolean ignoreNulls) {
- HiveTableScan tScan = getTableScan(rel.getInput(), false);
-
- if (tScan == null) {
- // If HiveTableScan is not found, e.g., not sequence of Project and
- // Filter operators, execute the original getUniqueKeys method
-
- // LogicalProject maps a set of rows to a different set;
- // Without knowledge of the mapping function(whether it
- // preserves uniqueness), it is only safe to derive uniqueness
- // info from the child of a project when the mapping is f(a) => a.
- //
- // Further more, the unique bitset coming from the child needs
- // to be mapped to match the output of the project.
- final Map<Integer, Integer> mapInToOutPos = new HashMap<>();
- final List<RexNode> projExprs = rel.getProjects();
- final Set<ImmutableBitSet> projUniqueKeySet = new HashSet<>();
-
- // Build an input to output position map.
- for (int i = 0; i < projExprs.size(); i++) {
- RexNode projExpr = projExprs.get(i);
- if (projExpr instanceof RexInputRef) {
- mapInToOutPos.put(((RexInputRef) projExpr).getIndex(), i);
- }
- }
-
- if (mapInToOutPos.isEmpty()) {
- // if there's no RexInputRef in the projected expressions
- // return empty set.
- return projUniqueKeySet;
- }
-
- Set<ImmutableBitSet> childUniqueKeySet =
- mq.getUniqueKeys(rel.getInput(), ignoreNulls);
-
- if (childUniqueKeySet != null) {
- // Now add to the projUniqueKeySet the child keys that are fully
- // projected.
- for (ImmutableBitSet colMask : childUniqueKeySet) {
- ImmutableBitSet.Builder tmpMask = ImmutableBitSet.builder();
- boolean completeKeyProjected = true;
- for (int bit : colMask) {
- if (mapInToOutPos.containsKey(bit)) {
- tmpMask.set(mapInToOutPos.get(bit));
- } else {
- // Skip the child unique key if part of it is not
- // projected.
- completeKeyProjected = false;
- break;
- }
- }
- if (completeKeyProjected) {
- projUniqueKeySet.add(tmpMask.build());
- }
- }
- }
-
- return projUniqueKeySet;
- }
-
- Map<Integer, Integer> posMap = new HashMap<Integer, Integer>();
- int projectPos = 0;
- int colStatsPos = 0;
-
- BitSet projectedCols = new BitSet();
- for (RexNode r : rel.getProjects()) {
- if (r instanceof RexInputRef) {
- projectedCols.set(((RexInputRef) r).getIndex());
- posMap.put(colStatsPos, projectPos);
- colStatsPos++;
- }
- projectPos++;
+ public Set<ImmutableBitSet> getUniqueKeys(HiveTableScan rel, RelMetadataQuery mq,
+ boolean ignoreNulls) {
+ RelOptHiveTable tbl = (RelOptHiveTable) rel.getTable();
+ List<ImmutableBitSet> keyList = tbl.getNonNullableKeys();
+ if (keyList != null) {
+ Set<ImmutableBitSet> keySet = new HashSet<>(keyList);
+ return keySet;
}
-
- double numRows = mq.getRowCount(tScan);
- List<ColStatistics> colStats = tScan.getColStat(BitSets
- .toList(projectedCols));
- Set<ImmutableBitSet> keys = new HashSet<ImmutableBitSet>();
-
- colStatsPos = 0;
- for (ColStatistics cStat : colStats) {
- boolean isKey = false;
- if (cStat.getCountDistint() >= numRows) {
- isKey = true;
- }
- if ( !isKey && cStat.getRange() != null &&
- cStat.getRange().maxValue != null &&
- cStat.getRange().minValue != null) {
- double r = cStat.getRange().maxValue.doubleValue() -
- cStat.getRange().minValue.doubleValue() + 1;
- isKey = (Math.abs(numRows - r) < RelOptUtil.EPSILON);
- }
- if ( isKey ) {
- ImmutableBitSet key = ImmutableBitSet.of(posMap.get(colStatsPos));
- keys.add(key);
- }
- colStatsPos++;
- }
-
- return keys;
+ return null;
}
-
- /*
- * traverse a path of Filter, Projects to get to the TableScan.
- * In case of Unique keys, stop if you reach a Project, it will be handled
- * by the invocation on the Project.
- * In case of getting the base rowCount of a Path, keep going past a Project.
- */
- static HiveTableScan getTableScan(RelNode r, boolean traverseProject) {
-
- while (r != null && !(r instanceof HiveTableScan)) {
- if (r instanceof HepRelVertex) {
- r = ((HepRelVertex) r).getCurrentRel();
- } else if (r instanceof Filter) {
- r = ((Filter) r).getInput();
- } else if (traverseProject && r instanceof Project) {
- r = ((Project) r).getInput();
- } else {
- r = null;
- }
- }
- return r == null ? null : (HiveTableScan) r;
- }
-
}
http://git-wip-us.apache.org/repos/asf/hive/blob/1db3debc/ql/src/test/queries/clientpositive/constraints_optimization.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/constraints_optimization.q b/ql/src/test/queries/clientpositive/constraints_optimization.q
new file mode 100644
index 0000000..70ab850
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/constraints_optimization.q
@@ -0,0 +1,149 @@
+set hive.strict.checks.cartesian.product=false;
+
+CREATE TABLE `customer_removal_n0`(
+ `c_custkey` bigint,
+ `c_name` string,
+ `c_address` string,
+ `c_city` string,
+ `c_nation` string,
+ `c_region` string,
+ `c_phone` string,
+ `c_mktsegment` string,
+ primary key (`c_custkey`) disable rely);
+
+CREATE TABLE `dates_removal_n0`(
+ `d_datekey` bigint,
+ `d_id` bigint,
+ `d_date` string,
+ `d_dayofweek` string,
+ `d_month` string,
+ `d_year` int,
+ `d_yearmonthnum` int,
+ `d_yearmonth` string,
+ `d_daynuminweek` int,
+ `d_daynuminmonth` int,
+ `d_daynuminyear` int,
+ `d_monthnuminyear` int,
+ `d_weeknuminyear` int,
+ `d_sellingseason` string,
+ `d_lastdayinweekfl` int,
+ `d_lastdayinmonthfl` int,
+ `d_holidayfl` int ,
+ `d_weekdayfl`int,
+ primary key (`d_datekey`, `d_id`) disable rely);
+
+ -- group by key has single primary key
+ EXPLAIN SELECT c_custkey from customer_removal_n0 where c_nation IN ('USA', 'INDIA') group by c_custkey;
+
+ -- mix of primary + non-primary keys
+ EXPLAIN SELECT c_custkey from customer_removal_n0 where c_nation IN ('USA', 'INDIA') group by c_custkey, c_nation;
+
+ -- multiple keys
+ EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_datekey, d_id;
+
+ -- multiple keys + non-keys + different order
+ EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_datekey, d_sellingseason
+ order by d_datekey limit 10;
+
+ -- multiple keys in different order and mixed with non-keys
+ EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_daynuminmonth, d_datekey,
+ d_sellingseason order by d_datekey limit 10;
+
+ -- same as above but with aggregate
+ EXPLAIN SELECT count(d_datekey) from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_daynuminmonth, d_datekey,
+ d_sellingseason order by d_datekey limit 10;
+
+ -- join
+ insert into dates_removal_n0(d_datekey, d_id) values(3, 0);
+ insert into dates_removal_n0(d_datekey, d_id) values(3, 1);
+ insert into customer_removal_n0 (c_custkey) values(3);
+
+ EXPLAIN SELECT d_datekey from dates_removal_n0 join customer_removal_n0 on d_datekey = c_custkey group by d_datekey, d_id;
+ SELECT d_datekey from dates_removal_n0 join customer_removal_n0 on d_datekey = c_custkey group by d_datekey, d_id;
+
+ -- group by keys are not primary keys
+ EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_datekey, d_sellingseason
+ order by d_datekey limit 10;
+
+ -- negative
+ -- with aggregate function
+ EXPLAIN SELECT count(c_custkey) from customer_removal_n0 where c_nation IN ('USA', 'INDIA')
+ group by c_custkey, c_nation;
+
+ DROP TABLE customer_removal_n0;
+ DROP TABLE dates_removal_n0;
+
+ -- group by reduction optimization
+ create table dest_g21 (key1 int, value1 double, primary key(key1) disable rely);
+ insert into dest_g21 values(1, 2), (2,2), (3, 1), (4,4), (5, null), (6, null);
+
+ -- value1 will removed because it is unused, then whole group by will be removed because key1 is unique
+ explain select key1 from dest_g21 group by key1, value1;
+ select key1 from dest_g21 group by key1, value1;
+ -- same query but with filter
+ explain select key1 from dest_g21 where value1 > 1 group by key1, value1;
+ select key1 from dest_g21 where value1 > 1 group by key1, value1;
+
+ explain select key1 from dest_g21 where key1 > 1 group by key1, value1;
+ select key1 from dest_g21 where key1 > 1 group by key1, value1;
+
+ -- only value1 will be removed because there is aggregate call
+ explain select count(key1) from dest_g21 group by key1, value1;
+ select count(key1) from dest_g21 group by key1, value1;
+
+ explain select count(key1) from dest_g21 where value1 > 1 group by key1, value1;
+ select count(key1) from dest_g21 where value1 > 1 group by key1, value1;
+
+ -- t1.key is unique even after join therefore group by = group by (t1.key)
+ explain select t1.key1 from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1;
+ select t1.key1 from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1;
+
+ explain select count(t1.key1) from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1;
+ select count(t1.key1) from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1;
+
+ -- both aggregate and one of the key1 should be removed
+ explain select key1 from (select key1, count(key1) from dest_g21 where value1 < 4.5 group by key1, value1) sub;
+ select key1 from (select key1, count(key1) from dest_g21 where value1 < 4.5 group by key1, value1) sub;
+
+ -- one of the aggregate will be removed and one of the key1 will be removed
+ explain select key1, sm from (select key1, count(key1), sum(key1) as sm from dest_g21 where value1 < 4.5 group by key1, value1) sub;
+ select key1, sm from (select key1, count(key1), sum(key1) as sm from dest_g21 where value1 < 4.5 group by key1, value1) sub;
+
+ DROP table dest_g21;
+
+CREATE TABLE tconst(i int NOT NULL disable rely, j INT NOT NULL disable norely, d_year string);
+INSERT INTO tconst values(1, 1, '2001'), (2, null, '2002'), (3, 3, '2010');
+
+-- explicit NOT NULL filter
+explain select i, j from tconst where i is not null group by i,j, d_year;
+select i, j from tconst where i is not null group by i,j, d_year;
+
+-- filter on i should be removed
+explain select i, j from tconst where i IS NOT NULL and j IS NOT NULL group by i,j, d_year;
+select i, j from tconst where i IS NOT NULL and j IS NOT NULL group by i,j, d_year;
+
+-- where will be removed since i is not null is always true
+explain select i,j from tconst where i is not null OR j IS NOT NULL group by i, j, d_year;
+select i,j from tconst where i is not null OR j IS NOT NULL group by i, j, d_year;
+
+-- should not have implicit filter on join keys
+explain select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.j group by t1.i, t1.d_year;
+select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.j group by t1.i, t1.d_year;
+
+-- both join keys have NOT NULL
+explain select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.i group by t1.i, t1.d_year;
+select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.i group by t1.i, t1.d_year;
+
+DROP TABLE tconst;
+
+
+-- UNIQUE + NOT NULL (same as primary key)
+create table dest_g21 (key1 int NOT NULL disable rely, value1 double, UNIQUE(key1) disable rely);
+explain select key1 from dest_g21 group by key1, value1;
+
+-- UNIQUE with nullabiity
+create table dest_g24 (key1 int , value1 double, UNIQUE(key1) disable rely);
+explain select key1 from dest_g24 group by key1, value1;
+
+DROP TABLE dest_g21;
+DROP TABLE dest_g24;