You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@hive.apache.org by vg...@apache.org on 2018/10/15 03:33:49 UTC

[1/3] hive git commit: HIVE-17043: Remove non unique columns from group by keys if not referenced later (Vineet Garg, reviewed by Jesus Camacho Rodriguez)

Repository: hive
Updated Branches:
  refs/heads/master 259db56e3 -> 1db3debcb


http://git-wip-us.apache.org/repos/asf/hive/blob/1db3debc/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_1.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_1.q.out
index 735a4db..a7a1a0b 100644
--- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_1.q.out
@@ -641,11 +641,13 @@ PREHOOK: query: explain
 select empid, deptno from emps_n3 group by empid, deptno
 PREHOOK: type: QUERY
 PREHOOK: Input: default@emps_n3
+PREHOOK: Input: default@mv1_n2
 #### A masked pattern was here ####
 POSTHOOK: query: explain
 select empid, deptno from emps_n3 group by empid, deptno
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@emps_n3
+POSTHOOK: Input: default@mv1_n2
 #### A masked pattern was here ####
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
@@ -656,7 +658,7 @@ STAGE PLANS:
       limit: -1
       Processor Tree:
         TableScan
-          alias: emps_n3
+          alias: default.mv1_n2
           Select Operator
             expressions: empid (type: int), deptno (type: int)
             outputColumnNames: _col0, _col1
@@ -665,10 +667,12 @@ STAGE PLANS:
 PREHOOK: query: select empid, deptno from emps_n3 group by empid, deptno
 PREHOOK: type: QUERY
 PREHOOK: Input: default@emps_n3
+PREHOOK: Input: default@mv1_n2
 #### A masked pattern was here ####
 POSTHOOK: query: select empid, deptno from emps_n3 group by empid, deptno
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@emps_n3
+POSTHOOK: Input: default@mv1_n2
 #### A masked pattern was here ####
 100	10
 110	10
@@ -709,11 +713,13 @@ PREHOOK: query: explain
 select empid, name from emps_n3 group by empid, name
 PREHOOK: type: QUERY
 PREHOOK: Input: default@emps_n3
+PREHOOK: Input: default@mv1_n2
 #### A masked pattern was here ####
 POSTHOOK: query: explain
 select empid, name from emps_n3 group by empid, name
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@emps_n3
+POSTHOOK: Input: default@mv1_n2
 #### A masked pattern was here ####
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
@@ -724,7 +730,7 @@ STAGE PLANS:
       limit: -1
       Processor Tree:
         TableScan
-          alias: emps_n3
+          alias: default.mv1_n2
           Select Operator
             expressions: empid (type: int), name (type: varchar(256))
             outputColumnNames: _col0, _col1
@@ -733,10 +739,12 @@ STAGE PLANS:
 PREHOOK: query: select empid, name from emps_n3 group by empid, name
 PREHOOK: type: QUERY
 PREHOOK: Input: default@emps_n3
+PREHOOK: Input: default@mv1_n2
 #### A masked pattern was here ####
 POSTHOOK: query: select empid, name from emps_n3 group by empid, name
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@emps_n3
+POSTHOOK: Input: default@mv1_n2
 #### A masked pattern was here ####
 100	Bill
 110	Theodore

[2/3] hive git commit: HIVE-17043: Remove non unique columns from group by keys if not referenced later (Vineet Garg, reviewed by Jesus Camacho Rodriguez)

Posted by vg...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/1db3debc/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out b/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out
new file mode 100644
index 0000000..b45b7c4
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out
@@ -0,0 +1,2010 @@
+PREHOOK: query: CREATE TABLE `customer_removal_n0`(
+  `c_custkey` bigint,
+  `c_name` string,
+  `c_address` string,
+  `c_city` string,
+  `c_nation` string,
+  `c_region` string,
+  `c_phone` string,
+  `c_mktsegment` string,
+  primary key (`c_custkey`) disable rely)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@customer_removal_n0
+POSTHOOK: query: CREATE TABLE `customer_removal_n0`(
+  `c_custkey` bigint,
+  `c_name` string,
+  `c_address` string,
+  `c_city` string,
+  `c_nation` string,
+  `c_region` string,
+  `c_phone` string,
+  `c_mktsegment` string,
+  primary key (`c_custkey`) disable rely)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@customer_removal_n0
+PREHOOK: query: CREATE TABLE `dates_removal_n0`(
+  `d_datekey` bigint,
+  `d_id` bigint,
+  `d_date` string,
+  `d_dayofweek` string,
+  `d_month` string,
+  `d_year` int,
+  `d_yearmonthnum` int,
+  `d_yearmonth` string,
+  `d_daynuminweek` int,
+  `d_daynuminmonth` int,
+  `d_daynuminyear` int,
+  `d_monthnuminyear` int,
+  `d_weeknuminyear` int,
+  `d_sellingseason` string,
+  `d_lastdayinweekfl` int,
+  `d_lastdayinmonthfl` int,
+  `d_holidayfl` int ,
+  `d_weekdayfl`int,
+  primary key (`d_datekey`, `d_id`) disable rely)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dates_removal_n0
+POSTHOOK: query: CREATE TABLE `dates_removal_n0`(
+  `d_datekey` bigint,
+  `d_id` bigint,
+  `d_date` string,
+  `d_dayofweek` string,
+  `d_month` string,
+  `d_year` int,
+  `d_yearmonthnum` int,
+  `d_yearmonth` string,
+  `d_daynuminweek` int,
+  `d_daynuminmonth` int,
+  `d_daynuminyear` int,
+  `d_monthnuminyear` int,
+  `d_weeknuminyear` int,
+  `d_sellingseason` string,
+  `d_lastdayinweekfl` int,
+  `d_lastdayinmonthfl` int,
+  `d_holidayfl` int ,
+  `d_weekdayfl`int,
+  primary key (`d_datekey`, `d_id`) disable rely)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dates_removal_n0
+PREHOOK: query: -- group by key has single primary key
+  EXPLAIN SELECT c_custkey from customer_removal_n0 where c_nation IN ('USA', 'INDIA') group by c_custkey
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: -- group by key has single primary key
+  EXPLAIN SELECT c_custkey from customer_removal_n0 where c_nation IN ('USA', 'INDIA') group by c_custkey
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: customer_removal_n0
+          filterExpr: (c_nation) IN ('USA', 'INDIA') (type: boolean)
+          Filter Operator
+            predicate: (c_nation) IN ('USA', 'INDIA') (type: boolean)
+            Select Operator
+              expressions: c_custkey (type: bigint)
+              outputColumnNames: _col0
+              ListSink
+
+PREHOOK: query: -- mix of primary + non-primary keys
+  EXPLAIN SELECT c_custkey from customer_removal_n0 where c_nation IN ('USA', 'INDIA') group by c_custkey, c_nation
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: -- mix of primary + non-primary keys
+  EXPLAIN SELECT c_custkey from customer_removal_n0 where c_nation IN ('USA', 'INDIA') group by c_custkey, c_nation
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: customer_removal_n0
+          filterExpr: (c_nation) IN ('USA', 'INDIA') (type: boolean)
+          Filter Operator
+            predicate: (c_nation) IN ('USA', 'INDIA') (type: boolean)
+            Select Operator
+              expressions: c_custkey (type: bigint)
+              outputColumnNames: _col0
+              ListSink
+
+PREHOOK: query: -- multiple keys
+  EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_datekey, d_id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: -- multiple keys
+  EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_datekey, d_id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: dates_removal_n0
+          filterExpr: (d_year) IN (1985, 2004) (type: boolean)
+          Filter Operator
+            predicate: (d_year) IN (1985, 2004) (type: boolean)
+            Select Operator
+              expressions: d_datekey (type: bigint)
+              outputColumnNames: _col0
+              ListSink
+
+PREHOOK: query: -- multiple keys + non-keys + different order
+  EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_datekey, d_sellingseason
+    order by d_datekey limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: -- multiple keys + non-keys + different order
+  EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_datekey, d_sellingseason
+    order by d_datekey limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: dates_removal_n0
+                  filterExpr: (d_year) IN (1985, 2004) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (d_year) IN (1985, 2004) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: d_datekey (type: bigint)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: bigint)
+                        sort order: +
+                        Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                        TopN Hash Memory Usage: 0.1
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: bigint)
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                Limit
+                  Number of rows: 10
+                  Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 10
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: -- multiple keys in different order and mixed with non-keys
+  EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_daynuminmonth, d_datekey,
+  d_sellingseason order by d_datekey limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: -- multiple keys in different order and mixed with non-keys
+  EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_daynuminmonth, d_datekey,
+  d_sellingseason order by d_datekey limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: dates_removal_n0
+                  filterExpr: (d_year) IN (1985, 2004) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (d_year) IN (1985, 2004) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: d_datekey (type: bigint)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: bigint)
+                        sort order: +
+                        Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                        TopN Hash Memory Usage: 0.1
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: bigint)
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                Limit
+                  Number of rows: 10
+                  Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 10
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: -- same as above but with aggregate
+  EXPLAIN SELECT count(d_datekey) from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_daynuminmonth, d_datekey,
+  d_sellingseason order by d_datekey limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: -- same as above but with aggregate
+  EXPLAIN SELECT count(d_datekey) from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_daynuminmonth, d_datekey,
+  d_sellingseason order by d_datekey limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: dates_removal_n0
+                  filterExpr: (d_year) IN (1985, 2004) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (d_year) IN (1985, 2004) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: d_datekey (type: bigint), d_id (type: bigint)
+                      outputColumnNames: d_datekey, d_id
+                      Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+                      Top N Key Operator
+                        sort order: ++
+                        keys: d_datekey (type: bigint), d_id (type: bigint)
+                        Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+                        top n: 10
+                        Group By Operator
+                          aggregations: count()
+                          keys: d_datekey (type: bigint), d_id (type: bigint)
+                          mode: hash
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+                          Reduce Output Operator
+                            key expressions: _col0 (type: bigint), _col1 (type: bigint)
+                            sort order: ++
+                            Map-reduce partition columns: _col0 (type: bigint), _col1 (type: bigint)
+                            Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+                            TopN Hash Memory Usage: 0.1
+                            value expressions: _col2 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: bigint), KEY._col1 (type: bigint)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col2 (type: bigint), _col0 (type: bigint)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col1 (type: bigint)
+                    sort order: +
+                    Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+                    TopN Hash Memory Usage: 0.1
+                    value expressions: _col0 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: bigint)
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+                Limit
+                  Number of rows: 10
+                  Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: -- join
+  insert into dates_removal_n0(d_datekey, d_id)  values(3, 0)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@dates_removal_n0
+POSTHOOK: query: -- join
+  insert into dates_removal_n0(d_datekey, d_id)  values(3, 0)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@dates_removal_n0
+POSTHOOK: Lineage: dates_removal_n0.d_date SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_datekey SCRIPT []
+POSTHOOK: Lineage: dates_removal_n0.d_daynuminmonth SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_daynuminweek SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_daynuminyear SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_dayofweek SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_holidayfl SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_id SCRIPT []
+POSTHOOK: Lineage: dates_removal_n0.d_lastdayinmonthfl SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_lastdayinweekfl SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_month SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_monthnuminyear SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_sellingseason SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_weekdayfl SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_weeknuminyear SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_year SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_yearmonth SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_yearmonthnum SIMPLE []
+PREHOOK: query: insert into dates_removal_n0(d_datekey, d_id)  values(3, 1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@dates_removal_n0
+POSTHOOK: query: insert into dates_removal_n0(d_datekey, d_id)  values(3, 1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@dates_removal_n0
+POSTHOOK: Lineage: dates_removal_n0.d_date SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_datekey SCRIPT []
+POSTHOOK: Lineage: dates_removal_n0.d_daynuminmonth SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_daynuminweek SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_daynuminyear SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_dayofweek SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_holidayfl SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_id SCRIPT []
+POSTHOOK: Lineage: dates_removal_n0.d_lastdayinmonthfl SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_lastdayinweekfl SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_month SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_monthnuminyear SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_sellingseason SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_weekdayfl SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_weeknuminyear SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_year SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_yearmonth SIMPLE []
+POSTHOOK: Lineage: dates_removal_n0.d_yearmonthnum SIMPLE []
+PREHOOK: query: insert into customer_removal_n0 (c_custkey) values(3)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@customer_removal_n0
+POSTHOOK: query: insert into customer_removal_n0 (c_custkey) values(3)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@customer_removal_n0
+POSTHOOK: Lineage: customer_removal_n0.c_address SIMPLE []
+POSTHOOK: Lineage: customer_removal_n0.c_city SIMPLE []
+POSTHOOK: Lineage: customer_removal_n0.c_custkey SCRIPT []
+POSTHOOK: Lineage: customer_removal_n0.c_mktsegment SIMPLE []
+POSTHOOK: Lineage: customer_removal_n0.c_name SIMPLE []
+POSTHOOK: Lineage: customer_removal_n0.c_nation SIMPLE []
+POSTHOOK: Lineage: customer_removal_n0.c_phone SIMPLE []
+POSTHOOK: Lineage: customer_removal_n0.c_region SIMPLE []
+PREHOOK: query: EXPLAIN SELECT d_datekey from dates_removal_n0 join customer_removal_n0 on d_datekey = c_custkey group by d_datekey, d_id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer_removal_n0
+PREHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN SELECT d_datekey from dates_removal_n0 join customer_removal_n0 on d_datekey = c_custkey group by d_datekey, d_id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer_removal_n0
+POSTHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: dates_removal_n0
+                  Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: d_datekey (type: bigint)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: bigint)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: bigint)
+                      Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: customer_removal_n0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: c_custkey (type: bigint)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: bigint)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: bigint)
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: bigint)
+                  1 _col0 (type: bigint)
+                outputColumnNames: _col0
+                Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT d_datekey from dates_removal_n0 join customer_removal_n0 on d_datekey = c_custkey group by d_datekey, d_id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer_removal_n0
+PREHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT d_datekey from dates_removal_n0 join customer_removal_n0 on d_datekey = c_custkey group by d_datekey, d_id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer_removal_n0
+POSTHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+3
+3
+PREHOOK: query: -- group by keys are not primary keys
+  EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_datekey, d_sellingseason
+    order by d_datekey limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: -- group by keys are not primary keys
+  EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_datekey, d_sellingseason
+    order by d_datekey limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dates_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: dates_removal_n0
+                  filterExpr: (d_year) IN (1985, 2004) (type: boolean)
+                  Statistics: Num rows: 2 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (d_year) IN (1985, 2004) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: d_datekey (type: bigint), d_sellingseason (type: string)
+                      outputColumnNames: d_datekey, d_sellingseason
+                      Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+                      Top N Key Operator
+                        sort order: ++
+                        keys: d_datekey (type: bigint), d_sellingseason (type: string)
+                        Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+                        top n: 10
+                        Group By Operator
+                          keys: d_datekey (type: bigint), d_sellingseason (type: string)
+                          mode: hash
+                          outputColumnNames: _col0, _col1
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                          Reduce Output Operator
+                            key expressions: _col0 (type: bigint), _col1 (type: string)
+                            sort order: ++
+                            Map-reduce partition columns: _col0 (type: bigint), _col1 (type: string)
+                            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                            TopN Hash Memory Usage: 0.1
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: bigint), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: bigint)
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: bigint)
+                    sort order: +
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                    TopN Hash Memory Usage: 0.1
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: bigint)
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                Limit
+                  Number of rows: 10
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 10
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: -- negative
+  -- with aggregate function
+  EXPLAIN SELECT count(c_custkey) from customer_removal_n0 where c_nation IN ('USA', 'INDIA')
+    group by c_custkey, c_nation
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: -- negative
+  -- with aggregate function
+  EXPLAIN SELECT count(c_custkey) from customer_removal_n0 where c_nation IN ('USA', 'INDIA')
+    group by c_custkey, c_nation
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: customer_removal_n0
+                  filterExpr: (c_nation) IN ('USA', 'INDIA') (type: boolean)
+                  Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (c_nation) IN ('USA', 'INDIA') (type: boolean)
+                    Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: c_custkey (type: bigint)
+                      outputColumnNames: c_custkey
+                      Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE
+                      Group By Operator
+                        aggregations: count()
+                        keys: c_custkey (type: bigint)
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: bigint)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: bigint)
+                          Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+                          value expressions: _col1 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: bigint)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: bigint)
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: DROP TABLE customer_removal_n0
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@customer_removal_n0
+PREHOOK: Output: default@customer_removal_n0
+POSTHOOK: query: DROP TABLE customer_removal_n0
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@customer_removal_n0
+POSTHOOK: Output: default@customer_removal_n0
+PREHOOK: query: DROP TABLE dates_removal_n0
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dates_removal_n0
+PREHOOK: Output: default@dates_removal_n0
+POSTHOOK: query: DROP TABLE dates_removal_n0
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@dates_removal_n0
+POSTHOOK: Output: default@dates_removal_n0
+PREHOOK: query: -- group by reduction optimization
+  create table dest_g21 (key1 int, value1 double, primary key(key1) disable rely)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest_g21
+POSTHOOK: query: -- group by reduction optimization
+  create table dest_g21 (key1 int, value1 double, primary key(key1) disable rely)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest_g21
+PREHOOK: query: insert into dest_g21 values(1, 2), (2,2), (3, 1), (4,4), (5, null), (6, null)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@dest_g21
+POSTHOOK: query: insert into dest_g21 values(1, 2), (2,2), (3, 1), (4,4), (5, null), (6, null)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@dest_g21
+POSTHOOK: Lineage: dest_g21.key1 SCRIPT []
+POSTHOOK: Lineage: dest_g21.value1 SCRIPT []
+PREHOOK: query: -- value1 will removed because it is unused, then whole group by will be removed because key1 is unique
+  explain select key1 from dest_g21 group by key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: -- value1 will removed because it is unused, then whole group by will be removed because key1 is unique
+  explain select key1 from dest_g21 group by key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: dest_g21
+          Select Operator
+            expressions: key1 (type: int)
+            outputColumnNames: _col0
+            ListSink
+
+PREHOOK: query: select key1 from dest_g21 group by key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: select key1 from dest_g21 group by key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+1
+2
+3
+4
+5
+6
+PREHOOK: query: -- same query but with filter
+  explain select key1 from dest_g21 where value1 > 1 group by key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: -- same query but with filter
+  explain select key1 from dest_g21 where value1 > 1 group by key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: dest_g21
+          filterExpr: (value1 > 1.0D) (type: boolean)
+          Filter Operator
+            predicate: (value1 > 1.0D) (type: boolean)
+            Select Operator
+              expressions: key1 (type: int)
+              outputColumnNames: _col0
+              ListSink
+
+PREHOOK: query: select key1 from dest_g21 where value1 > 1 group by key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: select key1 from dest_g21 where value1 > 1 group by key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+1
+2
+4
+PREHOOK: query: explain select key1 from dest_g21 where key1 > 1 group by key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: explain select key1 from dest_g21 where key1 > 1 group by key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: dest_g21
+          filterExpr: (key1 > 1) (type: boolean)
+          Filter Operator
+            predicate: (key1 > 1) (type: boolean)
+            Select Operator
+              expressions: key1 (type: int)
+              outputColumnNames: _col0
+              ListSink
+
+PREHOOK: query: select key1 from dest_g21 where key1 > 1 group by key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: select key1 from dest_g21 where key1 > 1 group by key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+2
+3
+4
+5
+6
+PREHOOK: query: -- only value1 will be removed because there is aggregate call
+  explain select count(key1) from dest_g21 group by key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: -- only value1 will be removed because there is aggregate call
+  explain select count(key1) from dest_g21 group by key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: dest_g21
+                  Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: key1 (type: int)
+                    outputColumnNames: key1
+                    Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: count()
+                      keys: key1 (type: int)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: bigint)
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(key1) from dest_g21 group by key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: select count(key1) from dest_g21 group by key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+1
+1
+1
+1
+1
+1
+PREHOOK: query: explain select count(key1) from dest_g21 where value1 > 1 group by key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: explain select count(key1) from dest_g21 where value1 > 1 group by key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: dest_g21
+                  filterExpr: (value1 > 1.0D) (type: boolean)
+                  Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (value1 > 1.0D) (type: boolean)
+                    Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key1 (type: int)
+                      outputColumnNames: key1
+                      Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
+                      Group By Operator
+                        aggregations: count()
+                        keys: key1 (type: int)
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: int)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: int)
+                          Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE
+                          value expressions: _col1 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: bigint)
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(key1) from dest_g21 where value1 > 1 group by key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: select count(key1) from dest_g21 where value1 > 1 group by key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+1
+1
+1
+PREHOOK: query: -- t1.key is unique even after join therefore group by = group by (t1.key)
+  explain select t1.key1 from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: -- t1.key is unique even after join therefore group by = group by (t1.key)
+  explain select t1.key1 from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: key1 (type: int)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: t2
+                  filterExpr: (value1 > 2.0D) (type: boolean)
+                  Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (value1 > 2.0D) (type: boolean)
+                    Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key1 (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0
+                Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select t1.key1 from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: select t1.key1 from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+4
+PREHOOK: query: explain select count(t1.key1) from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: explain select count(t1.key1) from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: key1 (type: int)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: t2
+                  filterExpr: (value1 > 2.0D) (type: boolean)
+                  Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (value1 > 2.0D) (type: boolean)
+                    Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key1 (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0
+                Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  aggregations: count()
+                  keys: _col0 (type: int)
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: int)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: int)
+                    Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+                    value expressions: _col1 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: bigint)
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(t1.key1) from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: select count(t1.key1) from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+1
+PREHOOK: query: -- both aggregate and one of the key1 should be removed
+  explain select key1 from (select key1, count(key1) from dest_g21 where value1 < 4.5 group by key1, value1) sub
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: -- both aggregate and one of the key1 should be removed
+  explain select key1 from (select key1, count(key1) from dest_g21 where value1 < 4.5 group by key1, value1) sub
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: dest_g21
+          filterExpr: (value1 < 4.5D) (type: boolean)
+          Filter Operator
+            predicate: (value1 < 4.5D) (type: boolean)
+            Select Operator
+              expressions: key1 (type: int)
+              outputColumnNames: _col0
+              ListSink
+
+PREHOOK: query: select key1 from (select key1, count(key1) from dest_g21 where value1 < 4.5 group by key1, value1) sub
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: select key1 from (select key1, count(key1) from dest_g21 where value1 < 4.5 group by key1, value1) sub
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+1
+2
+3
+4
+PREHOOK: query: -- one of the aggregate will be removed and one of the key1 will be removed
+  explain select key1, sm from (select key1, count(key1), sum(key1) as sm from dest_g21 where value1 < 4.5 group by key1, value1) sub
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: -- one of the aggregate will be removed and one of the key1 will be removed
+  explain select key1, sm from (select key1, count(key1), sum(key1) as sm from dest_g21 where value1 < 4.5 group by key1, value1) sub
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: dest_g21
+                  filterExpr: (value1 < 4.5D) (type: boolean)
+                  Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (value1 < 4.5D) (type: boolean)
+                    Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key1 (type: int)
+                      outputColumnNames: key1
+                      Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
+                      Group By Operator
+                        aggregations: sum(key1)
+                        keys: key1 (type: int)
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: int)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: int)
+                          Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE
+                          value expressions: _col1 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select key1, sm from (select key1, count(key1), sum(key1) as sm from dest_g21 where value1 < 4.5 group by key1, value1) sub
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: select key1, sm from (select key1, count(key1), sum(key1) as sm from dest_g21 where value1 < 4.5 group by key1, value1) sub
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+1	1
+3	3
+4	4
+2	2
+PREHOOK: query: DROP table dest_g21
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dest_g21
+PREHOOK: Output: default@dest_g21
+POSTHOOK: query: DROP table dest_g21
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@dest_g21
+POSTHOOK: Output: default@dest_g21
+PREHOOK: query: CREATE TABLE tconst(i int NOT NULL disable rely, j INT NOT NULL disable norely, d_year string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tconst
+POSTHOOK: query: CREATE TABLE tconst(i int NOT NULL disable rely, j INT NOT NULL disable norely, d_year string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tconst
+PREHOOK: query: INSERT INTO tconst values(1, 1, '2001'), (2, null, '2002'), (3, 3, '2010')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tconst
+POSTHOOK: query: INSERT INTO tconst values(1, 1, '2001'), (2, null, '2002'), (3, 3, '2010')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tconst
+POSTHOOK: Lineage: tconst.d_year SCRIPT []
+POSTHOOK: Lineage: tconst.i SCRIPT []
+POSTHOOK: Lineage: tconst.j SCRIPT []
+PREHOOK: query: explain select i, j from tconst where i is not null group by i,j, d_year
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tconst
+#### A masked pattern was here ####
+POSTHOOK: query: explain select i, j from tconst where i is not null group by i,j, d_year
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tconst
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: tconst
+                  Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: i (type: int), j (type: int), d_year (type: string)
+                    outputColumnNames: i, j, d_year
+                    Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      keys: i (type: int), j (type: int), d_year (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string)
+                        sort order: +++
+                        Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string)
+                        Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: int)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select i, j from tconst where i is not null group by i,j, d_year
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tconst
+#### A masked pattern was here ####
+POSTHOOK: query: select i, j from tconst where i is not null group by i,j, d_year
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tconst
+#### A masked pattern was here ####
+3	3
+2	NULL
+1	1
+PREHOOK: query: explain select i, j from tconst where i IS NOT NULL and j IS NOT NULL group by i,j, d_year
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tconst
+#### A masked pattern was here ####
+POSTHOOK: query: explain select i, j from tconst where i IS NOT NULL and j IS NOT NULL group by i,j, d_year
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tconst
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: tconst
+                  filterExpr: j is not null (type: boolean)
+                  Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: j is not null (type: boolean)
+                    Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      keys: i (type: int), j (type: int), d_year (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string)
+                        sort order: +++
+                        Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string)
+                        Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: int)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select i, j from tconst where i IS NOT NULL and j IS NOT NULL group by i,j, d_year
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tconst
+#### A masked pattern was here ####
+POSTHOOK: query: select i, j from tconst where i IS NOT NULL and j IS NOT NULL group by i,j, d_year
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tconst
+#### A masked pattern was here ####
+3	3
+1	1
+PREHOOK: query: explain select i,j from tconst where i is not null OR j IS NOT NULL group by i, j, d_year
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tconst
+#### A masked pattern was here ####
+POSTHOOK: query: explain select i,j from tconst where i is not null OR j IS NOT NULL group by i, j, d_year
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tconst
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: tconst
+                  Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: i (type: int), j (type: int), d_year (type: string)
+                    outputColumnNames: i, j, d_year
+                    Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      keys: i (type: int), j (type: int), d_year (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string)
+                        sort order: +++
+                        Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string)
+                        Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: int)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select i,j from tconst where i is not null OR j IS NOT NULL group by i, j, d_year
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tconst
+#### A masked pattern was here ####
+POSTHOOK: query: select i,j from tconst where i is not null OR j IS NOT NULL group by i, j, d_year
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tconst
+#### A masked pattern was here ####
+3	3
+2	NULL
+1	1
+PREHOOK: query: explain select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.j group by t1.i, t1.d_year
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tconst
+#### A masked pattern was here ####
+POSTHOOK: query: explain select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.j group by t1.i, t1.d_year
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tconst
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: i (type: int), d_year (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE
+                      value expressions: _col1 (type: string)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: t2
+                  filterExpr: j is not null (type: boolean)
+                  Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: j is not null (type: boolean)
+                    Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: j (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  aggregations: sum(_col0)
+                  keys: _col0 (type: int), _col1 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: int), _col1 (type: string)
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: int), _col1 (type: string)
+                    Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
+                    value expressions: _col2 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: int), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col2 (type: bigint)
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.j group by t1.i, t1.d_year
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tconst
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.j group by t1.i, t1.d_year
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tconst
+#### A masked pattern was here ####
+1
+3
+PREHOOK: query: explain select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.i group by t1.i, t1.d_year
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tconst
+#### A masked pattern was here ####
+POSTHOOK: query: explain select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.i group by t1.i, t1.d_year
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tconst
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: i (type: int), d_year (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE
+                      value expressions: _col1 (type: string)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: t2
+                  Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: i (type: int)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  aggregations: sum(_col0)
+                  keys: _col0 (type: int), _col1 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: int), _col1 (type: string)
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: int), _col1 (type: string)
+                    Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
+                    value expressions: _col2 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: int), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col2 (type: bigint)
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.i group by t1.i, t1.d_year
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tconst
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.i group by t1.i, t1.d_year
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tconst
+#### A masked pattern was here ####
+1
+2
+3
+PREHOOK: query: DROP TABLE tconst
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@tconst
+PREHOOK: Output: default@tconst
+POSTHOOK: query: DROP TABLE tconst
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@tconst
+POSTHOOK: Output: default@tconst
+PREHOOK: query: create table dest_g21 (key1 int NOT NULL disable rely, value1 double, UNIQUE(key1) disable rely)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest_g21
+POSTHOOK: query: create table dest_g21 (key1 int NOT NULL disable rely, value1 double, UNIQUE(key1) disable rely)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest_g21
+PREHOOK: query: explain select key1 from dest_g21 group by key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+POSTHOOK: query: explain select key1 from dest_g21 group by key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g21
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: dest_g21
+          Select Operator
+            expressions: key1 (type: int)
+            outputColumnNames: _col0
+            ListSink
+
+PREHOOK: query: create table dest_g24 (key1 int , value1 double, UNIQUE(key1) disable rely)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest_g24
+POSTHOOK: query: create table dest_g24 (key1 int , value1 double, UNIQUE(key1) disable rely)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest_g24
+PREHOOK: query: explain select key1 from dest_g24 group by key1, value1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g24
+#### A masked pattern was here ####
+POSTHOOK: query: explain select key1 from dest_g24 group by key1, value1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g24
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: dest_g24
+                  Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key1 (type: int), value1 (type: double)
+                    outputColumnNames: key1, value1
+                    Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: key1 (type: int), value1 (type: double)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int), _col1 (type: double)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: int), _col1 (type: double)
+                        Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int), KEY._col1 (type: double)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: int)
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: DROP TABLE dest_g21
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dest_g21
+PREHOOK: Output:

<TRUNCATED>

[3/3] hive git commit: HIVE-17043: Remove non unique columns from group by keys if not referenced later (Vineet Garg, reviewed by Jesus Camacho Rodriguez)

Posted by vg...@apache.org.

HIVE-17043: Remove non unique columns from group by keys if not referenced later (Vineet Garg, reviewed by Jesus Camacho Rodriguez)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1db3debc
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1db3debc
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1db3debc

Branch: refs/heads/master
Commit: 1db3debcb564c60db872022761b3ac4862509694
Parents: 259db56
Author: Vineet Garg <vg...@apache.org>
Authored: Sun Oct 14 20:32:40 2018 -0700
Committer: Vineet Garg <vg...@apache.org>
Committed: Sun Oct 14 20:32:40 2018 -0700

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |    1 +
 .../calcite/HiveDefaultRelMetadataProvider.java |    2 +
 .../ql/optimizer/calcite/RelOptHiveTable.java   |   49 +-
 .../rules/HiveAggregateJoinTransposeRule.java   |    2 +-
 .../calcite/rules/HiveRelFieldTrimmer.java      |  269 ++-
 .../calcite/stats/EstimateUniqueKeys.java       |  333 +++
 .../stats/HiveRelMdColumnUniqueness.java        |   63 +
 .../calcite/stats/HiveRelMdRowCount.java        |    7 +-
 .../calcite/stats/HiveRelMdUniqueKeys.java      |  152 +-
 .../clientpositive/constraints_optimization.q   |  149 ++
 .../llap/constraints_optimization.q.out         | 2010 ++++++++++++++++++
 .../llap/materialized_view_rewrite_1.q.out      |   12 +-
 12 files changed, 2848 insertions(+), 201 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/1db3debc/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 9a87464..b6d42c6 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -489,6 +489,7 @@ minillaplocal.query.files=\
   colstats_date_min_max.q,\
   compare_double_bigint_2.q,\
   constprog_dpp.q,\
+  constraints_optimization.q,\
   current_date_timestamp.q,\
   correlationoptimizer1.q,\
   correlationoptimizer2.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/1db3debc/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
index 635d27e..41c2f9e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
@@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCostModel;
 import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveDefaultCostModel;
 import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveOnTezCostModel;
 import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveRelMdCost;
+import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdColumnUniqueness;
 import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdCollation;
 import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdDistinctRowCount;
 import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdDistribution;
@@ -71,6 +72,7 @@ public class HiveDefaultRelMetadataProvider {
                     HiveRelMdSelectivity.SOURCE,
                     HiveRelMdRowCount.SOURCE,
                     HiveRelMdUniqueKeys.SOURCE,
+                    HiveRelMdColumnUniqueness.SOURCE,
                     HiveRelMdSize.SOURCE,
                     HiveRelMdMemory.SOURCE,
                     new HiveRelMdParallelism(maxSplitSize).getMetadataProvider(),

http://git-wip-us.apache.org/repos/asf/hive/blob/1db3debc/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
index 42e60de..fc9178f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
@@ -18,7 +18,6 @@
 package org.apache.hadoop.hive.ql.optimizer.calcite;
 
 import java.util.ArrayList;
-import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
@@ -35,7 +34,6 @@ import org.apache.calcite.prepare.RelOptTableImpl;
 import org.apache.calcite.rel.RelCollation;
 import org.apache.calcite.rel.RelCollationTraitDef;
 import org.apache.calcite.rel.RelDistribution;
-import org.apache.calcite.rel.RelDistributions;
 import org.apache.calcite.rel.RelFieldCollation;
 import org.apache.calcite.rel.RelFieldCollation.Direction;
 import org.apache.calcite.rel.RelFieldCollation.NullDirection;
@@ -49,6 +47,7 @@ import org.apache.calcite.rel.type.RelDataTypeField;
 import org.apache.calcite.rex.RexNode;
 import org.apache.calcite.schema.ColumnStrategy;
 import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.Pair;
 import org.apache.calcite.util.mapping.IntPair;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
@@ -100,6 +99,7 @@ public class RelOptHiveTable implements RelOptTable {
   private final ImmutableList<VirtualColumn>      hiveVirtualCols;
   private final int                               noOfNonVirtualCols;
   private final List<ImmutableBitSet>             keys;
+  private final List<ImmutableBitSet>             nonNullablekeys;
   private final List<RelReferentialConstraint>    referentialConstraints;
   final HiveConf                                  hiveConf;
 
@@ -133,7 +133,9 @@ public class RelOptHiveTable implements RelOptTable {
     this.partitionCache = partitionCache;
     this.colStatsCache = colStatsCache;
     this.noColsMissingStats = noColsMissingStats;
-    this.keys = generateKeys();
+    Pair<List<ImmutableBitSet>, List<ImmutableBitSet>> constraintKeys = generateKeys();
+    this.keys = constraintKeys.left;
+    this.nonNullablekeys = constraintKeys.right;
     this.referentialConstraints = generateReferentialConstraints();
   }
 
@@ -167,6 +169,10 @@ public class RelOptHiveTable implements RelOptTable {
     throw new UnsupportedOperationException();
   }
 
+  public List<ImmutableBitSet> getNonNullableKeys() {
+    return nonNullablekeys;
+  }
+
   @Override
   public RelOptTable extend(List<RelDataTypeField> extendedFields) {
     throw new UnsupportedOperationException();
@@ -213,6 +219,16 @@ public class RelOptHiveTable implements RelOptTable {
         this.hiveConf, this.partitionCache, this.colStatsCache, this.noColsMissingStats);
   }
 
+  // Given a key this method returns true if all of the columns in the key are not nullable
+  public boolean isNonNullableKey(ImmutableBitSet columns) {
+    for (ImmutableBitSet key : nonNullablekeys) {
+      if (key.contains(columns)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
   @Override
   public boolean isKey(ImmutableBitSet columns) {
     for (ImmutableBitSet key : keys) {
@@ -228,8 +244,9 @@ public class RelOptHiveTable implements RelOptTable {
     return referentialConstraints;
   }
 
-  private List<ImmutableBitSet> generateKeys() {
+  private Pair<List<ImmutableBitSet>, List<ImmutableBitSet>> generateKeys() {
     ImmutableList.Builder<ImmutableBitSet> builder = ImmutableList.builder();
+    ImmutableList.Builder<ImmutableBitSet> nonNullbuilder = ImmutableList.builder();
     // First PK
     final PrimaryKeyInfo pki;
     try {
@@ -250,11 +267,12 @@ public class RelOptHiveTable implements RelOptTable {
         }
         if (pkPos == rowType.getFieldNames().size()) {
           LOG.error("Column for primary key definition " + pkColName + " not found");
-          return ImmutableList.of();
         }
         keys.set(pkPos);
       }
-      builder.add(keys.build());
+      ImmutableBitSet key = keys.build();
+      builder.add(key);
+      nonNullbuilder.add(key);
     }
     // Then UKs
     final UniqueConstraint uki;
@@ -266,23 +284,31 @@ public class RelOptHiveTable implements RelOptTable {
     }
     for (List<UniqueConstraintCol> ukCols : uki.getUniqueConstraints().values()) {
       ImmutableBitSet.Builder keys = ImmutableBitSet.builder();
+      boolean isNonNullable = true;
       for (UniqueConstraintCol ukCol : ukCols) {
         int ukPos;
         for (ukPos = 0; ukPos < rowType.getFieldNames().size(); ukPos++) {
           String colName = rowType.getFieldNames().get(ukPos);
           if (ukCol.colName.equals(colName)) {
+            if(rowType.getFieldList().get(ukPos).getType().isNullable()) {
+              // they should all be nullable
+              isNonNullable = false;
+            }
             break;
           }
         }
         if (ukPos == rowType.getFieldNames().size()) {
           LOG.error("Column for unique constraint definition " + ukCol.colName + " not found");
-          return ImmutableList.of();
         }
         keys.set(ukPos);
       }
-      builder.add(keys.build());
+      ImmutableBitSet key = keys.build();
+      builder.add(key);
+      if(isNonNullable) {
+        nonNullbuilder.add(key);
+      }
     }
-    return builder.build();
+    return new Pair<>(builder.build(), nonNullbuilder.build());
   }
 
   private List<RelReferentialConstraint> generateReferentialConstraints() {
@@ -368,12 +394,11 @@ public class RelOptHiveTable implements RelOptTable {
           if (sortColumn.getOrder() == BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_ASC) {
             direction = Direction.ASCENDING;
             nullDirection = NullDirection.FIRST;
-          }
-          else {
+          } else {
             direction = Direction.DESCENDING;
             nullDirection = NullDirection.LAST;
           }
-          collationList.add(new RelFieldCollation(i,direction,nullDirection));
+          collationList.add(new RelFieldCollation(i, direction, nullDirection));
           break;
         }
       }

http://git-wip-us.apache.org/repos/asf/hive/blob/1db3debc/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java
index f43ef01..ed6659c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java
@@ -171,7 +171,7 @@ public class HiveAggregateJoinTransposeRule extends AggregateJoinTransposeRule {
         unique = true;
       } else {
         final Boolean unique0 =
-            mq.areColumnsUnique(joinInput, belowAggregateKey);
+            mq.areColumnsUnique(joinInput, belowAggregateKey, true);
         unique = unique0 != null && unique0;
       }
       if (unique) {

http://git-wip-us.apache.org/repos/asf/hive/blob/1db3debc/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
index 5857f73..b7c31bd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
@@ -24,15 +24,19 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
 import org.apache.calcite.adapter.druid.DruidQuery;
 import org.apache.calcite.linq4j.Ord;
 import org.apache.calcite.plan.RelOptTable;
 import org.apache.calcite.plan.RelOptUtil;
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rel.core.Aggregate;
+import org.apache.calcite.rel.core.AggregateCall;
 import org.apache.calcite.rel.core.CorrelationId;
 import org.apache.calcite.rel.core.Project;
 import org.apache.calcite.rel.core.TableScan;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
 import org.apache.calcite.rel.type.RelDataType;
 import org.apache.calcite.rel.type.RelDataTypeField;
 import org.apache.calcite.rex.RexBuilder;
@@ -300,18 +304,58 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer {
   private boolean isRexLiteral(final RexNode rexNode) {
     if(rexNode instanceof RexLiteral) {
       return true;
-    }
-    else if(rexNode instanceof RexCall
+    } else if(rexNode instanceof RexCall
         && ((RexCall)rexNode).getOperator().getKind() == SqlKind.CAST){
       return isRexLiteral(((RexCall)(rexNode)).getOperands().get(0));
-    }
-    else {
+    } else {
       return false;
     }
   }
+
+  // if gby keys consist of pk/uk non-pk/non-uk columns are removed if they are not being used
+  private ImmutableBitSet generateNewGroupset(Aggregate aggregate, ImmutableBitSet fieldsUsed) {
+
+    ImmutableBitSet originalGroupSet = aggregate.getGroupSet();
+
+    if (aggregate.getGroupSets().size() > 1 || aggregate.getIndicatorCount() > 0
+        || fieldsUsed.contains(originalGroupSet)) {
+      // if there is grouping sets, indicator or all the group keys are being used we do no need to proceed further
+      return originalGroupSet;
+    }
+
+    final RelNode input = aggregate.getInput();
+    RelMetadataQuery mq = aggregate.getCluster().getMetadataQuery();
+
+    final Set<ImmutableBitSet> uniqueKeys = mq.getUniqueKeys(input, false);
+    if (uniqueKeys == null || uniqueKeys.isEmpty()) {
+      return originalGroupSet;
+    }
+
+    // we have set of unique key, get to the key which is same as group by key
+    ImmutableBitSet groupByUniqueKey = null;
+
+    for (ImmutableBitSet key : uniqueKeys) {
+      if (aggregate.getGroupSet().contains(key)) {
+        groupByUniqueKey = key;
+        break;
+      }
+    }
+
+    if (groupByUniqueKey == null) {
+      // group by keys do not represent unique keys
+      return originalGroupSet;
+    }
+
+    // we know group by key contains primary key and there is at least one column in group by which is not being used
+    // if that column is not part of key it should be removed
+    ImmutableBitSet nonKeyColumns = aggregate.getGroupSet().except(groupByUniqueKey);
+    ImmutableBitSet columnsToRemove = nonKeyColumns.except(fieldsUsed);
+    ImmutableBitSet newGroupSet = aggregate.getGroupSet().except(columnsToRemove);
+
+    return  newGroupSet;
+  }
+
   /**
-   * Variant of {@link #trimFields(Aggregate, ImmutableBitSet, Set)} for
-   * {@link org.apache.calcite.rel.logical.LogicalAggregate}.
    * This method replaces group by 'constant key' with group by true (boolean)
    * if and only if
    *  group by doesn't have grouping sets
@@ -322,50 +366,193 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer {
    *  underneath aggregate
    *
    *  This is mainly done so that hive is able to push down queries with
-   *  group by 'constant key with type not supported by druid' into druid
+   *  group by 'constant key with type not supported by druid' into druid.
+   *
    */
-  public TrimResult trimFields(Aggregate aggregate, ImmutableBitSet fieldsUsed,
-                               Set<RelDataTypeField> extraFields) {
-
-    Aggregate newAggregate = aggregate;
-    if (!(aggregate.getIndicatorCount() > 0)
-        && !(aggregate.getGroupSet().isEmpty())
-        && !fieldsUsed.contains(aggregate.getGroupSet())) {
-      final RelNode input = aggregate.getInput();
-      final RelDataType rowType = input.getRowType();
-      RexBuilder rexBuilder = aggregate.getCluster().getRexBuilder();
-      final List<RexNode> newProjects = new ArrayList<>();
-
-      final List<RexNode> inputExprs = input.getChildExps();
-      if(inputExprs == null || inputExprs.isEmpty()) {
-        return super.trimFields(newAggregate, fieldsUsed, extraFields);
+  private Aggregate rewriteGBConstantKeys(Aggregate aggregate, ImmutableBitSet fieldsUsed,
+                                          Set<RelDataTypeField> extraFields) {
+    if ((aggregate.getIndicatorCount() > 0)
+        || (aggregate.getGroupSet().isEmpty())
+        || fieldsUsed.contains(aggregate.getGroupSet())) {
+      return aggregate;
+    }
+
+    final RelNode input = aggregate.getInput();
+
+
+    final RelDataType rowType = input.getRowType();
+    RexBuilder rexBuilder = aggregate.getCluster().getRexBuilder();
+    final List<RexNode> newProjects = new ArrayList<>();
+
+    final List<RexNode> inputExprs = input.getChildExps();
+    if (inputExprs == null || inputExprs.isEmpty()) {
+      return aggregate;
+    }
+
+    boolean allConstants = true;
+    for (int key : aggregate.getGroupSet()) {
+      // getChildExprs on Join could return less number of expressions than there are coming out of join
+      if (inputExprs.size() <= key || !isRexLiteral(inputExprs.get(key))) {
+        allConstants = false;
+        break;
       }
+    }
 
-      boolean allConstants = true;
-      for(int key : aggregate.getGroupSet()) {
-        // getChildExprs on Join could return less number of expressions than there are coming out of join
-        if(inputExprs.size() <= key || !isRexLiteral(inputExprs.get(key))){
-          allConstants = false;
-          break;
+    if (allConstants) {
+      for (int i = 0; i < rowType.getFieldCount(); i++) {
+        if (aggregate.getGroupSet().get(i)) {
+          newProjects.add(rexBuilder.makeLiteral(true));
+        } else {
+          newProjects.add(rexBuilder.makeInputRef(input, i));
         }
       }
+      relBuilder.push(input);
+      relBuilder.project(newProjects);
+      Aggregate newAggregate = new HiveAggregate(aggregate.getCluster(), aggregate.getTraitSet(), relBuilder.build(),
+                                                 aggregate.getGroupSet(), null, aggregate.getAggCallList());
+      return newAggregate;
+    }
+    return aggregate;
+  }
 
-      if (allConstants) {
-        for (int i = 0; i < rowType.getFieldCount(); i++) {
-          if (aggregate.getGroupSet().get(i)) {
-            newProjects.add(rexBuilder.makeLiteral(true));
-          } else {
-            newProjects.add(rexBuilder.makeInputRef(input, i));
-          }
-        }
-        relBuilder.push(input);
-        relBuilder.project(newProjects);
-        newAggregate = new HiveAggregate(aggregate.getCluster(), aggregate.getTraitSet(), relBuilder.build(),
-                                         aggregate.getGroupSet(), null, aggregate.getAggCallList());
+  @Override
+  public TrimResult trimFields(Aggregate aggregate, ImmutableBitSet fieldsUsed, Set<RelDataTypeField> extraFields) {
+    // Fields:
+    //
+    // | sys fields | group fields | indicator fields | agg functions |
+    //
+    // Two kinds of trimming:
+    //
+    // 1. If agg rel has system fields but none of these are used, create an
+    // agg rel with no system fields.
+    //
+    // 2. If aggregate functions are not used, remove them.
+    //
+    // But group and indicator fields stay, even if they are not used.
+
+    aggregate = rewriteGBConstantKeys(aggregate, fieldsUsed, extraFields);
+
+    final RelDataType rowType = aggregate.getRowType();
+
+    // Compute which input fields are used.
+    // 1. group fields are always used
+    final ImmutableBitSet.Builder inputFieldsUsed =
+        aggregate.getGroupSet().rebuild();
+    // 2. agg functions
+    for (AggregateCall aggCall : aggregate.getAggCallList()) {
+      for (int i : aggCall.getArgList()) {
+        inputFieldsUsed.set(i);
+      }
+      if (aggCall.filterArg >= 0) {
+        inputFieldsUsed.set(aggCall.filterArg);
+      }
+    }
+
+    // Create input with trimmed columns.
+    final RelNode input = aggregate.getInput();
+    final Set<RelDataTypeField> inputExtraFields = Collections.emptySet();
+    final TrimResult trimResult =
+        trimChild(aggregate, input, inputFieldsUsed.build(), inputExtraFields);
+    final RelNode newInput = trimResult.left;
+    final Mapping inputMapping = trimResult.right;
+
+    ImmutableBitSet originalGroupSet = aggregate.getGroupSet();
+    ImmutableBitSet updatedGroupSet = generateNewGroupset(aggregate, fieldsUsed);
+    ImmutableBitSet gbKeysDeleted = originalGroupSet.except(updatedGroupSet);
+    ImmutableBitSet updatedGroupFields = ImmutableBitSet.range(originalGroupSet.cardinality());
+    final int updatedGroupCount = updatedGroupSet.cardinality();
+
+    // we need to clear the bits corresponding to deleted gb keys
+    int setIdx = 0;
+    while(setIdx != -1) {
+      setIdx = gbKeysDeleted.nextSetBit(setIdx);
+      if(setIdx != -1) {
+        updatedGroupFields = updatedGroupFields.clear(setIdx);
+        setIdx++;
       }
     }
-    return super.trimFields(newAggregate, fieldsUsed, extraFields);
+    fieldsUsed =
+        fieldsUsed.union(updatedGroupFields);
+
+    // If the input is unchanged, and we need to project all columns,
+    // there's nothing to do.
+    if (input == newInput
+        && fieldsUsed.equals(ImmutableBitSet.range(rowType.getFieldCount()))) {
+      return result(aggregate,
+                    Mappings.createIdentity(rowType.getFieldCount()));
+    }
+
+    // update the group by keys based on inputMapping
+    ImmutableBitSet newGroupSet =
+        Mappings.apply(inputMapping, updatedGroupSet);
+
+    // Which agg calls are used by our consumer?
+    int originalGroupCount = aggregate.getGroupSet().cardinality();
+    int j = originalGroupCount;
+    int usedAggCallCount = 0;
+    for (int i = 0; i < aggregate.getAggCallList().size(); i++) {
+      if (fieldsUsed.get(j++)) {
+        ++usedAggCallCount;
+      }
+    }
+
+    // Offset due to the number of system fields having changed.
+    Mapping mapping =
+        Mappings.create(
+            MappingType.INVERSE_SURJECTION,
+            rowType.getFieldCount(),
+            updatedGroupCount + usedAggCallCount);
+
+
+    // if group keys were reduced, it means we didn't have grouping therefore
+    // we don't need to transform group sets
+    ImmutableList<ImmutableBitSet> newGroupSets = null;
+    if(!updatedGroupSet.equals(aggregate.getGroupSet())) {
+      newGroupSets = ImmutableList.of(newGroupSet);
+    } else {
+      newGroupSets = ImmutableList.copyOf(
+          Iterables.transform(aggregate.getGroupSets(),
+            input1 -> Mappings.apply(inputMapping, input1)));
+    }
+
+    // Populate mapping of where to find the fields. System, group key and
+    // indicator fields first.
+    int gbKeyIdx = 0;
+    for (j = 0; j < originalGroupCount; j++) {
+      if(fieldsUsed.get(j)) {
+        mapping.set(j, gbKeyIdx);
+        gbKeyIdx++;
+      }
+    }
+
+    // Now create new agg calls, and populate mapping for them.
+    relBuilder.push(newInput);
+    final List<RelBuilder.AggCall> newAggCallList = new ArrayList<>();
+    j = originalGroupCount; // because lookup in fieldsUsed is done using original group count
+    for (AggregateCall aggCall : aggregate.getAggCallList()) {
+      if (fieldsUsed.get(j)) {
+        final ImmutableList<RexNode> args =
+            relBuilder.fields(
+                Mappings.apply2(inputMapping, aggCall.getArgList()));
+        final RexNode filterArg = aggCall.filterArg < 0 ? null
+            : relBuilder.field(Mappings.apply(inputMapping, aggCall.filterArg));
+        RelBuilder.AggCall newAggCall =
+            relBuilder.aggregateCall(aggCall.getAggregation(),
+                                     aggCall.isDistinct(), aggCall.isApproximate(),
+                                     filterArg, aggCall.name, args);
+        mapping.set(j, updatedGroupCount +  newAggCallList.size());
+        newAggCallList.add(newAggCall);
+      }
+      ++j;
+    }
+
+    final RelBuilder.GroupKey groupKey =
+        relBuilder.groupKey(newGroupSet, newGroupSets);
+    relBuilder.aggregate(groupKey, newAggCallList);
+
+    return result(relBuilder.build(), mapping);
   }
+
   /**
    * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for
    * {@link org.apache.calcite.rel.logical.LogicalProject}.

http://git-wip-us.apache.org/repos/asf/hive/blob/1db3debc/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/EstimateUniqueKeys.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/EstimateUniqueKeys.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/EstimateUniqueKeys.java
new file mode 100644
index 0000000..5ef945c
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/EstimateUniqueKeys.java
@@ -0,0 +1,333 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.stats;
+
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Correlate;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.JoinInfo;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.core.SetOp;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.util.BitSets;
+import org.apache.calcite.util.ImmutableBitSet;
+
+import com.google.common.collect.ImmutableSet;
+import org.apache.calcite.util.Util;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
+
+import java.util.BitSet;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * EstimateUniqueKeys provides an ability to estimate unique keys based on statistics.
+ */
+//TODO: Ideally RelMdUniqueKeys should be modified (on Calcite side) to accept a parameter based on which
+// this logic whoud be implemented
+public final class EstimateUniqueKeys {
+  //~ Constructors -----------------------------------------------------------
+  private EstimateUniqueKeys() {
+  }
+
+  //~ Methods ----------------------------------------------------------------
+
+  private static Set<ImmutableBitSet> getUniqueKeys(HiveFilter rel) {
+    return getUniqueKeys(rel.getInput());
+  }
+
+  private static Set<ImmutableBitSet> getUniqueKeys(HiveSortLimit rel) {
+    return getUniqueKeys(rel.getInput());
+  }
+
+  private static Set<ImmutableBitSet> getUniqueKeys(Correlate rel) {
+    return getUniqueKeys(rel.getLeft());
+  }
+
+
+  //Infer Uniquenes if: - rowCount(col) = ndv(col) - TBD for numerics: max(col)
+  // - min(col) = rowCount(col)
+  private static Set<ImmutableBitSet> generateKeysUsingStatsEstimation(Project rel,
+                                                                       HiveTableScan tScan) {
+    Map<Integer, Integer> posMap = new HashMap<Integer, Integer>();
+    int projectPos = 0;
+    int colStatsPos = 0;
+
+    BitSet projectedCols = new BitSet();
+    for (RexNode r : rel.getProjects()) {
+      if (r instanceof RexInputRef) {
+        projectedCols.set(((RexInputRef) r).getIndex());
+        posMap.put(colStatsPos, projectPos);
+        colStatsPos++;
+      }
+      projectPos++;
+    }
+
+    RelMetadataQuery mq = rel.getCluster().getMetadataQuery();
+    double numRows = mq.getRowCount(tScan);
+    List<ColStatistics> colStats = tScan.getColStat(BitSets
+                                                        .toList(projectedCols));
+    Set<ImmutableBitSet> keys = new HashSet<ImmutableBitSet>();
+
+    colStatsPos = 0;
+    for (ColStatistics cStat : colStats) {
+      boolean isKey = false;
+      if (cStat.getCountDistint() >= numRows) {
+        isKey = true;
+      }
+      if (!isKey && cStat.getRange() != null &&
+          cStat.getRange().maxValue != null &&
+          cStat.getRange().minValue != null) {
+        double r = cStat.getRange().maxValue.doubleValue() -
+            cStat.getRange().minValue.doubleValue() + 1;
+        isKey = (Math.abs(numRows - r) < RelOptUtil.EPSILON);
+      }
+      if (isKey) {
+        ImmutableBitSet key = ImmutableBitSet.of(posMap.get(colStatsPos));
+        keys.add(key);
+      }
+      colStatsPos++;
+    }
+
+    return keys;
+
+  }
+
+  private static Set<ImmutableBitSet> getUniqueKeys(HiveProject rel) {
+
+    HiveTableScan tScan = getTableScan(rel.getInput(), false);
+    if (tScan != null) {
+      return generateKeysUsingStatsEstimation(rel, tScan);
+    }
+
+    // LogicalProject maps a set of rows to a different set;
+    // Without knowledge of the mapping function(whether it
+    // preserves uniqueness), it is only safe to derive uniqueness
+    // info from the child of a project when the mapping is f(a) => a.
+    //
+    // Further more, the unique bitset coming from the child needs
+    // to be mapped to match the output of the project.
+    final Map<Integer, Integer> mapInToOutPos = new HashMap<>();
+    final List<RexNode> projExprs = rel.getProjects();
+    final Set<ImmutableBitSet> projUniqueKeySet = new HashSet<>();
+
+    // Build an input to output position map.
+    for (int i = 0; i < projExprs.size(); i++) {
+      RexNode projExpr = projExprs.get(i);
+      if (projExpr instanceof RexInputRef) {
+        mapInToOutPos.put(((RexInputRef) projExpr).getIndex(), i);
+      }
+    }
+
+    if (mapInToOutPos.isEmpty()) {
+      // if there's no RexInputRef in the projected expressions
+      // return empty set.
+      return projUniqueKeySet;
+    }
+
+    Set<ImmutableBitSet> childUniqueKeySet =
+        getUniqueKeys(rel.getInput());
+
+    if (childUniqueKeySet != null) {
+      // Now add to the projUniqueKeySet the child keys that are fully
+      // projected.
+      for (ImmutableBitSet colMask : childUniqueKeySet) {
+        ImmutableBitSet.Builder tmpMask = ImmutableBitSet.builder();
+        boolean completeKeyProjected = true;
+        for (int bit : colMask) {
+          if (mapInToOutPos.containsKey(bit)) {
+            tmpMask.set(mapInToOutPos.get(bit));
+          } else {
+            // Skip the child unique key if part of it is not
+            // projected.
+            completeKeyProjected = false;
+            break;
+          }
+        }
+        if (completeKeyProjected) {
+          projUniqueKeySet.add(tmpMask.build());
+        }
+      }
+    }
+
+    return projUniqueKeySet;
+
+  }
+
+  private static RelNode getRelNode(RelNode rel) {
+    if (rel != null && rel instanceof HepRelVertex) {
+      rel = ((HepRelVertex) rel).getCurrentRel();
+    } else if (rel != null && rel instanceof RelSubset) {
+      rel = Util.first(((RelSubset)rel).getBest(), ((RelSubset) rel).getOriginal());
+    }
+    return rel;
+  }
+
+  private static Set<ImmutableBitSet> getUniqueKeys(HiveJoin rel) {
+    RelNode left = getRelNode(rel.getLeft());
+    RelNode right = getRelNode(rel.getRight());
+
+    // first add the different combinations of concatenated unique keys
+    // from the left and the right, adjusting the right hand side keys to
+    // reflect the addition of the left hand side
+    //
+    // NOTE zfong 12/18/06 - If the number of tables in a join is large,
+    // the number of combinations of unique key sets will explode.  If
+    // that is undesirable, use RelMetadataQuery.areColumnsUnique() as
+    // an alternative way of getting unique key information.
+
+    final Set<ImmutableBitSet> retSet = new HashSet<>();
+    final Set<ImmutableBitSet> leftSet = getUniqueKeys(left);
+    Set<ImmutableBitSet> rightSet = null;
+
+    final Set<ImmutableBitSet> tmpRightSet = getUniqueKeys(right);
+    int nFieldsOnLeft = left.getRowType().getFieldCount();
+
+    if (tmpRightSet != null) {
+      rightSet = new HashSet<>();
+      for (ImmutableBitSet colMask : tmpRightSet) {
+        ImmutableBitSet.Builder tmpMask = ImmutableBitSet.builder();
+        for (int bit : colMask) {
+          tmpMask.set(bit + nFieldsOnLeft);
+        }
+        rightSet.add(tmpMask.build());
+      }
+
+      if (leftSet != null) {
+        for (ImmutableBitSet colMaskRight : rightSet) {
+          for (ImmutableBitSet colMaskLeft : leftSet) {
+            retSet.add(colMaskLeft.union(colMaskRight));
+          }
+        }
+      }
+    }
+
+    // locate the columns that participate in equijoins
+    final JoinInfo joinInfo = rel.analyzeCondition();
+    RelMetadataQuery mq = rel.getCluster().getMetadataQuery();
+
+    // determine if either or both the LHS and RHS are unique on the
+    // equijoin columns
+    final Boolean leftUnique =
+        mq.areColumnsUnique(left, joinInfo.leftSet());
+    final Boolean rightUnique =
+        mq.areColumnsUnique(right, joinInfo.rightSet());
+
+    // if the right hand side is unique on its equijoin columns, then we can
+    // add the unique keys from left if the left hand side is not null
+    // generating
+    if ((rightUnique != null)
+        && rightUnique
+        && (leftSet != null)
+        && !(rel.getJoinType().generatesNullsOnLeft())) {
+      retSet.addAll(leftSet);
+    }
+
+    // same as above except left and right are reversed
+    if ((leftUnique != null)
+        && leftUnique
+        && (rightSet != null)
+        && !(rel.getJoinType().generatesNullsOnRight())) {
+      retSet.addAll(rightSet);
+    }
+
+    return retSet;
+  }
+
+  private static Set<ImmutableBitSet> getUniqueKeys(HiveSemiJoin rel) {
+    // only return the unique keys from the LHS since a semijoin only
+    // returns the LHS
+    return getUniqueKeys(rel.getLeft());
+  }
+
+  private static Set<ImmutableBitSet> getUniqueKeys(HiveAggregate rel) {
+    // group by keys form a unique key
+    return ImmutableSet.of(rel.getGroupSet());
+  }
+
+  private static Set<ImmutableBitSet> getUniqueKeys(SetOp rel) {
+    if (!rel.all) {
+      return ImmutableSet.of(
+          ImmutableBitSet.range(rel.getRowType().getFieldCount()));
+    }
+    return ImmutableSet.of();
+  }
+
+  // Catch-all rule when none of the others apply.
+  public static Set<ImmutableBitSet> getUniqueKeys(RelNode rel) {
+    rel = getRelNode(rel);
+    if (rel instanceof HiveFilter) {
+      return getUniqueKeys((HiveFilter) rel);
+    } else if (rel instanceof HiveSortLimit) {
+      return getUniqueKeys((HiveSortLimit) rel);
+    } else if (rel instanceof Correlate) {
+      return getUniqueKeys((Correlate) rel);
+    } else if (rel instanceof HiveProject) {
+      return getUniqueKeys((HiveProject) rel);
+    } else if (rel instanceof HiveJoin) {
+      return getUniqueKeys((HiveJoin) rel);
+    } else if (rel instanceof HiveSemiJoin) {
+      return getUniqueKeys((HiveSemiJoin) rel);
+    } else if (rel instanceof HiveAggregate) {
+      return getUniqueKeys((HiveAggregate) rel);
+    } else if (rel instanceof SetOp) {
+      return getUniqueKeys((SetOp) rel);
+    } else {
+      return null;
+    }
+  }
+
+  /*
+   * traverse a path of Filter, Projects to get to the TableScan.
+   * In case of Unique keys, stop if you reach a Project, it will be handled
+   * by the invocation on the Project.
+   * In case of getting the base rowCount of a Path, keep going past a Project.
+   */
+  static HiveTableScan getTableScan(RelNode r, boolean traverseProject) {
+
+    while (r != null && !(r instanceof HiveTableScan)) {
+      if (r instanceof HepRelVertex) {
+        r = ((HepRelVertex) r).getCurrentRel();
+      } else if (r instanceof Filter) {
+        r = ((Filter) r).getInput();
+      } else if (traverseProject && r instanceof Project) {
+        r = ((Project) r).getInput();
+      } else {
+        r = null;
+      }
+    }
+    return r == null ? null : (HiveTableScan) r;
+  }
+}
+
+// End EstimateUniqueKeys.java

http://git-wip-us.apache.org/repos/asf/hive/blob/1db3debc/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdColumnUniqueness.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdColumnUniqueness.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdColumnUniqueness.java
new file mode 100644
index 0000000..2dede97
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdColumnUniqueness.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.stats;
+
+
+import org.apache.calcite.rel.metadata.BuiltInMetadata;
+import org.apache.calcite.rel.metadata.MetadataDef;
+import org.apache.calcite.rel.metadata.MetadataHandler;
+import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.util.BuiltInMethod;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+
+/**
+ * HiveRelMdColumnUniqueness implements the ability to get unique keys for table scan op.
+ */
+public final class HiveRelMdColumnUniqueness
+    implements MetadataHandler<BuiltInMetadata.ColumnUniqueness> {
+  public static final RelMetadataProvider SOURCE =
+      ReflectiveRelMetadataProvider.reflectiveSource(
+          BuiltInMethod.COLUMN_UNIQUENESS.method, new HiveRelMdColumnUniqueness());
+
+  //~ Constructors -----------------------------------------------------------
+
+  private HiveRelMdColumnUniqueness() {}
+
+  //~ Methods ----------------------------------------------------------------
+
+  public MetadataDef<BuiltInMetadata.ColumnUniqueness> getDef() {
+    return BuiltInMetadata.ColumnUniqueness.DEF;
+  }
+
+  public Boolean areColumnsUnique(HiveTableScan rel, RelMetadataQuery mq,
+                                  ImmutableBitSet columns, boolean ignoreNulls) {
+    if(ignoreNulls) {
+      return rel.getTable().isKey(columns);
+    } else {
+      RelOptHiveTable tbl = (RelOptHiveTable)rel.getTable();
+      return tbl.isNonNullableKey(columns);
+    }
+  }
+}
+
+// End HiveRelMdColumnUniqueness.java
+

http://git-wip-us.apache.org/repos/asf/hive/blob/1db3debc/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java
index 1ca1937..576ed34 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java
@@ -317,7 +317,7 @@ public class HiveRelMdRowCount extends RelMdRowCount {
         (!leftChild && joinRel.getJoinType().generatesNullsOnLeft())) {
       return 1.0;
     } else {
-      HiveTableScan tScan = HiveRelMdUniqueKeys.getTableScan(child, true);
+      HiveTableScan tScan = EstimateUniqueKeys.getTableScan(child, true);
       if (tScan != null) {
         double tRowCount = mq.getRowCount(tScan);
         return childRowCount / tRowCount;
@@ -329,7 +329,10 @@ public class HiveRelMdRowCount extends RelMdRowCount {
 
   private static boolean isKey(ImmutableBitSet c, RelNode rel, RelMetadataQuery mq) {
     boolean isKey = false;
-    Set<ImmutableBitSet> keys = mq.getUniqueKeys(rel);
+    //EstimateUniqueKeys doesn't go through metadata providers anymore, which means we will not be
+    // taking advantage of metadata caching anymore. This could potential increase query compile time
+    // Leaving a note here in case we see increase in timings
+    Set<ImmutableBitSet> keys = EstimateUniqueKeys.getUniqueKeys(rel);
     if (keys != null) {
       for (ImmutableBitSet key : keys) {
         if (key.equals(c)) {

http://git-wip-us.apache.org/repos/asf/hive/blob/1db3debc/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java
index 3bf62c5..7772335 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdUniqueKeys.java
@@ -17,31 +17,20 @@
  */
 package org.apache.hadoop.hive.ql.optimizer.calcite.stats;
 
-import java.util.BitSet;
-import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
-import java.util.Map;
 import java.util.Set;
 
-import org.apache.calcite.plan.RelOptUtil;
-import org.apache.calcite.plan.hep.HepRelVertex;
-import org.apache.calcite.rel.RelNode;
-import org.apache.calcite.rel.core.Filter;
-import org.apache.calcite.rel.core.Project;
 import org.apache.calcite.rel.metadata.BuiltInMetadata;
 import org.apache.calcite.rel.metadata.MetadataDef;
 import org.apache.calcite.rel.metadata.MetadataHandler;
 import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
 import org.apache.calcite.rel.metadata.RelMetadataProvider;
 import org.apache.calcite.rel.metadata.RelMetadataQuery;
-import org.apache.calcite.rex.RexInputRef;
-import org.apache.calcite.rex.RexNode;
-import org.apache.calcite.util.BitSets;
 import org.apache.calcite.util.BuiltInMethod;
 import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
-import org.apache.hadoop.hive.ql.plan.ColStatistics;
 
 public class HiveRelMdUniqueKeys implements MetadataHandler<BuiltInMetadata.UniqueKeys> {
 
@@ -54,138 +43,15 @@ public class HiveRelMdUniqueKeys implements MetadataHandler<BuiltInMetadata.Uniq
     return BuiltInMetadata.UniqueKeys.DEF;
   }
 
-  /*
-   * Infer Uniquenes if: - rowCount(col) = ndv(col) - TBD for numerics: max(col)
-   * - min(col) = rowCount(col)
-   * 
-   * Why are we intercepting Project and not TableScan? Because if we
-   * have a method for TableScan, it will not know which columns to check for.
-   * Inferring Uniqueness for all columns is very expensive right now. The flip
-   * side of doing this is, it only works post Field Trimming.
-   */
-  public Set<ImmutableBitSet> getUniqueKeys(Project rel, RelMetadataQuery mq, boolean ignoreNulls) {
 
-    HiveTableScan tScan = getTableScan(rel.getInput(), false);
-
-    if (tScan == null) {
-      // If HiveTableScan is not found, e.g., not sequence of Project and
-      // Filter operators, execute the original getUniqueKeys method
-
-      // LogicalProject maps a set of rows to a different set;
-      // Without knowledge of the mapping function(whether it
-      // preserves uniqueness), it is only safe to derive uniqueness
-      // info from the child of a project when the mapping is f(a) => a.
-      //
-      // Further more, the unique bitset coming from the child needs
-      // to be mapped to match the output of the project.
-      final Map<Integer, Integer> mapInToOutPos = new HashMap<>();
-      final List<RexNode> projExprs = rel.getProjects();
-      final Set<ImmutableBitSet> projUniqueKeySet = new HashSet<>();
-
-      // Build an input to output position map.
-      for (int i = 0; i < projExprs.size(); i++) {
-        RexNode projExpr = projExprs.get(i);
-        if (projExpr instanceof RexInputRef) {
-          mapInToOutPos.put(((RexInputRef) projExpr).getIndex(), i);
-        }
-      }
-
-      if (mapInToOutPos.isEmpty()) {
-        // if there's no RexInputRef in the projected expressions
-        // return empty set.
-        return projUniqueKeySet;
-      }
-
-      Set<ImmutableBitSet> childUniqueKeySet =
-          mq.getUniqueKeys(rel.getInput(), ignoreNulls);
-
-      if (childUniqueKeySet != null) {
-        // Now add to the projUniqueKeySet the child keys that are fully
-        // projected.
-        for (ImmutableBitSet colMask : childUniqueKeySet) {
-          ImmutableBitSet.Builder tmpMask = ImmutableBitSet.builder();
-          boolean completeKeyProjected = true;
-          for (int bit : colMask) {
-            if (mapInToOutPos.containsKey(bit)) {
-              tmpMask.set(mapInToOutPos.get(bit));
-            } else {
-              // Skip the child unique key if part of it is not
-              // projected.
-              completeKeyProjected = false;
-              break;
-            }
-          }
-          if (completeKeyProjected) {
-            projUniqueKeySet.add(tmpMask.build());
-          }
-        }
-      }
-
-      return projUniqueKeySet;
-    }
-
-    Map<Integer, Integer> posMap = new HashMap<Integer, Integer>();
-    int projectPos = 0;
-    int colStatsPos = 0;
-
-    BitSet projectedCols = new BitSet();
-    for (RexNode r : rel.getProjects()) {
-      if (r instanceof RexInputRef) {
-        projectedCols.set(((RexInputRef) r).getIndex());
-        posMap.put(colStatsPos, projectPos);
-        colStatsPos++;
-      }
-      projectPos++;
+  public Set<ImmutableBitSet> getUniqueKeys(HiveTableScan rel, RelMetadataQuery mq,
+                                            boolean ignoreNulls) {
+    RelOptHiveTable tbl = (RelOptHiveTable) rel.getTable();
+    List<ImmutableBitSet> keyList = tbl.getNonNullableKeys();
+    if (keyList != null) {
+      Set<ImmutableBitSet> keySet = new HashSet<>(keyList);
+      return keySet;
     }
-
-    double numRows = mq.getRowCount(tScan);
-    List<ColStatistics> colStats = tScan.getColStat(BitSets
-        .toList(projectedCols));
-    Set<ImmutableBitSet> keys = new HashSet<ImmutableBitSet>();
-
-    colStatsPos = 0;
-    for (ColStatistics cStat : colStats) {
-      boolean isKey = false;
-      if (cStat.getCountDistint() >= numRows) {
-        isKey = true;
-      }
-      if ( !isKey && cStat.getRange() != null &&
-          cStat.getRange().maxValue != null  &&
-          cStat.getRange().minValue != null) {
-        double r = cStat.getRange().maxValue.doubleValue() - 
-            cStat.getRange().minValue.doubleValue() + 1;
-        isKey = (Math.abs(numRows - r) < RelOptUtil.EPSILON);
-      }
-      if ( isKey ) {
-        ImmutableBitSet key = ImmutableBitSet.of(posMap.get(colStatsPos));
-        keys.add(key);
-      }
-      colStatsPos++;
-    }
-
-    return keys;
+    return null;
   }
-
-  /*
-   * traverse a path of Filter, Projects to get to the TableScan.
-   * In case of Unique keys, stop if you reach a Project, it will be handled
-   * by the invocation on the Project.
-   * In case of getting the base rowCount of a Path, keep going past a Project.
-   */
-  static HiveTableScan getTableScan(RelNode r, boolean traverseProject) {
-
-    while (r != null && !(r instanceof HiveTableScan)) {
-      if (r instanceof HepRelVertex) {
-        r = ((HepRelVertex) r).getCurrentRel();
-      } else if (r instanceof Filter) {
-        r = ((Filter) r).getInput();
-      } else if (traverseProject && r instanceof Project) {
-        r = ((Project) r).getInput();
-      } else {
-        r = null;
-      }
-    }
-    return r == null ? null : (HiveTableScan) r;
-  }
-
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/1db3debc/ql/src/test/queries/clientpositive/constraints_optimization.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/constraints_optimization.q b/ql/src/test/queries/clientpositive/constraints_optimization.q
new file mode 100644
index 0000000..70ab850
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/constraints_optimization.q
@@ -0,0 +1,149 @@
+set hive.strict.checks.cartesian.product=false;
+
+CREATE TABLE `customer_removal_n0`(
+  `c_custkey` bigint,
+  `c_name` string,
+  `c_address` string,
+  `c_city` string,
+  `c_nation` string,
+  `c_region` string,
+  `c_phone` string,
+  `c_mktsegment` string,
+  primary key (`c_custkey`) disable rely);
+
+CREATE TABLE `dates_removal_n0`(
+  `d_datekey` bigint,
+  `d_id` bigint,
+  `d_date` string,
+  `d_dayofweek` string,
+  `d_month` string,
+  `d_year` int,
+  `d_yearmonthnum` int,
+  `d_yearmonth` string,
+  `d_daynuminweek` int,
+  `d_daynuminmonth` int,
+  `d_daynuminyear` int,
+  `d_monthnuminyear` int,
+  `d_weeknuminyear` int,
+  `d_sellingseason` string,
+  `d_lastdayinweekfl` int,
+  `d_lastdayinmonthfl` int,
+  `d_holidayfl` int ,
+  `d_weekdayfl`int,
+  primary key (`d_datekey`, `d_id`) disable rely);
+
+  -- group by key has single primary key
+  EXPLAIN SELECT c_custkey from customer_removal_n0 where c_nation IN ('USA', 'INDIA') group by c_custkey;
+
+  -- mix of primary + non-primary keys
+  EXPLAIN SELECT c_custkey from customer_removal_n0 where c_nation IN ('USA', 'INDIA') group by c_custkey, c_nation;
+
+  -- multiple keys
+  EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_datekey, d_id;
+
+  -- multiple keys + non-keys + different order
+  EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_datekey, d_sellingseason
+    order by d_datekey limit 10;
+
+ -- multiple keys in different order and mixed with non-keys
+  EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_daynuminmonth, d_datekey,
+  d_sellingseason order by d_datekey limit 10;
+
+  -- same as above but with aggregate
+  EXPLAIN SELECT count(d_datekey) from dates_removal_n0 where d_year IN (1985, 2004) group by d_id, d_daynuminmonth, d_datekey,
+  d_sellingseason order by d_datekey limit 10;
+
+  -- join
+  insert into dates_removal_n0(d_datekey, d_id)  values(3, 0);
+  insert into dates_removal_n0(d_datekey, d_id)  values(3, 1);
+  insert into customer_removal_n0 (c_custkey) values(3);
+
+  EXPLAIN SELECT d_datekey from dates_removal_n0 join customer_removal_n0 on d_datekey = c_custkey group by d_datekey, d_id;
+  SELECT d_datekey from dates_removal_n0 join customer_removal_n0 on d_datekey = c_custkey group by d_datekey, d_id;
+
+  -- group by keys are not primary keys
+  EXPLAIN SELECT d_datekey from dates_removal_n0 where d_year IN (1985, 2004) group by d_datekey, d_sellingseason
+    order by d_datekey limit 10;
+
+  -- negative
+  -- with aggregate function
+  EXPLAIN SELECT count(c_custkey) from customer_removal_n0 where c_nation IN ('USA', 'INDIA')
+    group by c_custkey, c_nation;
+
+  DROP TABLE customer_removal_n0;
+  DROP TABLE dates_removal_n0;
+
+  -- group by reduction optimization
+  create table dest_g21 (key1 int, value1 double, primary key(key1) disable rely);
+  insert into dest_g21 values(1, 2), (2,2), (3, 1), (4,4), (5, null), (6, null);
+
+  -- value1 will removed because it is unused, then whole group by will be removed because key1 is unique
+  explain select key1 from dest_g21 group by key1, value1;
+  select key1 from dest_g21 group by key1, value1;
+  -- same query but with filter
+  explain select key1 from dest_g21 where value1 > 1 group by key1, value1;
+  select key1 from dest_g21 where value1 > 1 group by key1, value1;
+
+  explain select key1 from dest_g21 where key1 > 1 group by key1, value1;
+  select key1 from dest_g21 where key1 > 1 group by key1, value1;
+
+  -- only value1 will be removed because there is aggregate call
+  explain select count(key1) from dest_g21 group by key1, value1;
+  select count(key1) from dest_g21 group by key1, value1;
+
+  explain select count(key1) from dest_g21 where value1 > 1 group by key1, value1;
+  select count(key1) from dest_g21 where value1 > 1 group by key1, value1;
+
+  -- t1.key is unique even after join therefore group by = group by (t1.key)
+  explain select t1.key1 from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1;
+  select t1.key1 from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1;
+
+  explain select count(t1.key1) from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1;
+  select count(t1.key1) from dest_g21 t1 join dest_g21 t2 on t1.key1 = t2.key1 where t2.value1 > 2 group by t1.key1, t1.value1;
+
+  -- both aggregate and one of the key1 should be removed
+  explain select key1 from (select key1, count(key1) from dest_g21 where value1 < 4.5 group by key1, value1) sub;
+  select key1 from (select key1, count(key1) from dest_g21 where value1 < 4.5 group by key1, value1) sub;
+
+  -- one of the aggregate will be removed and one of the key1 will be removed
+  explain select key1, sm from (select key1, count(key1), sum(key1) as sm from dest_g21 where value1 < 4.5 group by key1, value1) sub;
+  select key1, sm from (select key1, count(key1), sum(key1) as sm from dest_g21 where value1 < 4.5 group by key1, value1) sub;
+
+  DROP table dest_g21;
+
+CREATE TABLE tconst(i int NOT NULL disable rely, j INT NOT NULL disable norely, d_year string);
+INSERT INTO tconst values(1, 1, '2001'), (2, null, '2002'), (3, 3, '2010');
+
+-- explicit NOT NULL filter
+explain select i, j from tconst where i is not null group by i,j, d_year;
+select i, j from tconst where i is not null group by i,j, d_year;
+
+-- filter on i should be removed
+explain select i, j from tconst where i IS NOT NULL and j IS NOT NULL group by i,j, d_year;
+select i, j from tconst where i IS NOT NULL and j IS NOT NULL group by i,j, d_year;
+
+-- where will be removed since i is not null is always true
+explain select i,j from tconst where i is not null OR j IS NOT NULL group by i, j, d_year;
+select i,j from tconst where i is not null OR j IS NOT NULL group by i, j, d_year;
+
+-- should not have implicit filter on join keys
+explain select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.j group by t1.i, t1.d_year;
+select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.j group by t1.i, t1.d_year;
+
+-- both join keys have NOT NULL
+explain select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.i group by t1.i, t1.d_year;
+select sum(t1.i) from tconst t1 join tconst t2 on t1.i=t2.i group by t1.i, t1.d_year;
+
+DROP TABLE tconst;
+
+
+-- UNIQUE + NOT NULL (same as primary key)
+create table dest_g21 (key1 int NOT NULL disable rely, value1 double, UNIQUE(key1) disable rely);
+explain select key1 from dest_g21 group by key1, value1;
+
+-- UNIQUE with nullabiity
+create table dest_g24 (key1 int , value1 double, UNIQUE(key1) disable rely);
+explain select key1 from dest_g24 group by key1, value1;
+
+DROP TABLE dest_g21;
+DROP TABLE dest_g24;