You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by vg...@apache.org on 2019/01/28 19:04:39 UTC

hive git commit: HIVE-21132: Semi join edge is not being removed despite max bloomfilter entries set to 1 (Vineet Garg, reviewed by Jason Dere)

Repository: hive
Updated Branches:
  refs/heads/master fd92d8865 -> ce654250b


HIVE-21132: Semi join edge is not being removed despite max bloomfilter entries set to 1 (Vineet Garg, reviewed by Jason Dere)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ce654250
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ce654250
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ce654250

Branch: refs/heads/master
Commit: ce654250b5b2e183886cf642806b3b33d8bfa5fc
Parents: fd92d88
Author: Vineet Garg <vg...@apache.org>
Authored: Mon Jan 28 11:04:11 2019 -0800
Committer: Vineet Garg <vg...@apache.org>
Committed: Mon Jan 28 11:04:11 2019 -0800

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |   2 +
 .../correlation/ReduceSinkDeDuplication.java    |  20 +
 .../queries/clientpositive/semijoin_reddedup.q  | 139 +++++
 .../clientpositive/llap/semijoin_reddedup.q.out | 566 +++++++++++++++++++
 4 files changed, 727 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/ce654250/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index c190db3..e0ea710 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -715,6 +715,8 @@ minillaplocal.query.files=\
   semijoin7.q,\
   semijoin_hint.q,\
   sharedwork.q,\
+  semijoin_reddedup,\
+  semijoin_reddedup.q,\
   sharedworkext.q,\
   smb_cache.q,\
   special_character_in_tabnames_1.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/ce654250/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java
index 5269eb6..b25bcf0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java
@@ -177,6 +177,20 @@ public class ReduceSinkDeDuplication extends Transform {
 
   static class GroupbyReducerProc extends AbsctractReducerReducerProc {
 
+    // given a group by operator this determines if that group by belongs to semi-join branch
+    // note that this works only for second last group by in semi-join branch (X-GB-RS-GB-RS)
+    private boolean isSemiJoinBranch(final GroupByOperator gOp, ReduceSinkDeduplicateProcCtx dedupCtx) {
+      for(int i=0; i<gOp.getChildren().size(); i++) {
+        if(gOp.getChildren().get(i) instanceof  ReduceSinkOperator) {
+          ReduceSinkOperator rsOp = (ReduceSinkOperator)gOp.getChildren().get(i);
+          if(dedupCtx.getPctx().getRsToSemiJoinBranchInfo().containsKey(rsOp)) {
+            return true;
+          }
+        }
+      }
+      return false;
+    }
+
     // pRS-pGBY-cRS
     @Override
     public Object process(ReduceSinkOperator cRS, ReduceSinkDeduplicateProcCtx dedupCtx)
@@ -187,6 +201,9 @@ public class ReduceSinkDeDuplication extends Transform {
       if (pGBY == null) {
         return false;
       }
+      if(isSemiJoinBranch(pGBY, dedupCtx)) {
+        return false;
+      }
       ReduceSinkOperator pRS =
           CorrelationUtilities.findPossibleParent(
               pGBY, ReduceSinkOperator.class, dedupCtx.trustScript());
@@ -211,6 +228,9 @@ public class ReduceSinkDeDuplication extends Transform {
       if (pGBY == null) {
         return false;
       }
+      if(isSemiJoinBranch(cGBY, dedupCtx)) {
+        return false;
+      }
       ReduceSinkOperator pRS =
           CorrelationUtilities.getSingleParent(pGBY, ReduceSinkOperator.class);
       if (pRS != null && ReduceSinkDeDuplicationUtils.merge(cRS, pRS, dedupCtx.minReducer())) {

http://git-wip-us.apache.org/repos/asf/hive/blob/ce654250/ql/src/test/queries/clientpositive/semijoin_reddedup.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/semijoin_reddedup.q b/ql/src/test/queries/clientpositive/semijoin_reddedup.q
new file mode 100644
index 0000000..b01e58c
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/semijoin_reddedup.q
@@ -0,0 +1,139 @@
+--! qt:dataset:lineitem
+--! qt:dataset:part
+--! qt:dataset:src
+
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+--set hive.compute.query.using.stats=false;
+set hive.mapred.mode=nonstrict;
+set hive.explain.user=false;
+set hive.optimize.ppd=true;
+set hive.ppd.remove.duplicatefilters=true;
+set hive.tez.dynamic.partition.pruning=true;
+set hive.tez.dynamic.semijoin.reduction=true;
+set hive.optimize.metadataonly=false;
+set hive.optimize.index.filter=true;
+set hive.stats.autogather=true;
+set hive.tez.bigtable.minsize.semijoin.reduction=1;
+set hive.tez.min.bloom.filter.entries=1;
+set hive.stats.fetch.column.stats=true;
+set hive.tez.bloom.filter.factor=1.0f;
+set hive.auto.convert.join=false;
+set hive.optimize.shared.work=false;
+
+
+create database tpch_test;
+use tpch_test;
+
+CREATE TABLE `customer`(
+  `c_custkey` bigint, 
+  `c_name` string, 
+  `c_address` string, 
+  `c_nationkey` bigint, 
+  `c_phone` string, 
+  `c_acctbal` double, 
+  `c_mktsegment` string, 
+  `c_comment` string)
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
+TBLPROPERTIES (
+  'bucketing_version'='2', 
+  'transactional'='true', 
+  'transactional_properties'='default', 
+  'transient_lastDdlTime'='1543026723');
+
+CREATE TABLE `lineitem`(
+  `l_orderkey` bigint, 
+  `l_partkey` bigint, 
+  `l_suppkey` bigint, 
+  `l_linenumber` int, 
+  `l_quantity` double, 
+  `l_extendedprice` double, 
+  `l_discount` double, 
+  `l_tax` double, 
+  `l_returnflag` string, 
+  `l_linestatus` string, 
+  `l_shipdate` string, 
+  `l_commitdate` string, 
+  `l_receiptdate` string, 
+  `l_shipinstruct` string, 
+  `l_shipmode` string, 
+  `l_comment` string)
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
+TBLPROPERTIES (
+  'bucketing_version'='2', 
+  'transactional'='true', 
+  'transactional_properties'='default', 
+  'transient_lastDdlTime'='1543027179');
+
+CREATE TABLE `orders`(
+  `o_orderkey` bigint, 
+  `o_custkey` bigint, 
+  `o_orderstatus` string, 
+  `o_totalprice` double, 
+  `o_orderdate` string, 
+  `o_orderpriority` string, 
+  `o_clerk` string, 
+  `o_shippriority` int, 
+  `o_comment` string)
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
+TBLPROPERTIES (
+  'bucketing_version'='2', 
+  'transactional'='true', 
+  'transactional_properties'='default', 
+  'transient_lastDdlTime'='1543026824');
+
+alter table customer update statistics set('numRows'='150000000','rawDataSize'='8633707142');
+alter table lineitem update statistics set('numRows'='5999989709','rawDataSize'='184245066955');
+alter table orders update statistics set('numRows'='1500000000','rawDataSize'='46741318253');
+
+
+create view q18_tmp_cached as
+select l_orderkey, sum(l_quantity) as t_sum_quantity
+from lineitem
+where l_orderkey is not null
+group by l_orderkey;
+
+-- Set bloom filter size to huge number so we get any possible semijoin reductions
+
+set hive.tez.min.bloom.filter.entries=0;
+set hive.tez.max.bloom.filter.entries=1;
+
+explain
+create table q18_large_volume_customer_cached stored as orc tblproperties ('transactional'='true', 'transactional_properties'='default') as
+select c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice, sum(l_quantity)
+from customer, orders, q18_tmp_cached t, lineitem l
+where
+  c_custkey = o_custkey and o_orderkey = t.l_orderkey
+  and o_orderkey is not null and t.t_sum_quantity > 300
+  and o_orderkey = l.l_orderkey and l.l_orderkey is not null
+group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice
+order by o_totalprice desc, o_orderdate
+limit 100;
+
+create table q18_large_volume_customer_cached stored as orc tblproperties ('transactional'='true', 'transactional_properties'='default') as
+select c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice, sum(l_quantity)
+from customer, orders, q18_tmp_cached t, lineitem l
+where
+  c_custkey = o_custkey and o_orderkey = t.l_orderkey
+  and o_orderkey is not null and t.t_sum_quantity > 300
+  and o_orderkey = l.l_orderkey and l.l_orderkey is not null
+group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice
+order by o_totalprice desc, o_orderdate
+limit 100;
+
+drop database tpch_test cascade;

http://git-wip-us.apache.org/repos/asf/hive/blob/ce654250/ql/src/test/results/clientpositive/llap/semijoin_reddedup.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/semijoin_reddedup.q.out b/ql/src/test/results/clientpositive/llap/semijoin_reddedup.q.out
new file mode 100644
index 0000000..6a43d6b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/semijoin_reddedup.q.out
@@ -0,0 +1,566 @@
+PREHOOK: query: create database tpch_test
+PREHOOK: type: CREATEDATABASE
+PREHOOK: Output: database:tpch_test
+POSTHOOK: query: create database tpch_test
+POSTHOOK: type: CREATEDATABASE
+POSTHOOK: Output: database:tpch_test
+PREHOOK: query: use tpch_test
+PREHOOK: type: SWITCHDATABASE
+PREHOOK: Input: database:tpch_test
+POSTHOOK: query: use tpch_test
+POSTHOOK: type: SWITCHDATABASE
+POSTHOOK: Input: database:tpch_test
+PREHOOK: query: CREATE TABLE `customer`(
+  `c_custkey` bigint, 
+  `c_name` string, 
+  `c_address` string, 
+  `c_nationkey` bigint, 
+  `c_phone` string, 
+  `c_acctbal` double, 
+  `c_mktsegment` string, 
+  `c_comment` string)
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
+TBLPROPERTIES (
+  'bucketing_version'='2', 
+  'transactional'='true', 
+  'transactional_properties'='default', 
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:tpch_test
+PREHOOK: Output: tpch_test@customer
+POSTHOOK: query: CREATE TABLE `customer`(
+  `c_custkey` bigint, 
+  `c_name` string, 
+  `c_address` string, 
+  `c_nationkey` bigint, 
+  `c_phone` string, 
+  `c_acctbal` double, 
+  `c_mktsegment` string, 
+  `c_comment` string)
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
+TBLPROPERTIES (
+  'bucketing_version'='2', 
+  'transactional'='true', 
+  'transactional_properties'='default', 
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:tpch_test
+POSTHOOK: Output: tpch_test@customer
+PREHOOK: query: CREATE TABLE `lineitem`(
+  `l_orderkey` bigint, 
+  `l_partkey` bigint, 
+  `l_suppkey` bigint, 
+  `l_linenumber` int, 
+  `l_quantity` double, 
+  `l_extendedprice` double, 
+  `l_discount` double, 
+  `l_tax` double, 
+  `l_returnflag` string, 
+  `l_linestatus` string, 
+  `l_shipdate` string, 
+  `l_commitdate` string, 
+  `l_receiptdate` string, 
+  `l_shipinstruct` string, 
+  `l_shipmode` string, 
+  `l_comment` string)
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
+TBLPROPERTIES (
+  'bucketing_version'='2', 
+  'transactional'='true', 
+  'transactional_properties'='default', 
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:tpch_test
+PREHOOK: Output: tpch_test@lineitem
+POSTHOOK: query: CREATE TABLE `lineitem`(
+  `l_orderkey` bigint, 
+  `l_partkey` bigint, 
+  `l_suppkey` bigint, 
+  `l_linenumber` int, 
+  `l_quantity` double, 
+  `l_extendedprice` double, 
+  `l_discount` double, 
+  `l_tax` double, 
+  `l_returnflag` string, 
+  `l_linestatus` string, 
+  `l_shipdate` string, 
+  `l_commitdate` string, 
+  `l_receiptdate` string, 
+  `l_shipinstruct` string, 
+  `l_shipmode` string, 
+  `l_comment` string)
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
+TBLPROPERTIES (
+  'bucketing_version'='2', 
+  'transactional'='true', 
+  'transactional_properties'='default', 
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:tpch_test
+POSTHOOK: Output: tpch_test@lineitem
+PREHOOK: query: CREATE TABLE `orders`(
+  `o_orderkey` bigint, 
+  `o_custkey` bigint, 
+  `o_orderstatus` string, 
+  `o_totalprice` double, 
+  `o_orderdate` string, 
+  `o_orderpriority` string, 
+  `o_clerk` string, 
+  `o_shippriority` int, 
+  `o_comment` string)
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
+TBLPROPERTIES (
+  'bucketing_version'='2', 
+  'transactional'='true', 
+  'transactional_properties'='default', 
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:tpch_test
+PREHOOK: Output: tpch_test@orders
+POSTHOOK: query: CREATE TABLE `orders`(
+  `o_orderkey` bigint, 
+  `o_custkey` bigint, 
+  `o_orderstatus` string, 
+  `o_totalprice` double, 
+  `o_orderdate` string, 
+  `o_orderpriority` string, 
+  `o_clerk` string, 
+  `o_shippriority` int, 
+  `o_comment` string)
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
+TBLPROPERTIES (
+  'bucketing_version'='2', 
+  'transactional'='true', 
+  'transactional_properties'='default', 
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:tpch_test
+POSTHOOK: Output: tpch_test@orders
+PREHOOK: query: alter table customer update statistics set('numRows'='150000000','rawDataSize'='8633707142')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: Input: tpch_test@customer
+PREHOOK: Output: tpch_test@customer
+POSTHOOK: query: alter table customer update statistics set('numRows'='150000000','rawDataSize'='8633707142')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: Input: tpch_test@customer
+POSTHOOK: Output: tpch_test@customer
+PREHOOK: query: alter table lineitem update statistics set('numRows'='5999989709','rawDataSize'='184245066955')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: Input: tpch_test@lineitem
+PREHOOK: Output: tpch_test@lineitem
+POSTHOOK: query: alter table lineitem update statistics set('numRows'='5999989709','rawDataSize'='184245066955')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: Input: tpch_test@lineitem
+POSTHOOK: Output: tpch_test@lineitem
+PREHOOK: query: alter table orders update statistics set('numRows'='1500000000','rawDataSize'='46741318253')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: Input: tpch_test@orders
+PREHOOK: Output: tpch_test@orders
+POSTHOOK: query: alter table orders update statistics set('numRows'='1500000000','rawDataSize'='46741318253')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: Input: tpch_test@orders
+POSTHOOK: Output: tpch_test@orders
+PREHOOK: query: create view q18_tmp_cached as
+select l_orderkey, sum(l_quantity) as t_sum_quantity
+from lineitem
+where l_orderkey is not null
+group by l_orderkey
+PREHOOK: type: CREATEVIEW
+PREHOOK: Input: tpch_test@lineitem
+PREHOOK: Output: database:tpch_test
+PREHOOK: Output: tpch_test@q18_tmp_cached
+POSTHOOK: query: create view q18_tmp_cached as
+select l_orderkey, sum(l_quantity) as t_sum_quantity
+from lineitem
+where l_orderkey is not null
+group by l_orderkey
+POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: tpch_test@lineitem
+POSTHOOK: Output: database:tpch_test
+POSTHOOK: Output: tpch_test@q18_tmp_cached
+POSTHOOK: Lineage: q18_tmp_cached.l_orderkey SIMPLE [(lineitem)lineitem.FieldSchema(name:l_orderkey, type:bigint, comment:null), ]
+POSTHOOK: Lineage: q18_tmp_cached.t_sum_quantity EXPRESSION [(lineitem)lineitem.FieldSchema(name:l_quantity, type:double, comment:null), ]
+PREHOOK: query: explain
+create table q18_large_volume_customer_cached stored as orc tblproperties ('transactional'='true', 'transactional_properties'='default') as
+select c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice, sum(l_quantity)
+from customer, orders, q18_tmp_cached t, lineitem l
+where
+  c_custkey = o_custkey and o_orderkey = t.l_orderkey
+  and o_orderkey is not null and t.t_sum_quantity > 300
+  and o_orderkey = l.l_orderkey and l.l_orderkey is not null
+group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice
+order by o_totalprice desc, o_orderdate
+limit 100
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: tpch_test@customer
+PREHOOK: Input: tpch_test@lineitem
+PREHOOK: Input: tpch_test@orders
+PREHOOK: Input: tpch_test@q18_tmp_cached
+PREHOOK: Output: database:tpch_test
+PREHOOK: Output: tpch_test@q18_large_volume_customer_cached
+POSTHOOK: query: explain
+create table q18_large_volume_customer_cached stored as orc tblproperties ('transactional'='true', 'transactional_properties'='default') as
+select c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice, sum(l_quantity)
+from customer, orders, q18_tmp_cached t, lineitem l
+where
+  c_custkey = o_custkey and o_orderkey = t.l_orderkey
+  and o_orderkey is not null and t.t_sum_quantity > 300
+  and o_orderkey = l.l_orderkey and l.l_orderkey is not null
+group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice
+order by o_totalprice desc, o_orderdate
+limit 100
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: tpch_test@customer
+POSTHOOK: Input: tpch_test@lineitem
+POSTHOOK: Input: tpch_test@orders
+POSTHOOK: Input: tpch_test@q18_tmp_cached
+POSTHOOK: Output: database:tpch_test
+POSTHOOK: Output: tpch_test@q18_large_volume_customer_cached
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-4 depends on stages: Stage-0, Stage-2
+  Stage-3 depends on stages: Stage-4
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
+        Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE), Reducer 8 (ONE_TO_ONE_EDGE)
+        Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+        Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE)
+        Reducer 8 <- Map 7 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: orders
+                  filterExpr: (o_orderkey is not null and o_custkey is not null) (type: boolean)
+                  Statistics: Num rows: 1500000000 Data size: 296399999792 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (o_custkey is not null and o_orderkey is not null) (type: boolean)
+                    Statistics: Num rows: 1349999996 Data size: 266759999022 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: o_orderkey (type: bigint), o_custkey (type: bigint), o_totalprice (type: double), o_orderdate (type: string)
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 1349999996 Data size: 266759999022 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col1 (type: bigint)
+                        sort order: +
+                        Map-reduce partition columns: _col1 (type: bigint)
+                        Statistics: Num rows: 1349999996 Data size: 266759999022 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: bigint), _col2 (type: double), _col3 (type: string)
+            Execution mode: vectorized, llap
+            LLAP IO: may be used (ACID table)
+        Map 6 
+            Map Operator Tree:
+                TableScan
+                  alias: customer
+                  filterExpr: c_custkey is not null (type: boolean)
+                  Statistics: Num rows: 150000000 Data size: 27360000192 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: c_custkey is not null (type: boolean)
+                    Statistics: Num rows: 142500000 Data size: 25992000182 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: c_custkey (type: bigint), c_name (type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 142500000 Data size: 25992000182 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: bigint)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: bigint)
+                        Statistics: Num rows: 142500000 Data size: 25992000182 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: string)
+            Execution mode: vectorized, llap
+            LLAP IO: may be used (ACID table)
+        Map 7 
+            Map Operator Tree:
+                TableScan
+                  alias: lineitem
+                  filterExpr: l_orderkey is not null (type: boolean)
+                  properties:
+                    insideView TRUE
+                  Statistics: Num rows: 5999989709 Data size: 91199843728 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: l_orderkey is not null (type: boolean)
+                    Statistics: Num rows: 5699990232 Data size: 86639851670 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: sum(l_quantity)
+                      keys: l_orderkey (type: bigint)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 5699990232 Data size: 86639851670 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: bigint)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: bigint)
+                        Statistics: Num rows: 5699990232 Data size: 86639851670 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: double)
+            Execution mode: vectorized, llap
+            LLAP IO: may be used (ACID table)
+        Map 9 
+            Map Operator Tree:
+                TableScan
+                  alias: l
+                  filterExpr: l_orderkey is not null (type: boolean)
+                  Statistics: Num rows: 5999989709 Data size: 91199843728 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: l_orderkey is not null (type: boolean)
+                    Statistics: Num rows: 5699990232 Data size: 86639851670 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: l_orderkey (type: bigint), l_quantity (type: double)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 5699990232 Data size: 86639851670 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: bigint)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: bigint)
+                        Statistics: Num rows: 5699990232 Data size: 86639851670 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: double)
+            Execution mode: vectorized, llap
+            LLAP IO: may be used (ACID table)
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col1 (type: bigint)
+                  1 _col0 (type: bigint)
+                outputColumnNames: _col0, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 1485000027 Data size: 293436005284 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: bigint)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: bigint)
+                  Statistics: Num rows: 1485000027 Data size: 293436005284 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col2 (type: double), _col3 (type: string), _col4 (type: bigint), _col5 (type: string)
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                     Inner Join 0 to 2
+                keys:
+                  0 _col0 (type: bigint)
+                  1 _col0 (type: bigint)
+                  2 _col0 (type: bigint)
+                outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col8
+                Statistics: Num rows: 12539978782 Data size: 190607677805 Basic stats: COMPLETE Column stats: NONE
+                Top N Key Operator
+                  sort order: -++++
+                  keys: _col2 (type: double), _col3 (type: string), _col0 (type: bigint), _col4 (type: bigint), _col5 (type: string)
+                  Statistics: Num rows: 12539978782 Data size: 190607677805 Basic stats: COMPLETE Column stats: NONE
+                  top n: 100
+                  Group By Operator
+                    aggregations: sum(_col8)
+                    keys: _col2 (type: double), _col3 (type: string), _col0 (type: bigint), _col4 (type: bigint), _col5 (type: string)
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                    Statistics: Num rows: 12539978782 Data size: 190607677805 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: double), _col1 (type: string), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: string)
+                      sort order: -++++
+                      Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: string)
+                      Statistics: Num rows: 12539978782 Data size: 190607677805 Basic stats: COMPLETE Column stats: NONE
+                      TopN Hash Memory Usage: 0.1
+                      value expressions: _col5 (type: double)
+        Reducer 4 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: bigint), KEY._col3 (type: bigint), KEY._col4 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 6269989391 Data size: 95303838902 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col4 (type: string), _col3 (type: bigint), _col2 (type: bigint), _col1 (type: string), _col0 (type: double), _col5 (type: double)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                  Statistics: Num rows: 6269989391 Data size: 95303838902 Basic stats: COMPLETE Column stats: NONE
+                  Limit
+                    Number of rows: 100
+                    Statistics: Num rows: 100 Data size: 1500 Basic stats: COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 100 Data size: 1500 Basic stats: COMPLETE Column stats: NONE
+                      table:
+                          input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+                          serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                          name: tpch_test.q18_large_volume_customer_cached
+                      Write Type: INSERT
+                    Select Operator
+                      expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: string), _col4 (type: double), _col5 (type: double)
+                      outputColumnNames: col1, col2, col3, col4, col5, col6
+                      Statistics: Num rows: 100 Data size: 1500 Basic stats: COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll'), compute_stats(col4, 'hll'), compute_stats(col5, 'hll'), compute_stats(col6, 'hll')
+                        mode: hash
+                        outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                        Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col4 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>), _col5 (type: struct<columntype:string,min:double,max:double,countnulls:bigint,bitvector:binary>)
+        Reducer 5 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 8 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: bigint)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 2849995116 Data size: 43319925835 Basic stats: COMPLETE Column stats: NONE
+                Filter Operator
+                  predicate: (_col1 > 300.0D) (type: boolean)
+                  Statistics: Num rows: 949998372 Data size: 14439975278 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: _col0 (type: bigint)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 949998372 Data size: 14439975278 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: bigint)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: bigint)
+                      Statistics: Num rows: 949998372 Data size: 14439975278 Basic stats: COMPLETE Column stats: NONE
+
+  Stage: Stage-2
+    Dependency Collection
+
+  Stage: Stage-4
+      Create Table Operator:
+        Create Table
+          columns: c_name string, c_custkey bigint, o_orderkey bigint, o_orderdate string, o_totalprice double, _c5 double
+          input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+          output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+          serde name: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+          name: tpch_test.q18_large_volume_customer_cached
+          table properties:
+            transactional true
+            transactional_properties default
+
+  Stage: Stage-3
+    Stats Work
+      Basic Stats Work:
+      Column Stats Desc:
+          Columns: c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice, _c5
+          Column Types: string, bigint, bigint, string, double, double
+          Table: tpch_test.q18_large_volume_customer_cached
+
+  Stage: Stage-0
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+          Write Type: INSERT
+
+PREHOOK: query: create table q18_large_volume_customer_cached stored as orc tblproperties ('transactional'='true', 'transactional_properties'='default') as
+select c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice, sum(l_quantity)
+from customer, orders, q18_tmp_cached t, lineitem l
+where
+  c_custkey = o_custkey and o_orderkey = t.l_orderkey
+  and o_orderkey is not null and t.t_sum_quantity > 300
+  and o_orderkey = l.l_orderkey and l.l_orderkey is not null
+group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice
+order by o_totalprice desc, o_orderdate
+limit 100
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: tpch_test@customer
+PREHOOK: Input: tpch_test@lineitem
+PREHOOK: Input: tpch_test@orders
+PREHOOK: Input: tpch_test@q18_tmp_cached
+PREHOOK: Output: database:tpch_test
+PREHOOK: Output: tpch_test@q18_large_volume_customer_cached
+POSTHOOK: query: create table q18_large_volume_customer_cached stored as orc tblproperties ('transactional'='true', 'transactional_properties'='default') as
+select c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice, sum(l_quantity)
+from customer, orders, q18_tmp_cached t, lineitem l
+where
+  c_custkey = o_custkey and o_orderkey = t.l_orderkey
+  and o_orderkey is not null and t.t_sum_quantity > 300
+  and o_orderkey = l.l_orderkey and l.l_orderkey is not null
+group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice
+order by o_totalprice desc, o_orderdate
+limit 100
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: tpch_test@customer
+POSTHOOK: Input: tpch_test@lineitem
+POSTHOOK: Input: tpch_test@orders
+POSTHOOK: Input: tpch_test@q18_tmp_cached
+POSTHOOK: Output: database:tpch_test
+POSTHOOK: Output: tpch_test@q18_large_volume_customer_cached
+POSTHOOK: Lineage: q18_large_volume_customer_cached._c5 EXPRESSION [(lineitem)l.FieldSchema(name:l_quantity, type:double, comment:null), ]
+POSTHOOK: Lineage: q18_large_volume_customer_cached.c_custkey SIMPLE [(customer)customer.FieldSchema(name:c_custkey, type:bigint, comment:null), ]
+POSTHOOK: Lineage: q18_large_volume_customer_cached.c_name SIMPLE [(customer)customer.FieldSchema(name:c_name, type:string, comment:null), ]
+POSTHOOK: Lineage: q18_large_volume_customer_cached.o_orderdate SIMPLE [(orders)orders.FieldSchema(name:o_orderdate, type:string, comment:null), ]
+POSTHOOK: Lineage: q18_large_volume_customer_cached.o_orderkey SIMPLE [(orders)orders.FieldSchema(name:o_orderkey, type:bigint, comment:null), ]
+POSTHOOK: Lineage: q18_large_volume_customer_cached.o_totalprice SIMPLE [(orders)orders.FieldSchema(name:o_totalprice, type:double, comment:null), ]
+PREHOOK: query: drop database tpch_test cascade
+PREHOOK: type: DROPDATABASE
+PREHOOK: Input: database:tpch_test
+PREHOOK: Output: database:tpch_test
+PREHOOK: Output: tpch_test@customer
+PREHOOK: Output: tpch_test@lineitem
+PREHOOK: Output: tpch_test@orders
+PREHOOK: Output: tpch_test@q18_large_volume_customer_cached
+PREHOOK: Output: tpch_test@q18_tmp_cached
+POSTHOOK: query: drop database tpch_test cascade
+POSTHOOK: type: DROPDATABASE
+POSTHOOK: Input: database:tpch_test
+POSTHOOK: Output: database:tpch_test
+POSTHOOK: Output: tpch_test@customer
+POSTHOOK: Output: tpch_test@lineitem
+POSTHOOK: Output: tpch_test@orders
+POSTHOOK: Output: tpch_test@q18_large_volume_customer_cached
+POSTHOOK: Output: tpch_test@q18_tmp_cached