You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@hive.apache.org by vi...@apache.org on 2015/05/01 20:07:45 UTC

[1/5] hive git commit: HIVE-10403 - Add n-way join support for Hybrid Grace Hash Join (Wei Zheng via Vikram Dixit)

Repository: hive
Updated Branches:
  refs/heads/master 6db33a9d1 -> c72d073c1


http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/test/results/clientpositive/tez/hybridhashjoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/hybridhashjoin.q.out b/ql/src/test/results/clientpositive/tez/hybridhashjoin.q.out
deleted file mode 100644
index 5d5b850..0000000
--- a/ql/src/test/results/clientpositive/tez/hybridhashjoin.q.out
+++ /dev/null
@@ -1,1566 +0,0 @@
-PREHOOK: query: -- Base result for inner join
-explain
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint
- where c.cint < 2000000000) t1
-PREHOOK: type: QUERY
-POSTHOOK: query: -- Base result for inner join
-explain
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint
- where c.cint < 2000000000) t1
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-    Tez
-      Edges:
-        Map 1 <- Map 3 (BROADCAST_EDGE)
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
-      Vertices:
-        Map 1 
-            Map Operator Tree:
-                TableScan
-                  alias: c
-                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: (cint < 2000000000) (type: boolean)
-                    Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: cint (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
-                      Map Join Operator
-                        condition map:
-                             Inner Join 0 to 1
-                        keys:
-                          0 _col0 (type: int)
-                          1 _col0 (type: int)
-                        input vertices:
-                          1 Map 3
-                        Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE
-                        Group By Operator
-                          aggregations: count()
-                          mode: hash
-                          outputColumnNames: _col0
-                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                          Reduce Output Operator
-                            sort order: 
-                            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                            value expressions: _col0 (type: bigint)
-        Map 3 
-            Map Operator Tree:
-                TableScan
-                  alias: c
-                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: (cint < 2000000000) (type: boolean)
-                    Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: cint (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: int)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
-        Reducer 2 
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: count(VALUE._col0)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                File Output Operator
-                  compressed: false
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                  table:
-                      input format: org.apache.hadoop.mapred.TextInputFormat
-                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Processor Tree:
-        ListSink
-
-PREHOOK: query: select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint
- where c.cint < 2000000000) t1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@alltypesorc
-#### A masked pattern was here ####
-POSTHOOK: query: select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint
- where c.cint < 2000000000) t1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@alltypesorc
-#### A masked pattern was here ####
-3152013
-PREHOOK: query: -- Two partitions are created. One in memory, one on disk on creation.
--- The one in memory will eventually exceed memory limit, but won't spill.
-explain
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint
- where c.cint < 2000000000) t1
-PREHOOK: type: QUERY
-POSTHOOK: query: -- Two partitions are created. One in memory, one on disk on creation.
--- The one in memory will eventually exceed memory limit, but won't spill.
-explain
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint
- where c.cint < 2000000000) t1
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-    Tez
-      Edges:
-        Map 1 <- Map 3 (BROADCAST_EDGE)
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
-      Vertices:
-        Map 1 
-            Map Operator Tree:
-                TableScan
-                  alias: c
-                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: (cint < 2000000000) (type: boolean)
-                    Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: cint (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
-                      Map Join Operator
-                        condition map:
-                             Inner Join 0 to 1
-                        keys:
-                          0 _col0 (type: int)
-                          1 _col0 (type: int)
-                        input vertices:
-                          1 Map 3
-                        Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE
-                        HybridGraceHashJoin: true
-                        Group By Operator
-                          aggregations: count()
-                          mode: hash
-                          outputColumnNames: _col0
-                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                          Reduce Output Operator
-                            sort order: 
-                            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                            value expressions: _col0 (type: bigint)
-        Map 3 
-            Map Operator Tree:
-                TableScan
-                  alias: c
-                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: (cint < 2000000000) (type: boolean)
-                    Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: cint (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: int)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
-        Reducer 2 
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: count(VALUE._col0)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                File Output Operator
-                  compressed: false
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                  table:
-                      input format: org.apache.hadoop.mapred.TextInputFormat
-                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Processor Tree:
-        ListSink
-
-PREHOOK: query: select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint
- where c.cint < 2000000000) t1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@alltypesorc
-#### A masked pattern was here ####
-POSTHOOK: query: select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint
- where c.cint < 2000000000) t1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@alltypesorc
-#### A masked pattern was here ####
-3152013
-PREHOOK: query: -- Base result for inner join
-explain
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint) t1
-PREHOOK: type: QUERY
-POSTHOOK: query: -- Base result for inner join
-explain
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint) t1
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-    Tez
-      Edges:
-        Map 1 <- Map 3 (BROADCAST_EDGE)
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
-      Vertices:
-        Map 1 
-            Map Operator Tree:
-                TableScan
-                  alias: c
-                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: cint is not null (type: boolean)
-                    Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: cint (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
-                      Map Join Operator
-                        condition map:
-                             Inner Join 0 to 1
-                        keys:
-                          0 _col0 (type: int)
-                          1 _col0 (type: int)
-                        input vertices:
-                          1 Map 3
-                        Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
-                        Group By Operator
-                          aggregations: count()
-                          mode: hash
-                          outputColumnNames: _col0
-                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                          Reduce Output Operator
-                            sort order: 
-                            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                            value expressions: _col0 (type: bigint)
-        Map 3 
-            Map Operator Tree:
-                TableScan
-                  alias: c
-                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: cint is not null (type: boolean)
-                    Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: cint (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: int)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
-        Reducer 2 
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: count(VALUE._col0)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                File Output Operator
-                  compressed: false
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                  table:
-                      input format: org.apache.hadoop.mapred.TextInputFormat
-                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Processor Tree:
-        ListSink
-
-PREHOOK: query: select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint) t1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@alltypesorc
-#### A masked pattern was here ####
-POSTHOOK: query: select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint) t1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@alltypesorc
-#### A masked pattern was here ####
-3152013
-PREHOOK: query: -- 16 partitions are created: 3 in memory, 13 on disk on creation.
--- 1 partition is spilled during first round processing, which ends up having 2 in memory, 14 on disk
-explain
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint) t1
-PREHOOK: type: QUERY
-POSTHOOK: query: -- 16 partitions are created: 3 in memory, 13 on disk on creation.
--- 1 partition is spilled during first round processing, which ends up having 2 in memory, 14 on disk
-explain
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint) t1
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-    Tez
-      Edges:
-        Map 1 <- Map 3 (BROADCAST_EDGE)
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
-      Vertices:
-        Map 1 
-            Map Operator Tree:
-                TableScan
-                  alias: c
-                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: cint is not null (type: boolean)
-                    Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: cint (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
-                      Map Join Operator
-                        condition map:
-                             Inner Join 0 to 1
-                        keys:
-                          0 _col0 (type: int)
-                          1 _col0 (type: int)
-                        input vertices:
-                          1 Map 3
-                        Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
-                        HybridGraceHashJoin: true
-                        Group By Operator
-                          aggregations: count()
-                          mode: hash
-                          outputColumnNames: _col0
-                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                          Reduce Output Operator
-                            sort order: 
-                            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                            value expressions: _col0 (type: bigint)
-        Map 3 
-            Map Operator Tree:
-                TableScan
-                  alias: c
-                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: cint is not null (type: boolean)
-                    Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: cint (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: int)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
-        Reducer 2 
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: count(VALUE._col0)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                File Output Operator
-                  compressed: false
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                  table:
-                      input format: org.apache.hadoop.mapred.TextInputFormat
-                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Processor Tree:
-        ListSink
-
-PREHOOK: query: select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint) t1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@alltypesorc
-#### A masked pattern was here ####
-POSTHOOK: query: select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint) t1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@alltypesorc
-#### A masked pattern was here ####
-3152013
-PREHOOK: query: -- Base result for outer join
-explain
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- left outer join alltypesorc cd
- on cd.cint = c.cint) t1
-PREHOOK: type: QUERY
-POSTHOOK: query: -- Base result for outer join
-explain
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- left outer join alltypesorc cd
- on cd.cint = c.cint) t1
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-    Tez
-      Edges:
-        Map 1 <- Map 3 (BROADCAST_EDGE)
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
-      Vertices:
-        Map 1 
-            Map Operator Tree:
-                TableScan
-                  alias: c
-                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: cint (type: int)
-                    outputColumnNames: _col0
-                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
-                    Map Join Operator
-                      condition map:
-                           Left Outer Join0 to 1
-                      keys:
-                        0 _col0 (type: int)
-                        1 _col0 (type: int)
-                      input vertices:
-                        1 Map 3
-                      Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE
-                      Group By Operator
-                        aggregations: count()
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                        Reduce Output Operator
-                          sort order: 
-                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                          value expressions: _col0 (type: bigint)
-        Map 3 
-            Map Operator Tree:
-                TableScan
-                  alias: c
-                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: cint (type: int)
-                    outputColumnNames: _col0
-                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: int)
-                      sort order: +
-                      Map-reduce partition columns: _col0 (type: int)
-                      Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
-        Reducer 2 
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: count(VALUE._col0)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                File Output Operator
-                  compressed: false
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                  table:
-                      input format: org.apache.hadoop.mapred.TextInputFormat
-                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Processor Tree:
-        ListSink
-
-PREHOOK: query: select count(*) from
-(select c.ctinyint
- from alltypesorc c
- left outer join alltypesorc cd
- on cd.cint = c.cint) t1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@alltypesorc
-#### A masked pattern was here ####
-POSTHOOK: query: select count(*) from
-(select c.ctinyint
- from alltypesorc c
- left outer join alltypesorc cd
- on cd.cint = c.cint) t1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@alltypesorc
-#### A masked pattern was here ####
-3155128
-PREHOOK: query: -- 32 partitions are created. 3 in memory, 29 on disk on creation.
-explain
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- left outer join alltypesorc cd
- on cd.cint = c.cint) t1
-PREHOOK: type: QUERY
-POSTHOOK: query: -- 32 partitions are created. 3 in memory, 29 on disk on creation.
-explain
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- left outer join alltypesorc cd
- on cd.cint = c.cint) t1
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-    Tez
-      Edges:
-        Map 1 <- Map 3 (BROADCAST_EDGE)
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
-      Vertices:
-        Map 1 
-            Map Operator Tree:
-                TableScan
-                  alias: c
-                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: cint (type: int)
-                    outputColumnNames: _col0
-                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
-                    Map Join Operator
-                      condition map:
-                           Left Outer Join0 to 1
-                      keys:
-                        0 _col0 (type: int)
-                        1 _col0 (type: int)
-                      input vertices:
-                        1 Map 3
-                      Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE
-                      HybridGraceHashJoin: true
-                      Group By Operator
-                        aggregations: count()
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                        Reduce Output Operator
-                          sort order: 
-                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                          value expressions: _col0 (type: bigint)
-        Map 3 
-            Map Operator Tree:
-                TableScan
-                  alias: c
-                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: cint (type: int)
-                    outputColumnNames: _col0
-                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: int)
-                      sort order: +
-                      Map-reduce partition columns: _col0 (type: int)
-                      Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
-        Reducer 2 
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: count(VALUE._col0)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                File Output Operator
-                  compressed: false
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                  table:
-                      input format: org.apache.hadoop.mapred.TextInputFormat
-                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Processor Tree:
-        ListSink
-
-PREHOOK: query: select count(*) from
-(select c.ctinyint
- from alltypesorc c
- left outer join alltypesorc cd
- on cd.cint = c.cint) t1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@alltypesorc
-#### A masked pattern was here ####
-POSTHOOK: query: select count(*) from
-(select c.ctinyint
- from alltypesorc c
- left outer join alltypesorc cd
- on cd.cint = c.cint) t1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@alltypesorc
-#### A masked pattern was here ####
-3155128
-PREHOOK: query: -- Partitioned table
-create table parttbl (key string, value char(20)) partitioned by (dt char(10))
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@parttbl
-POSTHOOK: query: -- Partitioned table
-create table parttbl (key string, value char(20)) partitioned by (dt char(10))
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@parttbl
-PREHOOK: query: insert overwrite table parttbl partition(dt='2000-01-01')
-  select * from src
-PREHOOK: type: QUERY
-PREHOOK: Input: default@src
-PREHOOK: Output: default@parttbl@dt=2000-01-01
-POSTHOOK: query: insert overwrite table parttbl partition(dt='2000-01-01')
-  select * from src
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
-POSTHOOK: Output: default@parttbl@dt=2000-01-01
-POSTHOOK: Lineage: parttbl PARTITION(dt=2000-01-01).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: parttbl PARTITION(dt=2000-01-01).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-PREHOOK: query: insert overwrite table parttbl partition(dt='2000-01-02')
-  select * from src1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@src1
-PREHOOK: Output: default@parttbl@dt=2000-01-02
-POSTHOOK: query: insert overwrite table parttbl partition(dt='2000-01-02')
-  select * from src1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src1
-POSTHOOK: Output: default@parttbl@dt=2000-01-02
-POSTHOOK: Lineage: parttbl PARTITION(dt=2000-01-02).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
-POSTHOOK: Lineage: parttbl PARTITION(dt=2000-01-02).value EXPRESSION [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
-PREHOOK: query: -- No spill, base result
-explain
-select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-PREHOOK: type: QUERY
-POSTHOOK: query: -- No spill, base result
-explain
-select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-    Tez
-      Edges:
-        Map 1 <- Map 3 (BROADCAST_EDGE)
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
-      Vertices:
-        Map 1 
-            Map Operator Tree:
-                TableScan
-                  alias: p1
-                  Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
-                    Map Join Operator
-                      condition map:
-                           Inner Join 0 to 1
-                      keys:
-                        0 key (type: string)
-                        1 key (type: string)
-                      input vertices:
-                        1 Map 3
-                      Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE
-                      Group By Operator
-                        aggregations: count()
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                        Reduce Output Operator
-                          sort order: 
-                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                          value expressions: _col0 (type: bigint)
-        Map 3 
-            Map Operator Tree:
-                TableScan
-                  alias: p2
-                  Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: key (type: string)
-                      sort order: +
-                      Map-reduce partition columns: key (type: string)
-                      Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
-        Reducer 2 
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: count(VALUE._col0)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                File Output Operator
-                  compressed: false
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                  table:
-                      input format: org.apache.hadoop.mapred.TextInputFormat
-                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Processor Tree:
-        ListSink
-
-PREHOOK: query: select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@parttbl
-PREHOOK: Input: default@parttbl@dt=2000-01-01
-PREHOOK: Input: default@parttbl@dt=2000-01-02
-#### A masked pattern was here ####
-POSTHOOK: query: select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@parttbl
-POSTHOOK: Input: default@parttbl@dt=2000-01-01
-POSTHOOK: Input: default@parttbl@dt=2000-01-02
-#### A masked pattern was here ####
-1217
-PREHOOK: query: -- No spill, 2 partitions created in memory
-explain
-select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-PREHOOK: type: QUERY
-POSTHOOK: query: -- No spill, 2 partitions created in memory
-explain
-select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-    Tez
-      Edges:
-        Map 1 <- Map 3 (BROADCAST_EDGE)
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
-      Vertices:
-        Map 1 
-            Map Operator Tree:
-                TableScan
-                  alias: p1
-                  Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
-                    Map Join Operator
-                      condition map:
-                           Inner Join 0 to 1
-                      keys:
-                        0 key (type: string)
-                        1 key (type: string)
-                      input vertices:
-                        1 Map 3
-                      Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE
-                      HybridGraceHashJoin: true
-                      Group By Operator
-                        aggregations: count()
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                        Reduce Output Operator
-                          sort order: 
-                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                          value expressions: _col0 (type: bigint)
-        Map 3 
-            Map Operator Tree:
-                TableScan
-                  alias: p2
-                  Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: key (type: string)
-                      sort order: +
-                      Map-reduce partition columns: key (type: string)
-                      Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
-        Reducer 2 
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: count(VALUE._col0)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                File Output Operator
-                  compressed: false
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                  table:
-                      input format: org.apache.hadoop.mapred.TextInputFormat
-                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Processor Tree:
-        ListSink
-
-PREHOOK: query: select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@parttbl
-PREHOOK: Input: default@parttbl@dt=2000-01-01
-PREHOOK: Input: default@parttbl@dt=2000-01-02
-#### A masked pattern was here ####
-POSTHOOK: query: select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@parttbl
-POSTHOOK: Input: default@parttbl@dt=2000-01-01
-POSTHOOK: Input: default@parttbl@dt=2000-01-02
-#### A masked pattern was here ####
-1217
-PREHOOK: query: -- Spill case base result
-explain
-select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-PREHOOK: type: QUERY
-POSTHOOK: query: -- Spill case base result
-explain
-select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-    Tez
-      Edges:
-        Map 1 <- Map 3 (BROADCAST_EDGE)
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
-      Vertices:
-        Map 1 
-            Map Operator Tree:
-                TableScan
-                  alias: p1
-                  Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
-                    Map Join Operator
-                      condition map:
-                           Inner Join 0 to 1
-                      keys:
-                        0 key (type: string)
-                        1 key (type: string)
-                      input vertices:
-                        1 Map 3
-                      Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE
-                      Group By Operator
-                        aggregations: count()
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                        Reduce Output Operator
-                          sort order: 
-                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                          value expressions: _col0 (type: bigint)
-        Map 3 
-            Map Operator Tree:
-                TableScan
-                  alias: p2
-                  Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: key (type: string)
-                      sort order: +
-                      Map-reduce partition columns: key (type: string)
-                      Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
-        Reducer 2 
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: count(VALUE._col0)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                File Output Operator
-                  compressed: false
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                  table:
-                      input format: org.apache.hadoop.mapred.TextInputFormat
-                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Processor Tree:
-        ListSink
-
-PREHOOK: query: select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@parttbl
-PREHOOK: Input: default@parttbl@dt=2000-01-01
-PREHOOK: Input: default@parttbl@dt=2000-01-02
-#### A masked pattern was here ####
-POSTHOOK: query: select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@parttbl
-POSTHOOK: Input: default@parttbl@dt=2000-01-01
-POSTHOOK: Input: default@parttbl@dt=2000-01-02
-#### A masked pattern was here ####
-1217
-PREHOOK: query: -- Spill case, one partition in memory, one spilled on creation
-explain
-select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-PREHOOK: type: QUERY
-POSTHOOK: query: -- Spill case, one partition in memory, one spilled on creation
-explain
-select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-    Tez
-      Edges:
-        Map 1 <- Map 3 (BROADCAST_EDGE)
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
-      Vertices:
-        Map 1 
-            Map Operator Tree:
-                TableScan
-                  alias: p1
-                  Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
-                    Map Join Operator
-                      condition map:
-                           Inner Join 0 to 1
-                      keys:
-                        0 key (type: string)
-                        1 key (type: string)
-                      input vertices:
-                        1 Map 3
-                      Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE
-                      HybridGraceHashJoin: true
-                      Group By Operator
-                        aggregations: count()
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                        Reduce Output Operator
-                          sort order: 
-                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                          value expressions: _col0 (type: bigint)
-        Map 3 
-            Map Operator Tree:
-                TableScan
-                  alias: p2
-                  Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: key (type: string)
-                      sort order: +
-                      Map-reduce partition columns: key (type: string)
-                      Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
-        Reducer 2 
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: count(VALUE._col0)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                File Output Operator
-                  compressed: false
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                  table:
-                      input format: org.apache.hadoop.mapred.TextInputFormat
-                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Processor Tree:
-        ListSink
-
-PREHOOK: query: select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@parttbl
-PREHOOK: Input: default@parttbl@dt=2000-01-01
-PREHOOK: Input: default@parttbl@dt=2000-01-02
-#### A masked pattern was here ####
-POSTHOOK: query: select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@parttbl
-POSTHOOK: Input: default@parttbl@dt=2000-01-01
-POSTHOOK: Input: default@parttbl@dt=2000-01-02
-#### A masked pattern was here ####
-1217
-PREHOOK: query: drop table parttbl
-PREHOOK: type: DROPTABLE
-PREHOOK: Input: default@parttbl
-PREHOOK: Output: default@parttbl
-POSTHOOK: query: drop table parttbl
-POSTHOOK: type: DROPTABLE
-POSTHOOK: Input: default@parttbl
-POSTHOOK: Output: default@parttbl
-PREHOOK: query: -- Test vectorization
--- Test case borrowed from vector_decimal_mapjoin.q
-CREATE TABLE decimal_mapjoin STORED AS ORC AS
-  SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1,
-  CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2,
-  cint
-  FROM alltypesorc
-PREHOOK: type: CREATETABLE_AS_SELECT
-PREHOOK: Input: default@alltypesorc
-PREHOOK: Output: database:default
-PREHOOK: Output: default@decimal_mapjoin
-POSTHOOK: query: -- Test vectorization
--- Test case borrowed from vector_decimal_mapjoin.q
-CREATE TABLE decimal_mapjoin STORED AS ORC AS
-  SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1,
-  CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2,
-  cint
-  FROM alltypesorc
-POSTHOOK: type: CREATETABLE_AS_SELECT
-POSTHOOK: Input: default@alltypesorc
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@decimal_mapjoin
-PREHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
-  FROM decimal_mapjoin l
-  JOIN decimal_mapjoin r ON l.cint = r.cint
-  WHERE l.cint = 6981
-PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
-  FROM decimal_mapjoin l
-  JOIN decimal_mapjoin r ON l.cint = r.cint
-  WHERE l.cint = 6981
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-    Tez
-      Edges:
-        Map 1 <- Map 2 (BROADCAST_EDGE)
-#### A masked pattern was here ####
-      Vertices:
-        Map 1 
-            Map Operator Tree:
-                TableScan
-                  alias: l
-                  Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: (cint = 6981) (type: boolean)
-                    Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE
-                    Map Join Operator
-                      condition map:
-                           Inner Join 0 to 1
-                      keys:
-                        0 6981 (type: int)
-                        1 6981 (type: int)
-                      outputColumnNames: _col1, _col9
-                      input vertices:
-                        1 Map 2
-                      Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE
-                      Select Operator
-                        expressions: 6981 (type: int), 6981 (type: int), _col1 (type: decimal(20,10)), _col9 (type: decimal(23,14))
-                        outputColumnNames: _col0, _col1, _col2, _col3
-                        Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE
-                        File Output Operator
-                          compressed: false
-                          Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE
-                          table:
-                              input format: org.apache.hadoop.mapred.TextInputFormat
-                              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-            Execution mode: vectorized
-        Map 2 
-            Map Operator Tree:
-                TableScan
-                  alias: r
-                  Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: (cint = 6981) (type: boolean)
-                    Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: 6981 (type: int)
-                      sort order: +
-                      Map-reduce partition columns: 6981 (type: int)
-                      Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE
-                      value expressions: cdecimal2 (type: decimal(23,14))
-            Execution mode: vectorized
-
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Processor Tree:
-        ListSink
-
-PREHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
-  FROM decimal_mapjoin l
-  JOIN decimal_mapjoin r ON l.cint = r.cint
-  WHERE l.cint = 6981
-PREHOOK: type: QUERY
-PREHOOK: Input: default@decimal_mapjoin
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
-  FROM decimal_mapjoin l
-  JOIN decimal_mapjoin r ON l.cint = r.cint
-  WHERE l.cint = 6981
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@decimal_mapjoin
-#### A masked pattern was here ####
-6981	6981	NULL	NULL
-6981	6981	NULL	-617.5607769230769
-6981	6981	NULL	-617.5607769230769
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	6984454.211097692
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	-617.5607769230769
-6981	6981	NULL	-617.5607769230769
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	6984454.211097692
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	-617.5607769230769
-6981	6981	NULL	-617.5607769230769
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	6984454.211097692
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	-617.5607769230769
-6981	6981	NULL	-617.5607769230769
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	6984454.211097692
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	5831542.269248378	NULL
-6981	6981	5831542.269248378	-617.5607769230769
-6981	6981	5831542.269248378	-617.5607769230769
-6981	6981	5831542.269248378	NULL
-6981	6981	5831542.269248378	NULL
-6981	6981	5831542.269248378	NULL
-6981	6981	5831542.269248378	6984454.211097692
-6981	6981	5831542.269248378	NULL
-6981	6981	5831542.269248378	NULL
-6981	6981	5831542.269248378	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	-617.5607769230769
-6981	6981	NULL	-617.5607769230769
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	6984454.211097692
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	-617.5607769230769
-6981	6981	NULL	-617.5607769230769
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	6984454.211097692
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	-617.5607769230769
-6981	6981	NULL	-617.5607769230769
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	6984454.211097692
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	-515.621072973	NULL
-6981	6981	-515.621072973	-617.5607769230769
-6981	6981	-515.621072973	-617.5607769230769
-6981	6981	-515.621072973	NULL
-6981	6981	-515.621072973	NULL
-6981	6981	-515.621072973	NULL
-6981	6981	-515.621072973	6984454.211097692
-6981	6981	-515.621072973	NULL
-6981	6981	-515.621072973	NULL
-6981	6981	-515.621072973	NULL
-6981	6981	-515.621072973	NULL
-6981	6981	-515.621072973	-617.5607769230769
-6981	6981	-515.621072973	-617.5607769230769
-6981	6981	-515.621072973	NULL
-6981	6981	-515.621072973	NULL
-6981	6981	-515.621072973	NULL
-6981	6981	-515.621072973	6984454.211097692
-6981	6981	-515.621072973	NULL
-6981	6981	-515.621072973	NULL
-6981	6981	-515.621072973	NULL
-PREHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
-  FROM decimal_mapjoin l
-  JOIN decimal_mapjoin r ON l.cint = r.cint
-  WHERE l.cint = 6981
-PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
-  FROM decimal_mapjoin l
-  JOIN decimal_mapjoin r ON l.cint = r.cint
-  WHERE l.cint = 6981
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-    Tez
-      Edges:
-        Map 1 <- Map 2 (BROADCAST_EDGE)
-#### A masked pattern was here ####
-      Vertices:
-        Map 1 
-            Map Operator Tree:
-                TableScan
-                  alias: l
-                  Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: (cint = 6981) (type: boolean)
-                    Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE
-                    Map Join Operator
-                      condition map:
-                           Inner Join 0 to 1
-                      keys:
-                        0 6981 (type: int)
-                        1 6981 (type: int)
-                      outputColumnNames: _col1, _col9
-                      input vertices:
-                        1 Map 2
-                      Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE
-                      HybridGraceHashJoin: true
-                      Select Operator
-                        expressions: 6981 (type: int), 6981 (type: int), _col1 (type: decimal(20,10)), _col9 (type: decimal(23,14))
-                        outputColumnNames: _col0, _col1, _col2, _col3
-                        Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE
-                        File Output Operator
-                          compressed: false
-                          Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE
-                          table:
-                              input format: org.apache.hadoop.mapred.TextInputFormat
-                              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-            Execution mode: vectorized
-        Map 2 
-            Map Operator Tree:
-                TableScan
-                  alias: r
-                  Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: (cint = 6981) (type: boolean)
-                    Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: 6981 (type: int)
-                      sort order: +
-                      Map-reduce partition columns: 6981 (type: int)
-                      Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE
-                      value expressions: cdecimal2 (type: decimal(23,14))
-            Execution mode: vectorized
-
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Processor Tree:
-        ListSink
-
-PREHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
-  FROM decimal_mapjoin l
-  JOIN decimal_mapjoin r ON l.cint = r.cint
-  WHERE l.cint = 6981
-PREHOOK: type: QUERY
-PREHOOK: Input: default@decimal_mapjoin
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
-  FROM decimal_mapjoin l
-  JOIN decimal_mapjoin r ON l.cint = r.cint
-  WHERE l.cint = 6981
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@decimal_mapjoin
-#### A masked pattern was here ####
-6981	6981	NULL	NULL
-6981	6981	NULL	-617.5607769230769
-6981	6981	NULL	-617.5607769230769
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	6984454.211097692
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	-617.5607769230769
-6981	6981	NULL	-617.5607769230769
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	6984454.211097692
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	-617.5607769230769
-6981	6981	NULL	-617.5607769230769
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	6984454.211097692
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	-617.5607769230769
-6981	6981	NULL	-617.5607769230769
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	6984454.211097692
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	5831542.269248378	NULL
-6981	6981	5831542.269248378	-617.5607769230769
-6981	6981	5831542.269248378	-617.5607769230769
-6981	6981	5831542.269248378	NULL
-6981	6981	5831542.269248378	NULL
-6981	6981	5831542.269248378	NULL
-6981	6981	5831542.269248378	6984454.211097692
-6981	6981	5831542.269248378	NULL
-6981	6981	5831542.269248378	NULL
-6981	6981	5831542.269248378	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	-617.5607769230769
-6981	6981	NULL	-617.5607769230769
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	6984454.211097692
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	-617.5607769230769
-6981	6981	NULL	-617.5607769230769
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	6984454.211097692
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	-617.5607769230769
-6981	6981	NULL	-617.5607769230769
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	6984454.211097692
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	NULL	NULL
-6981	6981	-515.621072973	NULL
-6981	6981	-515.621072973	-617.5607769230769
-6981	6981	-515.621072973	-617.5607769230769
-6981	6981	-515.621072973	NULL
-6981	6981	-515.621072973	NULL
-6981	6981	-515.621072973	NULL
-6981	6981	-515.621072973	6984454.211097692
-6981	6981	-515.621072973	NULL
-6981	6981	-515.621072973	NULL
-6981	6981	-515.621072973	NULL
-6981	6981	-515.621072973	NULL
-6981	6981	-515.621072973	-617.5607769230769
-6981	6981	-515.621072973	-617.5607769230769
-6981	6981	-515.621072973	NULL
-6981	6981	-515.621072973	NULL
-6981	6981	-515.621072973	NULL
-6981	6981	-515.621072973	6984454.211097692
-6981	6981	-515.621072973	NULL
-6981	6981	-515.621072973	NULL
-6981	6981	-515.621072973	NULL
-PREHOOK: query: DROP TABLE decimal_mapjoin
-PREHOOK: type: DROPTABLE
-PREHOOK: Input: default@decimal_mapjoin
-PREHOOK: Output: default@decimal_mapjoin
-POSTHOOK: query: DROP TABLE decimal_mapjoin
-POSTHOOK: type: DROPTABLE
-POSTHOOK: Input: default@decimal_mapjoin
-POSTHOOK: Output: default@decimal_mapjoin

[5/5] hive git commit: HIVE-10403 - Add n-way join support for Hybrid Grace Hash Join (Wei Zheng via Vikram Dixit)

Posted by vi...@apache.org.

HIVE-10403 - Add n-way join support for Hybrid Grace Hash Join (Wei Zheng via Vikram Dixit)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c72d073c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c72d073c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c72d073c

Branch: refs/heads/master
Commit: c72d073c1fe2a07c80120647bb2170fb7e50d168
Parents: 6db33a9
Author: vikram <vi...@hortonworks.com>
Authored: Fri May 1 10:54:21 2015 -0700
Committer: vikram <vi...@hortonworks.com>
Committed: Fri May 1 11:07:06 2015 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |    4 +
 .../test/resources/testconfiguration.properties |    3 +-
 .../hadoop/hive/ql/exec/HashTableLoader.java    |    4 +-
 .../hadoop/hive/ql/exec/MapJoinOperator.java    |  169 +-
 .../hadoop/hive/ql/exec/mr/HashTableLoader.java |    2 +-
 .../persistence/BytesBytesMultiHashMap.java     |    1 +
 .../exec/persistence/HybridHashTableConf.java   |   86 +
 .../persistence/HybridHashTableContainer.java   |  213 ++-
 .../ql/exec/persistence/KeyValueContainer.java  |   31 +-
 .../ql/exec/persistence/ObjectContainer.java    |   31 +-
 .../hive/ql/exec/spark/HashTableLoader.java     |    2 +-
 .../hive/ql/exec/tez/HashTableLoader.java       |   60 +-
 .../ql/exec/vector/VectorMapJoinOperator.java   |    4 +-
 .../mapjoin/VectorMapJoinCommonOperator.java    |    4 -
 .../VectorMapJoinGenerateResultOperator.java    |   25 +-
 .../fast/VectorMapJoinFastBytesHashMap.java     |    4 +-
 .../VectorMapJoinFastBytesHashMultiSet.java     |    4 +-
 .../fast/VectorMapJoinFastBytesHashSet.java     |    4 +-
 .../fast/VectorMapJoinFastBytesHashTable.java   |    4 +-
 .../mapjoin/fast/VectorMapJoinFastHashMap.java  |    4 +-
 .../fast/VectorMapJoinFastHashMultiSet.java     |    4 +-
 .../mapjoin/fast/VectorMapJoinFastHashSet.java  |    4 +-
 .../fast/VectorMapJoinFastHashTable.java        |    4 +-
 .../fast/VectorMapJoinFastHashTableLoader.java  |    4 +-
 .../fast/VectorMapJoinFastLongHashMap.java      |    4 +-
 .../fast/VectorMapJoinFastLongHashMultiSet.java |    4 +-
 .../fast/VectorMapJoinFastLongHashSet.java      |    4 +-
 .../fast/VectorMapJoinFastLongHashTable.java    |    4 +-
 .../fast/VectorMapJoinFastMultiKeyHashMap.java  |    6 +-
 .../VectorMapJoinFastMultiKeyHashMultiSet.java  |    4 +-
 .../fast/VectorMapJoinFastMultiKeyHashSet.java  |    4 +-
 .../fast/VectorMapJoinFastStringHashMap.java    |    4 +-
 .../VectorMapJoinFastStringHashMultiSet.java    |    4 +-
 .../fast/VectorMapJoinFastStringHashSet.java    |    4 +-
 .../fast/VectorMapJoinFastTableContainer.java   |   23 +-
 .../apache/hadoop/hive/ql/plan/MapJoinDesc.java |    2 +-
 .../fast/TestVectorMapJoinFastLongHashMap.java  |   14 +-
 .../TestVectorMapJoinFastMultiKeyHashMap.java   |   14 +-
 .../clientpositive/auto_sortmerge_join_13.q     |    2 +
 .../clientpositive/hybridgrace_hashjoin_1.q     |  258 +++
 .../clientpositive/hybridgrace_hashjoin_2.q     |  152 ++
 .../queries/clientpositive/hybridhashjoin.q     |  250 ---
 .../test/queries/clientpositive/tez_join_hash.q |    2 +
 .../test/queries/clientpositive/tez_smb_main.q  |    6 +
 .../tez/hybridgrace_hashjoin_1.q.out            | 1587 ++++++++++++++++++
 .../tez/hybridgrace_hashjoin_2.q.out            | 1417 ++++++++++++++++
 .../clientpositive/tez/hybridhashjoin.q.out     | 1566 -----------------
 47 files changed, 3924 insertions(+), 2086 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 72e4ff2..95e3d04 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -764,6 +764,10 @@ public class HiveConf extends Configuration {
     HIVEHYBRIDGRACEHASHJOINMEMCHECKFREQ("hive.mapjoin.hybridgrace.memcheckfrequency", 1024, "For " +
         "hybrid grace hash join, how often (how many rows apart) we check if memory is full. " +
         "This number should be power of 2."),
+    HIVEHYBRIDGRACEHASHJOINMINWBSIZE("hive.mapjoin.hybridgrace.minwbsize", 524288, "For hybrid grace" +
+        " hash join, the minimum write buffer size used by optimized hashtable. Default is 512 KB."),
+    HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS("hive.mapjoin.hybridgrace.minnumpartitions", 16, "For" +
+        " hybrid grace hash join, the minimum number of partitions to create."),
     HIVEHASHTABLEWBSIZE("hive.mapjoin.optimized.hashtable.wbsize", 10 * 1024 * 1024,
         "Optimized hashtable (see hive.mapjoin.optimized.hashtable) uses a chain of buffers to\n" +
         "store data. This is one buffer size. HT may be slightly faster if this is larger, but for small\n" +

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index b7abf0d..b2a6e58 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -301,7 +301,8 @@ minitez.query.files=bucket_map_join_tez1.q,\
   dynamic_partition_pruning_2.q,\
   explainuser_1.q,\
   explainuser_2.q,\
-  hybridhashjoin.q,\
+  hybridgrace_hashjoin_1.q,\
+  hybridgrace_hashjoin_2.q,\
   mapjoin_decimal.q,\
   lvj_mapjoin.q, \
   mrr.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java
index c3e3078..cbf2d43 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableLoader.java
@@ -32,6 +32,6 @@ public interface HashTableLoader {
   void init(ExecMapperContext context, MapredContext mrContext, Configuration hconf,
       MapJoinOperator joinOp);
 
-  void load(MapJoinTableContainer[] mapJoinTables, MapJoinTableContainerSerDe[] mapJoinTableSerdes,
-      long memUsage) throws HiveException;
+  void load(MapJoinTableContainer[] mapJoinTables, MapJoinTableContainerSerDe[] mapJoinTableSerdes)
+      throws HiveException;
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
index f66ab90..f2b800a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
@@ -35,7 +35,6 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.common.ObjectPair;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.HashTableLoaderFactory;
-import org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionHandler;
 import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext;
 import org.apache.hadoop.hive.ql.exec.persistence.BytesBytesMultiHashMap;
 import org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer;
@@ -89,9 +88,10 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem
   private UnwrapRowContainer[] unwrapContainer;
   private transient Configuration hconf;
   private transient boolean hybridMapJoinLeftover;  // whether there's spilled data to be processed
-  protected transient MapJoinBytesTableContainer currentSmallTable; // reloaded hashmap from disk
-  protected transient int tag;        // big table alias
-  protected transient int smallTable; // small table alias
+  protected transient MapJoinBytesTableContainer[] spilledMapJoinTables;  // used to hold restored
+                                                                          // spilled small tables
+  protected HybridHashTableContainer firstSmallTable; // The first small table;
+                                                      // Only this table has spilled big table rows
 
   public MapJoinOperator() {
   }
@@ -272,7 +272,6 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem
 
   protected Pair<MapJoinTableContainer[], MapJoinTableContainerSerDe[]> loadHashTable(
       ExecMapperContext mapContext, MapredContext mrContext) throws HiveException {
-
     loadCalled = true;
 
     if (this.hashTblInitedOnce
@@ -285,9 +284,7 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem
 
     perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.LOAD_HASHTABLE);
     loader.init(mapContext, mrContext, hconf, this);
-    long memUsage = (long)(MapJoinMemoryExhaustionHandler.getMaxHeapSize()
-        * conf.getHashTableMemoryUsage());
-    loader.load(mapJoinTables, mapJoinTableSerdes, memUsage);
+    loader.load(mapJoinTables, mapJoinTableSerdes);
 
     hashTblInitedOnce = true;
 
@@ -325,18 +322,6 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem
 
   @Override
   public void process(Object row, int tag) throws HiveException {
-    this.tag = tag;
-
-    // As we're calling processOp again to process the leftover triplets, we know the "row" is
-    // coming from the on-disk matchfile. We need to recreate hashMapRowGetter against new hashtable
-    if (hybridMapJoinLeftover) {
-      assert hashMapRowGetters != null;
-      if (hashMapRowGetters[smallTable] == null) {
-        MapJoinKey refKey = getRefKey((byte) tag);
-        hashMapRowGetters[smallTable] = currentSmallTable.createGetter(refKey);
-      }
-    }
-
     try {
       alias = (byte) tag;
       if (hashMapRowGetters == null) {
@@ -349,13 +334,24 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem
         }
       }
 
+      // As we're calling processOp again to process the leftover "tuples", we know the "row" is
+      // coming from the spilled matchfile. We need to recreate hashMapRowGetter against new hashtables
+      if (hybridMapJoinLeftover) {
+        MapJoinKey refKey = getRefKey(alias);
+        for (byte pos = 0; pos < order.length; pos++) {
+          if (pos != alias && spilledMapJoinTables[pos] != null) {
+            hashMapRowGetters[pos] = spilledMapJoinTables[pos].createGetter(refKey);
+          }
+        }
+      }
+
       // compute keys and values as StandardObjects
       ReusableGetAdaptor firstSetKey = null;
       int fieldCount = joinKeys[alias].size();
       boolean joinNeeded = false;
+      boolean bigTableRowSpilled = false;
       for (byte pos = 0; pos < order.length; pos++) {
         if (pos != alias) {
-          smallTable = pos; // record small table alias
           JoinUtil.JoinResult joinResult;
           ReusableGetAdaptor adaptor;
           if (firstSetKey == null) {
@@ -389,9 +385,14 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem
             storage[pos] = rowContainer.copy();
             aliasFilterTags[pos] = rowContainer.getAliasFilter();
           }
-          // Spill the big table rows into appropriate partition
-          if (joinResult == JoinUtil.JoinResult.SPILL) {
+          // Spill the big table rows into appropriate partition:
+          // When the JoinResult is SPILL, it means the corresponding small table row may have been
+          // spilled to disk (at least the partition that holds this row is on disk). So we need to
+          // postpone the join processing for this pair by also spilling this big table row.
+          if (joinResult == JoinUtil.JoinResult.SPILL &&
+              !bigTableRowSpilled) {  // For n-way join, only spill big table rows once
             spillBigTableRow(mapJoinTables[pos], row);
+            bigTableRowSpilled = true;
           }
         }
       }
@@ -431,7 +432,6 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem
 
   @Override
   public void closeOp(boolean abort) throws HiveException {
-
     boolean spilled = false;
     for (MapJoinTableContainer container: mapJoinTables) {
       if (container != null) {
@@ -440,10 +440,30 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem
       }
     }
 
+    // For Hybrid Grace Hash Join, we need to see if there is any spilled data to be processed next
     if (spilled) {
-      for (MapJoinTableContainer tableContainer : mapJoinTables) {
-        if (tableContainer != null) {
-          if (tableContainer instanceof HybridHashTableContainer) {
+      if (hashMapRowGetters == null) {
+        hashMapRowGetters = new ReusableGetAdaptor[mapJoinTables.length];
+      }
+      int numPartitions = 0;
+      // Find out number of partitions for each small table (should be same across tables)
+      for (byte pos = 0; pos < mapJoinTables.length; pos++) {
+        if (pos != conf.getPosBigTable()) {
+          firstSmallTable = (HybridHashTableContainer)mapJoinTables[pos];
+          numPartitions = firstSmallTable.getHashPartitions().length;
+          break;
+        }
+      }
+      assert numPartitions != 0 : "Number of partitions must be greater than 0!";
+
+      if (firstSmallTable.hasSpill()) {
+        spilledMapJoinTables = new MapJoinBytesTableContainer[mapJoinTables.length];
+        hybridMapJoinLeftover = true;
+
+        // Clear all in-memory partitions first
+        for (byte pos = 0; pos < mapJoinTables.length; pos++) {
+          MapJoinTableContainer tableContainer = mapJoinTables[pos];
+          if (tableContainer != null && tableContainer instanceof HybridHashTableContainer) {
             HybridHashTableContainer hybridHtContainer = (HybridHashTableContainer) tableContainer;
             hybridHtContainer.dumpStats();
 
@@ -453,29 +473,30 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem
               if (!hashPartitions[i].isHashMapOnDisk()) {
                 hybridHtContainer.setTotalInMemRowCount(
                     hybridHtContainer.getTotalInMemRowCount() -
-                    hashPartitions[i].getHashMapFromMemory().getNumValues());
+                        hashPartitions[i].getHashMapFromMemory().getNumValues());
                 hashPartitions[i].getHashMapFromMemory().clear();
               }
             }
             assert hybridHtContainer.getTotalInMemRowCount() == 0;
+          }
+        }
 
-            for (int i = 0; i < hashPartitions.length; i++) {
-              if (hashPartitions[i].isHashMapOnDisk()) {
-                // Recursively process on-disk triplets (hash partition, sidefile, matchfile)
-                try {
-                  hybridMapJoinLeftover = true;
-                  hashMapRowGetters[smallTable] = null;
-                  continueProcess(hashPartitions[i], hybridHtContainer);
-                } catch (IOException e) {
-                  e.printStackTrace();
-                } catch (ClassNotFoundException e) {
-                  e.printStackTrace();
-                } catch (SerDeException e) {
-                  e.printStackTrace();
-                }
-              }
-              hybridMapJoinLeftover = false;
-              currentSmallTable = null;
+        // Reprocess the spilled data
+        for (int i = 0; i < numPartitions; i++) {
+          HashPartition[] hashPartitions = firstSmallTable.getHashPartitions();
+          if (hashPartitions[i].isHashMapOnDisk()) {
+            try {
+              continueProcess(i);     // Re-process spilled data
+            } catch (IOException e) {
+              e.printStackTrace();
+            } catch (SerDeException e) {
+              e.printStackTrace();
+            } catch (ClassNotFoundException e) {
+              e.printStackTrace();
+            }
+            for (byte pos = 0; pos < order.length; pos++) {
+              if (pos != conf.getPosBigTable())
+                spilledMapJoinTables[pos] = null;
             }
           }
         }
@@ -497,18 +518,20 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem
   }
 
   /**
-   * Continue processing each pair of spilled hashtable and big table row container
-   * @param partition hash partition to process
-   * @param hybridHtContainer Hybrid hashtable container
+   * Continue processing join between spilled hashtable(s) and spilled big table
+   * @param partitionId the partition number across all small tables to process
    * @throws HiveException
    * @throws IOException
-   * @throws ClassNotFoundException
    * @throws SerDeException
    */
-  private void continueProcess(HashPartition partition, HybridHashTableContainer hybridHtContainer)
-      throws HiveException, IOException, ClassNotFoundException, SerDeException {
-    reloadHashTable(partition, hybridHtContainer);
-    reProcessBigTable(partition);
+  private void continueProcess(int partitionId)
+      throws HiveException, IOException, SerDeException, ClassNotFoundException {
+    for (byte pos = 0; pos < mapJoinTables.length; pos++) {
+      if (pos != conf.getPosBigTable()) {
+        reloadHashTable(pos, partitionId);
+      }
+    }
+    reProcessBigTable(partitionId);
   }
 
   /**
@@ -516,16 +539,16 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem
    * It can have two steps:
    * 1) Deserialize a serialized hash table, and
    * 2) Merge every key/value pair from small table container into the hash table
-   * @param partition hash partition to process
-   * @param hybridHtContainer Hybrid hashtable container
+   * @param pos position of small table
+   * @param partitionId the partition of the small table to be reloaded from
    * @throws IOException
-   * @throws ClassNotFoundException
    * @throws HiveException
    * @throws SerDeException
    */
-  protected void reloadHashTable(HashPartition partition,
-                               HybridHashTableContainer hybridHtContainer)
-      throws IOException, ClassNotFoundException, HiveException, SerDeException {
+  protected void reloadHashTable(byte pos, int partitionId)
+      throws IOException, HiveException, SerDeException, ClassNotFoundException {
+    HybridHashTableContainer container = (HybridHashTableContainer)mapJoinTables[pos];
+    HashPartition partition = container.getHashPartitions()[partitionId];
 
     // Merge the sidefile into the newly created hash table
     // This is where the spilling may happen again
@@ -544,11 +567,12 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem
     // If based on the new key count, keyCount is smaller than a threshold,
     // then just load the entire restored hashmap into memory.
     // The size of deserialized partition shouldn't exceed half of memory limit
-    if (rowCount * hybridHtContainer.getTableRowSize() >= hybridHtContainer.getMemoryThreshold() / 2) {
-      LOG.info("Hybrid Grace Hash Join: Hash table reload can fail since it will be greater than memory limit. Recursive spilling is currently not supported");
+    if (rowCount * container.getTableRowSize() >= container.getMemoryThreshold() / 2) {
+      LOG.warn("Hybrid Grace Hash Join: Hash table cannot be reloaded since it" +
+          " will be greater than memory limit. Recursive spilling is currently not supported");
     }
 
-    KeyValueHelper writeHelper = hybridHtContainer.getWriteHelper();
+    KeyValueHelper writeHelper = container.getWriteHelper();
     while (kvContainer.hasNext()) {
       ObjectPair<HiveKey, BytesWritable> pair = kvContainer.next();
       Writable key = pair.getFirst();
@@ -557,27 +581,30 @@ public class MapJoinOperator extends AbstractMapJoinOperator<MapJoinDesc> implem
       restoredHashMap.put(writeHelper, -1);
     }
 
-    hybridHtContainer.setTotalInMemRowCount(hybridHtContainer.getTotalInMemRowCount()
+    container.setTotalInMemRowCount(container.getTotalInMemRowCount()
         + restoredHashMap.getNumValues() + kvContainer.size());
     kvContainer.clear();
 
-    // Since there's only one hashmap to deal with, it's OK to create a MapJoinBytesTableContainer
-    currentSmallTable = new MapJoinBytesTableContainer(restoredHashMap);
-    currentSmallTable.setInternalValueOi(hybridHtContainer.getInternalValueOi());
-    currentSmallTable.setSortableSortOrders(hybridHtContainer.getSortableSortOrders());
+    spilledMapJoinTables[pos] = new MapJoinBytesTableContainer(restoredHashMap);
+    spilledMapJoinTables[pos].setInternalValueOi(container.getInternalValueOi());
+    spilledMapJoinTables[pos].setSortableSortOrders(container.getSortableSortOrders());
   }
 
   /**
    * Iterate over the big table row container and feed process() with leftover rows
-   * @param partition the hash partition being brought back to memory at the moment
+   * @param partitionId the partition from which to take out spilled big table rows
    * @throws HiveException
-   * @throws IOException
    */
-  protected void reProcessBigTable(HashPartition partition) throws HiveException, IOException {
+  protected void reProcessBigTable(int partitionId) throws HiveException {
+    // For binary join, firstSmallTable is the only small table; it has reference to spilled big
+    // table rows;
+    // For n-way join, since we only spill once, when processing the first small table, so only the
+    // firstSmallTable has reference to the spilled big table rows.
+    HashPartition partition = firstSmallTable.getHashPartitions()[partitionId];
     ObjectContainer bigTable = partition.getMatchfileObjContainer();
     while (bigTable.hasNext()) {
       Object row = bigTable.next();
-      process(row, tag);
+      process(row, conf.getPosBigTable());
     }
     bigTable.clear();
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HashTableLoader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HashTableLoader.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HashTableLoader.java
index 96a6728..abf38e4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HashTableLoader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HashTableLoader.java
@@ -72,7 +72,7 @@ public class HashTableLoader implements org.apache.hadoop.hive.ql.exec.HashTable
   @Override
   public void load(
       MapJoinTableContainer[] mapJoinTables,
-      MapJoinTableContainerSerDe[] mapJoinTableSerdes, long memUsage) throws HiveException {
+      MapJoinTableContainerSerDe[] mapJoinTableSerdes) throws HiveException {
 
     String currentInputPath = context.getCurrentInputPath().toString();
     LOG.info("******* Load from HashTable for input file: " + currentInputPath);

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java
index dd5c621..2ba622e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java
@@ -566,6 +566,7 @@ public final class BytesBytesMultiHashMap {
     this.writeBuffers.clear();
     this.refs = new long[1];
     this.keysAssigned = 0;
+    this.numValues = 0;
   }
 
   public void expandAndRehashToTarget(int estimateNewRowCount) {

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableConf.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableConf.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableConf.java
new file mode 100644
index 0000000..625038f
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableConf.java
@@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.persistence;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * This conf class is a wrapper of a list of HybridHashTableContainers and some common info shared
+ * among them, which is used in n-way join (multiple small tables are involved).
+ */
+public class HybridHashTableConf {
+  private List<HybridHashTableContainer> loadedContainerList; // A list of alrady loaded containers
+  private int numberOfPartitions = 0; // Number of partitions each table should have
+  private int nextSpillPartition = -1;       // The partition to be spilled next
+
+  public HybridHashTableConf() {
+    loadedContainerList = new ArrayList<HybridHashTableContainer>();
+  }
+
+  public int getNumberOfPartitions() {
+    return numberOfPartitions;
+  }
+
+  public void setNumberOfPartitions(int numberOfPartitions) {
+    this.numberOfPartitions = numberOfPartitions;
+    this.nextSpillPartition = numberOfPartitions - 1;
+  }
+
+  public int getNextSpillPartition() {
+    return this.nextSpillPartition;
+  }
+
+  public void setNextSpillPartition(int nextSpillPartition) {
+    this.nextSpillPartition = nextSpillPartition;
+  }
+
+
+  public List<HybridHashTableContainer> getLoadedContainerList() {
+    return loadedContainerList;
+  }
+
+  /**
+   * Spill one in-memory partition from tail for all previously loaded HybridHashTableContainers.
+   * Also mark that partition number as spill-on-creation for future created containers.
+   * @return amount of memory freed; 0 if only one last partition is in memory for each container
+   */
+  public long spill() throws IOException {
+    if (nextSpillPartition == 0) {
+      return 0;
+    }
+    long memFreed = 0;
+    for (HybridHashTableContainer container : loadedContainerList) {
+      memFreed += container.spillPartition(nextSpillPartition);
+      container.setSpill(true);
+    }
+    nextSpillPartition--;
+    return memFreed;
+  }
+
+  /**
+   * Check if a partition should be spilled directly on creation
+   * @param partitionId the partition to create
+   * @return true if it should be spilled directly, false otherwise
+   */
+  public boolean doSpillOnCreation(int partitionId) {
+    return nextSpillPartition != -1 && partitionId > nextSpillPartition;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
index cb9083d..3f6d61e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
@@ -72,15 +72,18 @@ public class HybridHashTableContainer
   private static final Log LOG = LogFactory.getLog(HybridHashTableContainer.class);
 
   private final HashPartition[] hashPartitions; // an array of partitions holding the triplets
-  private int totalInMemRowCount = 0;     // total number of small table rows in memory
-  private final long memoryThreshold;           // the max memory limit allocated
+  private int totalInMemRowCount = 0;           // total number of small table rows in memory
+  private long memoryThreshold;                 // the max memory limit that can be allocated
+  private long memoryUsed;                      // the actual memory used
+  private int writeBufferSize;                  // write buffer size for this HybridHashTableContainer
   private final long tableRowSize;              // row size of the small table
-  private boolean isSpilled;              // whether there's any spilled partition
-  private int toSpillPartitionId;         // the partition into which to spill the big table row;
-                                          // This may change after every setMapJoinKey call
-  private int numPartitionsSpilled;       // number of spilled partitions
-  private boolean lastPartitionInMem;     // only one (last one) partition is left in memory
+  private boolean isSpilled;                    // whether there's any spilled partition
+  private int toSpillPartitionId;               // the partition into which to spill the big table row;
+                                                // This may change after every setMapJoinKey call
+  private int numPartitionsSpilled;             // number of spilled partitions
+  private boolean lastPartitionInMem;           // only one (last one) partition is left in memory
   private final int memoryCheckFrequency;       // how often (# of rows apart) to check if memory is full
+  private HybridHashTableConf nwayConf;         // configuration for n-way join
 
   /** The OI used to deserialize values. We never deserialize keys. */
   private LazyBinaryStructObjectInspector internalValueOi;
@@ -182,53 +185,93 @@ public class HybridHashTableContainer
     }
   }
 
-  public HybridHashTableContainer(Configuration hconf, long keyCount, long memUsage, long tableSize)
-      throws SerDeException {
+  public HybridHashTableContainer(Configuration hconf, long keyCount, long memoryAvailable,
+                                  long estimatedTableSize, HybridHashTableConf nwayConf)
+      throws SerDeException, IOException {
     this(HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEKEYCOUNTADJUSTMENT),
          HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLETHRESHOLD),
          HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLELOADFACTOR),
-         HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEWBSIZE),
-         HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD),
          HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMEMCHECKFREQ),
-         tableSize, keyCount, memUsage);
+         HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINWBSIZE),
+         HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS),
+         estimatedTableSize, keyCount, memoryAvailable, nwayConf);
   }
 
-  private HybridHashTableContainer(float keyCountAdj, int threshold, float loadFactor, int wbSize,
-                                   long noConditionalTaskThreshold, int memCheckFreq, long tableSize,
-                                   long keyCount, long memUsage) throws SerDeException {
-
-    if (wbSize > noConditionalTaskThreshold) {
-      LOG.warn("adjusting hash table write buffer size to be smaller than noconditionaltasksize");
-      wbSize = (int) noConditionalTaskThreshold;
-    }
-
+  private HybridHashTableContainer(float keyCountAdj, int threshold, float loadFactor,
+                                   int memCheckFreq, int minWbSize, int minNumParts,
+                                   long estimatedTableSize, long keyCount,
+                                   long memoryAvailable, HybridHashTableConf nwayConf)
+      throws SerDeException, IOException {
     directWriteHelper = new MapJoinBytesTableContainer.DirectKeyValueWriter();
 
     int newKeyCount = HashMapWrapper.calculateTableSize(
         keyCountAdj, threshold, loadFactor, keyCount);
 
-    memoryThreshold = noConditionalTaskThreshold;
-    tableRowSize = tableSize / newKeyCount;
+    memoryThreshold = memoryAvailable;
+    tableRowSize = estimatedTableSize / keyCount;
     memoryCheckFrequency = memCheckFreq;
 
-    int numPartitions = calcNumPartitions(tableSize, wbSize); // estimate # of partitions to create
+    this.nwayConf = nwayConf;
+    int numPartitions;
+    if (nwayConf == null) { // binary join
+      numPartitions = calcNumPartitions(memoryThreshold, estimatedTableSize, minNumParts, minWbSize,
+          nwayConf);
+      writeBufferSize = (int)(estimatedTableSize / numPartitions);
+    } else {                // n-way join
+      // It has been calculated in HashTableLoader earlier, so just need to retrieve that number
+      numPartitions = nwayConf.getNumberOfPartitions();
+      if (nwayConf.getLoadedContainerList().size() == 0) {  // n-way: first small table
+        writeBufferSize = (int)(estimatedTableSize / numPartitions);
+      } else {                                              // n-way: all later small tables
+        while (memoryThreshold < numPartitions * minWbSize) {
+          // Spill previously loaded tables to make more room
+          long memFreed = nwayConf.spill();
+          if (memFreed == 0) {
+            LOG.warn("Available memory is not enough to create HybridHashTableContainers" +
+                " consistently!");
+            break;
+          } else {
+            LOG.info("Total available memory was: " + memoryThreshold);
+            memoryThreshold += memFreed;
+            LOG.info("Total available memory is: " + memoryThreshold);
+          }
+        }
+        writeBufferSize = (int)(memoryThreshold / numPartitions);
+      }
+    }
+    writeBufferSize = writeBufferSize < minWbSize ? minWbSize : writeBufferSize;
+    LOG.info("Write buffer size: " + writeBufferSize);
     hashPartitions = new HashPartition[numPartitions];
     int numPartitionsSpilledOnCreation = 0;
-    long memoryAllocated = 0;
+    memoryUsed = 0;
     int initialCapacity = Math.max(newKeyCount / numPartitions, threshold / numPartitions);
     for (int i = 0; i < numPartitions; i++) {
-      if (i == 0) { // We unconditionally create a hashmap for the first hash partition
-        hashPartitions[i] = new HashPartition(initialCapacity, loadFactor, wbSize, memUsage, true);
-      } else {
-        hashPartitions[i] = new HashPartition(initialCapacity, loadFactor, wbSize, memUsage,
-                                              memoryAllocated + wbSize < memoryThreshold);
+      if (this.nwayConf == null ||                          // binary join
+          nwayConf.getLoadedContainerList().size() == 0) {  // n-way join, first (biggest) small table
+        if (i == 0) { // We unconditionally create a hashmap for the first hash partition
+          hashPartitions[i] = new HashPartition(initialCapacity, loadFactor, writeBufferSize, memoryThreshold, true);
+        } else {
+          hashPartitions[i] = new HashPartition(initialCapacity, loadFactor, writeBufferSize, memoryThreshold,
+              memoryUsed + writeBufferSize < memoryThreshold);
+        }
+      } else {                      // n-way join
+        // For all later small tables, follow the same pattern of the previously loaded tables.
+        if (this.nwayConf.doSpillOnCreation(i)) {
+          hashPartitions[i] = new HashPartition(threshold, loadFactor, writeBufferSize, memoryThreshold, false);
+        } else {
+          hashPartitions[i] = new HashPartition(threshold, loadFactor, writeBufferSize, memoryThreshold, true);
+        }
       }
+
       if (isHashMapSpilledOnCreation(i)) {
         numPartitionsSpilledOnCreation++;
         numPartitionsSpilled++;
         this.setSpill(true);
+        if (this.nwayConf != null && this.nwayConf.getNextSpillPartition() == numPartitions - 1) {
+          this.nwayConf.setNextSpillPartition(i - 1);
+        }
       } else {
-        memoryAllocated += hashPartitions[i].hashMap.memorySize();
+        memoryUsed += hashPartitions[i].hashMap.memorySize();
       }
     }
     assert numPartitionsSpilledOnCreation != numPartitions : "All partitions are directly spilled!" +
@@ -236,6 +279,11 @@ public class HybridHashTableContainer
     LOG.info("Number of partitions created: " + numPartitions);
     LOG.info("Number of partitions spilled directly to disk on creation: "
         + numPartitionsSpilledOnCreation);
+
+    // Append this container to the loaded list
+    if (this.nwayConf != null) {
+      this.nwayConf.getLoadedContainerList().add(this);
+    }
   }
 
 
@@ -251,6 +299,20 @@ public class HybridHashTableContainer
     return memoryThreshold;
   }
 
+  /**
+   * Get the current memory usage by recalculating it.
+   * @return current memory usage
+   */
+  public long refreshMemoryUsed() {
+    long memUsed = 0;
+    for (HashPartition hp : hashPartitions) {
+      if (hp.hashMap != null) {
+        memUsed += hp.hashMap.memorySize();
+      }
+    }
+    return memoryUsed = memUsed;
+  }
+
   public LazyBinaryStructObjectInspector getInternalValueOi() {
     return internalValueOi;
   }
@@ -313,10 +375,16 @@ public class HybridHashTableContainer
             LOG.warn("This LAST partition in memory won't be spilled!");
             lastPartitionInMem = true;
           } else {
-            int biggest = biggestPartition();
+            if (nwayConf == null) { // binary join
+              int biggest = biggestPartition();
+              spillPartition(biggest);
+              this.setSpill(true);
+            } else {                // n-way join
+              LOG.info("N-way spilling: spill tail partition from previously loaded small tables");
+              memoryThreshold += nwayConf.spill();
+              LOG.info("Memory threshold has been increased to: " + memoryThreshold);
+            }
             numPartitionsSpilled++;
-            spillPartition(biggest);
-            this.setSpill(true);
           }
         }
       }
@@ -349,13 +417,7 @@ public class HybridHashTableContainer
    * @return true if memory is full, false if not
    */
   private boolean isMemoryFull() {
-    long size = 0;
-    for (int i = 0; i < hashPartitions.length; i++) {
-      if (!isOnDisk(i)) {
-        size += hashPartitions[i].hashMap.memorySize();
-      }
-    }
-    return size >= memoryThreshold;
+    return refreshMemoryUsed() >= memoryThreshold;
   }
 
   /**
@@ -385,11 +447,11 @@ public class HybridHashTableContainer
   /**
    * Move the hashtable of a specified partition from memory into local file system
    * @param partitionId the hashtable to be moved
+   * @return amount of memory freed
    */
-  private void spillPartition(int partitionId) throws IOException {
+  public long spillPartition(int partitionId) throws IOException {
     HashPartition partition = hashPartitions[partitionId];
     int inMemRowCount = partition.hashMap.getNumValues();
-    long inMemSize = partition.hashMap.memorySize();
 
     Path path = Files.createTempFile("partition-" + partitionId + "-", null);
     OutputStream outputStream = Files.newOutputStream(path);
@@ -403,57 +465,55 @@ public class HybridHashTableContainer
     partition.hashMapLocalPath = path;
     partition.hashMapOnDisk = true;
 
-    long size = 0;
-    for (int i = 0; i < hashPartitions.length; i++) {
-      if (!isOnDisk(i)) {
-        size += hashPartitions[i].hashMap.memorySize();
-      }
-    }
     LOG.info("Spilling hash partition " + partitionId + " (Rows: " + inMemRowCount +
-        ", Mem size: " + inMemSize + "): " + path);
-    LOG.info("Memory usage before spilling: " + size);
-    LOG.info("Memory usage after spilling: " + (size - inMemSize));
+        ", Mem size: " + partition.hashMap.memorySize() + "): " + path);
+    LOG.info("Memory usage before spilling: " + memoryUsed);
+
+    long memFreed = partition.hashMap.memorySize();
+    memoryUsed -= memFreed;
+    LOG.info("Memory usage after spilling: " + memoryUsed);
 
     totalInMemRowCount -= inMemRowCount;
     partition.hashMap.clear();
+    return memFreed;
   }
 
   /**
-   * Calculate how many partitions are needed. This is an estimation.
+   * Calculate how many partitions are needed.
+   * For n-way join, we only do this calculation once in the HashTableLoader, for the biggest small
+   * table. Other small tables will use the same number. They may need to adjust (usually reduce)
+   * their individual write buffer size in order not to exceed memory threshold.
+   * @param memoryThreshold memory threshold for the given table
    * @param dataSize total data size for the table
-   * @param wbSize write buffer size
+   * @param minNumParts minimum required number of partitions
+   * @param minWbSize minimum required write buffer size
+   * @param nwayConf the n-way join configuration
    * @return number of partitions needed
    */
-  private int calcNumPartitions(long dataSize, int wbSize) {
-    if (memoryThreshold < wbSize) {
-      throw new IllegalStateException("Available memory is less than hashtable writebuffer size!"
-          + " Try increasing hive.auto.convert.join.noconditionaltask.size.");
-    }
-
-    int lowerLimit = 2;
-    int numPartitions = (int) Math.ceil(dataSize / wbSize);
-
-    LOG.info("Total available memory: " + memoryThreshold);
-    LOG.info("Estimated small table size: " + dataSize);
-    LOG.info("Write buffer size: " + wbSize);
-    LOG.info("Initial number of partitions: " + numPartitions);
+  public static int calcNumPartitions(long memoryThreshold, long dataSize, int minNumParts,
+      int minWbSize, HybridHashTableConf nwayConf) throws IOException {
+    int numPartitions = minNumParts;
 
-    if (numPartitions < lowerLimit) {
-      return lowerLimit;
-    } else if (dataSize > memoryThreshold) {
-      numPartitions = (int) (memoryThreshold / wbSize);
+    if (memoryThreshold < minNumParts * minWbSize) {
+      LOG.warn("Available memory is not enough to create a HybridHashTableContainer!");
     }
-    // Make sure numPartitions is power of 2, to make N & (M - 1) easy when calculating partition No.
-    numPartitions = (Long.bitCount(numPartitions) == 1) ? numPartitions
-                                                        : Integer.highestOneBit(numPartitions) << 1;
-    while (dataSize / numPartitions > memoryThreshold) {
-      numPartitions *= 2;
+    if (memoryThreshold < dataSize) {
+      while (dataSize / numPartitions > memoryThreshold) {
+        numPartitions *= 2;
+      }
     }
 
+    LOG.info("Total available memory: " + memoryThreshold);
+    LOG.info("Estimated small table size: " + dataSize);
     LOG.info("Number of hash partitions to be created: " + numPartitions);
     return numPartitions;
   }
 
+  /* Get number of partitions */
+  public int getNumPartitions() {
+    return hashPartitions.length;
+  }
+
   /* Get total number of rows from all in memory partitions */
   public int getTotalInMemRowCount() {
     return totalInMemRowCount;
@@ -494,6 +554,7 @@ public class HybridHashTableContainer
         hp.hashMap.clear();
       }
     }
+    memoryUsed = 0;
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/KeyValueContainer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/KeyValueContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/KeyValueContainer.java
index d3ec29a..d1bea48 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/KeyValueContainer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/KeyValueContainer.java
@@ -26,6 +26,7 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.hive.common.ObjectPair;
 import org.apache.hadoop.hive.ql.io.HiveKey;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.io.BytesWritable;
 
 import java.io.File;
@@ -64,29 +65,31 @@ public class KeyValueContainer {
     }
     try {
       setupOutput();
-    } catch (IOException e) {
+    } catch (IOException | HiveException e) {
       throw new RuntimeException("Failed to create temporary output file on disk", e);
     }
   }
 
-  private void setupOutput() throws IOException {
-    if (parentFile == null) {
-      parentFile = File.createTempFile("key-value-container", "");
-      if (parentFile.delete() && parentFile.mkdir()) {
-        parentFile.deleteOnExit();
+  private void setupOutput() throws IOException, HiveException {
+    FileOutputStream fos = null;
+    try {
+      if (parentFile == null) {
+        parentFile = File.createTempFile("key-value-container", "");
+        if (parentFile.delete() && parentFile.mkdir()) {
+          parentFile.deleteOnExit();
+        }
       }
-    }
 
-    if (tmpFile == null || input != null) {
-      tmpFile = File.createTempFile("KeyValueContainer", ".tmp", parentFile);
-      LOG.info("KeyValueContainer created temp file " + tmpFile.getAbsolutePath());
-      tmpFile.deleteOnExit();
-    }
+      if (tmpFile == null || input != null) {
+        tmpFile = File.createTempFile("KeyValueContainer", ".tmp", parentFile);
+        LOG.info("KeyValueContainer created temp file " + tmpFile.getAbsolutePath());
+        tmpFile.deleteOnExit();
+      }
 
-    FileOutputStream fos = null;
-    try {
       fos = new FileOutputStream(tmpFile);
       output = new Output(fos);
+    } catch (IOException e) {
+      throw new HiveException(e);
     } finally {
       if (output == null && fos != null) {
         fos.close();

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/ObjectContainer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/ObjectContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/ObjectContainer.java
index 18943dd..7d7ce1d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/ObjectContainer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/ObjectContainer.java
@@ -26,6 +26,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
 
 import java.io.File;
 import java.io.FileInputStream;
@@ -67,29 +68,31 @@ public class ObjectContainer<ROW> {
     kryo = Utilities.runtimeSerializationKryo.get();
     try {
       setupOutput();
-    } catch (IOException e) {
+    } catch (IOException | HiveException e) {
       throw new RuntimeException("Failed to create temporary output file on disk", e);
     }
   }
 
-  private void setupOutput() throws IOException {
-    if (parentFile == null) {
-      parentFile = File.createTempFile("object-container", "");
-      if (parentFile.delete() && parentFile.mkdir()) {
-        parentFile.deleteOnExit();
+  private void setupOutput() throws IOException, HiveException {
+    FileOutputStream fos = null;
+    try {
+      if (parentFile == null) {
+        parentFile = File.createTempFile("object-container", "");
+        if (parentFile.delete() && parentFile.mkdir()) {
+          parentFile.deleteOnExit();
+        }
       }
-    }
 
-    if (tmpFile == null || input != null) {
-      tmpFile = File.createTempFile("ObjectContainer", ".tmp", parentFile);
-      LOG.info("ObjectContainer created temp file " + tmpFile.getAbsolutePath());
-      tmpFile.deleteOnExit();
-    }
+      if (tmpFile == null || input != null) {
+        tmpFile = File.createTempFile("ObjectContainer", ".tmp", parentFile);
+        LOG.info("ObjectContainer created temp file " + tmpFile.getAbsolutePath());
+        tmpFile.deleteOnExit();
+      }
 
-    FileOutputStream fos = null;
-    try {
       fos = new FileOutputStream(tmpFile);
       output = new Output(fos);
+    } catch (IOException e) {
+      throw new HiveException(e);
     } finally {
       if (output == null && fos != null) {
         fos.close();

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java
index fe108c4..043f1f7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java
@@ -69,7 +69,7 @@ public class HashTableLoader implements org.apache.hadoop.hive.ql.exec.HashTable
 
   @Override
   public void load(MapJoinTableContainer[] mapJoinTables,
-      MapJoinTableContainerSerDe[] mapJoinTableSerdes, long memUsage)
+      MapJoinTableContainerSerDe[] mapJoinTableSerdes)
       throws HiveException {
 
     // Note: it's possible that a MJ operator is in a ReduceWork, in which case the

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java
index ba5a797..6a81f11 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java
@@ -29,6 +29,7 @@ import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
 import org.apache.hadoop.hive.ql.exec.MapredContext;
 import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext;
 import org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper;
+import org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableConf;
 import org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer;
 import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer;
 import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext;
@@ -69,7 +70,7 @@ public class HashTableLoader implements org.apache.hadoop.hive.ql.exec.HashTable
 
   @Override
   public void load(MapJoinTableContainer[] mapJoinTables,
-      MapJoinTableContainerSerDe[] mapJoinTableSerdes, long memUsage)
+      MapJoinTableContainerSerDe[] mapJoinTableSerdes)
       throws HiveException {
 
     Map<Integer, String> parentToInput = desc.getParentToInput();
@@ -79,10 +80,44 @@ public class HashTableLoader implements org.apache.hadoop.hive.ql.exec.HashTable
         hconf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE);
     boolean useHybridGraceHashJoin = desc.isHybridHashJoin();
     boolean isFirstKey = true;
+    // TODO remove this after memory manager is in
+    long noConditionalTaskThreshold = HiveConf.getLongVar(
+        hconf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD);
+
+    // Only applicable to n-way Hybrid Grace Hash Join
+    HybridHashTableConf nwayConf = null;
+    long totalSize = 0;
+    int biggest = 0;                                // position of the biggest small table
+    if (useHybridGraceHashJoin && mapJoinTables.length > 2) {
+      // Create a Conf for n-way HybridHashTableContainers
+      nwayConf = new HybridHashTableConf();
+
+      // Find the biggest small table; also calculate total data size of all small tables
+      long maxSize = 0; // the size of the biggest small table
+      for (int pos = 0; pos < mapJoinTables.length; pos++) {
+        if (pos == desc.getPosBigTable()) {
+          continue;
+        }
+        totalSize += desc.getParentDataSizes().get(pos);
+        biggest = desc.getParentDataSizes().get(pos) > maxSize ? pos : biggest;
+        maxSize = desc.getParentDataSizes().get(pos) > maxSize ? desc.getParentDataSizes().get(pos)
+                                                               : maxSize;
+      }
 
-    // Disable hybrid grace hash join for n-way join
-    if (mapJoinTables.length > 2) {
-      useHybridGraceHashJoin = false;
+      // Using biggest small table, calculate number of partitions to create for each small table
+      float percentage = (float) maxSize / totalSize;
+      long memory = (long) (noConditionalTaskThreshold * percentage);
+      int numPartitions = 0;
+      try {
+        numPartitions = HybridHashTableContainer.calcNumPartitions(memory,
+            desc.getParentDataSizes().get(biggest),
+            HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS),
+            HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHYBRIDGRACEHASHJOINMINWBSIZE),
+            nwayConf);
+      } catch (IOException e) {
+        throw new HiveException(e);
+      }
+      nwayConf.setNumberOfPartitions(numPartitions);
     }
 
     for (int pos = 0; pos < mapJoinTables.length; pos++) {
@@ -122,10 +157,21 @@ public class HashTableLoader implements org.apache.hadoop.hive.ql.exec.HashTable
         Long keyCountObj = parentKeyCounts.get(pos);
         long keyCount = (keyCountObj == null) ? -1 : keyCountObj.longValue();
 
+        long memory = 0;
+        if (useHybridGraceHashJoin) {
+          if (mapJoinTables.length > 2) {
+            // Allocate n-way join memory proportionally
+            float percentage = (float) desc.getParentDataSizes().get(pos) / totalSize;
+            memory = (long) (noConditionalTaskThreshold * percentage);
+          } else {  // binary join
+            memory = noConditionalTaskThreshold;
+          }
+        }
+
         MapJoinTableContainer tableContainer = useOptimizedTables
-            ? (useHybridGraceHashJoin ? new HybridHashTableContainer(hconf, keyCount, memUsage,
-                                                                     desc.getParentDataSizes().get(pos))
-                                      : new MapJoinBytesTableContainer(hconf, valCtx, keyCount, memUsage))
+            ? (useHybridGraceHashJoin ? new HybridHashTableContainer(hconf, keyCount,
+                                            memory, desc.getParentDataSizes().get(pos), nwayConf)
+                                      : new MapJoinBytesTableContainer(hconf, valCtx, keyCount, 0))
             : new HashMapWrapper(hconf, keyCount);
         LOG.info("Using tableContainer " + tableContainer.getClass().getSimpleName());
 

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java
index 534a906..0547346 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java
@@ -311,8 +311,10 @@ public class VectorMapJoinOperator extends MapJoinOperator implements Vectorizat
   }
 
   @Override
-  protected void reProcessBigTable(HybridHashTableContainer.HashPartition partition)
+  protected void reProcessBigTable(int partitionId)
       throws HiveException {
+
+    HybridHashTableContainer.HashPartition partition = firstSmallTable.getHashPartitions()[partitionId];
     ObjectContainer bigTable = partition.getMatchfileObjContainer();
 
     DataOutputBuffer dataOutputBuffer = new DataOutputBuffer();

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
index f272b6d..f9d5736 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
@@ -744,10 +744,6 @@ public abstract class VectorMapJoinCommonOperator extends MapJoinOperator implem
     // Setup a scratch batch that will be used to play back big table rows that were spilled
     // to disk for the Hybrid Grace hash partitioning.
     spillReplayBatch = VectorizedBatchUtil.makeLike(batch);
-
-    // TEMPORARY -- Set this up for Hybrid Grace logic in MapJoinOperator.closeOp
-    hashMapRowGetters = new ReusableGetAdaptor[mapJoinTables.length];
-    smallTable = posSingleVectorMapJoinSmallTable;
   }
 
   protected void displayBatchColumns(VectorizedRowBatch batch, String batchName) {

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java
index 743a975..70c8cb1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java
@@ -27,6 +27,8 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer;
 import org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer.HashPartition;
+import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer;
+import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer;
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow;
 import org.apache.hadoop.hive.ql.exec.vector.VectorSerializeRow;
@@ -449,7 +451,7 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
 
     int partitionId = hashTableResult.spillPartitionId();
 
-    HybridHashTableContainer ht = (HybridHashTableContainer) mapJoinTables[smallTable];
+    HybridHashTableContainer ht = (HybridHashTableContainer) mapJoinTables[posSingleVectorMapJoinSmallTable];
     HashPartition hp = ht.getHashPartitions()[partitionId];
 
     VectorMapJoinRowBytesContainer rowBytesContainer = hp.getMatchfileRowBytesContainer();
@@ -499,27 +501,30 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
   }
 
   @Override
-  protected void reloadHashTable(HashPartition partition,
-      HybridHashTableContainer hybridHtContainer)
-          throws IOException, ClassNotFoundException, HiveException, SerDeException {
+  protected void reloadHashTable(byte pos, int partitionId)
+          throws IOException, HiveException, SerDeException, ClassNotFoundException {
 
-    // The super method will reload a hash table partition and
-    // put a single MapJoinBytesTableContainer into the currentSmallTable member.
-    super.reloadHashTable(partition, hybridHtContainer);
+    // The super method will reload a hash table partition of one of the small tables.
+    // Currently, for native vector map join it will only be one small table.
+    super.reloadHashTable(pos, partitionId);
+
+    MapJoinTableContainer smallTable = spilledMapJoinTables[pos];
 
     vectorMapJoinHashTable = VectorMapJoinOptimizedCreateHashTable.createHashTable(conf,
-        currentSmallTable);
+        smallTable);
     needHashTableSetup = true;
 
     LOG.info(CLASS_NAME + " reloadHashTable!");
   }
 
   @Override
-  protected void reProcessBigTable(HybridHashTableContainer.HashPartition partition)
-      throws HiveException, IOException {
+  protected void reProcessBigTable(int partitionId)
+      throws HiveException {
 
     LOG.info(CLASS_NAME + " reProcessBigTable enter...");
 
+    HashPartition partition = firstSmallTable.getHashPartitions()[partitionId];
+
     int rowCount = 0;
     int batchCount = 0;
 

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
index 0796406..f9550c9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
@@ -90,8 +90,8 @@ public abstract class VectorMapJoinFastBytesHashMap
   }
 
   public VectorMapJoinFastBytesHashMap(
-      int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
-    super(initialCapacity, loadFactor, writeBuffersSize, memUsage);
+      int initialCapacity, float loadFactor, int writeBuffersSize) {
+    super(initialCapacity, loadFactor, writeBuffersSize);
 
     valueStore = new VectorMapJoinFastValueStore(writeBuffersSize);
 

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
index d685c22..9dcaf8f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
@@ -85,8 +85,8 @@ public abstract class VectorMapJoinFastBytesHashMultiSet
   }
 
   public VectorMapJoinFastBytesHashMultiSet(
-      int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
-    super(initialCapacity, loadFactor, writeBuffersSize, memUsage);
+      int initialCapacity, float loadFactor, int writeBuffersSize) {
+    super(initialCapacity, loadFactor, writeBuffersSize);
 
     keyStore = new VectorMapJoinFastKeyStore(writeBuffersSize);
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java
index 9f20fdc..9f122c4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java
@@ -77,8 +77,8 @@ public abstract class VectorMapJoinFastBytesHashSet
   }
 
   public VectorMapJoinFastBytesHashSet(
-      int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
-    super(initialCapacity, loadFactor, writeBuffersSize, memUsage);
+      int initialCapacity, float loadFactor, int writeBuffersSize) {
+    super(initialCapacity, loadFactor, writeBuffersSize);
 
     keyStore = new VectorMapJoinFastKeyStore(writeBuffersSize);
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
index 594a77f..b6e6321 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
@@ -214,8 +214,8 @@ public abstract class VectorMapJoinFastBytesHashTable
   }
 
   public VectorMapJoinFastBytesHashTable(
-        int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
-    super(initialCapacity, loadFactor, writeBuffersSize, memUsage);
+        int initialCapacity, float loadFactor, int writeBuffersSize) {
+    super(initialCapacity, loadFactor, writeBuffersSize);
     allocateBucketArray();
   }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashMap.java
index b37247c..262b619 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashMap.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashMap.java
@@ -32,7 +32,7 @@ public abstract class VectorMapJoinFastHashMap
 
   public VectorMapJoinFastHashMap(
         boolean isOuterJoin,
-        int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
-    super(initialCapacity, loadFactor, writeBuffersSize, memUsage);
+        int initialCapacity, float loadFactor, int writeBuffersSize) {
+    super(initialCapacity, loadFactor, writeBuffersSize);
   }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashMultiSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashMultiSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashMultiSet.java
index 5569f6e..5f7c6a7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashMultiSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashMultiSet.java
@@ -42,7 +42,7 @@ public abstract class VectorMapJoinFastHashMultiSet
 
   public VectorMapJoinFastHashMultiSet(
         boolean isOuterJoin,
-        int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
-    super(initialCapacity, loadFactor, writeBuffersSize, memUsage);
+        int initialCapacity, float loadFactor, int writeBuffersSize) {
+    super(initialCapacity, loadFactor, writeBuffersSize);
   }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashSet.java
index 0738df3..8509971 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashSet.java
@@ -38,7 +38,7 @@ public abstract class VectorMapJoinFastHashSet
 
   public VectorMapJoinFastHashSet(
         boolean isOuterJoin,
-        int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
-    super(initialCapacity, loadFactor, writeBuffersSize, memUsage);
+        int initialCapacity, float loadFactor, int writeBuffersSize) {
+    super(initialCapacity, loadFactor, writeBuffersSize);
   }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java
index 33e34fa..fbe6b4c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java
@@ -30,7 +30,6 @@ public abstract class VectorMapJoinFastHashTable implements VectorMapJoinHashTab
 
   protected float loadFactor;
   protected int writeBuffersSize;
-  protected long memUsage;
 
   protected int metricPutConflict;
   protected int largestNumberOfSteps;
@@ -52,7 +51,7 @@ public abstract class VectorMapJoinFastHashTable implements VectorMapJoinHashTab
   }
 
   public VectorMapJoinFastHashTable(
-        int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
+        int initialCapacity, float loadFactor, int writeBuffersSize) {
 
     initialCapacity = (Long.bitCount(initialCapacity) == 1)
         ? initialCapacity : nextHighestPowerOfTwo(initialCapacity);
@@ -65,6 +64,5 @@ public abstract class VectorMapJoinFastHashTable implements VectorMapJoinHashTab
 
     this.loadFactor = loadFactor;
     this.writeBuffersSize = writeBuffersSize;
-    this.memUsage = memUsage;
   }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java
index 92b5d40..4edf604 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java
@@ -62,7 +62,7 @@ public class VectorMapJoinFastHashTableLoader implements org.apache.hadoop.hive.
 
   @Override
   public void load(MapJoinTableContainer[] mapJoinTables,
-      MapJoinTableContainerSerDe[] mapJoinTableSerdes, long memUsage)
+      MapJoinTableContainerSerDe[] mapJoinTableSerdes)
       throws HiveException {
 
     Map<Integer, String> parentToInput = desc.getParentToInput();
@@ -91,7 +91,7 @@ public class VectorMapJoinFastHashTableLoader implements org.apache.hadoop.hive.
         long keyCount = (keyCountObj == null) ? -1 : keyCountObj.longValue();
 
         VectorMapJoinFastTableContainer vectorMapJoinFastTableContainer =
-                new VectorMapJoinFastTableContainer(desc, hconf, keyCount, memUsage);
+                new VectorMapJoinFastTableContainer(desc, hconf, keyCount);
 
         while (kvReader.next()) {
           vectorMapJoinFastTableContainer.putRow(

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
index 3a0b380..d6ad028 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
@@ -86,9 +86,9 @@ public class VectorMapJoinFastLongHashMap
 
   public VectorMapJoinFastLongHashMap(
       boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType hashTableKeyType,
-      int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
+      int initialCapacity, float loadFactor, int writeBuffersSize) {
     super(minMaxEnabled, isOuterJoin, hashTableKeyType,
-        initialCapacity, loadFactor, writeBuffersSize, memUsage);
+        initialCapacity, loadFactor, writeBuffersSize);
     valueStore = new VectorMapJoinFastValueStore(writeBuffersSize);
   }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java
index f9763e3..e447551 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java
@@ -84,8 +84,8 @@ public class VectorMapJoinFastLongHashMultiSet
 
   public VectorMapJoinFastLongHashMultiSet(
       boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType hashTableKeyType,
-      int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
+      int initialCapacity, float loadFactor, int writeBuffersSize) {
     super(minMaxEnabled, isOuterJoin, hashTableKeyType,
-        initialCapacity, loadFactor, writeBuffersSize, memUsage);
+        initialCapacity, loadFactor, writeBuffersSize);
   }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java
index cd23949..aa44e60 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java
@@ -77,8 +77,8 @@ public class VectorMapJoinFastLongHashSet
 
   public VectorMapJoinFastLongHashSet(
       boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType hashTableKeyType,
-      int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
+      int initialCapacity, float loadFactor, int writeBuffersSize) {
     super(minMaxEnabled, isOuterJoin, hashTableKeyType,
-        initialCapacity, loadFactor, writeBuffersSize, memUsage);
+        initialCapacity, loadFactor, writeBuffersSize);
   }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
index b448e1f..2137fb7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
@@ -270,8 +270,8 @@ public abstract class VectorMapJoinFastLongHashTable
 
   public VectorMapJoinFastLongHashTable(
         boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType hashTableKeyType,
-        int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
-    super(initialCapacity, loadFactor, writeBuffersSize, memUsage);
+        int initialCapacity, float loadFactor, int writeBuffersSize) {
+    super(initialCapacity, loadFactor, writeBuffersSize);
     this.isOuterJoin = isOuterJoin;
     this.hashTableKeyType = hashTableKeyType;
     PrimitiveTypeInfo[] primitiveTypeInfos = { TypeInfoFactory.longTypeInfo };

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java
index b962475..9a9fb8d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java
@@ -28,12 +28,12 @@ public class VectorMapJoinFastMultiKeyHashMap
 
   @VisibleForTesting
   public VectorMapJoinFastMultiKeyHashMap(int initialCapacity, float loadFactor, int wbSize) {
-    this(false, initialCapacity, loadFactor, wbSize, -1);
+    this(false, initialCapacity, loadFactor, wbSize);
   }
 
   public VectorMapJoinFastMultiKeyHashMap(
         boolean isOuterJoin,
-        int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
-    super(initialCapacity, loadFactor, writeBuffersSize, memUsage);
+        int initialCapacity, float loadFactor, int writeBuffersSize) {
+    super(initialCapacity, loadFactor, writeBuffersSize);
   }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java
index 71a62fe..a8744a5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java
@@ -26,7 +26,7 @@ public class VectorMapJoinFastMultiKeyHashMultiSet
 
   public VectorMapJoinFastMultiKeyHashMultiSet(
         boolean isOuterJoin,
-        int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
-    super(initialCapacity, loadFactor, writeBuffersSize, memUsage);
+        int initialCapacity, float loadFactor, int writeBuffersSize) {
+    super(initialCapacity, loadFactor, writeBuffersSize);
   }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java
index dad3b32..a8048e5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java
@@ -26,7 +26,7 @@ public class VectorMapJoinFastMultiKeyHashSet
 
   public VectorMapJoinFastMultiKeyHashSet(
         boolean isOuterJoin,
-        int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
-    super(initialCapacity, loadFactor, writeBuffersSize, memUsage);
+        int initialCapacity, float loadFactor, int writeBuffersSize) {
+    super(initialCapacity, loadFactor, writeBuffersSize);
   }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java
index c80ea89..6f181b2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java
@@ -37,8 +37,8 @@ public class VectorMapJoinFastStringHashMap extends VectorMapJoinFastBytesHashMa
 
   public VectorMapJoinFastStringHashMap(
       boolean isOuterJoin,
-      int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
-    super(initialCapacity, loadFactor, writeBuffersSize, memUsage);
+      int initialCapacity, float loadFactor, int writeBuffersSize) {
+    super(initialCapacity, loadFactor, writeBuffersSize);
     stringCommon = new VectorMapJoinFastStringCommon(isOuterJoin);
   }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java
index 4933b16..9653b71 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java
@@ -37,8 +37,8 @@ public class VectorMapJoinFastStringHashMultiSet extends VectorMapJoinFastBytesH
 
   public VectorMapJoinFastStringHashMultiSet(
       boolean isOuterJoin,
-      int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
-    super(initialCapacity, loadFactor, writeBuffersSize, memUsage);
+      int initialCapacity, float loadFactor, int writeBuffersSize) {
+    super(initialCapacity, loadFactor, writeBuffersSize);
     stringCommon = new VectorMapJoinFastStringCommon(isOuterJoin);
   }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java
index ae8d943..6419a0b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java
@@ -37,8 +37,8 @@ public class VectorMapJoinFastStringHashSet extends VectorMapJoinFastBytesHashSe
 
   public VectorMapJoinFastStringHashSet(
       boolean isOuterJoin,
-      int initialCapacity, float loadFactor, int writeBuffersSize, long memUsage) {
-    super(initialCapacity, loadFactor, writeBuffersSize, memUsage);
+      int initialCapacity, float loadFactor, int writeBuffersSize) {
+    super(initialCapacity, loadFactor, writeBuffersSize);
     stringCommon = new VectorMapJoinFastStringCommon(isOuterJoin);
   }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java
index 3789275..373b5f4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java
@@ -56,13 +56,12 @@ public class VectorMapJoinFastTableContainer implements VectorMapJoinTableContai
   private float loadFactor;
   private int wbSize;
   private long keyCount;
-  private long memUsage;
 
 
   private VectorMapJoinFastHashTable VectorMapJoinFastHashTable;
 
   public VectorMapJoinFastTableContainer(MapJoinDesc desc, Configuration hconf,
-      long keyCount, long memUsage) throws SerDeException {
+      long keyCount) throws SerDeException {
 
     this.desc = desc;
     this.hconf = hconf;
@@ -73,13 +72,11 @@ public class VectorMapJoinFastTableContainer implements VectorMapJoinTableContai
     wbSize = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEWBSIZE);
 
     this.keyCount = keyCount;
-    this.memUsage = memUsage;
 
     // LOG.info("VectorMapJoinFastTableContainer load keyCountAdj " + keyCountAdj);
     // LOG.info("VectorMapJoinFastTableContainer load threshold " + threshold);
     // LOG.info("VectorMapJoinFastTableContainer load loadFactor " + loadFactor);
     // LOG.info("VectorMapJoinFastTableContainer load wbSize " + wbSize);
-    // LOG.info("VectorMapJoinFastTableContainer load memUsage " + memUsage);
 
     int newThreshold = HashMapWrapper.calculateTableSize(
         keyCountAdj, threshold, loadFactor, keyCount);
@@ -117,17 +114,17 @@ public class VectorMapJoinFastTableContainer implements VectorMapJoinTableContai
       case HASH_MAP:
         hashTable = new VectorMapJoinFastLongHashMap(
                 minMaxEnabled, isOuterJoin, hashTableKeyType,
-                newThreshold, loadFactor, writeBufferSize, memUsage);
+                newThreshold, loadFactor, writeBufferSize);
         break;
       case HASH_MULTISET:
         hashTable = new VectorMapJoinFastLongHashMultiSet(
                 minMaxEnabled, isOuterJoin, hashTableKeyType,
-                newThreshold, loadFactor, writeBufferSize, memUsage);
+                newThreshold, loadFactor, writeBufferSize);
         break;
       case HASH_SET:
         hashTable = new VectorMapJoinFastLongHashSet(
                 minMaxEnabled, isOuterJoin, hashTableKeyType,
-                newThreshold, loadFactor, writeBufferSize, memUsage);
+                newThreshold, loadFactor, writeBufferSize);
         break;
       }
       break;
@@ -137,17 +134,17 @@ public class VectorMapJoinFastTableContainer implements VectorMapJoinTableContai
       case HASH_MAP:
         hashTable = new VectorMapJoinFastStringHashMap(
                 isOuterJoin,
-                newThreshold, loadFactor, writeBufferSize, memUsage);
+                newThreshold, loadFactor, writeBufferSize);
         break;
       case HASH_MULTISET:
         hashTable = new VectorMapJoinFastStringHashMultiSet(
                 isOuterJoin,
-                newThreshold, loadFactor, writeBufferSize, memUsage);
+                newThreshold, loadFactor, writeBufferSize);
         break;
       case HASH_SET:
         hashTable = new VectorMapJoinFastStringHashSet(
                 isOuterJoin,
-                newThreshold, loadFactor, writeBufferSize, memUsage);
+                newThreshold, loadFactor, writeBufferSize);
         break;
       }
       break;
@@ -157,17 +154,17 @@ public class VectorMapJoinFastTableContainer implements VectorMapJoinTableContai
       case HASH_MAP:
         hashTable = new VectorMapJoinFastMultiKeyHashMap(
             isOuterJoin,
-            newThreshold, loadFactor, writeBufferSize, memUsage);
+            newThreshold, loadFactor, writeBufferSize);
         break;
       case HASH_MULTISET:
         hashTable = new VectorMapJoinFastMultiKeyHashMultiSet(
                 isOuterJoin,
-                newThreshold, loadFactor, writeBufferSize, memUsage);
+                newThreshold, loadFactor, writeBufferSize);
         break;
       case HASH_SET:
         hashTable = new VectorMapJoinFastMultiKeyHashSet(
                 isOuterJoin,
-                newThreshold, loadFactor, writeBufferSize, memUsage);
+                newThreshold, loadFactor, writeBufferSize);
         break;
       }
       break;

[2/5] hive git commit: HIVE-10403 - Add n-way join support for Hybrid Grace Hash Join (Wei Zheng via Vikram Dixit)

Posted by vi...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_2.q.out b/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_2.q.out
new file mode 100644
index 0000000..4ad143c
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_2.q.out
@@ -0,0 +1,1417 @@
+PREHOOK: query: -- Hybrid Grace Hash Join
+-- Test n-way join
+SELECT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: -- Hybrid Grace Hash Join
+-- Test n-way join
+SELECT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+1
+PREHOOK: query: -- 3-way mapjoin (1 big table, 2 small tables)
+SELECT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: -- 3-way mapjoin (1 big table, 2 small tables)
+SELECT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+1
+PREHOOK: query: EXPLAIN
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN src y ON (y.key = x.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN src y ON (y.key = x.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE)
+        Reducer 3 <- Map 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: x
+                  Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: key (type: string)
+                      sort order: +
+                      Map-reduce partition columns: key (type: string)
+                      Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: z
+                  Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                           Inner Join 0 to 2
+                      keys:
+                        0 key (type: string)
+                        1 key (type: string)
+                        2 key (type: string)
+                      input vertices:
+                        0 Map 1
+                        2 Map 4
+                      Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: count()
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: y
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: key (type: string)
+                      sort order: +
+                      Map-reduce partition columns: key (type: string)
+                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+        Reducer 3 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN src y ON (y.key = x.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN src y ON (y.key = x.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+428
+PREHOOK: query: EXPLAIN
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN src y ON (y.key = x.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN src y ON (y.key = x.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE)
+        Reducer 3 <- Map 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: x
+                  Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: key (type: string)
+                      sort order: +
+                      Map-reduce partition columns: key (type: string)
+                      Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: z
+                  Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                           Inner Join 0 to 2
+                      keys:
+                        0 key (type: string)
+                        1 key (type: string)
+                        2 key (type: string)
+                      input vertices:
+                        0 Map 1
+                        2 Map 4
+                      Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+                      HybridGraceHashJoin: true
+                      Group By Operator
+                        aggregations: count()
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: y
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: key (type: string)
+                      sort order: +
+                      Map-reduce partition columns: key (type: string)
+                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+        Reducer 3 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN src y ON (y.key = x.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN src y ON (y.key = x.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+428
+PREHOOK: query: -- 4-way mapjoin (1 big table, 3 small tables)
+SELECT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: -- 4-way mapjoin (1 big table, 3 small tables)
+SELECT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+1
+PREHOOK: query: EXPLAIN
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN srcpart w ON (x.key = w.key)
+JOIN src y ON (y.key = x.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN srcpart w ON (x.key = w.key)
+JOIN src y ON (y.key = x.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE)
+        Reducer 3 <- Map 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: x
+                  Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: key (type: string)
+                      sort order: +
+                      Map-reduce partition columns: key (type: string)
+                      Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: z
+                  Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                           Inner Join 0 to 2
+                           Inner Join 0 to 3
+                      keys:
+                        0 key (type: string)
+                        1 key (type: string)
+                        2 key (type: string)
+                        3 key (type: string)
+                      input vertices:
+                        0 Map 1
+                        2 Map 4
+                        3 Map 5
+                      Statistics: Num rows: 3300 Data size: 35059 Basic stats: COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: count()
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: w
+                  Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: key (type: string)
+                      sort order: +
+                      Map-reduce partition columns: key (type: string)
+                      Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: y
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: key (type: string)
+                      sort order: +
+                      Map-reduce partition columns: key (type: string)
+                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+        Reducer 3 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN srcpart w ON (x.key = w.key)
+JOIN src y ON (y.key = x.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN srcpart w ON (x.key = w.key)
+JOIN src y ON (y.key = x.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+5680
+PREHOOK: query: EXPLAIN
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN srcpart w ON (x.key = w.key)
+JOIN src y ON (y.key = x.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN srcpart w ON (x.key = w.key)
+JOIN src y ON (y.key = x.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE)
+        Reducer 3 <- Map 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: x
+                  Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: key (type: string)
+                      sort order: +
+                      Map-reduce partition columns: key (type: string)
+                      Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: z
+                  Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                           Inner Join 0 to 2
+                           Inner Join 0 to 3
+                      keys:
+                        0 key (type: string)
+                        1 key (type: string)
+                        2 key (type: string)
+                        3 key (type: string)
+                      input vertices:
+                        0 Map 1
+                        2 Map 4
+                        3 Map 5
+                      Statistics: Num rows: 3300 Data size: 35059 Basic stats: COMPLETE Column stats: NONE
+                      HybridGraceHashJoin: true
+                      Group By Operator
+                        aggregations: count()
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: w
+                  Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: key (type: string)
+                      sort order: +
+                      Map-reduce partition columns: key (type: string)
+                      Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: y
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: key (type: string)
+                      sort order: +
+                      Map-reduce partition columns: key (type: string)
+                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+        Reducer 3 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN srcpart w ON (x.key = w.key)
+JOIN src y ON (y.key = x.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN srcpart w ON (x.key = w.key)
+JOIN src y ON (y.key = x.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+5680
+PREHOOK: query: -- 2 sets of 3-way mapjoin under 2 different tasks
+SELECT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: -- 2 sets of 3-way mapjoin under 2 different tasks
+SELECT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+1
+PREHOOK: query: EXPLAIN
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN src y ON (y.key = x.key)
+UNION
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.value = z.value)
+JOIN src y ON (y.value = x.value)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN src y ON (y.key = x.key)
+UNION
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.value = z.value)
+JOIN src y ON (y.value = x.value)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 2 <- Map 1 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE)
+        Map 8 <- Map 10 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE)
+        Reducer 3 <- Map 2 (SIMPLE_EDGE), Union 4 (CONTAINS)
+        Reducer 5 <- Union 4 (SIMPLE_EDGE)
+        Reducer 9 <- Map 8 (SIMPLE_EDGE), Union 4 (CONTAINS)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: x
+                  Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: key (type: string)
+                      sort order: +
+                      Map-reduce partition columns: key (type: string)
+                      Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+        Map 10 
+            Map Operator Tree:
+                TableScan
+                  alias: y
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: value is not null (type: boolean)
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: value (type: string)
+                      sort order: +
+                      Map-reduce partition columns: value (type: string)
+                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: z
+                  Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                           Inner Join 0 to 2
+                      keys:
+                        0 key (type: string)
+                        1 key (type: string)
+                        2 key (type: string)
+                      input vertices:
+                        0 Map 1
+                        2 Map 6
+                      Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: count()
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+        Map 6 
+            Map Operator Tree:
+                TableScan
+                  alias: y
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: key (type: string)
+                      sort order: +
+                      Map-reduce partition columns: key (type: string)
+                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+        Map 7 
+            Map Operator Tree:
+                TableScan
+                  alias: x
+                  Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: value is not null (type: boolean)
+                    Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: value (type: string)
+                      sort order: +
+                      Map-reduce partition columns: value (type: string)
+                      Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+        Map 8 
+            Map Operator Tree:
+                TableScan
+                  alias: z
+                  Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: value is not null (type: boolean)
+                    Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                           Inner Join 0 to 2
+                      keys:
+                        0 value (type: string)
+                        1 value (type: string)
+                        2 value (type: string)
+                      input vertices:
+                        0 Map 7
+                        2 Map 10
+                      Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: count()
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+        Reducer 3 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Group By Operator
+                  keys: _col0 (type: bigint)
+                  mode: hash
+                  outputColumnNames: _col0
+                  Reduce Output Operator
+                    key expressions: _col0 (type: bigint)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: bigint)
+        Reducer 5 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: bigint)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 9 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Group By Operator
+                  keys: _col0 (type: bigint)
+                  mode: hash
+                  outputColumnNames: _col0
+                  Reduce Output Operator
+                    key expressions: _col0 (type: bigint)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: bigint)
+        Union 4 
+            Vertex: Union 4
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN src y ON (y.key = x.key)
+UNION
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.value = z.value)
+JOIN src y ON (y.value = x.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN src y ON (y.key = x.key)
+UNION
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.value = z.value)
+JOIN src y ON (y.value = x.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+428
+452
+PREHOOK: query: EXPLAIN
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN src y ON (y.key = x.key)
+UNION
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.value = z.value)
+JOIN src y ON (y.value = x.value)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN src y ON (y.key = x.key)
+UNION
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.value = z.value)
+JOIN src y ON (y.value = x.value)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 2 <- Map 1 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE)
+        Map 8 <- Map 10 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE)
+        Reducer 3 <- Map 2 (SIMPLE_EDGE), Union 4 (CONTAINS)
+        Reducer 5 <- Union 4 (SIMPLE_EDGE)
+        Reducer 9 <- Map 8 (SIMPLE_EDGE), Union 4 (CONTAINS)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: x
+                  Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: key (type: string)
+                      sort order: +
+                      Map-reduce partition columns: key (type: string)
+                      Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+        Map 10 
+            Map Operator Tree:
+                TableScan
+                  alias: y
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: value is not null (type: boolean)
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: value (type: string)
+                      sort order: +
+                      Map-reduce partition columns: value (type: string)
+                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: z
+                  Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                           Inner Join 0 to 2
+                      keys:
+                        0 key (type: string)
+                        1 key (type: string)
+                        2 key (type: string)
+                      input vertices:
+                        0 Map 1
+                        2 Map 6
+                      Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+                      HybridGraceHashJoin: true
+                      Group By Operator
+                        aggregations: count()
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+        Map 6 
+            Map Operator Tree:
+                TableScan
+                  alias: y
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: key (type: string)
+                      sort order: +
+                      Map-reduce partition columns: key (type: string)
+                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+        Map 7 
+            Map Operator Tree:
+                TableScan
+                  alias: x
+                  Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: value is not null (type: boolean)
+                    Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: value (type: string)
+                      sort order: +
+                      Map-reduce partition columns: value (type: string)
+                      Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE
+        Map 8 
+            Map Operator Tree:
+                TableScan
+                  alias: z
+                  Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: value is not null (type: boolean)
+                    Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                           Inner Join 0 to 2
+                      keys:
+                        0 value (type: string)
+                        1 value (type: string)
+                        2 value (type: string)
+                      input vertices:
+                        0 Map 7
+                        2 Map 10
+                      Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+                      HybridGraceHashJoin: true
+                      Group By Operator
+                        aggregations: count()
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+        Reducer 3 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Group By Operator
+                  keys: _col0 (type: bigint)
+                  mode: hash
+                  outputColumnNames: _col0
+                  Reduce Output Operator
+                    key expressions: _col0 (type: bigint)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: bigint)
+        Reducer 5 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: bigint)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 9 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Group By Operator
+                  keys: _col0 (type: bigint)
+                  mode: hash
+                  outputColumnNames: _col0
+                  Reduce Output Operator
+                    key expressions: _col0 (type: bigint)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: bigint)
+        Union 4 
+            Vertex: Union 4
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN src y ON (y.key = x.key)
+UNION
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.value = z.value)
+JOIN src y ON (y.value = x.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN src y ON (y.key = x.key)
+UNION
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.value = z.value)
+JOIN src y ON (y.value = x.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+428
+452
+PREHOOK: query: -- A chain of 2 sets of 3-way mapjoin under the same task
+SELECT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: -- A chain of 2 sets of 3-way mapjoin under the same task
+SELECT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+1
+PREHOOK: query: EXPLAIN
+SELECT COUNT(*)
+FROM src1 x
+JOIN srcpart z1 ON (x.key = z1.key)
+JOIN src y1     ON (x.key = y1.key)
+JOIN srcpart z2 ON (x.value = z2.value)
+JOIN src y2     ON (x.value = y2.value)
+WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz'
+ AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz'
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT COUNT(*)
+FROM src1 x
+JOIN srcpart z1 ON (x.key = z1.key)
+JOIN src y1     ON (x.key = y1.key)
+JOIN srcpart z2 ON (x.value = z2.value)
+JOIN src y2     ON (x.value = y2.value)
+WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz'
+ AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE)
+        Reducer 3 <- Map 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: x
+                  Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (((key is not null and value is not null) and (value < 'zzzzzzzzzz')) and (key < 'zzzzzzzz')) (type: boolean)
+                    Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: key (type: string)
+                      sort order: +
+                      Map-reduce partition columns: key (type: string)
+                      Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                      value expressions: value (type: string)
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: z1
+                  Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (key < 'zzzzzzzz') (type: boolean)
+                    Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                           Inner Join 0 to 2
+                      keys:
+                        0 key (type: string)
+                        1 key (type: string)
+                        2 key (type: string)
+                      outputColumnNames: _col1
+                      input vertices:
+                        0 Map 1
+                        2 Map 4
+                      Statistics: Num rows: 1465 Data size: 15565 Basic stats: COMPLETE Column stats: NONE
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                             Inner Join 0 to 2
+                        keys:
+                          0 _col1 (type: string)
+                          1 value (type: string)
+                          2 value (type: string)
+                        input vertices:
+                          1 Map 5
+                          2 Map 6
+                        Statistics: Num rows: 3223 Data size: 34243 Basic stats: COMPLETE Column stats: NONE
+                        Group By Operator
+                          aggregations: count()
+                          mode: hash
+                          outputColumnNames: _col0
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          Reduce Output Operator
+                            sort order: 
+                            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                            value expressions: _col0 (type: bigint)
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: y1
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: ((key is not null and (value < 'zzzzzzzz')) and (key < 'zzzzzzzz')) (type: boolean)
+                    Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: key (type: string)
+                      sort order: +
+                      Map-reduce partition columns: key (type: string)
+                      Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: z2
+                  Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: ((value is not null and (key < 'zzzzzzzzzz')) and (value < 'zzzzzzzzzz')) (type: boolean)
+                    Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: value (type: string)
+                      sort order: +
+                      Map-reduce partition columns: value (type: string)
+                      Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
+        Map 6 
+            Map Operator Tree:
+                TableScan
+                  alias: y2
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (value < 'zzzzzzzzzz') (type: boolean)
+                    Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: value (type: string)
+                      sort order: +
+                      Map-reduce partition columns: value (type: string)
+                      Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+        Reducer 3 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT COUNT(*)
+FROM src1 x
+JOIN srcpart z1 ON (x.key = z1.key)
+JOIN src y1     ON (x.key = y1.key)
+JOIN srcpart z2 ON (x.value = z2.value)
+JOIN src y2     ON (x.value = y2.value)
+WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz'
+ AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT COUNT(*)
+FROM src1 x
+JOIN srcpart z1 ON (x.key = z1.key)
+JOIN src y1     ON (x.key = y1.key)
+JOIN srcpart z2 ON (x.value = z2.value)
+JOIN src y2     ON (x.value = y2.value)
+WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz'
+ AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+18256
+PREHOOK: query: EXPLAIN
+SELECT COUNT(*)
+FROM src1 x
+JOIN srcpart z1 ON (x.key = z1.key)
+JOIN src y1     ON (x.key = y1.key)
+JOIN srcpart z2 ON (x.value = z2.value)
+JOIN src y2     ON (x.value = y2.value)
+WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz'
+ AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz'
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT COUNT(*)
+FROM src1 x
+JOIN srcpart z1 ON (x.key = z1.key)
+JOIN src y1     ON (x.key = y1.key)
+JOIN srcpart z2 ON (x.value = z2.value)
+JOIN src y2     ON (x.value = y2.value)
+WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz'
+ AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE)
+        Reducer 3 <- Map 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: x
+                  Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (((key is not null and value is not null) and (value < 'zzzzzzzzzz')) and (key < 'zzzzzzzz')) (type: boolean)
+                    Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: key (type: string)
+                      sort order: +
+                      Map-reduce partition columns: key (type: string)
+                      Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                      value expressions: value (type: string)
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: z1
+                  Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (key < 'zzzzzzzz') (type: boolean)
+                    Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                           Inner Join 0 to 2
+                      keys:
+                        0 key (type: string)
+                        1 key (type: string)
+                        2 key (type: string)
+                      outputColumnNames: _col1
+                      input vertices:
+                        0 Map 1
+                        2 Map 4
+                      Statistics: Num rows: 1465 Data size: 15565 Basic stats: COMPLETE Column stats: NONE
+                      HybridGraceHashJoin: true
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                             Inner Join 0 to 2
+                        keys:
+                          0 _col1 (type: string)
+                          1 value (type: string)
+                          2 value (type: string)
+                        input vertices:
+                          1 Map 5
+                          2 Map 6
+                        Statistics: Num rows: 3223 Data size: 34243 Basic stats: COMPLETE Column stats: NONE
+                        HybridGraceHashJoin: true
+                        Group By Operator
+                          aggregations: count()
+                          mode: hash
+                          outputColumnNames: _col0
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          Reduce Output Operator
+                            sort order: 
+                            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                            value expressions: _col0 (type: bigint)
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: y1
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: ((key is not null and (value < 'zzzzzzzz')) and (key < 'zzzzzzzz')) (type: boolean)
+                    Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: key (type: string)
+                      sort order: +
+                      Map-reduce partition columns: key (type: string)
+                      Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: z2
+                  Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: ((value is not null and (key < 'zzzzzzzzzz')) and (value < 'zzzzzzzzzz')) (type: boolean)
+                    Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: value (type: string)
+                      sort order: +
+                      Map-reduce partition columns: value (type: string)
+                      Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE
+        Map 6 
+            Map Operator Tree:
+                TableScan
+                  alias: y2
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (value < 'zzzzzzzzzz') (type: boolean)
+                    Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: value (type: string)
+                      sort order: +
+                      Map-reduce partition columns: value (type: string)
+                      Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+        Reducer 3 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT COUNT(*)
+FROM src1 x
+JOIN srcpart z1 ON (x.key = z1.key)
+JOIN src y1     ON (x.key = y1.key)
+JOIN srcpart z2 ON (x.value = z2.value)
+JOIN src y2     ON (x.value = y2.value)
+WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz'
+ AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT COUNT(*)
+FROM src1 x
+JOIN srcpart z1 ON (x.key = z1.key)
+JOIN src y1     ON (x.key = y1.key)
+JOIN srcpart z2 ON (x.value = z2.value)
+JOIN src y2     ON (x.value = y2.value)
+WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz'
+ AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+18256

[3/5] hive git commit: HIVE-10403 - Add n-way join support for Hybrid Grace Hash Join (Wei Zheng via Vikram Dixit)

Posted by vi...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out b/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out
new file mode 100644
index 0000000..4b39b2c
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out
@@ -0,0 +1,1587 @@
+PREHOOK: query: -- Hybrid Grace Hash Join
+-- Test basic functionalities:
+-- 1. Various cases when hash partitions spill
+-- 2. Partitioned table spilling
+-- 3. Vectorization
+
+SELECT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: -- Hybrid Grace Hash Join
+-- Test basic functionalities:
+-- 1. Various cases when hash partitions spill
+-- 2. Partitioned table spilling
+-- 3. Vectorization
+
+SELECT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+1
+PREHOOK: query: -- Base result for inner join
+explain
+select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ inner join alltypesorc cd
+ on cd.cint = c.cint
+ where c.cint < 2000000000) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Base result for inner join
+explain
+select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ inner join alltypesorc cd
+ on cd.cint = c.cint
+ where c.cint < 2000000000) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 1 <- Map 3 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: c
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (cint < 2000000000) (type: boolean)
+                    Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: cint (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        keys:
+                          0 _col0 (type: int)
+                          1 _col0 (type: int)
+                        input vertices:
+                          1 Map 3
+                        Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE
+                        Group By Operator
+                          aggregations: count()
+                          mode: hash
+                          outputColumnNames: _col0
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          Reduce Output Operator
+                            sort order: 
+                            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                            value expressions: _col0 (type: bigint)
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: c
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (cint < 2000000000) (type: boolean)
+                    Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: cint (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ inner join alltypesorc cd
+ on cd.cint = c.cint
+ where c.cint < 2000000000) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ inner join alltypesorc cd
+ on cd.cint = c.cint
+ where c.cint < 2000000000) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+3152013
+PREHOOK: query: -- Two partitions are created. One in memory, one on disk on creation.
+-- The one in memory will eventually exceed memory limit, but won't spill.
+explain
+select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ inner join alltypesorc cd
+ on cd.cint = c.cint
+ where c.cint < 2000000000) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Two partitions are created. One in memory, one on disk on creation.
+-- The one in memory will eventually exceed memory limit, but won't spill.
+explain
+select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ inner join alltypesorc cd
+ on cd.cint = c.cint
+ where c.cint < 2000000000) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 1 <- Map 3 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: c
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (cint < 2000000000) (type: boolean)
+                    Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: cint (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        keys:
+                          0 _col0 (type: int)
+                          1 _col0 (type: int)
+                        input vertices:
+                          1 Map 3
+                        Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE
+                        HybridGraceHashJoin: true
+                        Group By Operator
+                          aggregations: count()
+                          mode: hash
+                          outputColumnNames: _col0
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          Reduce Output Operator
+                            sort order: 
+                            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                            value expressions: _col0 (type: bigint)
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: c
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (cint < 2000000000) (type: boolean)
+                    Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: cint (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ inner join alltypesorc cd
+ on cd.cint = c.cint
+ where c.cint < 2000000000) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ inner join alltypesorc cd
+ on cd.cint = c.cint
+ where c.cint < 2000000000) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+3152013
+PREHOOK: query: -- Base result for inner join
+explain
+select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ inner join alltypesorc cd
+ on cd.cint = c.cint) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Base result for inner join
+explain
+select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ inner join alltypesorc cd
+ on cd.cint = c.cint) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 1 <- Map 3 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: c
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: cint is not null (type: boolean)
+                    Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: cint (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        keys:
+                          0 _col0 (type: int)
+                          1 _col0 (type: int)
+                        input vertices:
+                          1 Map 3
+                        Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
+                        Group By Operator
+                          aggregations: count()
+                          mode: hash
+                          outputColumnNames: _col0
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          Reduce Output Operator
+                            sort order: 
+                            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                            value expressions: _col0 (type: bigint)
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: c
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: cint is not null (type: boolean)
+                    Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: cint (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ inner join alltypesorc cd
+ on cd.cint = c.cint) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ inner join alltypesorc cd
+ on cd.cint = c.cint) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+3152013
+PREHOOK: query: -- 16 partitions are created: 3 in memory, 13 on disk on creation.
+-- 1 partition is spilled during first round processing, which ends up having 2 in memory, 14 on disk
+explain
+select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ inner join alltypesorc cd
+ on cd.cint = c.cint) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: -- 16 partitions are created: 3 in memory, 13 on disk on creation.
+-- 1 partition is spilled during first round processing, which ends up having 2 in memory, 14 on disk
+explain
+select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ inner join alltypesorc cd
+ on cd.cint = c.cint) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 1 <- Map 3 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: c
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: cint is not null (type: boolean)
+                    Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: cint (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        keys:
+                          0 _col0 (type: int)
+                          1 _col0 (type: int)
+                        input vertices:
+                          1 Map 3
+                        Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
+                        HybridGraceHashJoin: true
+                        Group By Operator
+                          aggregations: count()
+                          mode: hash
+                          outputColumnNames: _col0
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          Reduce Output Operator
+                            sort order: 
+                            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                            value expressions: _col0 (type: bigint)
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: c
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: cint is not null (type: boolean)
+                    Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: cint (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ inner join alltypesorc cd
+ on cd.cint = c.cint) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ inner join alltypesorc cd
+ on cd.cint = c.cint) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+3152013
+PREHOOK: query: -- Base result for outer join
+explain
+select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ left outer join alltypesorc cd
+ on cd.cint = c.cint) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Base result for outer join
+explain
+select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ left outer join alltypesorc cd
+ on cd.cint = c.cint) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 1 <- Map 3 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: c
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: cint (type: int)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Left Outer Join0 to 1
+                      keys:
+                        0 _col0 (type: int)
+                        1 _col0 (type: int)
+                      input vertices:
+                        1 Map 3
+                      Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: count()
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: c
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: cint (type: int)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ left outer join alltypesorc cd
+ on cd.cint = c.cint) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ left outer join alltypesorc cd
+ on cd.cint = c.cint) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+3155128
+PREHOOK: query: -- 32 partitions are created. 3 in memory, 29 on disk on creation.
+explain
+select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ left outer join alltypesorc cd
+ on cd.cint = c.cint) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: -- 32 partitions are created. 3 in memory, 29 on disk on creation.
+explain
+select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ left outer join alltypesorc cd
+ on cd.cint = c.cint) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 1 <- Map 3 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: c
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: cint (type: int)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Left Outer Join0 to 1
+                      keys:
+                        0 _col0 (type: int)
+                        1 _col0 (type: int)
+                      input vertices:
+                        1 Map 3
+                      Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE
+                      HybridGraceHashJoin: true
+                      Group By Operator
+                        aggregations: count()
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: c
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: cint (type: int)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ left outer join alltypesorc cd
+ on cd.cint = c.cint) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ left outer join alltypesorc cd
+ on cd.cint = c.cint) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+3155128
+PREHOOK: query: -- Partitioned table
+create table parttbl (key string, value char(20)) partitioned by (dt char(10))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@parttbl
+POSTHOOK: query: -- Partitioned table
+create table parttbl (key string, value char(20)) partitioned by (dt char(10))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@parttbl
+PREHOOK: query: insert overwrite table parttbl partition(dt='2000-01-01')
+  select * from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@parttbl@dt=2000-01-01
+POSTHOOK: query: insert overwrite table parttbl partition(dt='2000-01-01')
+  select * from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@parttbl@dt=2000-01-01
+POSTHOOK: Lineage: parttbl PARTITION(dt=2000-01-01).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: parttbl PARTITION(dt=2000-01-01).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table parttbl partition(dt='2000-01-02')
+  select * from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+PREHOOK: Output: default@parttbl@dt=2000-01-02
+POSTHOOK: query: insert overwrite table parttbl partition(dt='2000-01-02')
+  select * from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+POSTHOOK: Output: default@parttbl@dt=2000-01-02
+POSTHOOK: Lineage: parttbl PARTITION(dt=2000-01-02).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: parttbl PARTITION(dt=2000-01-02).value EXPRESSION [(src1)src1.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: -- No spill, base result
+explain
+select count(*) from
+(select p1.value
+ from parttbl p1
+ inner join parttbl p2
+ on p1.key = p2.key) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: -- No spill, base result
+explain
+select count(*) from
+(select p1.value
+ from parttbl p1
+ inner join parttbl p2
+ on p1.key = p2.key) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 1 <- Map 3 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: p1
+                  Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                      keys:
+                        0 key (type: string)
+                        1 key (type: string)
+                      input vertices:
+                        1 Map 3
+                      Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: count()
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: p2
+                  Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: key (type: string)
+                      sort order: +
+                      Map-reduce partition columns: key (type: string)
+                      Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from
+(select p1.value
+ from parttbl p1
+ inner join parttbl p2
+ on p1.key = p2.key) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parttbl
+PREHOOK: Input: default@parttbl@dt=2000-01-01
+PREHOOK: Input: default@parttbl@dt=2000-01-02
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+(select p1.value
+ from parttbl p1
+ inner join parttbl p2
+ on p1.key = p2.key) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parttbl
+POSTHOOK: Input: default@parttbl@dt=2000-01-01
+POSTHOOK: Input: default@parttbl@dt=2000-01-02
+#### A masked pattern was here ####
+1217
+PREHOOK: query: -- No spill, 2 partitions created in memory
+explain
+select count(*) from
+(select p1.value
+ from parttbl p1
+ inner join parttbl p2
+ on p1.key = p2.key) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: -- No spill, 2 partitions created in memory
+explain
+select count(*) from
+(select p1.value
+ from parttbl p1
+ inner join parttbl p2
+ on p1.key = p2.key) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 1 <- Map 3 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: p1
+                  Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                      keys:
+                        0 key (type: string)
+                        1 key (type: string)
+                      input vertices:
+                        1 Map 3
+                      Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE
+                      HybridGraceHashJoin: true
+                      Group By Operator
+                        aggregations: count()
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: p2
+                  Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: key (type: string)
+                      sort order: +
+                      Map-reduce partition columns: key (type: string)
+                      Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from
+(select p1.value
+ from parttbl p1
+ inner join parttbl p2
+ on p1.key = p2.key) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parttbl
+PREHOOK: Input: default@parttbl@dt=2000-01-01
+PREHOOK: Input: default@parttbl@dt=2000-01-02
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+(select p1.value
+ from parttbl p1
+ inner join parttbl p2
+ on p1.key = p2.key) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parttbl
+POSTHOOK: Input: default@parttbl@dt=2000-01-01
+POSTHOOK: Input: default@parttbl@dt=2000-01-02
+#### A masked pattern was here ####
+1217
+PREHOOK: query: -- Spill case base result
+explain
+select count(*) from
+(select p1.value
+ from parttbl p1
+ inner join parttbl p2
+ on p1.key = p2.key) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Spill case base result
+explain
+select count(*) from
+(select p1.value
+ from parttbl p1
+ inner join parttbl p2
+ on p1.key = p2.key) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 1 <- Map 3 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: p1
+                  Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                      keys:
+                        0 key (type: string)
+                        1 key (type: string)
+                      input vertices:
+                        1 Map 3
+                      Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: count()
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: p2
+                  Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: key (type: string)
+                      sort order: +
+                      Map-reduce partition columns: key (type: string)
+                      Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from
+(select p1.value
+ from parttbl p1
+ inner join parttbl p2
+ on p1.key = p2.key) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parttbl
+PREHOOK: Input: default@parttbl@dt=2000-01-01
+PREHOOK: Input: default@parttbl@dt=2000-01-02
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+(select p1.value
+ from parttbl p1
+ inner join parttbl p2
+ on p1.key = p2.key) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parttbl
+POSTHOOK: Input: default@parttbl@dt=2000-01-01
+POSTHOOK: Input: default@parttbl@dt=2000-01-02
+#### A masked pattern was here ####
+1217
+PREHOOK: query: -- Spill case, one partition in memory, one spilled on creation
+explain
+select count(*) from
+(select p1.value
+ from parttbl p1
+ inner join parttbl p2
+ on p1.key = p2.key) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Spill case, one partition in memory, one spilled on creation
+explain
+select count(*) from
+(select p1.value
+ from parttbl p1
+ inner join parttbl p2
+ on p1.key = p2.key) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 1 <- Map 3 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: p1
+                  Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                      keys:
+                        0 key (type: string)
+                        1 key (type: string)
+                      input vertices:
+                        1 Map 3
+                      Statistics: Num rows: 289 Data size: 6872 Basic stats: COMPLETE Column stats: NONE
+                      HybridGraceHashJoin: true
+                      Group By Operator
+                        aggregations: count()
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: p2
+                  Statistics: Num rows: 525 Data size: 12474 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: key (type: string)
+                      sort order: +
+                      Map-reduce partition columns: key (type: string)
+                      Statistics: Num rows: 263 Data size: 6248 Basic stats: COMPLETE Column stats: NONE
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from
+(select p1.value
+ from parttbl p1
+ inner join parttbl p2
+ on p1.key = p2.key) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parttbl
+PREHOOK: Input: default@parttbl@dt=2000-01-01
+PREHOOK: Input: default@parttbl@dt=2000-01-02
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from
+(select p1.value
+ from parttbl p1
+ inner join parttbl p2
+ on p1.key = p2.key) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parttbl
+POSTHOOK: Input: default@parttbl@dt=2000-01-01
+POSTHOOK: Input: default@parttbl@dt=2000-01-02
+#### A masked pattern was here ####
+1217
+PREHOOK: query: drop table parttbl
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@parttbl
+PREHOOK: Output: default@parttbl
+POSTHOOK: query: drop table parttbl
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@parttbl
+POSTHOOK: Output: default@parttbl
+PREHOOK: query: -- Test vectorization
+-- Test case borrowed from vector_decimal_mapjoin.q
+CREATE TABLE decimal_mapjoin STORED AS ORC AS
+  SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1,
+  CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2,
+  cint
+  FROM alltypesorc
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@decimal_mapjoin
+POSTHOOK: query: -- Test vectorization
+-- Test case borrowed from vector_decimal_mapjoin.q
+CREATE TABLE decimal_mapjoin STORED AS ORC AS
+  SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1,
+  CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2,
+  cint
+  FROM alltypesorc
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@decimal_mapjoin
+PREHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
+  FROM decimal_mapjoin l
+  JOIN decimal_mapjoin r ON l.cint = r.cint
+  WHERE l.cint = 6981
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
+  FROM decimal_mapjoin l
+  JOIN decimal_mapjoin r ON l.cint = r.cint
+  WHERE l.cint = 6981
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 1 <- Map 2 (BROADCAST_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: l
+                  Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (cint = 6981) (type: boolean)
+                    Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                      keys:
+                        0 6981 (type: int)
+                        1 6981 (type: int)
+                      outputColumnNames: _col1, _col9
+                      input vertices:
+                        1 Map 2
+                      Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE
+                      Select Operator
+                        expressions: 6981 (type: int), 6981 (type: int), _col1 (type: decimal(20,10)), _col9 (type: decimal(23,14))
+                        outputColumnNames: _col0, _col1, _col2, _col3
+                        Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE
+                        File Output Operator
+                          compressed: false
+                          Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE
+                          table:
+                              input format: org.apache.hadoop.mapred.TextInputFormat
+                              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: r
+                  Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (cint = 6981) (type: boolean)
+                    Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: 6981 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: 6981 (type: int)
+                      Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: cdecimal2 (type: decimal(23,14))
+            Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
+  FROM decimal_mapjoin l
+  JOIN decimal_mapjoin r ON l.cint = r.cint
+  WHERE l.cint = 6981
+PREHOOK: type: QUERY
+PREHOOK: Input: default@decimal_mapjoin
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
+  FROM decimal_mapjoin l
+  JOIN decimal_mapjoin r ON l.cint = r.cint
+  WHERE l.cint = 6981
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@decimal_mapjoin
+#### A masked pattern was here ####
+6981	6981	NULL	NULL
+6981	6981	NULL	-617.5607769230769
+6981	6981	NULL	-617.5607769230769
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	6984454.211097692
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	-617.5607769230769
+6981	6981	NULL	-617.5607769230769
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	6984454.211097692
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	-617.5607769230769
+6981	6981	NULL	-617.5607769230769
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	6984454.211097692
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	-617.5607769230769
+6981	6981	NULL	-617.5607769230769
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	6984454.211097692
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	5831542.269248378	NULL
+6981	6981	5831542.269248378	-617.5607769230769
+6981	6981	5831542.269248378	-617.5607769230769
+6981	6981	5831542.269248378	NULL
+6981	6981	5831542.269248378	NULL
+6981	6981	5831542.269248378	NULL
+6981	6981	5831542.269248378	6984454.211097692
+6981	6981	5831542.269248378	NULL
+6981	6981	5831542.269248378	NULL
+6981	6981	5831542.269248378	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	-617.5607769230769
+6981	6981	NULL	-617.5607769230769
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	6984454.211097692
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	-617.5607769230769
+6981	6981	NULL	-617.5607769230769
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	6984454.211097692
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	-617.5607769230769
+6981	6981	NULL	-617.5607769230769
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	6984454.211097692
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	-515.621072973	NULL
+6981	6981	-515.621072973	-617.5607769230769
+6981	6981	-515.621072973	-617.5607769230769
+6981	6981	-515.621072973	NULL
+6981	6981	-515.621072973	NULL
+6981	6981	-515.621072973	NULL
+6981	6981	-515.621072973	6984454.211097692
+6981	6981	-515.621072973	NULL
+6981	6981	-515.621072973	NULL
+6981	6981	-515.621072973	NULL
+6981	6981	-515.621072973	NULL
+6981	6981	-515.621072973	-617.5607769230769
+6981	6981	-515.621072973	-617.5607769230769
+6981	6981	-515.621072973	NULL
+6981	6981	-515.621072973	NULL
+6981	6981	-515.621072973	NULL
+6981	6981	-515.621072973	6984454.211097692
+6981	6981	-515.621072973	NULL
+6981	6981	-515.621072973	NULL
+6981	6981	-515.621072973	NULL
+PREHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
+  FROM decimal_mapjoin l
+  JOIN decimal_mapjoin r ON l.cint = r.cint
+  WHERE l.cint = 6981
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
+  FROM decimal_mapjoin l
+  JOIN decimal_mapjoin r ON l.cint = r.cint
+  WHERE l.cint = 6981
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 1 <- Map 2 (BROADCAST_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: l
+                  Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (cint = 6981) (type: boolean)
+                    Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                      keys:
+                        0 6981 (type: int)
+                        1 6981 (type: int)
+                      outputColumnNames: _col1, _col9
+                      input vertices:
+                        1 Map 2
+                      Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE
+                      HybridGraceHashJoin: true
+                      Select Operator
+                        expressions: 6981 (type: int), 6981 (type: int), _col1 (type: decimal(20,10)), _col9 (type: decimal(23,14))
+                        outputColumnNames: _col0, _col1, _col2, _col3
+                        Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE
+                        File Output Operator
+                          compressed: false
+                          Statistics: Num rows: 6758 Data size: 1190783 Basic stats: COMPLETE Column stats: NONE
+                          table:
+                              input format: org.apache.hadoop.mapred.TextInputFormat
+                              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: r
+                  Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (cint = 6981) (type: boolean)
+                    Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: 6981 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: 6981 (type: int)
+                      Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: cdecimal2 (type: decimal(23,14))
+            Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
+  FROM decimal_mapjoin l
+  JOIN decimal_mapjoin r ON l.cint = r.cint
+  WHERE l.cint = 6981
+PREHOOK: type: QUERY
+PREHOOK: Input: default@decimal_mapjoin
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
+  FROM decimal_mapjoin l
+  JOIN decimal_mapjoin r ON l.cint = r.cint
+  WHERE l.cint = 6981
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@decimal_mapjoin
+#### A masked pattern was here ####
+6981	6981	NULL	NULL
+6981	6981	NULL	-617.5607769230769
+6981	6981	NULL	-617.5607769230769
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	6984454.211097692
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	-617.5607769230769
+6981	6981	NULL	-617.5607769230769
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	6984454.211097692
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	-617.5607769230769
+6981	6981	NULL	-617.5607769230769
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	6984454.211097692
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	-617.5607769230769
+6981	6981	NULL	-617.5607769230769
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	6984454.211097692
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	5831542.269248378	NULL
+6981	6981	5831542.269248378	-617.5607769230769
+6981	6981	5831542.269248378	-617.5607769230769
+6981	6981	5831542.269248378	NULL
+6981	6981	5831542.269248378	NULL
+6981	6981	5831542.269248378	NULL
+6981	6981	5831542.269248378	6984454.211097692
+6981	6981	5831542.269248378	NULL
+6981	6981	5831542.269248378	NULL
+6981	6981	5831542.269248378	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	-617.5607769230769
+6981	6981	NULL	-617.5607769230769
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	6984454.211097692
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	-617.5607769230769
+6981	6981	NULL	-617.5607769230769
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	6984454.211097692
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	-617.5607769230769
+6981	6981	NULL	-617.5607769230769
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	6984454.211097692
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	NULL	NULL
+6981	6981	-515.621072973	NULL
+6981	6981	-515.621072973	-617.5607769230769
+6981	6981	-515.621072973	-617.5607769230769
+6981	6981	-515.621072973	NULL
+6981	6981	-515.621072973	NULL
+6981	6981	-515.621072973	NULL
+6981	6981	-515.621072973	6984454.211097692
+6981	6981	-515.621072973	NULL
+6981	6981	-515.621072973	NULL
+6981	6981	-515.621072973	NULL
+6981	6981	-515.621072973	NULL
+6981	6981	-515.621072973	-617.5607769230769
+6981	6981	-515.621072973	-617.5607769230769
+6981	6981	-515.621072973	NULL
+6981	6981	-515.621072973	NULL
+6981	6981	-515.621072973	NULL
+6981	6981	-515.621072973	6984454.211097692
+6981	6981	-515.621072973	NULL
+6981	6981	-515.621072973	NULL
+6981	6981	-515.621072973	NULL
+PREHOOK: query: DROP TABLE decimal_mapjoin
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@decimal_mapjoin
+PREHOOK: Output: default@decimal_mapjoin
+POSTHOOK: query: DROP TABLE decimal_mapjoin
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@decimal_mapjoin
+POSTHOOK: Output: default@decimal_mapjoin

[4/5] hive git commit: HIVE-10403 - Add n-way join support for Hybrid Grace Hash Join (Wei Zheng via Vikram Dixit)

Posted by vi...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java
index 0192fb5..cee9100 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java
@@ -67,7 +67,7 @@ public class MapJoinDesc extends JoinDesc implements Serializable {
   private boolean isBucketMapJoin;
 
   // Hash table memory usage allowed; used in case of non-staged mapjoin.
-  private float hashtableMemoryUsage;
+  private float hashtableMemoryUsage;   // This is a percentage value between 0 and 1
   protected boolean genJoinKeys = true;
 
   private boolean isHybridHashJoin;

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java
index eb38b19..a45275b 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java
@@ -35,7 +35,7 @@ public class TestVectorMapJoinFastLongHashMap extends CommonFastHashTable {
     random = new Random(47496);
 
     VectorMapJoinFastLongHashMap map =
-        new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, 0);
+        new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE);
 
     RandomLongStream randomLongKeyStream = new RandomLongStream(random);
     RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random);
@@ -55,7 +55,7 @@ public class TestVectorMapJoinFastLongHashMap extends CommonFastHashTable {
   public void testPutGetMultiple() throws Exception {
     random = new Random(2990);
 
-    VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, 0);
+    VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE);
 
     RandomLongStream randomLongKeyStream = new RandomLongStream(random);
     RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random);
@@ -77,7 +77,7 @@ public class TestVectorMapJoinFastLongHashMap extends CommonFastHashTable {
   public void testGetNonExistent() throws Exception {
     random = new Random(16916);
 
-    VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, 0);
+    VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE);
 
     RandomLongStream randomLongKeyStream = new RandomLongStream(random);
     RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random);
@@ -101,7 +101,7 @@ public class TestVectorMapJoinFastLongHashMap extends CommonFastHashTable {
     random = new Random(26078);
 
     // Make sure the map does not expand; should be able to find space.
-    VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, 1f, WB_SIZE, 0);
+    VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, 1f, WB_SIZE);
 
     RandomLongStream randomLongKeyStream = new RandomLongStream(random);
     RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random);
@@ -126,7 +126,7 @@ public class TestVectorMapJoinFastLongHashMap extends CommonFastHashTable {
     random = new Random(22470);
 
     // Start with capacity 1; make sure we expand on every put.
-    VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, 1, 0.0000001f, WB_SIZE, 0);
+    VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, 1, 0.0000001f, WB_SIZE);
 
     RandomLongStream randomLongKeyStream = new RandomLongStream(random);
     RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random);
@@ -147,7 +147,7 @@ public class TestVectorMapJoinFastLongHashMap extends CommonFastHashTable {
     random = new Random(40719);
 
     // Use a large capacity that doesn't require expansion, yet.
-    VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, 0);
+    VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
 
     RandomLongStream randomLongKeyStream = new RandomLongStream(random);
 
@@ -172,7 +172,7 @@ public class TestVectorMapJoinFastLongHashMap extends CommonFastHashTable {
     random = new Random(46809);
 
     // Use a large capacity that doesn't require expansion, yet.
-    VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, 0);
+    VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE);
 
     RandomLongStream randomLongKeyStream = new RandomLongStream(random);
 

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastMultiKeyHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastMultiKeyHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastMultiKeyHashMap.java
index 3c1b29a..944bda6 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastMultiKeyHashMap.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastMultiKeyHashMap.java
@@ -35,7 +35,7 @@ public class TestVectorMapJoinFastMultiKeyHashMap extends CommonFastHashTable {
     random = new Random(47496);
 
     VectorMapJoinFastMultiKeyHashMap map =
-        new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, LOAD_FACTOR, WB_SIZE, 0);
+        new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, LOAD_FACTOR, WB_SIZE);
 
     RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random);
     RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random);
@@ -55,7 +55,7 @@ public class TestVectorMapJoinFastMultiKeyHashMap extends CommonFastHashTable {
   public void testPutGetMultiple() throws Exception {
     random = new Random(2990);
 
-    VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, LOAD_FACTOR, WB_SIZE, 0);
+    VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, LOAD_FACTOR, WB_SIZE);
 
     RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random);
     RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random);
@@ -77,7 +77,7 @@ public class TestVectorMapJoinFastMultiKeyHashMap extends CommonFastHashTable {
   public void testGetNonExistent() throws Exception {
     random = new Random(16916);
 
-    VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, LOAD_FACTOR, WB_SIZE, 0);
+    VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, LOAD_FACTOR, WB_SIZE);
 
     RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random);
     RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random);
@@ -101,7 +101,7 @@ public class TestVectorMapJoinFastMultiKeyHashMap extends CommonFastHashTable {
     random = new Random(26078);
 
     // Make sure the map does not expand; should be able to find space.
-    VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, 1f, WB_SIZE, 0);
+    VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, 1f, WB_SIZE);
 
     RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random);
     RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random);
@@ -126,7 +126,7 @@ public class TestVectorMapJoinFastMultiKeyHashMap extends CommonFastHashTable {
     random = new Random(22470);
 
     // Start with capacity 1; make sure we expand on every put.
-    VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, 1, 0.0000001f, WB_SIZE, 0);
+    VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, 1, 0.0000001f, WB_SIZE);
 
     RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random);
     RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random);
@@ -147,7 +147,7 @@ public class TestVectorMapJoinFastMultiKeyHashMap extends CommonFastHashTable {
     random = new Random(5231);
 
     // Use a large capacity that doesn't require expansion, yet.
-    VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, 0);
+    VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE);
 
     RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random, 10);
 
@@ -178,7 +178,7 @@ public class TestVectorMapJoinFastMultiKeyHashMap extends CommonFastHashTable {
     random = new Random(46809);
 
     // Use a large capacity that doesn't require expansion, yet.
-    VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, 0);
+    VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE);
 
     RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random, 10);
 

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q
index 096c890..e92504a 100644
--- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q
@@ -42,6 +42,8 @@ select * from dest2;
 
 set hive.auto.convert.join.noconditionaltask=true;
 set hive.auto.convert.join.noconditionaltask.size=200;
+set hive.mapjoin.hybridgrace.minwbsize=100;
+set hive.mapjoin.hybridgrace.minnumpartitions=2;
 
 -- A SMB join followed by a mutli-insert
 explain 

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/test/queries/clientpositive/hybridgrace_hashjoin_1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/hybridgrace_hashjoin_1.q b/ql/src/test/queries/clientpositive/hybridgrace_hashjoin_1.q
new file mode 100644
index 0000000..c7d925e
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/hybridgrace_hashjoin_1.q
@@ -0,0 +1,258 @@
+-- Hybrid Grace Hash Join
+-- Test basic functionalities:
+-- 1. Various cases when hash partitions spill
+-- 2. Partitioned table spilling
+-- 3. Vectorization
+
+SELECT 1;
+
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.noconditionaltask.size=1300000;
+set hive.mapjoin.optimized.hashtable.wbsize=880000;
+set hive.mapjoin.hybridgrace.memcheckfrequency=1024;
+
+set hive.mapjoin.hybridgrace.hashtable=false;
+
+-- Base result for inner join
+explain
+select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ inner join alltypesorc cd
+ on cd.cint = c.cint
+ where c.cint < 2000000000) t1
+;
+
+select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ inner join alltypesorc cd
+ on cd.cint = c.cint
+ where c.cint < 2000000000) t1
+;
+
+set hive.mapjoin.hybridgrace.hashtable=true;
+
+-- Two partitions are created. One in memory, one on disk on creation.
+-- The one in memory will eventually exceed memory limit, but won't spill.
+explain
+select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ inner join alltypesorc cd
+ on cd.cint = c.cint
+ where c.cint < 2000000000) t1
+;
+
+select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ inner join alltypesorc cd
+ on cd.cint = c.cint
+ where c.cint < 2000000000) t1
+;
+
+set hive.auto.convert.join.noconditionaltask.size=3000000;
+set hive.mapjoin.optimized.hashtable.wbsize=100000;
+
+set hive.mapjoin.hybridgrace.hashtable=false;
+
+-- Base result for inner join
+explain
+select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ inner join alltypesorc cd
+ on cd.cint = c.cint) t1
+;
+
+select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ inner join alltypesorc cd
+ on cd.cint = c.cint) t1
+;
+
+set hive.mapjoin.hybridgrace.hashtable=true;
+
+-- 16 partitions are created: 3 in memory, 13 on disk on creation.
+-- 1 partition is spilled during first round processing, which ends up having 2 in memory, 14 on disk
+explain
+select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ inner join alltypesorc cd
+ on cd.cint = c.cint) t1
+;
+
+select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ inner join alltypesorc cd
+ on cd.cint = c.cint) t1
+;
+
+
+
+set hive.mapjoin.hybridgrace.hashtable=false;
+
+-- Base result for outer join
+explain
+select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ left outer join alltypesorc cd
+ on cd.cint = c.cint) t1
+;
+
+select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ left outer join alltypesorc cd
+ on cd.cint = c.cint) t1
+;
+
+set hive.mapjoin.hybridgrace.hashtable=true;
+
+-- 32 partitions are created. 3 in memory, 29 on disk on creation.
+explain
+select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ left outer join alltypesorc cd
+ on cd.cint = c.cint) t1
+;
+
+select count(*) from
+(select c.ctinyint
+ from alltypesorc c
+ left outer join alltypesorc cd
+ on cd.cint = c.cint) t1
+;
+
+
+-- Partitioned table
+create table parttbl (key string, value char(20)) partitioned by (dt char(10));
+insert overwrite table parttbl partition(dt='2000-01-01')
+  select * from src;
+insert overwrite table parttbl partition(dt='2000-01-02')
+  select * from src1;
+
+set hive.auto.convert.join.noconditionaltask.size=30000000;
+set hive.mapjoin.optimized.hashtable.wbsize=10000000;
+
+set hive.mapjoin.hybridgrace.hashtable=false;
+
+-- No spill, base result
+explain
+select count(*) from
+(select p1.value
+ from parttbl p1
+ inner join parttbl p2
+ on p1.key = p2.key) t1
+;
+
+select count(*) from
+(select p1.value
+ from parttbl p1
+ inner join parttbl p2
+ on p1.key = p2.key) t1
+;
+
+set hive.mapjoin.hybridgrace.hashtable=true;
+
+-- No spill, 2 partitions created in memory
+explain
+select count(*) from
+(select p1.value
+ from parttbl p1
+ inner join parttbl p2
+ on p1.key = p2.key) t1
+;
+
+select count(*) from
+(select p1.value
+ from parttbl p1
+ inner join parttbl p2
+ on p1.key = p2.key) t1
+;
+
+
+set hive.auto.convert.join.noconditionaltask.size=20000;
+set hive.mapjoin.optimized.hashtable.wbsize=10000;
+
+set hive.mapjoin.hybridgrace.hashtable=false;
+
+-- Spill case base result
+explain
+select count(*) from
+(select p1.value
+ from parttbl p1
+ inner join parttbl p2
+ on p1.key = p2.key) t1
+;
+
+select count(*) from
+(select p1.value
+ from parttbl p1
+ inner join parttbl p2
+ on p1.key = p2.key) t1
+;
+
+set hive.mapjoin.hybridgrace.hashtable=true;
+
+-- Spill case, one partition in memory, one spilled on creation
+explain
+select count(*) from
+(select p1.value
+ from parttbl p1
+ inner join parttbl p2
+ on p1.key = p2.key) t1
+;
+
+select count(*) from
+(select p1.value
+ from parttbl p1
+ inner join parttbl p2
+ on p1.key = p2.key) t1
+;
+
+drop table parttbl;
+
+
+-- Test vectorization
+-- Test case borrowed from vector_decimal_mapjoin.q
+CREATE TABLE decimal_mapjoin STORED AS ORC AS
+  SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1,
+  CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2,
+  cint
+  FROM alltypesorc;
+
+SET hive.auto.convert.join=true;
+SET hive.auto.convert.join.noconditionaltask=true;
+SET hive.auto.convert.join.noconditionaltask.size=50000000;
+set hive.mapjoin.optimized.hashtable.wbsize=10000;
+SET hive.vectorized.execution.enabled=true;
+set hive.mapjoin.hybridgrace.hashtable=false;
+
+EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
+  FROM decimal_mapjoin l
+  JOIN decimal_mapjoin r ON l.cint = r.cint
+  WHERE l.cint = 6981;
+SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
+  FROM decimal_mapjoin l
+  JOIN decimal_mapjoin r ON l.cint = r.cint
+  WHERE l.cint = 6981;
+
+set hive.mapjoin.hybridgrace.hashtable=true;
+
+EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
+  FROM decimal_mapjoin l
+  JOIN decimal_mapjoin r ON l.cint = r.cint
+  WHERE l.cint = 6981;
+SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
+  FROM decimal_mapjoin l
+  JOIN decimal_mapjoin r ON l.cint = r.cint
+  WHERE l.cint = 6981;
+
+DROP TABLE decimal_mapjoin;

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/test/queries/clientpositive/hybridgrace_hashjoin_2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/hybridgrace_hashjoin_2.q b/ql/src/test/queries/clientpositive/hybridgrace_hashjoin_2.q
new file mode 100644
index 0000000..dd425f4
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/hybridgrace_hashjoin_2.q
@@ -0,0 +1,152 @@
+-- Hybrid Grace Hash Join
+-- Test n-way join
+SELECT 1;
+
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.noconditionaltask=true;
+set hive.auto.convert.join.noconditionaltask.size=10000000;
+set hive.cbo.enable=false;
+
+
+-- 3-way mapjoin (1 big table, 2 small tables)
+SELECT 1;
+
+set hive.mapjoin.hybridgrace.hashtable=false;
+
+EXPLAIN
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN src y ON (y.key = x.key);
+
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN src y ON (y.key = x.key);
+
+set hive.mapjoin.hybridgrace.hashtable=true;
+
+EXPLAIN
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN src y ON (y.key = x.key);
+
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN src y ON (y.key = x.key);
+
+
+-- 4-way mapjoin (1 big table, 3 small tables)
+SELECT 1;
+
+set hive.mapjoin.hybridgrace.hashtable=false;
+
+EXPLAIN
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN srcpart w ON (x.key = w.key)
+JOIN src y ON (y.key = x.key);
+
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN srcpart w ON (x.key = w.key)
+JOIN src y ON (y.key = x.key);
+
+set hive.mapjoin.hybridgrace.hashtable=true;
+
+EXPLAIN
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN srcpart w ON (x.key = w.key)
+JOIN src y ON (y.key = x.key);
+
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN srcpart w ON (x.key = w.key)
+JOIN src y ON (y.key = x.key);
+
+
+-- 2 sets of 3-way mapjoin under 2 different tasks
+SELECT 1;
+
+set hive.mapjoin.hybridgrace.hashtable=false;
+
+EXPLAIN
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN src y ON (y.key = x.key)
+UNION
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.value = z.value)
+JOIN src y ON (y.value = x.value);
+
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN src y ON (y.key = x.key)
+UNION
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.value = z.value)
+JOIN src y ON (y.value = x.value);
+
+set hive.mapjoin.hybridgrace.hashtable=true;
+
+EXPLAIN
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN src y ON (y.key = x.key)
+UNION
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.value = z.value)
+JOIN src y ON (y.value = x.value);
+
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.key = z.key)
+JOIN src y ON (y.key = x.key)
+UNION
+SELECT COUNT(*)
+FROM src1 x JOIN srcpart z ON (x.value = z.value)
+JOIN src y ON (y.value = x.value);
+
+
+-- A chain of 2 sets of 3-way mapjoin under the same task
+SELECT 1;
+
+set hive.mapjoin.hybridgrace.hashtable=false;
+
+EXPLAIN
+SELECT COUNT(*)
+FROM src1 x
+JOIN srcpart z1 ON (x.key = z1.key)
+JOIN src y1     ON (x.key = y1.key)
+JOIN srcpart z2 ON (x.value = z2.value)
+JOIN src y2     ON (x.value = y2.value)
+WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz'
+ AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz';
+
+SELECT COUNT(*)
+FROM src1 x
+JOIN srcpart z1 ON (x.key = z1.key)
+JOIN src y1     ON (x.key = y1.key)
+JOIN srcpart z2 ON (x.value = z2.value)
+JOIN src y2     ON (x.value = y2.value)
+WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz'
+ AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz';
+
+set hive.mapjoin.hybridgrace.hashtable=true;
+
+EXPLAIN
+SELECT COUNT(*)
+FROM src1 x
+JOIN srcpart z1 ON (x.key = z1.key)
+JOIN src y1     ON (x.key = y1.key)
+JOIN srcpart z2 ON (x.value = z2.value)
+JOIN src y2     ON (x.value = y2.value)
+WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz'
+ AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz';
+
+SELECT COUNT(*)
+FROM src1 x
+JOIN srcpart z1 ON (x.key = z1.key)
+JOIN src y1     ON (x.key = y1.key)
+JOIN srcpart z2 ON (x.value = z2.value)
+JOIN src y2     ON (x.value = y2.value)
+WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz'
+ AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz';
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/test/queries/clientpositive/hybridhashjoin.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/hybridhashjoin.q b/ql/src/test/queries/clientpositive/hybridhashjoin.q
deleted file mode 100644
index fbd48ea..0000000
--- a/ql/src/test/queries/clientpositive/hybridhashjoin.q
+++ /dev/null
@@ -1,250 +0,0 @@
-set hive.auto.convert.join=true;
-set hive.auto.convert.join.noconditionaltask.size=1300000;
-set hive.mapjoin.optimized.hashtable.wbsize=880000;
-set hive.mapjoin.hybridgrace.memcheckfrequency=1024;
-
-set hive.mapjoin.hybridgrace.hashtable=false;
-
--- Base result for inner join
-explain
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint
- where c.cint < 2000000000) t1
-;
-
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint
- where c.cint < 2000000000) t1
-;
-
-set hive.mapjoin.hybridgrace.hashtable=true;
-
--- Two partitions are created. One in memory, one on disk on creation.
--- The one in memory will eventually exceed memory limit, but won't spill.
-explain
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint
- where c.cint < 2000000000) t1
-;
-
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint
- where c.cint < 2000000000) t1
-;
-
-set hive.auto.convert.join.noconditionaltask.size=3000000;
-set hive.mapjoin.optimized.hashtable.wbsize=100000;
-
-set hive.mapjoin.hybridgrace.hashtable=false;
-
--- Base result for inner join
-explain
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint) t1
-;
-
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint) t1
-;
-
-set hive.mapjoin.hybridgrace.hashtable=true;
-
--- 16 partitions are created: 3 in memory, 13 on disk on creation.
--- 1 partition is spilled during first round processing, which ends up having 2 in memory, 14 on disk
-explain
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint) t1
-;
-
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- inner join alltypesorc cd
- on cd.cint = c.cint) t1
-;
-
-
-
-set hive.mapjoin.hybridgrace.hashtable=false;
-
--- Base result for outer join
-explain
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- left outer join alltypesorc cd
- on cd.cint = c.cint) t1
-;
-
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- left outer join alltypesorc cd
- on cd.cint = c.cint) t1
-;
-
-set hive.mapjoin.hybridgrace.hashtable=true;
-
--- 32 partitions are created. 3 in memory, 29 on disk on creation.
-explain
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- left outer join alltypesorc cd
- on cd.cint = c.cint) t1
-;
-
-select count(*) from
-(select c.ctinyint
- from alltypesorc c
- left outer join alltypesorc cd
- on cd.cint = c.cint) t1
-;
-
-
--- Partitioned table
-create table parttbl (key string, value char(20)) partitioned by (dt char(10));
-insert overwrite table parttbl partition(dt='2000-01-01')
-  select * from src;
-insert overwrite table parttbl partition(dt='2000-01-02')
-  select * from src1;
-
-set hive.auto.convert.join.noconditionaltask.size=30000000;
-set hive.mapjoin.optimized.hashtable.wbsize=10000000;
-
-set hive.mapjoin.hybridgrace.hashtable=false;
-
--- No spill, base result
-explain
-select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-;
-
-select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-;
-
-set hive.mapjoin.hybridgrace.hashtable=true;
-
--- No spill, 2 partitions created in memory
-explain
-select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-;
-
-select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-;
-
-
-set hive.auto.convert.join.noconditionaltask.size=20000;
-set hive.mapjoin.optimized.hashtable.wbsize=10000;
-
-set hive.mapjoin.hybridgrace.hashtable=false;
-
--- Spill case base result
-explain
-select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-;
-
-select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-;
-
-set hive.mapjoin.hybridgrace.hashtable=true;
-
--- Spill case, one partition in memory, one spilled on creation
-explain
-select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-;
-
-select count(*) from
-(select p1.value
- from parttbl p1
- inner join parttbl p2
- on p1.key = p2.key) t1
-;
-
-drop table parttbl;
-
-
--- Test vectorization
--- Test case borrowed from vector_decimal_mapjoin.q
-CREATE TABLE decimal_mapjoin STORED AS ORC AS
-  SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1,
-  CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2,
-  cint
-  FROM alltypesorc;
-
-SET hive.auto.convert.join=true;
-SET hive.auto.convert.join.noconditionaltask=true;
-SET hive.auto.convert.join.noconditionaltask.size=50000000;
-set hive.mapjoin.optimized.hashtable.wbsize=10000;
-SET hive.vectorized.execution.enabled=true;
-set hive.mapjoin.hybridgrace.hashtable=false;
-
-EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
-  FROM decimal_mapjoin l
-  JOIN decimal_mapjoin r ON l.cint = r.cint
-  WHERE l.cint = 6981;
-SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
-  FROM decimal_mapjoin l
-  JOIN decimal_mapjoin r ON l.cint = r.cint
-  WHERE l.cint = 6981;
-
-set hive.mapjoin.hybridgrace.hashtable=true;
-
-EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
-  FROM decimal_mapjoin l
-  JOIN decimal_mapjoin r ON l.cint = r.cint
-  WHERE l.cint = 6981;
-SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2
-  FROM decimal_mapjoin l
-  JOIN decimal_mapjoin r ON l.cint = r.cint
-  WHERE l.cint = 6981;
-
-DROP TABLE decimal_mapjoin;

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/test/queries/clientpositive/tez_join_hash.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/tez_join_hash.q b/ql/src/test/queries/clientpositive/tez_join_hash.q
index 3571cd5..67d89f8 100644
--- a/ql/src/test/queries/clientpositive/tez_join_hash.q
+++ b/ql/src/test/queries/clientpositive/tez_join_hash.q
@@ -14,6 +14,8 @@ SELECT count(*) FROM src, orc_src where src.key=orc_src.key;
 set hive.auto.convert.join=true;
 set hive.auto.convert.join.noconditionaltask=true;
 set hive.auto.convert.join.noconditionaltask.size=3000;
+set hive.mapjoin.hybridgrace.minwbsize=350;
+set hive.mapjoin.hybridgrace.minnumpartitions=8;
 
 explain
 select count(*) from (select x.key as key, y.value as value from

http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/test/queries/clientpositive/tez_smb_main.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/tez_smb_main.q b/ql/src/test/queries/clientpositive/tez_smb_main.q
index 6398762..1802709 100644
--- a/ql/src/test/queries/clientpositive/tez_smb_main.q
+++ b/ql/src/test/queries/clientpositive/tez_smb_main.q
@@ -42,6 +42,8 @@ select count(*)
 from tab a join tab_part b on a.key = b.key;
 
 set hive.auto.convert.join.noconditionaltask.size=2000;
+set hive.mapjoin.hybridgrace.minwbsize=500;
+set hive.mapjoin.hybridgrace.minnumpartitions=4;
 explain
 select count (*)
 from tab a join tab_part b on a.key = b.key;
@@ -50,6 +52,8 @@ select count(*)
 from tab a join tab_part b on a.key = b.key;
 
 set hive.auto.convert.join.noconditionaltask.size=1000;
+set hive.mapjoin.hybridgrace.minwbsize=250;
+set hive.mapjoin.hybridgrace.minnumpartitions=4;
 explain
 select count (*)
 from tab a join tab_part b on a.key = b.key;
@@ -58,6 +62,8 @@ select count(*)
 from tab a join tab_part b on a.key = b.key;
 
 set hive.auto.convert.join.noconditionaltask.size=500;
+set hive.mapjoin.hybridgrace.minwbsize=125;
+set hive.mapjoin.hybridgrace.minnumpartitions=4;
 explain select count(*) from tab a join tab_part b on a.key = b.key join src1 c on a.value = c.value;
 select count(*) from tab a join tab_part b on a.key = b.key join src1 c on a.value = c.value;