You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@hive.apache.org by gu...@apache.org on 2015/05/15 00:45:57 UTC

[1/7] hive git commit: HIVE-10565: Native Vector Map Join doesn't handle filtering and matching on LEFT OUTER JOIN repeated key correctly (Matt McCline via Gunther Hagleitner)

Repository: hive
Updated Branches:
  refs/heads/master 3fa7489e2 -> 2b9f2f5e2


http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/results/clientpositive/vector_join_filters.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_join_filters.q.out b/ql/src/test/results/clientpositive/vector_join_filters.q.out
new file mode 100644
index 0000000..48fc072
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_join_filters.q.out
@@ -0,0 +1,222 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE myinput1_txt(key int, value int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@myinput1_txt
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE myinput1_txt(key int, value int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@myinput1_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1_txt
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@myinput1_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1_txt
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@myinput1_txt
+PREHOOK: query: CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@myinput1_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@myinput1
+POSTHOOK: query: CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@myinput1_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@myinput1
+Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+Warning: Map Join MAPJOIN[17][bigTable=a] in task 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4937935
+Warning: Map Join MAPJOIN[17][bigTable=b] in task 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3080335
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4937935
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4937935
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4937935
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4937935
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3080335
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3080335
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3080335
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3080335
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3080335
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3080335
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.key = c.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.key = c.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/results/clientpositive/vector_join_nulls.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_join_nulls.q.out b/ql/src/test/results/clientpositive/vector_join_nulls.q.out
new file mode 100644
index 0000000..c1516f2
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_join_nulls.q.out
@@ -0,0 +1,195 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE myinput1_txt(key int, value int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@myinput1_txt
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE myinput1_txt(key int, value int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@myinput1_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE myinput1_txt
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@myinput1_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE myinput1_txt
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@myinput1_txt
+PREHOOK: query: CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@myinput1_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@myinput1
+POSTHOOK: query: CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@myinput1_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@myinput1
+Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+13630578
+Warning: Map Join MAPJOIN[17][bigTable=?] in task 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+13630578
+Warning: Map Join MAPJOIN[17][bigTable=?] in task 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+13630578
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4509856
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3112070
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4542003
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4542038
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4543491
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4542003
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3079923
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4509891
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3113558
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3079923
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3112070
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3113558
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3112070

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/results/clientpositive/vector_left_outer_join2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_left_outer_join2.q.out b/ql/src/test/results/clientpositive/vector_left_outer_join2.q.out
index 23d3f32..f9077c8 100644
--- a/ql/src/test/results/clientpositive/vector_left_outer_join2.q.out
+++ b/ql/src/test/results/clientpositive/vector_left_outer_join2.q.out
@@ -1,6 +1,10 @@
-PREHOOK: query: drop table if exists TJOIN1
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+drop table if exists TJOIN1
 PREHOOK: type: DROPTABLE
-POSTHOOK: query: drop table if exists TJOIN1
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+drop table if exists TJOIN1
 POSTHOOK: type: DROPTABLE
 PREHOOK: query: drop table if exists TJOIN2
 PREHOOK: type: DROPTABLE

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/results/clientpositive/vector_outer_join5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_outer_join5.q.out b/ql/src/test/results/clientpositive/vector_outer_join5.q.out
new file mode 100644
index 0000000..bbe8ba1
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_outer_join5.q.out
@@ -0,0 +1,1300 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+create table sorted_mod_4 stored as orc
+as select ctinyint, pmod(cint, 4) as cmodint from alltypesorc
+where cint is not null and ctinyint is not null
+order by ctinyint
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@sorted_mod_4
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+create table sorted_mod_4 stored as orc
+as select ctinyint, pmod(cint, 4) as cmodint from alltypesorc
+where cint is not null and ctinyint is not null
+order by ctinyint
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@sorted_mod_4
+PREHOOK: query: ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sorted_mod_4
+PREHOOK: Output: default@sorted_mod_4
+POSTHOOK: query: ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sorted_mod_4
+POSTHOOK: Output: default@sorted_mod_4
+PREHOOK: query: ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS FOR COLUMNS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+POSTHOOK: query: ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS FOR COLUMNS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+PREHOOK: query: create table small_table stored
+as orc as select ctinyint, cbigint from alltypesorc limit 100
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@small_table
+POSTHOOK: query: create table small_table stored
+as orc as select ctinyint, cbigint from alltypesorc limit 100
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@small_table
+PREHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table
+PREHOOK: Output: default@small_table
+POSTHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table
+POSTHOOK: Output: default@small_table
+PREHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table
+#### A masked pattern was here ####
+POSTHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table
+#### A masked pattern was here ####
+PREHOOK: query: explain
+select count(*) from (select s.*, st.*
+from sorted_mod_4 s
+left outer join small_table st
+on s.ctinyint = st.ctinyint
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.*, st.*
+from sorted_mod_4 s
+left outer join small_table st
+on s.ctinyint = st.ctinyint
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-5 is a root stage
+  Stage-2 depends on stages: Stage-5
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-5
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $hdt$_0:$hdt$_1:st 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $hdt$_0:$hdt$_1:st 
+          TableScan
+            alias: st
+            Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: ctinyint (type: tinyint)
+              outputColumnNames: _col0
+              Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+              HashTable Sink Operator
+                keys:
+                  0 _col0 (type: tinyint)
+                  1 _col0 (type: tinyint)
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: s
+            Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: ctinyint (type: tinyint)
+              outputColumnNames: _col0
+              Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+              Map Join Operator
+                condition map:
+                     Left Outer Join0 to 1
+                keys:
+                  0 _col0 (type: tinyint)
+                  1 _col0 (type: tinyint)
+                Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count()
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: bigint)
+      Local Work:
+        Map Reduce Local Work
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from (select s.*, st.*
+from sorted_mod_4 s
+left outer join small_table st
+on s.ctinyint = st.ctinyint
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table
+PREHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.*, st.*
+from sorted_mod_4 s
+left outer join small_table st
+on s.ctinyint = st.ctinyint
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table
+POSTHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+6876
+PREHOOK: query: explain
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.cmodint = 2
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.cmodint = 2
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-5 is a root stage
+  Stage-2 depends on stages: Stage-5
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-5
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $hdt$_0:$hdt$_1:sm 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $hdt$_0:$hdt$_1:sm 
+          TableScan
+            alias: sm
+            Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: ctinyint (type: tinyint)
+              outputColumnNames: _col0
+              Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+              HashTable Sink Operator
+                filter predicates:
+                  0 {(_col1 = 2)}
+                  1 
+                keys:
+                  0 _col0 (type: tinyint)
+                  1 _col0 (type: tinyint)
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: s
+            Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: ctinyint (type: tinyint), cmodint (type: int)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+              Map Join Operator
+                condition map:
+                     Left Outer Join0 to 1
+                filter predicates:
+                  0 {(_col1 = 2)}
+                  1 
+                keys:
+                  0 _col0 (type: tinyint)
+                  1 _col0 (type: tinyint)
+                Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count()
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: bigint)
+      Local Work:
+        Map Reduce Local Work
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.cmodint = 2
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table
+PREHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.cmodint = 2
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table
+POSTHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+6058
+PREHOOK: query: explain
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-5 is a root stage
+  Stage-2 depends on stages: Stage-5
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-5
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $hdt$_0:$hdt$_1:sm 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $hdt$_0:$hdt$_1:sm 
+          TableScan
+            alias: sm
+            Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: ctinyint (type: tinyint)
+              outputColumnNames: _col0
+              Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+              HashTable Sink Operator
+                filter predicates:
+                  0 {((UDFToInteger(_col0) pmod 4) = _col1)}
+                  1 
+                keys:
+                  0 _col0 (type: tinyint)
+                  1 _col0 (type: tinyint)
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: s
+            Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: ctinyint (type: tinyint), cmodint (type: int)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+              Map Join Operator
+                condition map:
+                     Left Outer Join0 to 1
+                filter predicates:
+                  0 {((UDFToInteger(_col0) pmod 4) = _col1)}
+                  1 
+                keys:
+                  0 _col0 (type: tinyint)
+                  1 _col0 (type: tinyint)
+                Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count()
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: bigint)
+      Local Work:
+        Map Reduce Local Work
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table
+PREHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table
+POSTHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+6248
+PREHOOK: query: explain
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.ctinyint < 100
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.ctinyint < 100
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-5 is a root stage
+  Stage-2 depends on stages: Stage-5
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-5
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $hdt$_0:$hdt$_1:sm 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $hdt$_0:$hdt$_1:sm 
+          TableScan
+            alias: sm
+            Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: ctinyint (type: tinyint)
+              outputColumnNames: _col0
+              Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+              HashTable Sink Operator
+                filter predicates:
+                  0 {(_col0 < 100)}
+                  1 
+                keys:
+                  0 _col0 (type: tinyint)
+                  1 _col0 (type: tinyint)
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: s
+            Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: ctinyint (type: tinyint)
+              outputColumnNames: _col0
+              Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+              Map Join Operator
+                condition map:
+                     Left Outer Join0 to 1
+                filter predicates:
+                  0 {(_col0 < 100)}
+                  1 
+                keys:
+                  0 _col0 (type: tinyint)
+                  1 _col0 (type: tinyint)
+                Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count()
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: bigint)
+      Local Work:
+        Map Reduce Local Work
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.ctinyint < 100
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table
+PREHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.ctinyint < 100
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table
+POSTHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+6876
+PREHOOK: query: explain
+select count(*) from (select s.*, sm.*, s2.* 
+from sorted_mod_4 s
+left outer join small_table sm
+  on pmod(sm.cbigint, 8) = s.cmodint 
+left outer join sorted_mod_4 s2
+  on s2.ctinyint = s.ctinyint
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.*, sm.*, s2.* 
+from sorted_mod_4 s
+left outer join small_table sm
+  on pmod(sm.cbigint, 8) = s.cmodint 
+left outer join sorted_mod_4 s2
+  on s2.ctinyint = s.ctinyint
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-8 is a root stage
+  Stage-3 depends on stages: Stage-8
+  Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-8
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $hdt$_0:$hdt$_1:sm 
+          Fetch Operator
+            limit: -1
+        $hdt$_0:$hdt$_2:s 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $hdt$_0:$hdt$_1:sm 
+          TableScan
+            alias: sm
+            Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: cbigint (type: bigint)
+              outputColumnNames: _col0
+              Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+              HashTable Sink Operator
+                keys:
+                  0 UDFToLong(_col1) (type: bigint)
+                  1 (_col0 pmod UDFToLong(8)) (type: bigint)
+        $hdt$_0:$hdt$_2:s 
+          TableScan
+            alias: s
+            Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: ctinyint (type: tinyint)
+              outputColumnNames: _col0
+              Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+              HashTable Sink Operator
+                keys:
+                  0 _col0 (type: tinyint)
+                  1 _col0 (type: tinyint)
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: s
+            Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: ctinyint (type: tinyint), cmodint (type: int)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+              Map Join Operator
+                condition map:
+                     Left Outer Join0 to 1
+                keys:
+                  0 UDFToLong(_col1) (type: bigint)
+                  1 (_col0 pmod UDFToLong(8)) (type: bigint)
+                outputColumnNames: _col0
+                Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+                Map Join Operator
+                  condition map:
+                       Left Outer Join0 to 1
+                  keys:
+                    0 _col0 (type: tinyint)
+                    1 _col0 (type: tinyint)
+                  Statistics: Num rows: 7329 Data size: 58641 Basic stats: COMPLETE Column stats: NONE
+                  Group By Operator
+                    aggregations: count()
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: bigint)
+      Local Work:
+        Map Reduce Local Work
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from (select s.*, sm.*, s2.* 
+from sorted_mod_4 s
+left outer join small_table sm
+  on pmod(sm.cbigint, 8) = s.cmodint 
+left outer join sorted_mod_4 s2
+  on s2.ctinyint = s.ctinyint
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table
+PREHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.*, sm.*, s2.* 
+from sorted_mod_4 s
+left outer join small_table sm
+  on pmod(sm.cbigint, 8) = s.cmodint 
+left outer join sorted_mod_4 s2
+  on s2.ctinyint = s.ctinyint
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table
+POSTHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+3268334
+PREHOOK: query: create table mod_8_mod_4 stored as orc
+as select pmod(ctinyint, 8) as cmodtinyint, pmod(cint, 4) as cmodint from alltypesorc
+where cint is not null and ctinyint is not null
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mod_8_mod_4
+POSTHOOK: query: create table mod_8_mod_4 stored as orc
+as select pmod(ctinyint, 8) as cmodtinyint, pmod(cint, 4) as cmodint from alltypesorc
+where cint is not null and ctinyint is not null
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mod_8_mod_4
+PREHOOK: query: ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mod_8_mod_4
+PREHOOK: Output: default@mod_8_mod_4
+POSTHOOK: query: ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mod_8_mod_4
+POSTHOOK: Output: default@mod_8_mod_4
+PREHOOK: query: ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS FOR COLUMNS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mod_8_mod_4
+#### A masked pattern was here ####
+POSTHOOK: query: ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS FOR COLUMNS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mod_8_mod_4
+#### A masked pattern was here ####
+PREHOOK: query: create table small_table2 stored
+as orc as select pmod(ctinyint, 16) as cmodtinyint, cbigint from alltypesorc limit 100
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@small_table2
+POSTHOOK: query: create table small_table2 stored
+as orc as select pmod(ctinyint, 16) as cmodtinyint, cbigint from alltypesorc limit 100
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@small_table2
+PREHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table2
+PREHOOK: Output: default@small_table2
+POSTHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table2
+POSTHOOK: Output: default@small_table2
+PREHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+POSTHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+PREHOOK: query: explain
+select count(*) from (select s.*, st.*
+from mod_8_mod_4 s
+left outer join small_table2 st
+on s.cmodtinyint = st.cmodtinyint
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.*, st.*
+from mod_8_mod_4 s
+left outer join small_table2 st
+on s.cmodtinyint = st.cmodtinyint
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-5 is a root stage
+  Stage-2 depends on stages: Stage-5
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-5
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $hdt$_0:$hdt$_1:st 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $hdt$_0:$hdt$_1:st 
+          TableScan
+            alias: st
+            Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: cmodtinyint (type: int)
+              outputColumnNames: _col0
+              Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+              HashTable Sink Operator
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: s
+            Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: cmodtinyint (type: int)
+              outputColumnNames: _col0
+              Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+              Map Join Operator
+                condition map:
+                     Left Outer Join0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count()
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: bigint)
+      Local Work:
+        Map Reduce Local Work
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from (select s.*, st.*
+from mod_8_mod_4 s
+left outer join small_table2 st
+on s.cmodtinyint = st.cmodtinyint
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mod_8_mod_4
+PREHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.*, st.*
+from mod_8_mod_4 s
+left outer join small_table2 st
+on s.cmodtinyint = st.cmodtinyint
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mod_8_mod_4
+POSTHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+39112
+PREHOOK: query: explain
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-5 is a root stage
+  Stage-2 depends on stages: Stage-5
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-5
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $hdt$_0:$hdt$_1:sm 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $hdt$_0:$hdt$_1:sm 
+          TableScan
+            alias: sm
+            Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: cmodtinyint (type: int)
+              outputColumnNames: _col0
+              Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+              HashTable Sink Operator
+                filter predicates:
+                  0 {(_col1 = 2)}
+                  1 
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: s
+            Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: cmodtinyint (type: int), cmodint (type: int)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+              Map Join Operator
+                condition map:
+                     Left Outer Join0 to 1
+                filter predicates:
+                  0 {(_col1 = 2)}
+                  1 
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count()
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: bigint)
+      Local Work:
+        Map Reduce Local Work
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mod_8_mod_4
+PREHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mod_8_mod_4
+POSTHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+11171
+PREHOOK: query: explain
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-5 is a root stage
+  Stage-2 depends on stages: Stage-5
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-5
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $hdt$_0:$hdt$_1:sm 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $hdt$_0:$hdt$_1:sm 
+          TableScan
+            alias: sm
+            Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: cmodtinyint (type: int)
+              outputColumnNames: _col0
+              Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+              HashTable Sink Operator
+                filter predicates:
+                  0 {((_col0 pmod 4) = _col1)}
+                  1 
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: s
+            Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: cmodtinyint (type: int), cmodint (type: int)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+              Map Join Operator
+                condition map:
+                     Left Outer Join0 to 1
+                filter predicates:
+                  0 {((_col0 pmod 4) = _col1)}
+                  1 
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count()
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: bigint)
+      Local Work:
+        Map Reduce Local Work
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mod_8_mod_4
+PREHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mod_8_mod_4
+POSTHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+14371
+PREHOOK: query: explain
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-5 is a root stage
+  Stage-2 depends on stages: Stage-5
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-5
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $hdt$_0:$hdt$_1:sm 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $hdt$_0:$hdt$_1:sm 
+          TableScan
+            alias: sm
+            Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: cmodtinyint (type: int)
+              outputColumnNames: _col0
+              Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+              HashTable Sink Operator
+                filter predicates:
+                  0 {(_col0 < 3)}
+                  1 
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: s
+            Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: cmodtinyint (type: int)
+              outputColumnNames: _col0
+              Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+              Map Join Operator
+                condition map:
+                     Left Outer Join0 to 1
+                filter predicates:
+                  0 {(_col0 < 3)}
+                  1 
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count()
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: bigint)
+      Local Work:
+        Map Reduce Local Work
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mod_8_mod_4
+PREHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mod_8_mod_4
+POSTHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+17792
+PREHOOK: query: explain
+select count(*) from (select s.*, sm.*, s2.* 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+  on pmod(sm.cbigint, 8) = s.cmodint 
+left outer join mod_8_mod_4 s2
+  on s2.cmodtinyint = s.cmodtinyint
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.*, sm.*, s2.* 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+  on pmod(sm.cbigint, 8) = s.cmodint 
+left outer join mod_8_mod_4 s2
+  on s2.cmodtinyint = s.cmodtinyint
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-8 is a root stage
+  Stage-3 depends on stages: Stage-8
+  Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-8
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $hdt$_0:$hdt$_1:sm 
+          Fetch Operator
+            limit: -1
+        $hdt$_0:$hdt$_2:s 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $hdt$_0:$hdt$_1:sm 
+          TableScan
+            alias: sm
+            Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: cbigint (type: bigint)
+              outputColumnNames: _col0
+              Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+              HashTable Sink Operator
+                keys:
+                  0 UDFToLong(_col1) (type: bigint)
+                  1 (_col0 pmod UDFToLong(8)) (type: bigint)
+        $hdt$_0:$hdt$_2:s 
+          TableScan
+            alias: s
+            Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: cmodtinyint (type: int)
+              outputColumnNames: _col0
+              Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+              HashTable Sink Operator
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: s
+            Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: cmodtinyint (type: int), cmodint (type: int)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+              Map Join Operator
+                condition map:
+                     Left Outer Join0 to 1
+                keys:
+                  0 UDFToLong(_col1) (type: bigint)
+                  1 (_col0 pmod UDFToLong(8)) (type: bigint)
+                outputColumnNames: _col0
+                Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+                Map Join Operator
+                  condition map:
+                       Left Outer Join0 to 1
+                  keys:
+                    0 _col0 (type: int)
+                    1 _col0 (type: int)
+                  Statistics: Num rows: 7329 Data size: 58641 Basic stats: COMPLETE Column stats: NONE
+                  Group By Operator
+                    aggregations: count()
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: bigint)
+      Local Work:
+        Map Reduce Local Work
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from (select s.*, sm.*, s2.* 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+  on pmod(sm.cbigint, 8) = s.cmodint 
+left outer join mod_8_mod_4 s2
+  on s2.cmodtinyint = s.cmodtinyint
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mod_8_mod_4
+PREHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.*, sm.*, s2.* 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+  on pmod(sm.cbigint, 8) = s.cmodint 
+left outer join mod_8_mod_4 s2
+  on s2.cmodtinyint = s.cmodtinyint
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mod_8_mod_4
+POSTHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+6524438

[2/7] hive git commit: HIVE-10565: Native Vector Map Join doesn't handle filtering and matching on LEFT OUTER JOIN repeated key correctly (Matt McCline via Gunther Hagleitner)

Posted by gu...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/results/clientpositive/vector_join30.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_join30.q.out b/ql/src/test/results/clientpositive/vector_join30.q.out
new file mode 100644
index 0000000..57f9aeb
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_join30.q.out
@@ -0,0 +1,2194 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE orcsrc STORED AS ORC AS SELECT * FROM src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orcsrc
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE orcsrc STORED AS ORC AS SELECT * FROM src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orcsrc
+PREHOOK: query: explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-7 depends on stages: Stage-1, Stage-4 , consists of Stage-8, Stage-9, Stage-2
+  Stage-8 has a backup stage: Stage-2
+  Stage-5 depends on stages: Stage-8
+  Stage-3 depends on stages: Stage-2, Stage-5, Stage-6
+  Stage-9 has a backup stage: Stage-2
+  Stage-6 depends on stages: Stage-9
+  Stage-2
+  Stage-4 is a root stage
+  Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: orcsrc
+            Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: string)
+          outputColumnNames: _col0
+          Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-7
+    Conditional Operator
+
+  Stage: Stage-8
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $INTNAME1 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $INTNAME1 
+          TableScan
+            HashTable Sink Operator
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+
+  Stage: Stage-5
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+              outputColumnNames: _col2, _col3
+              Group By Operator
+                aggregations: sum(hash(_col2,_col3))
+                mode: hash
+                outputColumnNames: _col0
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-9
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $INTNAME 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $INTNAME 
+          TableScan
+            HashTable Sink Operator
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+
+  Stage: Stage-6
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+              outputColumnNames: _col2, _col3
+              Group By Operator
+                aggregations: sum(hash(_col2,_col3))
+                mode: hash
+                outputColumnNames: _col0
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: string)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col0 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col2, _col3
+          Statistics: Num rows: 275 Data size: 48400 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            aggregations: sum(hash(_col2,_col3))
+            mode: hash
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: orcsrc
+            Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string), value (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col1 (type: string)
+                  sort order: +
+                  Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: string)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN 
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+POSTHOOK: query: FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN 
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+103231310608
+PREHOOK: query: explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-6 depends on stages: Stage-1, Stage-4 , consists of Stage-7, Stage-2
+  Stage-7 has a backup stage: Stage-2
+  Stage-5 depends on stages: Stage-7
+  Stage-3 depends on stages: Stage-2, Stage-5
+  Stage-2
+  Stage-4 is a root stage
+  Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: orcsrc
+            Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: _col0
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: string)
+                sort order: +
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: string)
+          outputColumnNames: _col0
+          Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-6
+    Conditional Operator
+
+  Stage: Stage-7
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $INTNAME1 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $INTNAME1 
+          TableScan
+            HashTable Sink Operator
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+
+  Stage: Stage-5
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Map Join Operator
+              condition map:
+                   Left Outer Join0 to 1
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+              outputColumnNames: _col2, _col3
+              Group By Operator
+                aggregations: sum(hash(_col2,_col3))
+                mode: hash
+                outputColumnNames: _col0
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: string)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Left Outer Join0 to 1
+          keys:
+            0 _col0 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col2, _col3
+          Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            aggregations: sum(hash(_col2,_col3))
+            mode: hash
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: orcsrc
+            Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col1 (type: string)
+                sort order: +
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col0 (type: string)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN 
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+POSTHOOK: query: FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN 
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+103231310608
+PREHOOK: query: explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-6 depends on stages: Stage-1, Stage-4 , consists of Stage-7, Stage-2
+  Stage-7 has a backup stage: Stage-2
+  Stage-5 depends on stages: Stage-7
+  Stage-3 depends on stages: Stage-2, Stage-5
+  Stage-2
+  Stage-4 is a root stage
+  Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: orcsrc
+            Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: _col0
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: string)
+                sort order: +
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: string)
+          outputColumnNames: _col0
+          Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-6
+    Conditional Operator
+
+  Stage: Stage-7
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $INTNAME 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $INTNAME 
+          TableScan
+            HashTable Sink Operator
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+
+  Stage: Stage-5
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Map Join Operator
+              condition map:
+                   Right Outer Join0 to 1
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+              outputColumnNames: _col2, _col3
+              Group By Operator
+                aggregations: sum(hash(_col2,_col3))
+                mode: hash
+                outputColumnNames: _col0
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: string)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Right Outer Join0 to 1
+          keys:
+            0 _col0 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col2, _col3
+          Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            aggregations: sum(hash(_col2,_col3))
+            mode: hash
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: orcsrc
+            Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col1 (type: string)
+                sort order: +
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col0 (type: string)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN 
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+POSTHOOK: query: FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN 
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+103231310608
+PREHOOK: query: explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-9 depends on stages: Stage-1, Stage-4, Stage-5 , consists of Stage-10, Stage-11, Stage-12, Stage-2
+  Stage-10 has a backup stage: Stage-2
+  Stage-6 depends on stages: Stage-10
+  Stage-3 depends on stages: Stage-2, Stage-6, Stage-7, Stage-8
+  Stage-11 has a backup stage: Stage-2
+  Stage-7 depends on stages: Stage-11
+  Stage-12 has a backup stage: Stage-2
+  Stage-8 depends on stages: Stage-12
+  Stage-2
+  Stage-4 is a root stage
+  Stage-5 is a root stage
+  Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: orcsrc
+            Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: string)
+          outputColumnNames: _col0
+          Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-9
+    Conditional Operator
+
+  Stage: Stage-10
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $INTNAME1 
+          Fetch Operator
+            limit: -1
+        $INTNAME2 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $INTNAME1 
+          TableScan
+            HashTable Sink Operator
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+                2 _col0 (type: string)
+        $INTNAME2 
+          TableScan
+            HashTable Sink Operator
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+                2 _col0 (type: string)
+
+  Stage: Stage-6
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+                   Inner Join 0 to 2
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+                2 _col0 (type: string)
+              outputColumnNames: _col2, _col3
+              Group By Operator
+                aggregations: sum(hash(_col2,_col3))
+                mode: hash
+                outputColumnNames: _col0
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-11
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $INTNAME 
+          Fetch Operator
+            limit: -1
+        $INTNAME2 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $INTNAME 
+          TableScan
+            HashTable Sink Operator
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+                2 _col0 (type: string)
+        $INTNAME2 
+          TableScan
+            HashTable Sink Operator
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+                2 _col0 (type: string)
+
+  Stage: Stage-7
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+                   Inner Join 0 to 2
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+                2 _col0 (type: string)
+              outputColumnNames: _col2, _col3
+              Group By Operator
+                aggregations: sum(hash(_col2,_col3))
+                mode: hash
+                outputColumnNames: _col0
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-12
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $INTNAME 
+          Fetch Operator
+            limit: -1
+        $INTNAME1 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $INTNAME 
+          TableScan
+            HashTable Sink Operator
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+                2 _col0 (type: string)
+        $INTNAME1 
+          TableScan
+            HashTable Sink Operator
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+                2 _col0 (type: string)
+
+  Stage: Stage-8
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+                   Inner Join 0 to 2
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+                2 _col0 (type: string)
+              outputColumnNames: _col2, _col3
+              Group By Operator
+                aggregations: sum(hash(_col2,_col3))
+                mode: hash
+                outputColumnNames: _col0
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: string)
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+               Inner Join 0 to 2
+          keys:
+            0 _col0 (type: string)
+            1 _col0 (type: string)
+            2 _col0 (type: string)
+          outputColumnNames: _col2, _col3
+          Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            aggregations: sum(hash(_col2,_col3))
+            mode: hash
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: orcsrc
+            Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string), value (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col1 (type: string)
+                  sort order: +
+                  Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: string)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-5
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: orcsrc
+            Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string), value (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col1 (type: string)
+                  sort order: +
+                  Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: string)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: string)
+          outputColumnNames: _col0
+          Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+POSTHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+348019368476
+PREHOOK: query: explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-8 depends on stages: Stage-1, Stage-4, Stage-5 , consists of Stage-9, Stage-10, Stage-2
+  Stage-9 has a backup stage: Stage-2
+  Stage-6 depends on stages: Stage-9
+  Stage-3 depends on stages: Stage-2, Stage-6, Stage-7
+  Stage-10 has a backup stage: Stage-2
+  Stage-7 depends on stages: Stage-10
+  Stage-2
+  Stage-4 is a root stage
+  Stage-5 is a root stage
+  Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: orcsrc
+            Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: _col0
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: string)
+                sort order: +
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: string)
+          outputColumnNames: _col0
+          Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-8
+    Conditional Operator
+
+  Stage: Stage-9
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $INTNAME1 
+          Fetch Operator
+            limit: -1
+        $INTNAME2 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $INTNAME1 
+          TableScan
+            HashTable Sink Operator
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+                2 _col0 (type: string)
+        $INTNAME2 
+          TableScan
+            HashTable Sink Operator
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+                2 _col0 (type: string)
+
+  Stage: Stage-6
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+                   Left Outer Join0 to 2
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+                2 _col0 (type: string)
+              outputColumnNames: _col2, _col3
+              Group By Operator
+                aggregations: sum(hash(_col2,_col3))
+                mode: hash
+                outputColumnNames: _col0
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-10
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $INTNAME 
+          Fetch Operator
+            limit: -1
+        $INTNAME2 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $INTNAME 
+          TableScan
+            HashTable Sink Operator
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+                2 _col0 (type: string)
+        $INTNAME2 
+          TableScan
+            HashTable Sink Operator
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+                2 _col0 (type: string)
+
+  Stage: Stage-7
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+                   Left Outer Join0 to 2
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+                2 _col0 (type: string)
+              outputColumnNames: _col2, _col3
+              Group By Operator
+                aggregations: sum(hash(_col2,_col3))
+                mode: hash
+                outputColumnNames: _col0
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: string)
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+               Left Outer Join0 to 2
+          keys:
+            0 _col0 (type: string)
+            1 _col0 (type: string)
+            2 _col0 (type: string)
+          outputColumnNames: _col2, _col3
+          Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            aggregations: sum(hash(_col2,_col3))
+            mode: hash
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: orcsrc
+            Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col1 (type: string)
+                sort order: +
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col0 (type: string)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-5
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: orcsrc
+            Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col1 (type: string)
+                sort order: +
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col0 (type: string)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: string)
+          outputColumnNames: _col0
+          Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+POSTHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+348019368476
+PREHOOK: query: explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-7 depends on stages: Stage-1, Stage-4, Stage-5 , consists of Stage-8, Stage-2
+  Stage-8 has a backup stage: Stage-2
+  Stage-6 depends on stages: Stage-8
+  Stage-3 depends on stages: Stage-2, Stage-6
+  Stage-2
+  Stage-4 is a root stage
+  Stage-5 is a root stage
+  Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: orcsrc
+            Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: _col0
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: string)
+                sort order: +
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: string)
+          outputColumnNames: _col0
+          Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-7
+    Conditional Operator
+
+  Stage: Stage-8
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $INTNAME1 
+          Fetch Operator
+            limit: -1
+        $INTNAME2 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $INTNAME1 
+          TableScan
+            HashTable Sink Operator
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+                2 _col0 (type: string)
+        $INTNAME2 
+          TableScan
+            HashTable Sink Operator
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+                2 _col0 (type: string)
+
+  Stage: Stage-6
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Map Join Operator
+              condition map:
+                   Left Outer Join0 to 1
+                   Left Outer Join0 to 2
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+                2 _col0 (type: string)
+              outputColumnNames: _col2, _col3
+              Group By Operator
+                aggregations: sum(hash(_col2,_col3))
+                mode: hash
+                outputColumnNames: _col0
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: string)
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Left Outer Join0 to 1
+               Left Outer Join0 to 2
+          keys:
+            0 _col0 (type: string)
+            1 _col0 (type: string)
+            2 _col0 (type: string)
+          outputColumnNames: _col2, _col3
+          Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            aggregations: sum(hash(_col2,_col3))
+            mode: hash
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: orcsrc
+            Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col1 (type: string)
+                sort order: +
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col0 (type: string)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-5
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: orcsrc
+            Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col1 (type: string)
+                sort order: +
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col0 (type: string)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: string)
+          outputColumnNames: _col0
+          Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+POSTHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+348019368476
+PREHOOK: query: explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-7 depends on stages: Stage-1, Stage-4, Stage-5 , consists of Stage-8, Stage-2
+  Stage-8 has a backup stage: Stage-2
+  Stage-6 depends on stages: Stage-8
+  Stage-3 depends on stages: Stage-2, Stage-6
+  Stage-2
+  Stage-4 is a root stage
+  Stage-5 is a root stage
+  Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: orcsrc
+            Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: _col0
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: string)
+                sort order: +
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: string)
+          outputColumnNames: _col0
+          Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-7
+    Conditional Operator
+
+  Stage: Stage-8
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $INTNAME 
+          Fetch Operator
+            limit: -1
+        $INTNAME1 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $INTNAME 
+          TableScan
+            HashTable Sink Operator
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+                2 _col0 (type: string)
+        $INTNAME1 
+          TableScan
+            HashTable Sink Operator
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+                2 _col0 (type: string)
+
+  Stage: Stage-6
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Map Join Operator
+              condition map:
+                   Left Outer Join0 to 1
+                   Right Outer Join0 to 2
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+                2 _col0 (type: string)
+              outputColumnNames: _col2, _col3
+              Group By Operator
+                aggregations: sum(hash(_col2,_col3))
+                mode: hash
+                outputColumnNames: _col0
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: string)
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Left Outer Join0 to 1
+               Right Outer Join0 to 2
+          keys:
+            0 _col0 (type: string)
+            1 _col0 (type: string)
+            2 _col0 (type: string)
+          outputColumnNames: _col2, _col3
+          Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            aggregations: sum(hash(_col2,_col3))
+            mode: hash
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: orcsrc
+            Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col1 (type: string)
+                sort order: +
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col0 (type: string)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-5
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: orcsrc
+            Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col1 (type: string)
+                sort order: +
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col0 (type: string)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: string)
+          outputColumnNames: _col0
+          Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+POSTHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+348019368476
+PREHOOK: query: explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-7 depends on stages: Stage-1, Stage-4, Stage-5 , consists of Stage-8, Stage-2
+  Stage-8 has a backup stage: Stage-2
+  Stage-6 depends on stages: Stage-8
+  Stage-3 depends on stages: Stage-2, Stage-6
+  Stage-2
+  Stage-4 is a root stage
+  Stage-5 is a root stage
+  Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: orcsrc
+            Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: _col0
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: string)
+                sort order: +
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: string)
+          outputColumnNames: _col0
+          Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-7
+    Conditional Operator
+
+  Stage: Stage-8
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $INTNAME 
+          Fetch Operator
+            limit: -1
+        $INTNAME1 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $INTNAME 
+          TableScan
+            HashTable Sink Operator
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+                2 _col0 (type: string)
+        $INTNAME1 
+          TableScan
+            HashTable Sink Operator
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+                2 _col0 (type: string)
+
+  Stage: Stage-6
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Map Join Operator
+              condition map:
+                   Right Outer Join0 to 1
+                   Right Outer Join0 to 2
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+                2 _col0 (type: string)
+              outputColumnNames: _col2, _col3
+              Group By Operator
+                aggregations: sum(hash(_col2,_col3))
+                mode: hash
+                outputColumnNames: _col0
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col1 (type: string)
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Right Outer Join0 to 1
+               Right Outer Join0 to 2
+          keys:
+            0 _col0 (type: string)
+            1 _col0 (type: string)
+            2 _col0 (type: string)
+          outputColumnNames: _col2, _col3
+          Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            aggregations: sum(hash(_col2,_col3))
+            mode: hash
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: orcsrc
+            Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col1 (type: string)
+                sort order: +
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col0 (type: string)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-5
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: orcsrc
+            Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col1 (type: string)
+                sort order: +
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col0 (type: string)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: string)
+          outputColumnNames: _col0
+          Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+POSTHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+348019368476

[5/7] hive git commit: HIVE-10565: Native Vector Map Join doesn't handle filtering and matching on LEFT OUTER JOIN repeated key correctly (Matt McCline via Gunther Hagleitner)

Posted by gu...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/queries/clientpositive/vector_outer_join5.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_outer_join5.q b/ql/src/test/queries/clientpositive/vector_outer_join5.q
new file mode 100644
index 0000000..b7ee4a4
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_outer_join5.q
@@ -0,0 +1,173 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.vectorized.execution.mapjoin.native.enabled=true;
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.noconditionaltask=true;
+set hive.auto.convert.join.noconditionaltask.size=10000;
+
+-- SORT_QUERY_RESULTS
+
+create table sorted_mod_4 stored as orc
+as select ctinyint, pmod(cint, 4) as cmodint from alltypesorc
+where cint is not null and ctinyint is not null
+order by ctinyint;
+
+ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS;
+ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS FOR COLUMNS;
+
+create table small_table stored
+as orc as select ctinyint, cbigint from alltypesorc limit 100;
+
+ANALYZE TABLE small_table COMPUTE STATISTICS;
+ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS;
+
+explain
+select count(*) from (select s.*, st.*
+from sorted_mod_4 s
+left outer join small_table st
+on s.ctinyint = st.ctinyint
+) t1;
+
+select count(*) from (select s.*, st.*
+from sorted_mod_4 s
+left outer join small_table st
+on s.ctinyint = st.ctinyint
+) t1;
+
+explain
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.cmodint = 2
+) t1;
+
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.cmodint = 2
+) t1;
+
+explain
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint
+) t1;
+
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint
+) t1;
+
+explain
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.ctinyint < 100
+) t1;
+
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.ctinyint < 100
+) t1;
+
+explain
+select count(*) from (select s.*, sm.*, s2.* 
+from sorted_mod_4 s
+left outer join small_table sm
+  on pmod(sm.cbigint, 8) = s.cmodint 
+left outer join sorted_mod_4 s2
+  on s2.ctinyint = s.ctinyint
+) t1;
+
+select count(*) from (select s.*, sm.*, s2.* 
+from sorted_mod_4 s
+left outer join small_table sm
+  on pmod(sm.cbigint, 8) = s.cmodint 
+left outer join sorted_mod_4 s2
+  on s2.ctinyint = s.ctinyint
+) t1;
+
+
+create table mod_8_mod_4 stored as orc
+as select pmod(ctinyint, 8) as cmodtinyint, pmod(cint, 4) as cmodint from alltypesorc
+where cint is not null and ctinyint is not null;
+
+ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS;
+ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS FOR COLUMNS;
+
+create table small_table2 stored
+as orc as select pmod(ctinyint, 16) as cmodtinyint, cbigint from alltypesorc limit 100;
+
+ANALYZE TABLE small_table2 COMPUTE STATISTICS;
+ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS;
+
+explain
+select count(*) from (select s.*, st.*
+from mod_8_mod_4 s
+left outer join small_table2 st
+on s.cmodtinyint = st.cmodtinyint
+) t1;
+
+select count(*) from (select s.*, st.*
+from mod_8_mod_4 s
+left outer join small_table2 st
+on s.cmodtinyint = st.cmodtinyint
+) t1;
+
+explain
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2
+) t1;
+
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2
+) t1;
+
+explain
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint
+) t1;
+
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint
+) t1;
+
+explain
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3
+) t1;
+
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3
+) t1;
+
+explain
+select count(*) from (select s.*, sm.*, s2.* 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+  on pmod(sm.cbigint, 8) = s.cmodint 
+left outer join mod_8_mod_4 s2
+  on s2.cmodtinyint = s.cmodtinyint
+) t1;
+
+select count(*) from (select s.*, sm.*, s2.* 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+  on pmod(sm.cbigint, 8) = s.cmodint 
+left outer join mod_8_mod_4 s2
+  on s2.cmodtinyint = s.cmodtinyint
+) t1;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/results/clientpositive/tez/acid_vectorization_partition.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/acid_vectorization_partition.q.out b/ql/src/test/results/clientpositive/tez/acid_vectorization_partition.q.out
index 3aa0e1a..3b37c72 100644
--- a/ql/src/test/results/clientpositive/tez/acid_vectorization_partition.q.out
+++ b/ql/src/test/results/clientpositive/tez/acid_vectorization_partition.q.out
@@ -38,23 +38,23 @@ POSTHOOK: Input: default@acid_vectorized_part
 POSTHOOK: Input: default@acid_vectorized_part@ds=today
 POSTHOOK: Input: default@acid_vectorized_part@ds=tomorrow
 #### A masked pattern was here ####
--1073279343	oj1YrV5Wa	today
 -1073279343	oj1YrV5Wa	tomorrow
--1073051226	A34p7oRr2WvUJNf	today
+-1073279343	oj1YrV5Wa	today
 -1073051226	A34p7oRr2WvUJNf	tomorrow
--1072910839	0iqrc5	today
+-1073051226	A34p7oRr2WvUJNf	today
 -1072910839	0iqrc5	tomorrow
--1072081801	dPkN74F7	today
+-1072910839	0iqrc5	today
 -1072081801	dPkN74F7	tomorrow
--1072076362	2uLyD28144vklju213J1mr	tomorrow
+-1072081801	dPkN74F7	today
 -1072076362	2uLyD28144vklju213J1mr	today
--1071480828	aw724t8c5558x2xneC624	today
+-1072076362	2uLyD28144vklju213J1mr	tomorrow
 -1071480828	aw724t8c5558x2xneC624	tomorrow
--1071363017	Anj0oF	today
+-1071480828	aw724t8c5558x2xneC624	today
 -1071363017	Anj0oF	tomorrow
--1070883071	0ruyd6Y50JpdGRf6HqD	tomorrow
+-1071363017	Anj0oF	today
 -1070883071	0ruyd6Y50JpdGRf6HqD	today
--1070551679	iUR3Q	today
+-1070883071	0ruyd6Y50JpdGRf6HqD	tomorrow
 -1070551679	iUR3Q	tomorrow
--1069736047	k17Am8uPHWk02cEf1jet	tomorrow
+-1070551679	iUR3Q	today
 -1069736047	k17Am8uPHWk02cEf1jet	today
+-1069736047	k17Am8uPHWk02cEf1jet	tomorrow

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/results/clientpositive/tez/vector_join30.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_join30.q.out b/ql/src/test/results/clientpositive/tez/vector_join30.q.out
new file mode 100644
index 0000000..2a14842
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/vector_join30.q.out
@@ -0,0 +1,1367 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE orcsrc STORED AS ORC AS SELECT * FROM src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orcsrc
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE orcsrc STORED AS ORC AS SELECT * FROM src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orcsrc
+PREHOOK: query: explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+        Reducer 5 <- Map 4 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: orcsrc
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: key (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: orcsrc
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: key (type: string), value (type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col1 (type: string)
+                        sort order: +
+                        Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: string)
+            Execution mode: vectorized
+        Reducer 2 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+                Map Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  keys:
+                    0 _col0 (type: string)
+                    1 _col0 (type: string)
+                  outputColumnNames: _col2, _col3
+                  input vertices:
+                    1 Reducer 5
+                  Statistics: Num rows: 275 Data size: 48400 Basic stats: COMPLETE Column stats: NONE
+                  HybridGraceHashJoin: true
+                  Group By Operator
+                    aggregations: sum(hash(_col2,_col3))
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: bigint)
+        Reducer 3 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+        Reducer 5 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: string)
+            Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN 
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+POSTHOOK: query: FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN 
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+103231310608
+PREHOOK: query: explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+        Reducer 5 <- Map 4 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: orcsrc
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: orcsrc
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col1 (type: string)
+                      sort order: +
+                      Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: string)
+            Execution mode: vectorized
+        Reducer 2 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                Map Join Operator
+                  condition map:
+                       Left Outer Join0 to 1
+                  keys:
+                    0 _col0 (type: string)
+                    1 _col0 (type: string)
+                  outputColumnNames: _col2, _col3
+                  input vertices:
+                    1 Reducer 5
+                  Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE
+                  HybridGraceHashJoin: true
+                  Group By Operator
+                    aggregations: sum(hash(_col2,_col3))
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: bigint)
+        Reducer 3 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+        Reducer 5 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: string)
+            Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN 
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+POSTHOOK: query: FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN 
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+103231310608
+PREHOOK: query: explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 2 (BROADCAST_EDGE)
+        Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: orcsrc
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: orcsrc
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col1 (type: string)
+                      sort order: +
+                      Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: string)
+            Execution mode: vectorized
+        Reducer 2 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+        Reducer 4 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                Map Join Operator
+                  condition map:
+                       Right Outer Join0 to 1
+                  keys:
+                    0 _col0 (type: string)
+                    1 _col0 (type: string)
+                  outputColumnNames: _col2, _col3
+                  input vertices:
+                    0 Reducer 2
+                  Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE
+                  HybridGraceHashJoin: true
+                  Group By Operator
+                    aggregations: sum(hash(_col2,_col3))
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: bigint)
+        Reducer 5 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN 
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+POSTHOOK: query: FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN 
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+103231310608
+PREHOOK: query: explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+        Reducer 5 <- Map 4 (SIMPLE_EDGE)
+        Reducer 7 <- Map 6 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: orcsrc
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: key (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: orcsrc
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: key (type: string), value (type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col1 (type: string)
+                        sort order: +
+                        Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: string)
+            Execution mode: vectorized
+        Map 6 
+            Map Operator Tree:
+                TableScan
+                  alias: orcsrc
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: key (type: string), value (type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col1 (type: string)
+                        sort order: +
+                        Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: string)
+            Execution mode: vectorized
+        Reducer 2 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+                Map Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                       Inner Join 0 to 2
+                  keys:
+                    0 _col0 (type: string)
+                    1 _col0 (type: string)
+                    2 _col0 (type: string)
+                  outputColumnNames: _col2, _col3
+                  input vertices:
+                    1 Reducer 5
+                    2 Reducer 7
+                  Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE
+                  HybridGraceHashJoin: true
+                  Group By Operator
+                    aggregations: sum(hash(_col2,_col3))
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: bigint)
+        Reducer 3 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+        Reducer 5 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: string)
+            Execution mode: vectorized
+        Reducer 7 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+POSTHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+348019368476
+PREHOOK: query: explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+        Reducer 6 <- Map 5 (SIMPLE_EDGE)
+        Reducer 8 <- Map 7 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: orcsrc
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: orcsrc
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col1 (type: string)
+                      sort order: +
+                      Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: string)
+            Execution mode: vectorized
+        Map 7 
+            Map Operator Tree:
+                TableScan
+                  alias: orcsrc
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col1 (type: string)
+                      sort order: +
+                      Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: string)
+            Execution mode: vectorized
+        Reducer 2 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+        Reducer 3 
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                     Left Outer Join0 to 2
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                  2 _col0 (type: string)
+                outputColumnNames: _col2, _col3
+                Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: sum(hash(_col2,_col3))
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: bigint)
+        Reducer 4 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+        Reducer 6 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: string)
+            Execution mode: vectorized
+        Reducer 8 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+POSTHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+348019368476
+PREHOOK: query: explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+        Reducer 6 <- Map 5 (SIMPLE_EDGE)
+        Reducer 8 <- Map 7 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: orcsrc
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: orcsrc
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col1 (type: string)
+                      sort order: +
+                      Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: string)
+            Execution mode: vectorized
+        Map 7 
+            Map Operator Tree:
+                TableScan
+                  alias: orcsrc
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col1 (type: string)
+                      sort order: +
+                      Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: string)
+            Execution mode: vectorized
+        Reducer 2 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+        Reducer 3 
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Outer Join0 to 1
+                     Left Outer Join0 to 2
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                  2 _col0 (type: string)
+                outputColumnNames: _col2, _col3
+                Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: sum(hash(_col2,_col3))
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: bigint)
+        Reducer 4 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+        Reducer 6 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: string)
+            Execution mode: vectorized
+        Reducer 8 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+POSTHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+348019368476
+PREHOOK: query: explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+        Reducer 6 <- Map 5 (SIMPLE_EDGE)
+        Reducer 8 <- Map 7 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: orcsrc
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: orcsrc
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col1 (type: string)
+                      sort order: +
+                      Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: string)
+            Execution mode: vectorized
+        Map 7 
+            Map Operator Tree:
+                TableScan
+                  alias: orcsrc
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col1 (type: string)
+                      sort order: +
+                      Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: string)
+            Execution mode: vectorized
+        Reducer 2 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+        Reducer 3 
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Outer Join0 to 1
+                     Right Outer Join0 to 2
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                  2 _col0 (type: string)
+                outputColumnNames: _col2, _col3
+                Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: sum(hash(_col2,_col3))
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: bigint)
+        Reducer 4 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+        Reducer 6 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: string)
+            Execution mode: vectorized
+        Reducer 8 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+POSTHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+348019368476
+PREHOOK: query: explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+        Reducer 6 <- Map 5 (SIMPLE_EDGE)
+        Reducer 8 <- Map 7 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: orcsrc
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: orcsrc
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col1 (type: string)
+                      sort order: +
+                      Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: string)
+            Execution mode: vectorized
+        Map 7 
+            Map Operator Tree:
+                TableScan
+                  alias: orcsrc
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col1 (type: string)
+                      sort order: +
+                      Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: string)
+            Execution mode: vectorized
+        Reducer 2 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+        Reducer 3 
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Right Outer Join0 to 1
+                     Right Outer Join0 to 2
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                  2 _col0 (type: string)
+                outputColumnNames: _col2, _col3
+                Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: sum(hash(_col2,_col3))
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: bigint)
+        Reducer 4 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+        Reducer 6 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: string)
+            Execution mode: vectorized
+        Reducer 8 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+POSTHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+348019368476

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/results/clientpositive/tez/vector_join_filters.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_join_filters.q.out b/ql/src/test/results/clientpositive/tez/vector_join_filters.q.out
new file mode 100644
index 0000000..8cc9311
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/vector_join_filters.q.out
@@ -0,0 +1,222 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE myinput1_txt(key int, value int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@myinput1_txt
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE myinput1_txt(key int, value int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@myinput1_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1_txt
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@myinput1_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1_txt
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@myinput1_txt
+PREHOOK: query: CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@myinput1_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@myinput1
+POSTHOOK: query: CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@myinput1_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@myinput1
+Warning: Map Join MAPJOIN[19][bigTable=?] in task 'Map 1' is a cross product
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+Warning: Map Join MAPJOIN[15][bigTable=a] in task 'Map 1' is a cross product
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4937935
+Warning: Map Join MAPJOIN[15][bigTable=b] in task 'Map 2' is a cross product
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3080335
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4937935
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4937935
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4937935
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4937935
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3080335
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3080335
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3080335
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3080335
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3080335
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3080335
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.key = c.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.key = c.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400

[3/7] hive git commit: HIVE-10565: Native Vector Map Join doesn't handle filtering and matching on LEFT OUTER JOIN repeated key correctly (Matt McCline via Gunther Hagleitner)

Posted by gu...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/results/clientpositive/tez/vectorized_timestamp_ints_casts.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vectorized_timestamp_ints_casts.q.out b/ql/src/test/results/clientpositive/tez/vectorized_timestamp_ints_casts.q.out
new file mode 100644
index 0000000..1e74446
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/vectorized_timestamp_ints_casts.q.out
@@ -0,0 +1,234 @@
+PREHOOK: query: explain
+select
+-- to timestamp
+  cast (ctinyint as timestamp)
+  ,cast (csmallint as timestamp)
+  ,cast (cint as timestamp)
+  ,cast (cbigint as timestamp)
+  ,cast (cfloat as timestamp)
+  ,cast (cdouble as timestamp)
+  ,cast (cboolean1 as timestamp)
+  ,cast (cbigint * 0 as timestamp)
+  ,cast (ctimestamp1 as timestamp)
+  ,cast (cstring1 as timestamp)
+  ,cast (substr(cstring1, 1, 1) as timestamp)
+from alltypesorc
+-- limit output to a reasonably small number of rows
+where cbigint % 250 = 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+-- to timestamp
+  cast (ctinyint as timestamp)
+  ,cast (csmallint as timestamp)
+  ,cast (cint as timestamp)
+  ,cast (cbigint as timestamp)
+  ,cast (cfloat as timestamp)
+  ,cast (cdouble as timestamp)
+  ,cast (cboolean1 as timestamp)
+  ,cast (cbigint * 0 as timestamp)
+  ,cast (ctimestamp1 as timestamp)
+  ,cast (cstring1 as timestamp)
+  ,cast (substr(cstring1, 1, 1) as timestamp)
+from alltypesorc
+-- limit output to a reasonably small number of rows
+where cbigint % 250 = 0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: alltypesorc
+          Filter Operator
+            predicate: ((cbigint % 250) = 0) (type: boolean)
+            Select Operator
+              expressions: CAST( ctinyint AS TIMESTAMP) (type: timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: timestamp)
+              outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
+              ListSink
+
+PREHOOK: query: select
+-- to timestamp
+  cast (ctinyint as timestamp)
+  ,cast (csmallint as timestamp)
+  ,cast (cint as timestamp)
+  ,cast (cbigint as timestamp)
+  ,cast (cfloat as timestamp)
+  ,cast (cdouble as timestamp)
+  ,cast (cboolean1 as timestamp)
+  ,cast (cbigint * 0 as timestamp)
+  ,cast (ctimestamp1 as timestamp)
+  ,cast (cstring1 as timestamp)
+  ,cast (substr(cstring1, 1, 1) as timestamp)
+from alltypesorc
+-- limit output to a reasonably small number of rows
+where cbigint % 250 = 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select
+-- to timestamp
+  cast (ctinyint as timestamp)
+  ,cast (csmallint as timestamp)
+  ,cast (cint as timestamp)
+  ,cast (cbigint as timestamp)
+  ,cast (cfloat as timestamp)
+  ,cast (cdouble as timestamp)
+  ,cast (cboolean1 as timestamp)
+  ,cast (cbigint * 0 as timestamp)
+  ,cast (ctimestamp1 as timestamp)
+  ,cast (cstring1 as timestamp)
+  ,cast (substr(cstring1, 1, 1) as timestamp)
+from alltypesorc
+-- limit output to a reasonably small number of rows
+where cbigint % 250 = 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+1969-12-31 15:59:59.964	1969-12-31 15:59:59.8	NULL	1969-12-08 10:43:03.25	1969-12-31 15:59:24	1969-12-31 15:56:40	NULL	1969-12-31 16:00:00	1969-12-31 15:59:45.748	NULL	NULL
+1969-12-31 15:59:59.964	1969-12-31 15:59:59.8	NULL	1970-01-19 04:24:39	1969-12-31 15:59:24	1969-12-31 15:56:40	NULL	1969-12-31 16:00:00	1969-12-31 15:59:53.817	NULL	NULL
+1969-12-31 15:59:59.97	1969-12-31 15:59:59.8	NULL	1970-01-17 05:10:52.25	1969-12-31 15:59:30	1969-12-31 15:56:40	NULL	1969-12-31 16:00:00	1969-12-31 16:00:12.935	NULL	NULL
+1969-12-31 15:59:59.949	NULL	1970-01-09 14:53:20.971	1970-01-12 20:45:23.25	1969-12-31 15:59:09	NULL	1969-12-31 16:00:00	1969-12-31 16:00:00	1969-12-31 16:00:08.451	NULL	NULL
+1969-12-31 15:59:59.949	NULL	1970-01-09 07:39:13.882	1969-12-09 07:45:32.75	1969-12-31 15:59:09	NULL	1969-12-31 16:00:00.001	1969-12-31 16:00:00	1969-12-31 16:00:08.451	NULL	NULL
+1969-12-31 16:00:00.02	1969-12-31 16:00:15.601	NULL	1969-12-27 11:19:26.75	1969-12-31 16:00:20	1969-12-31 20:20:01	NULL	1969-12-31 16:00:00	1969-12-31 15:59:45.129	NULL	NULL
+1969-12-31 15:59:59.962	1969-12-31 16:00:15.601	NULL	1969-12-10 03:41:51	1969-12-31 15:59:22	1969-12-31 20:20:01	NULL	1969-12-31 16:00:00	1969-12-31 15:59:58.614	NULL	NULL
+1969-12-31 15:59:59.995	1969-12-31 16:00:15.601	NULL	1970-01-07 18:06:56	1969-12-31 15:59:55	1969-12-31 20:20:01	NULL	1969-12-31 16:00:00	1969-12-31 16:00:04.679	NULL	NULL
+1969-12-31 16:00:00.048	1969-12-31 16:00:15.601	NULL	1969-12-22 11:03:59	1969-12-31 16:00:48	1969-12-31 20:20:01	NULL	1969-12-31 16:00:00	1969-12-31 15:59:50.235	NULL	NULL
+1969-12-31 16:00:00.008	NULL	1969-12-24 00:12:58.862	1969-12-20 21:16:47.25	1969-12-31 16:00:08	NULL	1969-12-31 16:00:00	1969-12-31 16:00:00	1969-12-31 16:00:15.892	NULL	NULL
+1969-12-31 16:00:00.008	NULL	1969-12-30 11:24:23.566	1969-12-16 11:20:17.25	1969-12-31 16:00:08	NULL	1969-12-31 16:00:00	1969-12-31 16:00:00	1969-12-31 16:00:15.892	NULL	NULL
+1969-12-31 16:00:00.008	NULL	1970-01-09 23:39:39.664	1970-01-10 17:09:21.5	1969-12-31 16:00:08	NULL	1969-12-31 16:00:00	1969-12-31 16:00:00	1969-12-31 16:00:15.892	NULL	NULL
+1969-12-31 16:00:00.008	NULL	1969-12-23 21:59:27.689	1970-01-19 01:16:31.25	1969-12-31 16:00:08	NULL	1969-12-31 16:00:00	1969-12-31 16:00:00	1969-12-31 16:00:15.892	NULL	NULL
+1969-12-31 16:00:00.008	NULL	1970-01-10 23:29:48.972	1969-12-10 02:41:39	1969-12-31 16:00:08	NULL	1969-12-31 16:00:00.001	1969-12-31 16:00:00	1969-12-31 16:00:15.892	NULL	NULL
+1969-12-31 16:00:00.008	NULL	1970-01-11 10:34:27.246	1970-01-14 14:49:59.25	1969-12-31 16:00:08	NULL	1969-12-31 16:00:00.001	1969-12-31 16:00:00	1969-12-31 16:00:15.892	NULL	NULL
+1969-12-31 15:59:59.941	1969-12-31 15:59:52.804	NULL	1969-12-13 02:11:50	1969-12-31 15:59:01	1969-12-31 14:00:04	NULL	1969-12-31 16:00:00	1969-12-31 16:00:13.15	NULL	NULL
+1969-12-31 15:59:59.979	1969-12-31 15:59:52.804	NULL	1970-01-18 12:27:09	1969-12-31 15:59:39	1969-12-31 14:00:04	NULL	1969-12-31 16:00:00	1969-12-31 15:59:55.9	NULL	NULL
+1969-12-31 15:59:59.94	1969-12-31 15:59:52.804	NULL	1970-01-18 05:11:54.75	1969-12-31 15:59:00	1969-12-31 14:00:04	NULL	1969-12-31 16:00:00	1969-12-31 15:59:52.408	NULL	NULL
+1969-12-31 15:59:59.986	1969-12-31 15:59:52.804	NULL	1969-12-13 16:50:00.5	1969-12-31 15:59:46	1969-12-31 14:00:04	NULL	1969-12-31 16:00:00	1969-12-31 16:00:11.065	NULL	NULL
+1969-12-31 16:00:00.059	1969-12-31 15:59:52.804	NULL	1969-12-18 11:57:25.5	1969-12-31 16:00:59	1969-12-31 14:00:04	NULL	1969-12-31 16:00:00	1969-12-31 16:00:10.956	NULL	NULL
+1969-12-31 15:59:59.992	1969-12-31 15:59:52.804	NULL	1969-12-10 06:06:48.5	1969-12-31 15:59:52	1969-12-31 14:00:04	NULL	1969-12-31 16:00:00	1969-12-31 16:00:03.136	NULL	NULL
+1969-12-31 16:00:00.005	1969-12-31 15:59:52.804	NULL	1969-12-19 21:53:12.5	1969-12-31 16:00:05	1969-12-31 14:00:04	NULL	1969-12-31 16:00:00	1969-12-31 16:00:10.973	NULL	NULL
+1969-12-31 15:59:59.976	1969-12-31 15:59:52.804	NULL	1970-01-10 06:18:31	1969-12-31 15:59:36	1969-12-31 14:00:04	NULL	1969-12-31 16:00:00	1969-12-31 15:59:53.145	NULL	NULL
+1969-12-31 15:59:59.95	1969-12-31 15:59:52.804	NULL	1969-12-19 17:33:32.75	1969-12-31 15:59:10	1969-12-31 14:00:04	NULL	1969-12-31 16:00:00	1969-12-31 15:59:54.733	NULL	NULL
+1969-12-31 16:00:00.011	NULL	1969-12-30 22:03:04.018	1970-01-21 12:50:53.75	1969-12-31 16:00:11	NULL	1969-12-31 16:00:00.001	1969-12-31 16:00:00	1969-12-31 16:00:02.351	NULL	NULL
+1969-12-31 16:00:00.011	NULL	1969-12-27 18:49:09.583	1970-01-14 22:35:27	1969-12-31 16:00:11	NULL	1969-12-31 16:00:00.001	1969-12-31 16:00:00	1969-12-31 16:00:02.351	NULL	NULL
+PREHOOK: query: explain
+select
+-- to timestamp
+  cast (ctinyint as timestamp)
+  ,cast (csmallint as timestamp)
+  ,cast (cint as timestamp)
+  ,cast (cbigint as timestamp)
+  ,cast (cfloat as timestamp)
+  ,cast (cdouble as timestamp)
+  ,cast (cboolean1 as timestamp)
+  ,cast (cbigint * 0 as timestamp)
+  ,cast (ctimestamp1 as timestamp)
+  ,cast (cstring1 as timestamp)
+  ,cast (substr(cstring1, 1, 1) as timestamp)
+from alltypesorc
+-- limit output to a reasonably small number of rows
+where cbigint % 250 = 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+-- to timestamp
+  cast (ctinyint as timestamp)
+  ,cast (csmallint as timestamp)
+  ,cast (cint as timestamp)
+  ,cast (cbigint as timestamp)
+  ,cast (cfloat as timestamp)
+  ,cast (cdouble as timestamp)
+  ,cast (cboolean1 as timestamp)
+  ,cast (cbigint * 0 as timestamp)
+  ,cast (ctimestamp1 as timestamp)
+  ,cast (cstring1 as timestamp)
+  ,cast (substr(cstring1, 1, 1) as timestamp)
+from alltypesorc
+-- limit output to a reasonably small number of rows
+where cbigint % 250 = 0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: alltypesorc
+          Filter Operator
+            predicate: ((cbigint % 250) = 0) (type: boolean)
+            Select Operator
+              expressions: CAST( ctinyint AS TIMESTAMP) (type: timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: timestamp)
+              outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
+              ListSink
+
+PREHOOK: query: select
+-- to timestamp
+  cast (ctinyint as timestamp)
+  ,cast (csmallint as timestamp)
+  ,cast (cint as timestamp)
+  ,cast (cbigint as timestamp)
+  ,cast (cfloat as timestamp)
+  ,cast (cdouble as timestamp)
+  ,cast (cboolean1 as timestamp)
+  ,cast (cbigint * 0 as timestamp)
+  ,cast (ctimestamp1 as timestamp)
+  ,cast (cstring1 as timestamp)
+  ,cast (substr(cstring1, 1, 1) as timestamp)
+from alltypesorc
+-- limit output to a reasonably small number of rows
+where cbigint % 250 = 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select
+-- to timestamp
+  cast (ctinyint as timestamp)
+  ,cast (csmallint as timestamp)
+  ,cast (cint as timestamp)
+  ,cast (cbigint as timestamp)
+  ,cast (cfloat as timestamp)
+  ,cast (cdouble as timestamp)
+  ,cast (cboolean1 as timestamp)
+  ,cast (cbigint * 0 as timestamp)
+  ,cast (ctimestamp1 as timestamp)
+  ,cast (cstring1 as timestamp)
+  ,cast (substr(cstring1, 1, 1) as timestamp)
+from alltypesorc
+-- limit output to a reasonably small number of rows
+where cbigint % 250 = 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+1969-12-31 15:59:24	1969-12-31 15:56:40	NULL	1906-06-05 13:34:10	1969-12-31 15:59:24	1969-12-31 15:56:40	NULL	1969-12-31 16:00:00	1969-12-31 15:59:45.748	NULL	NULL
+1969-12-31 15:59:24	1969-12-31 15:56:40	NULL	2020-09-11 19:50:00	1969-12-31 15:59:24	1969-12-31 15:56:40	NULL	1969-12-31 16:00:00	1969-12-31 15:59:53.817	NULL	NULL
+1969-12-31 15:59:30	1969-12-31 15:56:40	NULL	2015-04-23 22:10:50	1969-12-31 15:59:30	1969-12-31 15:56:40	NULL	1969-12-31 16:00:00	1969-12-31 16:00:12.935	NULL	NULL
+1969-12-31 15:59:09	NULL	1994-07-07 10:09:31	2003-05-25 21:27:30	1969-12-31 15:59:09	NULL	1969-12-31 16:00:00	1969-12-31 16:00:00	1969-12-31 16:00:08.451	NULL	NULL
+1969-12-31 15:59:09	NULL	1993-09-08 22:51:22	1908-10-29 07:05:50	1969-12-31 15:59:09	NULL	1969-12-31 16:00:01	1969-12-31 16:00:00	1969-12-31 16:00:08.451	NULL	NULL
+1969-12-31 16:00:20	1969-12-31 20:20:01	NULL	1958-07-07 21:05:50	1969-12-31 16:00:20	1969-12-31 20:20:01	NULL	1969-12-31 16:00:00	1969-12-31 15:59:45.129	NULL	NULL
+1969-12-31 15:59:22	1969-12-31 20:20:01	NULL	1911-02-07 01:30:00	1969-12-31 15:59:22	1969-12-31 20:20:01	NULL	1969-12-31 16:00:00	1969-12-31 15:59:58.614	NULL	NULL
+1969-12-31 15:59:55	1969-12-31 20:20:01	NULL	1989-05-28 20:33:20	1969-12-31 15:59:55	1969-12-31 20:20:01	NULL	1969-12-31 16:00:00	1969-12-31 16:00:04.679	NULL	NULL
+1969-12-31 16:00:48	1969-12-31 20:20:01	NULL	1944-10-18 03:23:20	1969-12-31 16:00:48	1969-12-31 20:20:01	NULL	1969-12-31 16:00:00	1969-12-31 15:59:50.235	NULL	NULL
+1969-12-31 16:00:08	NULL	1949-01-13 00:21:02	1940-06-26 15:47:30	1969-12-31 16:00:08	NULL	1969-12-31 16:00:00	1969-12-31 16:00:00	1969-12-31 16:00:15.892	NULL	NULL
+1969-12-31 16:00:08	NULL	1966-09-27 07:32:46	1928-05-26 10:07:30	1969-12-31 16:00:08	NULL	1969-12-31 16:00:00	1969-12-31 16:00:00	1969-12-31 16:00:15.892	NULL	NULL
+1969-12-31 16:00:08	NULL	1995-07-07 22:01:04	1997-07-05 20:58:20	1969-12-31 16:00:08	NULL	1969-12-31 16:00:00	1969-12-31 16:00:00	1969-12-31 16:00:15.892	NULL	NULL
+1969-12-31 16:00:08	NULL	1948-10-12 08:01:29	2020-05-04 04:20:50	1969-12-31 16:00:08	NULL	1969-12-31 16:00:00	1969-12-31 16:00:00	1969-12-31 16:00:15.892	NULL	NULL
+1969-12-31 16:00:08	NULL	1998-03-27 00:56:12	1910-12-27 06:10:00	1969-12-31 16:00:08	NULL	1969-12-31 16:00:01	1969-12-31 16:00:00	1969-12-31 16:00:15.892	NULL	NULL
+1969-12-31 16:00:08	NULL	1999-07-01 15:14:06	2008-03-13 02:07:30	1969-12-31 16:00:08	NULL	1969-12-31 16:00:01	1969-12-31 16:00:00	1969-12-31 16:00:15.892	NULL	NULL
+1969-12-31 15:59:01	1969-12-31 14:00:04	NULL	1919-02-22 13:13:20	1969-12-31 15:59:01	1969-12-31 14:00:04	NULL	1969-12-31 16:00:00	1969-12-31 16:00:13.15	NULL	NULL
+1969-12-31 15:59:39	1969-12-31 14:00:04	NULL	2018-11-16 20:30:00	1969-12-31 15:59:39	1969-12-31 14:00:04	NULL	1969-12-31 16:00:00	1969-12-31 15:59:55.9	NULL	NULL
+1969-12-31 15:59:00	1969-12-31 14:00:04	NULL	2018-01-18 14:32:30	1969-12-31 15:59:00	1969-12-31 14:00:04	NULL	1969-12-31 16:00:00	1969-12-31 15:59:52.408	NULL	NULL
+1969-12-31 15:59:46	1969-12-31 14:00:04	NULL	1920-10-24 09:28:20	1969-12-31 15:59:46	1969-12-31 14:00:04	NULL	1969-12-31 16:00:00	1969-12-31 16:00:11.065	NULL	NULL
+1969-12-31 16:00:59	1969-12-31 14:00:04	NULL	1933-12-12 05:05:00	1969-12-31 16:00:59	1969-12-31 14:00:04	NULL	1969-12-31 16:00:00	1969-12-31 16:00:10.956	NULL	NULL
+1969-12-31 15:59:52	1969-12-31 14:00:04	NULL	1911-05-18 17:28:20	1969-12-31 15:59:52	1969-12-31 14:00:04	NULL	1969-12-31 16:00:00	1969-12-31 16:00:03.136	NULL	NULL
+1969-12-31 16:00:05	1969-12-31 14:00:04	NULL	1937-10-25 22:48:20	1969-12-31 16:00:05	1969-12-31 14:00:04	NULL	1969-12-31 16:00:00	1969-12-31 16:00:10.973	NULL	NULL
+1969-12-31 15:59:36	1969-12-31 14:00:04	NULL	1996-04-09 21:36:40	1969-12-31 15:59:36	1969-12-31 14:00:04	NULL	1969-12-31 16:00:00	1969-12-31 15:59:53.145	NULL	NULL
+1969-12-31 15:59:10	1969-12-31 14:00:04	NULL	1937-04-28 15:05:50	1969-12-31 15:59:10	1969-12-31 14:00:04	NULL	1969-12-31 16:00:00	1969-12-31 15:59:54.733	NULL	NULL
+1969-12-31 16:00:11	NULL	1967-12-14 19:06:58	2027-02-19 08:15:50	1969-12-31 16:00:11	NULL	1969-12-31 16:00:01	1969-12-31 16:00:00	1969-12-31 16:00:02.351	NULL	NULL
+1969-12-31 16:00:11	NULL	1959-05-16 04:19:43	2009-01-30 06:50:00	1969-12-31 16:00:11	NULL	1969-12-31 16:00:01	1969-12-31 16:00:00	1969-12-31 16:00:02.351	NULL	NULL

[4/7] hive git commit: HIVE-10565: Native Vector Map Join doesn't handle filtering and matching on LEFT OUTER JOIN repeated key correctly (Matt McCline via Gunther Hagleitner)

Posted by gu...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/results/clientpositive/tez/vector_join_nulls.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_join_nulls.q.out b/ql/src/test/results/clientpositive/tez/vector_join_nulls.q.out
new file mode 100644
index 0000000..2243072
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/vector_join_nulls.q.out
@@ -0,0 +1,195 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE myinput1_txt(key int, value int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@myinput1_txt
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE myinput1_txt(key int, value int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@myinput1_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE myinput1_txt
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@myinput1_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE myinput1_txt
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@myinput1_txt
+PREHOOK: query: CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@myinput1_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@myinput1
+POSTHOOK: query: CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@myinput1_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@myinput1
+Warning: Map Join MAPJOIN[16][bigTable=?] in task 'Map 1' is a cross product
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+13630578
+Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Map 1' is a cross product
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+13630578
+Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Map 1' is a cross product
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+13630578
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4509856
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3112070
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4542003
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4542038
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4543491
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4542003
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3079923
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4509891
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3113558
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3079923
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3112070
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3113558
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3112070

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/results/clientpositive/tez/vector_left_outer_join2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_left_outer_join2.q.out b/ql/src/test/results/clientpositive/tez/vector_left_outer_join2.q.out
index 929194e..6c781e3 100644
--- a/ql/src/test/results/clientpositive/tez/vector_left_outer_join2.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_left_outer_join2.q.out
@@ -1,6 +1,10 @@
-PREHOOK: query: drop table if exists TJOIN1
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+drop table if exists TJOIN1
 PREHOOK: type: DROPTABLE
-POSTHOOK: query: drop table if exists TJOIN1
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+drop table if exists TJOIN1
 POSTHOOK: type: DROPTABLE
 PREHOOK: query: drop table if exists TJOIN2
 PREHOOK: type: DROPTABLE
@@ -228,8 +232,8 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tjoin1
 POSTHOOK: Input: default@tjoin2
 #### A masked pattern was here ####
-1	20	25	NULL
 0	10	15	NULL
+1	20	25	NULL
 2	NULL	50	NULL
 PREHOOK: query: explain
 select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
@@ -387,8 +391,8 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tjoin1
 POSTHOOK: Input: default@tjoin2
 #### A masked pattern was here ####
-1	20	25	NULL
 0	10	15	NULL
+1	20	25	NULL
 2	NULL	50	NULL
 PREHOOK: query: explain
 select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
@@ -466,10 +470,9 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tjoin1
 POSTHOOK: Input: default@tjoin2
 #### A masked pattern was here ####
+0	10	15	NULL
 1	20	25	NULL
 2	NULL	50	NULL
-0	10	15	BB
-0	10	15	FF
 PREHOOK: query: explain
 select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
 PREHOOK: type: QUERY
@@ -547,7 +550,6 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tjoin1
 POSTHOOK: Input: default@tjoin2
 #### A masked pattern was here ####
-2	NULL	50	NULL
+0	10	15	NULL
 1	20	25	NULL
-0	10	15	BB
-0	10	15	FF
+2	NULL	50	NULL

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/results/clientpositive/tez/vector_left_outer_join3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_left_outer_join3.q.out b/ql/src/test/results/clientpositive/tez/vector_left_outer_join3.q.out
new file mode 100644
index 0000000..591e165
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/vector_left_outer_join3.q.out
@@ -0,0 +1,222 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+create table vsmb_bucket_1(RNUM int, C1 int, C2 int) 
+  CLUSTERED BY (C1) 
+  SORTED BY (C1) INTO 1 BUCKETS 
+  STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@vsmb_bucket_1
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+create table vsmb_bucket_1(RNUM int, C1 int, C2 int) 
+  CLUSTERED BY (C1) 
+  SORTED BY (C1) INTO 1 BUCKETS 
+  STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@vsmb_bucket_1
+PREHOOK: query: create table vsmb_bucket_2(RNUM int, C1 int, C2 int) 
+  CLUSTERED BY (C1) 
+  SORTED BY (C1) INTO 1 BUCKETS 
+  STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@vsmb_bucket_2
+POSTHOOK: query: create table vsmb_bucket_2(RNUM int, C1 int, C2 int) 
+  CLUSTERED BY (C1) 
+  SORTED BY (C1) INTO 1 BUCKETS 
+  STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@vsmb_bucket_2
+PREHOOK: query: create table vsmb_bucket_TXT_1(RNUM int, C1 int, C2 int) 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@vsmb_bucket_TXT_1
+POSTHOOK: query: create table vsmb_bucket_TXT_1(RNUM int, C1 int, C2 int) 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@vsmb_bucket_TXT_1
+PREHOOK: query: create table vsmb_bucket_TXT_2(RNUM int, C1 int, C2 int) 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@vsmb_bucket_TXT_2
+POSTHOOK: query: create table vsmb_bucket_TXT_2(RNUM int, C1 int, C2 int) 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@vsmb_bucket_TXT_2
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE vsmb_bucket_TXT_1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@vsmb_bucket_txt_1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE vsmb_bucket_TXT_1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@vsmb_bucket_txt_1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE vsmb_bucket_TXT_2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@vsmb_bucket_txt_2
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE vsmb_bucket_TXT_2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@vsmb_bucket_txt_2
+PREHOOK: query: insert into table vsmb_bucket_1 select * from vsmb_bucket_TXT_1 order by c1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@vsmb_bucket_txt_1
+PREHOOK: Output: default@vsmb_bucket_1
+POSTHOOK: query: insert into table vsmb_bucket_1 select * from vsmb_bucket_TXT_1 order by c1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@vsmb_bucket_txt_1
+POSTHOOK: Output: default@vsmb_bucket_1
+POSTHOOK: Lineage: vsmb_bucket_1.c1 SIMPLE [(vsmb_bucket_txt_1)vsmb_bucket_txt_1.FieldSchema(name:c1, type:int, comment:null), ]
+POSTHOOK: Lineage: vsmb_bucket_1.c2 SIMPLE [(vsmb_bucket_txt_1)vsmb_bucket_txt_1.FieldSchema(name:c2, type:int, comment:null), ]
+POSTHOOK: Lineage: vsmb_bucket_1.rnum SIMPLE [(vsmb_bucket_txt_1)vsmb_bucket_txt_1.FieldSchema(name:rnum, type:int, comment:null), ]
+PREHOOK: query: insert into table vsmb_bucket_2 select * from vsmb_bucket_TXT_2 order by c1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@vsmb_bucket_txt_2
+PREHOOK: Output: default@vsmb_bucket_2
+POSTHOOK: query: insert into table vsmb_bucket_2 select * from vsmb_bucket_TXT_2 order by c1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@vsmb_bucket_txt_2
+POSTHOOK: Output: default@vsmb_bucket_2
+POSTHOOK: Lineage: vsmb_bucket_2.c1 SIMPLE [(vsmb_bucket_txt_2)vsmb_bucket_txt_2.FieldSchema(name:c1, type:int, comment:null), ]
+POSTHOOK: Lineage: vsmb_bucket_2.c2 SIMPLE [(vsmb_bucket_txt_2)vsmb_bucket_txt_2.FieldSchema(name:c2, type:int, comment:null), ]
+POSTHOOK: Lineage: vsmb_bucket_2.rnum SIMPLE [(vsmb_bucket_txt_2)vsmb_bucket_txt_2.FieldSchema(name:rnum, type:int, comment:null), ]
+PREHOOK: query: explain
+select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 from vsmb_bucket_1 tjoin1 left outer join vsmb_bucket_2 tjoin2 on tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 from vsmb_bucket_1 tjoin1 left outer join vsmb_bucket_2 tjoin2 on tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: tjoin2
+                  Statistics: Num rows: 4 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+            Map Operator Tree:
+                TableScan
+                  alias: tjoin1
+                  Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE
+                  Merge Join Operator
+                    condition map:
+                         Left Outer Join0 to 1
+                    filter predicates:
+                      0 {(c2 > 15)}
+                      1 
+                    keys:
+                      0 c1 (type: int)
+                      1 c1 (type: int)
+                    outputColumnNames: _col0, _col1, _col2, _col8
+                    Statistics: Num rows: 4 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col8 (type: int)
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 4 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 4 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+                        table:
+                            input format: org.apache.hadoop.mapred.TextInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 from vsmb_bucket_1 tjoin1 left outer join vsmb_bucket_2 tjoin2 on tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15
+PREHOOK: type: QUERY
+PREHOOK: Input: default@vsmb_bucket_1
+PREHOOK: Input: default@vsmb_bucket_2
+#### A masked pattern was here ####
+POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 from vsmb_bucket_1 tjoin1 left outer join vsmb_bucket_2 tjoin2 on tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@vsmb_bucket_1
+POSTHOOK: Input: default@vsmb_bucket_2
+#### A masked pattern was here ####
+0	10	15	NULL
+1	20	25	NULL
+2	NULL	50	NULL
+PREHOOK: query: explain
+select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 from vsmb_bucket_1 tjoin1 left outer join vsmb_bucket_2 tjoin2 on tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 from vsmb_bucket_1 tjoin1 left outer join vsmb_bucket_2 tjoin2 on tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: tjoin2
+                  Statistics: Num rows: 4 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+            Map Operator Tree:
+                TableScan
+                  alias: tjoin1
+                  Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE
+                  Merge Join Operator
+                    condition map:
+                         Left Outer Join0 to 1
+                    filter predicates:
+                      0 {(c2 > 15)}
+                      1 
+                    keys:
+                      0 c1 (type: int)
+                      1 c1 (type: int)
+                    outputColumnNames: _col0, _col1, _col2, _col8
+                    Statistics: Num rows: 4 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col8 (type: int)
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 4 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 4 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+                        table:
+                            input format: org.apache.hadoop.mapred.TextInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 from vsmb_bucket_1 tjoin1 left outer join vsmb_bucket_2 tjoin2 on tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15
+PREHOOK: type: QUERY
+PREHOOK: Input: default@vsmb_bucket_1
+PREHOOK: Input: default@vsmb_bucket_2
+#### A masked pattern was here ####
+POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 from vsmb_bucket_1 tjoin1 left outer join vsmb_bucket_2 tjoin2 on tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@vsmb_bucket_1
+POSTHOOK: Input: default@vsmb_bucket_2
+#### A masked pattern was here ####
+0	10	15	NULL
+1	20	25	NULL
+2	NULL	50	NULL

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/results/clientpositive/tez/vector_outer_join5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_outer_join5.q.out b/ql/src/test/results/clientpositive/tez/vector_outer_join5.q.out
new file mode 100644
index 0000000..e77903a
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/vector_outer_join5.q.out
@@ -0,0 +1,1328 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+create table sorted_mod_4 stored as orc
+as select ctinyint, pmod(cint, 4) as cmodint from alltypesorc
+where cint is not null and ctinyint is not null
+order by ctinyint
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@sorted_mod_4
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+create table sorted_mod_4 stored as orc
+as select ctinyint, pmod(cint, 4) as cmodint from alltypesorc
+where cint is not null and ctinyint is not null
+order by ctinyint
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@sorted_mod_4
+PREHOOK: query: ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sorted_mod_4
+PREHOOK: Output: default@sorted_mod_4
+POSTHOOK: query: ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sorted_mod_4
+POSTHOOK: Output: default@sorted_mod_4
+PREHOOK: query: ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS FOR COLUMNS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+POSTHOOK: query: ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS FOR COLUMNS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+PREHOOK: query: create table small_table stored
+as orc as select ctinyint, cbigint from alltypesorc limit 100
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@small_table
+POSTHOOK: query: create table small_table stored
+as orc as select ctinyint, cbigint from alltypesorc limit 100
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@small_table
+PREHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table
+PREHOOK: Output: default@small_table
+POSTHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table
+POSTHOOK: Output: default@small_table
+PREHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table
+#### A masked pattern was here ####
+POSTHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table
+#### A masked pattern was here ####
+PREHOOK: query: explain
+select count(*) from (select s.*, st.*
+from sorted_mod_4 s
+left outer join small_table st
+on s.ctinyint = st.ctinyint
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.*, st.*
+from sorted_mod_4 s
+left outer join small_table st
+on s.ctinyint = st.ctinyint
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 1 <- Map 3 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: s
+                  Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: ctinyint (type: tinyint)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Left Outer Join0 to 1
+                      keys:
+                        0 _col0 (type: tinyint)
+                        1 _col0 (type: tinyint)
+                      input vertices:
+                        1 Map 3
+                      Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+                      HybridGraceHashJoin: true
+                      Group By Operator
+                        aggregations: count()
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+            Execution mode: vectorized
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: st
+                  Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: ctinyint (type: tinyint)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: tinyint)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: tinyint)
+                      Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from (select s.*, st.*
+from sorted_mod_4 s
+left outer join small_table st
+on s.ctinyint = st.ctinyint
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table
+PREHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.*, st.*
+from sorted_mod_4 s
+left outer join small_table st
+on s.ctinyint = st.ctinyint
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table
+POSTHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+6876
+PREHOOK: query: explain
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.cmodint = 2
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.cmodint = 2
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 1 <- Map 3 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: s
+                  Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: ctinyint (type: tinyint), cmodint (type: int)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Left Outer Join0 to 1
+                      filter predicates:
+                        0 {(_col1 = 2)}
+                        1 
+                      keys:
+                        0 _col0 (type: tinyint)
+                        1 _col0 (type: tinyint)
+                      input vertices:
+                        1 Map 3
+                      Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+                      HybridGraceHashJoin: true
+                      Group By Operator
+                        aggregations: count()
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+            Execution mode: vectorized
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: sm
+                  Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: ctinyint (type: tinyint)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: tinyint)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: tinyint)
+                      Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.cmodint = 2
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table
+PREHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.cmodint = 2
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table
+POSTHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+6058
+PREHOOK: query: explain
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 1 <- Map 3 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: s
+                  Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: ctinyint (type: tinyint), cmodint (type: int)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Left Outer Join0 to 1
+                      filter predicates:
+                        0 {((UDFToInteger(_col0) pmod 4) = _col1)}
+                        1 
+                      keys:
+                        0 _col0 (type: tinyint)
+                        1 _col0 (type: tinyint)
+                      input vertices:
+                        1 Map 3
+                      Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+                      HybridGraceHashJoin: true
+                      Group By Operator
+                        aggregations: count()
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+            Execution mode: vectorized
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: sm
+                  Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: ctinyint (type: tinyint)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: tinyint)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: tinyint)
+                      Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table
+PREHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table
+POSTHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+6248
+PREHOOK: query: explain
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.ctinyint < 100
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.ctinyint < 100
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 1 <- Map 3 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: s
+                  Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: ctinyint (type: tinyint)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Left Outer Join0 to 1
+                      filter predicates:
+                        0 {(_col0 < 100)}
+                        1 
+                      keys:
+                        0 _col0 (type: tinyint)
+                        1 _col0 (type: tinyint)
+                      input vertices:
+                        1 Map 3
+                      Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+                      HybridGraceHashJoin: true
+                      Group By Operator
+                        aggregations: count()
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+            Execution mode: vectorized
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: sm
+                  Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: ctinyint (type: tinyint)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: tinyint)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: tinyint)
+                      Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.ctinyint < 100
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table
+PREHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint 
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.ctinyint < 100
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table
+POSTHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+6876
+PREHOOK: query: explain
+select count(*) from (select s.*, sm.*, s2.* 
+from sorted_mod_4 s
+left outer join small_table sm
+  on pmod(sm.cbigint, 8) = s.cmodint 
+left outer join sorted_mod_4 s2
+  on s2.ctinyint = s.ctinyint
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.*, sm.*, s2.* 
+from sorted_mod_4 s
+left outer join small_table sm
+  on pmod(sm.cbigint, 8) = s.cmodint 
+left outer join sorted_mod_4 s2
+  on s2.ctinyint = s.ctinyint
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 1 <- Map 4 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: s
+                  Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: ctinyint (type: tinyint), cmodint (type: int)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Left Outer Join0 to 1
+                      keys:
+                        0 UDFToLong(_col1) (type: bigint)
+                        1 (_col0 pmod UDFToLong(8)) (type: bigint)
+                      outputColumnNames: _col0
+                      input vertices:
+                        1 Map 4
+                      Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+                      HybridGraceHashJoin: true
+                      Reduce Output Operator
+                        key expressions: _col0 (type: tinyint)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: tinyint)
+                        Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: sm
+                  Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: cbigint (type: bigint)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: (_col0 pmod UDFToLong(8)) (type: bigint)
+                      sort order: +
+                      Map-reduce partition columns: (_col0 pmod UDFToLong(8)) (type: bigint)
+                      Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: s
+                  Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: ctinyint (type: tinyint)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: tinyint)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: tinyint)
+                      Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+        Reducer 2 
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Outer Join0 to 1
+                keys:
+                  0 _col0 (type: tinyint)
+                  1 _col0 (type: tinyint)
+                Statistics: Num rows: 7329 Data size: 58641 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count()
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: bigint)
+        Reducer 3 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from (select s.*, sm.*, s2.* 
+from sorted_mod_4 s
+left outer join small_table sm
+  on pmod(sm.cbigint, 8) = s.cmodint 
+left outer join sorted_mod_4 s2
+  on s2.ctinyint = s.ctinyint
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table
+PREHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.*, sm.*, s2.* 
+from sorted_mod_4 s
+left outer join small_table sm
+  on pmod(sm.cbigint, 8) = s.cmodint 
+left outer join sorted_mod_4 s2
+  on s2.ctinyint = s.ctinyint
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table
+POSTHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+3268334
+PREHOOK: query: create table mod_8_mod_4 stored as orc
+as select pmod(ctinyint, 8) as cmodtinyint, pmod(cint, 4) as cmodint from alltypesorc
+where cint is not null and ctinyint is not null
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mod_8_mod_4
+POSTHOOK: query: create table mod_8_mod_4 stored as orc
+as select pmod(ctinyint, 8) as cmodtinyint, pmod(cint, 4) as cmodint from alltypesorc
+where cint is not null and ctinyint is not null
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mod_8_mod_4
+PREHOOK: query: ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mod_8_mod_4
+PREHOOK: Output: default@mod_8_mod_4
+POSTHOOK: query: ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mod_8_mod_4
+POSTHOOK: Output: default@mod_8_mod_4
+PREHOOK: query: ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS FOR COLUMNS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mod_8_mod_4
+#### A masked pattern was here ####
+POSTHOOK: query: ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS FOR COLUMNS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mod_8_mod_4
+#### A masked pattern was here ####
+PREHOOK: query: create table small_table2 stored
+as orc as select pmod(ctinyint, 16) as cmodtinyint, cbigint from alltypesorc limit 100
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@small_table2
+POSTHOOK: query: create table small_table2 stored
+as orc as select pmod(ctinyint, 16) as cmodtinyint, cbigint from alltypesorc limit 100
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@small_table2
+PREHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table2
+PREHOOK: Output: default@small_table2
+POSTHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table2
+POSTHOOK: Output: default@small_table2
+PREHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+POSTHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+PREHOOK: query: explain
+select count(*) from (select s.*, st.*
+from mod_8_mod_4 s
+left outer join small_table2 st
+on s.cmodtinyint = st.cmodtinyint
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.*, st.*
+from mod_8_mod_4 s
+left outer join small_table2 st
+on s.cmodtinyint = st.cmodtinyint
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 1 <- Map 3 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: s
+                  Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: cmodtinyint (type: int)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Left Outer Join0 to 1
+                      keys:
+                        0 _col0 (type: int)
+                        1 _col0 (type: int)
+                      input vertices:
+                        1 Map 3
+                      Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+                      HybridGraceHashJoin: true
+                      Group By Operator
+                        aggregations: count()
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+            Execution mode: vectorized
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: st
+                  Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: cmodtinyint (type: int)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from (select s.*, st.*
+from mod_8_mod_4 s
+left outer join small_table2 st
+on s.cmodtinyint = st.cmodtinyint
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mod_8_mod_4
+PREHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.*, st.*
+from mod_8_mod_4 s
+left outer join small_table2 st
+on s.cmodtinyint = st.cmodtinyint
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mod_8_mod_4
+POSTHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+39112
+PREHOOK: query: explain
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 1 <- Map 3 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: s
+                  Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: cmodtinyint (type: int), cmodint (type: int)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Left Outer Join0 to 1
+                      filter predicates:
+                        0 {(_col1 = 2)}
+                        1 
+                      keys:
+                        0 _col0 (type: int)
+                        1 _col0 (type: int)
+                      input vertices:
+                        1 Map 3
+                      Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+                      HybridGraceHashJoin: true
+                      Group By Operator
+                        aggregations: count()
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+            Execution mode: vectorized
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: sm
+                  Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: cmodtinyint (type: int)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mod_8_mod_4
+PREHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mod_8_mod_4
+POSTHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+11171
+PREHOOK: query: explain
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 1 <- Map 3 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: s
+                  Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: cmodtinyint (type: int), cmodint (type: int)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Left Outer Join0 to 1
+                      filter predicates:
+                        0 {((_col0 pmod 4) = _col1)}
+                        1 
+                      keys:
+                        0 _col0 (type: int)
+                        1 _col0 (type: int)
+                      input vertices:
+                        1 Map 3
+                      Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+                      HybridGraceHashJoin: true
+                      Group By Operator
+                        aggregations: count()
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+            Execution mode: vectorized
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: sm
+                  Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: cmodtinyint (type: int)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mod_8_mod_4
+PREHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mod_8_mod_4
+POSTHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+14371
+PREHOOK: query: explain
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 1 <- Map 3 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: s
+                  Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: cmodtinyint (type: int)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Left Outer Join0 to 1
+                      filter predicates:
+                        0 {(_col0 < 3)}
+                        1 
+                      keys:
+                        0 _col0 (type: int)
+                        1 _col0 (type: int)
+                      input vertices:
+                        1 Map 3
+                      Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+                      HybridGraceHashJoin: true
+                      Group By Operator
+                        aggregations: count()
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+            Execution mode: vectorized
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: sm
+                  Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: cmodtinyint (type: int)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mod_8_mod_4
+PREHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mod_8_mod_4
+POSTHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+17792
+PREHOOK: query: explain
+select count(*) from (select s.*, sm.*, s2.* 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+  on pmod(sm.cbigint, 8) = s.cmodint 
+left outer join mod_8_mod_4 s2
+  on s2.cmodtinyint = s.cmodtinyint
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.*, sm.*, s2.* 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+  on pmod(sm.cbigint, 8) = s.cmodint 
+left outer join mod_8_mod_4 s2
+  on s2.cmodtinyint = s.cmodtinyint
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 1 <- Map 4 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: s
+                  Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: cmodtinyint (type: int), cmodint (type: int)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Left Outer Join0 to 1
+                      keys:
+                        0 UDFToLong(_col1) (type: bigint)
+                        1 (_col0 pmod UDFToLong(8)) (type: bigint)
+                      outputColumnNames: _col0
+                      input vertices:
+                        1 Map 4
+                      Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+                      HybridGraceHashJoin: true
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: sm
+                  Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: cbigint (type: bigint)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: (_col0 pmod UDFToLong(8)) (type: bigint)
+                      sort order: +
+                      Map-reduce partition columns: (_col0 pmod UDFToLong(8)) (type: bigint)
+                      Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: s
+                  Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: cmodtinyint (type: int)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+        Reducer 2 
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Outer Join0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                Statistics: Num rows: 7329 Data size: 58641 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count()
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: bigint)
+        Reducer 3 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from (select s.*, sm.*, s2.* 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+  on pmod(sm.cbigint, 8) = s.cmodint 
+left outer join mod_8_mod_4 s2
+  on s2.cmodtinyint = s.cmodtinyint
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mod_8_mod_4
+PREHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.*, sm.*, s2.* 
+from mod_8_mod_4 s
+left outer join small_table2 sm
+  on pmod(sm.cbigint, 8) = s.cmodint 
+left outer join mod_8_mod_4 s2
+  on s2.cmodtinyint = s.cmodtinyint
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mod_8_mod_4
+POSTHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+6524438

[6/7] hive git commit: HIVE-10565: Native Vector Map Join doesn't handle filtering and matching on LEFT OUTER JOIN repeated key correctly (Matt McCline via Gunther Hagleitner)

Posted by gu...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java
index 37ccf22..f971727 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java
@@ -24,7 +24,9 @@ import java.util.Arrays;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.ql.exec.JoinUtil;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -123,13 +125,6 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
 
       batchCounter++;
 
-      // Do the per-batch setup for an outer join.
-
-      outerPerBatchSetup(batch);
-
-      // For outer join, DO NOT apply filters yet.  It is incorrect for outer join to
-      // apply the filter before hash table matching.
-
       final int inputLogicalSize = batch.size;
 
       if (inputLogicalSize == 0) {
@@ -139,6 +134,44 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
         return;
       }
 
+      // Do the per-batch setup for an outer join.
+
+      outerPerBatchSetup(batch);
+
+      // For outer join, remember our input rows before ON expression filtering or before
+      // hash table matching so we can generate results for all rows (matching and non matching)
+      // later.
+      boolean inputSelectedInUse = batch.selectedInUse;
+      if (inputSelectedInUse) {
+        // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) {
+        //   throw new HiveException("batch.selected is not in sort order and unique");
+        // }
+        System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize);
+      }
+
+      // Filtering for outer join just removes rows available for hash table matching.
+      boolean someRowsFilteredOut =  false;
+      if (bigTableFilterExpressions.length > 0) {
+        // Since the input
+        for (VectorExpression ve : bigTableFilterExpressions) {
+          ve.evaluate(batch);
+        }
+        someRowsFilteredOut = (batch.size != inputLogicalSize);
+        if (LOG.isDebugEnabled()) {
+          if (batch.selectedInUse) {
+            if (inputSelectedInUse) {
+              LOG.debug(CLASS_NAME +
+                  " inputSelected " + intArrayToRangesString(inputSelected, inputLogicalSize) +
+                  " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
+            } else {
+              LOG.debug(CLASS_NAME +
+                " inputLogicalSize " + inputLogicalSize +
+                " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
+            }
+          }
+        }
+      }
+
       // Perform any key expressions.  Results will go into scratch columns.
       if (bigTableKeyExpressions != null) {
         for (VectorExpression ve : bigTableKeyExpressions) {
@@ -146,9 +179,6 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
         }
       }
 
-      // We rebuild in-place the selected array with rows destine to be forwarded.
-      int numSel = 0;
-
       /*
        * Single-Column Long specific declarations.
        */
@@ -178,12 +208,16 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
          */
 
         JoinUtil.JoinResult joinResult;
-        if (!joinColVector.noNulls && joinColVector.isNull[0]) {
-          // Null key is no match for whole batch.
+        if (batch.size == 0) {
+          // Whole repeated key batch was filtered out.
+          joinResult = JoinUtil.JoinResult.NOMATCH;
+        } else if (!joinColVector.noNulls && joinColVector.isNull[0]) {
+          // Any (repeated) null key column is no match for whole batch.
           joinResult = JoinUtil.JoinResult.NOMATCH;
         } else {
           // Handle *repeated* join key, if found.
           long key = vector[0];
+          // LOG.debug(CLASS_NAME + " repeated key " + key);
           if (useMinMax && (key < min || key > max)) {
             // Out of range for whole batch.
             joinResult = JoinUtil.JoinResult.NOMATCH;
@@ -199,7 +233,8 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
         if (LOG.isDebugEnabled()) {
           LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
         }
-        numSel = finishOuterRepeated(batch, joinResult, hashMapResults[0], scratch1);
+        finishOuterRepeated(batch, joinResult, hashMapResults[0], someRowsFilteredOut,
+            inputSelectedInUse, inputLogicalSize);
       } else {
 
         /*
@@ -213,14 +248,13 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
         int selected[] = batch.selected;
         boolean selectedInUse = batch.selectedInUse;
 
-        // For outer join we must apply the filter after match and cause some matches to become
-        // non-matches, we do not track non-matches here.  Instead we remember all non spilled rows
-        // and compute non matches later in finishOuter.
         int hashMapResultCount = 0;
-        int matchCount = 0;
-        int nonSpillCount = 0;
+        int allMatchCount = 0;
+        int equalKeySeriesCount = 0;
         int spillCount = 0;
 
+        boolean atLeastOneNonMatch = someRowsFilteredOut;
+
         /*
          * Single-Column Long specific variables.
          */
@@ -232,9 +266,11 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
         JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
 
         // Logical loop over the rows in the batch since the batch may have selected in use.
-        for (int logical = 0; logical < inputLogicalSize; logical++) {
+        for (int logical = 0; logical < batch.size; logical++) {
           int batchIndex = (selectedInUse ? selected[logical] : logical);
 
+          // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch");
+
           /*
            * Single-Column Long outer null detection.
            */
@@ -250,8 +286,8 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
             //    Let a current SPILL equal key series keep going, or
             //    Let a current NOMATCH keep not matching.
 
-            // Remember non-matches for Outer Join.
-            nonSpills[nonSpillCount++] = batchIndex;
+            atLeastOneNonMatch = true;
+
             // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL");
           } else {
 
@@ -269,9 +305,12 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
               // New key.
 
               if (haveSaveKey) {
-                // Move on with our count(s).
+                // Move on with our counts.
                 switch (saveJoinResult) {
                 case MATCH:
+                  hashMapResultCount++;
+                  equalKeySeriesCount++;
+                  break;
                 case SPILL:
                   hashMapResultCount++;
                   break;
@@ -300,41 +339,70 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
               } else {
                 saveJoinResult = hashMap.lookup(currentKey, hashMapResults[hashMapResultCount]);
               }
-              // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " New Key " + saveJoinResult.name());
-            } else {
-              // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveJoinResult.name());
-            }
 
-            /*
-             * Common outer join result processing.
-             */
+              // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " New Key " + currentKey + " " + saveJoinResult.name());
 
-            switch (saveJoinResult) {
-            case MATCH:
-              matchs[matchCount] = batchIndex;
-              matchHashMapResultIndices[matchCount] = hashMapResultCount;
-              matchCount++;
-              nonSpills[nonSpillCount++] = batchIndex;
-              break;
-
-            case SPILL:
-              spills[spillCount] = batchIndex;
-              spillHashMapResultIndices[spillCount] = hashMapResultCount;
-              spillCount++;
-              break;
-
-            case NOMATCH:
-              nonSpills[nonSpillCount++] = batchIndex;
-              // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
-              break;
+              /*
+               * Common outer join result processing.
+               */
+
+              switch (saveJoinResult) {
+              case MATCH:
+                equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount;
+                equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount;
+                equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow();
+                equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1;
+                allMatchs[allMatchCount++] = batchIndex;
+                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
+                break;
+
+              case SPILL:
+                spills[spillCount] = batchIndex;
+                spillHashMapResultIndices[spillCount] = hashMapResultCount;
+                spillCount++;
+                break;
+
+              case NOMATCH:
+                atLeastOneNonMatch = true;
+                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
+                break;
+              }
+            } else {
+              // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveKey + " " + saveJoinResult.name());
+
+              // Series of equal keys.
+
+              switch (saveJoinResult) {
+              case MATCH:
+                equalKeySeriesDuplicateCounts[equalKeySeriesCount]++;
+                allMatchs[allMatchCount++] = batchIndex;
+                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
+                break;
+
+              case SPILL:
+                spills[spillCount] = batchIndex;
+                spillHashMapResultIndices[spillCount] = hashMapResultCount;
+                spillCount++;
+                break;
+
+              case NOMATCH:
+                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
+                break;
+              }
             }
+            // if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) {
+            //   throw new HiveException("allMatchs is not in sort order and unique");
+            // }
           }
         }
 
         if (haveSaveKey) {
-          // Account for last equal key sequence.
+          // Update our counts for the last key.
           switch (saveJoinResult) {
           case MATCH:
+            hashMapResultCount++;
+            equalKeySeriesCount++;
+            break;
           case SPILL:
             hashMapResultCount++;
             break;
@@ -345,27 +413,26 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
 
         if (LOG.isDebugEnabled()) {
           LOG.debug(CLASS_NAME + " batch #" + batchCounter +
-              " matchs " + intArrayToRangesString(matchs, matchCount) +
-              " matchHashMapResultIndices " + intArrayToRangesString(matchHashMapResultIndices, matchCount) +
-              " nonSpills " + intArrayToRangesString(nonSpills, nonSpillCount) +
+              " allMatchs " + intArrayToRangesString(allMatchs,allMatchCount) +
+              " equalKeySeriesHashMapResultIndices " + intArrayToRangesString(equalKeySeriesHashMapResultIndices, equalKeySeriesCount) +
+              " equalKeySeriesAllMatchIndices " + intArrayToRangesString(equalKeySeriesAllMatchIndices, equalKeySeriesCount) +
+              " equalKeySeriesIsSingleValue " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesIsSingleValue, 0, equalKeySeriesCount)) +
+              " equalKeySeriesDuplicateCounts " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesDuplicateCounts, 0, equalKeySeriesCount)) +
+              " atLeastOneNonMatch " + atLeastOneNonMatch +
+              " inputSelectedInUse " + inputSelectedInUse +
+              " inputLogicalSize " + inputLogicalSize +
               " spills " + intArrayToRangesString(spills, spillCount) +
               " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) +
               " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount)));
         }
 
         // We will generate results for all matching and non-matching rows.
-        // Note that scratch1 is undefined at this point -- it's preallocated storage.
-        numSel = finishOuter(batch,
-                    matchs, matchHashMapResultIndices, matchCount,
-                    nonSpills, nonSpillCount,
-                    spills, spillHashMapResultIndices, spillCount,
-                    hashMapResults, hashMapResultCount,
-                    scratch1);
+        finishOuter(batch,
+            allMatchCount, equalKeySeriesCount, atLeastOneNonMatch,
+            inputSelectedInUse, inputLogicalSize,
+            spillCount, hashMapResultCount);
       }
 
-      batch.selectedInUse = true;
-      batch.size =  numSel;
-
       if (batch.size > 0) {
         // Forward any remaining selected rows.
         forwardBigTableBatch(batch);

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java
index 23a29f7..bea032a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java
@@ -128,13 +128,6 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
 
       batchCounter++;
 
-      // Do the per-batch setup for an outer join.
-
-      outerPerBatchSetup(batch);
-
-      // For outer join, DO NOT apply filters yet.  It is incorrect for outer join to
-      // apply the filter before hash table matching.
-
       final int inputLogicalSize = batch.size;
 
       if (inputLogicalSize == 0) {
@@ -144,6 +137,44 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
         return;
       }
 
+      // Do the per-batch setup for an outer join.
+
+      outerPerBatchSetup(batch);
+
+      // For outer join, remember our input rows before ON expression filtering or before
+      // hash table matching so we can generate results for all rows (matching and non matching)
+      // later.
+      boolean inputSelectedInUse = batch.selectedInUse;
+      if (inputSelectedInUse) {
+        // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) {
+        //   throw new HiveException("batch.selected is not in sort order and unique");
+        // }
+        System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize);
+      }
+
+      // Filtering for outer join just removes rows available for hash table matching.
+      boolean someRowsFilteredOut =  false;
+      if (bigTableFilterExpressions.length > 0) {
+        // Since the input
+        for (VectorExpression ve : bigTableFilterExpressions) {
+          ve.evaluate(batch);
+        }
+        someRowsFilteredOut = (batch.size != inputLogicalSize);
+        if (LOG.isDebugEnabled()) {
+          if (batch.selectedInUse) {
+            if (inputSelectedInUse) {
+              LOG.debug(CLASS_NAME +
+                  " inputSelected " + intArrayToRangesString(inputSelected, inputLogicalSize) +
+                  " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
+            } else {
+              LOG.debug(CLASS_NAME +
+                " inputLogicalSize " + inputLogicalSize +
+                " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
+            }
+          }
+        }
+      }
+
       // Perform any key expressions.  Results will go into scratch columns.
       if (bigTableKeyExpressions != null) {
         for (VectorExpression ve : bigTableKeyExpressions) {
@@ -151,9 +182,6 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
         }
       }
 
-      // We rebuild in-place the selected array with rows destine to be forwarded.
-      int numSel = 0;
-
       /*
        * Multi-Key specific declarations.
        */
@@ -199,8 +227,11 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
          */
 
         JoinUtil.JoinResult joinResult;
-        if (someKeyInputColumnIsNull) {
-          // Any null key column is no match for whole batch.
+        if (batch.size == 0) {
+          // Whole repeated key batch was filtered out.
+          joinResult = JoinUtil.JoinResult.NOMATCH;
+        } else if (someKeyInputColumnIsNull) {
+          // Any (repeated) null key column is no match for whole batch.
           joinResult = JoinUtil.JoinResult.NOMATCH;
         } else {
 
@@ -219,7 +250,8 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
         if (LOG.isDebugEnabled()) {
           LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
         }
-        numSel = finishOuterRepeated(batch, joinResult, hashMapResults[0], scratch1);
+        finishOuterRepeated(batch, joinResult, hashMapResults[0], someRowsFilteredOut,
+            inputSelectedInUse, inputLogicalSize);
       } else {
 
         /*
@@ -233,14 +265,13 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
         int selected[] = batch.selected;
         boolean selectedInUse = batch.selectedInUse;
 
-        // For outer join we must apply the filter after match and cause some matches to become
-        // non-matches, we do not track non-matches here.  Instead we remember all non spilled rows
-        // and compute non matches later in finishOuter.
         int hashMapResultCount = 0;
-        int matchCount = 0;
-        int nonSpillCount = 0;
+        int allMatchCount = 0;
+        int equalKeySeriesCount = 0;
         int spillCount = 0;
 
+        boolean atLeastOneNonMatch = someRowsFilteredOut;
+
         /*
          * Multi-Key specific variables.
          */
@@ -252,9 +283,11 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
         JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
 
         // Logical loop over the rows in the batch since the batch may have selected in use.
-        for (int logical = 0; logical < inputLogicalSize; logical++) {
+        for (int logical = 0; logical < batch.size; logical++) {
           int batchIndex = (selectedInUse ? selected[logical] : logical);
 
+          // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch");
+
           /*
            * Multi-Key outer null detection.
            */
@@ -272,8 +305,8 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
             //    Let a current SPILL equal key series keep going, or
             //    Let a current NOMATCH keep not matching.
 
-            // Remember non-matches for Outer Join.
-            nonSpills[nonSpillCount++] = batchIndex;
+            atLeastOneNonMatch = true;
+
             // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL");
           } else {
 
@@ -292,9 +325,12 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
               // New key.
 
               if (haveSaveKey) {
-                // Move on with our count(s).
+                // Move on with our counts.
                 switch (saveJoinResult) {
                 case MATCH:
+                  hashMapResultCount++;
+                  equalKeySeriesCount++;
+                  break;
                 case SPILL:
                   hashMapResultCount++;
                   break;
@@ -322,41 +358,68 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
               byte[] keyBytes = saveKeyOutput.getData();
               int keyLength = saveKeyOutput.getLength();
               saveJoinResult = hashMap.lookup(keyBytes, 0, keyLength, hashMapResults[hashMapResultCount]);
-              // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " New Key " + saveJoinResult.name());
-            } else {
-              // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveJoinResult.name());
-            }
 
-            /*
-             * Common outer join result processing.
-             */
+              /*
+               * Common outer join result processing.
+               */
 
-            switch (saveJoinResult) {
-            case MATCH:
-              matchs[matchCount] = batchIndex;
-              matchHashMapResultIndices[matchCount] = hashMapResultCount;
-              matchCount++;
-              nonSpills[nonSpillCount++] = batchIndex;
-              break;
-
-            case SPILL:
-              spills[spillCount] = batchIndex;
-              spillHashMapResultIndices[spillCount] = hashMapResultCount;
-              spillCount++;
-              break;
-
-            case NOMATCH:
-              nonSpills[nonSpillCount++] = batchIndex;
-              // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
-              break;
+              switch (saveJoinResult) {
+              case MATCH:
+                equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount;
+                equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount;
+                equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow();
+                equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1;
+                allMatchs[allMatchCount++] = batchIndex;
+                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
+                break;
+
+              case SPILL:
+                spills[spillCount] = batchIndex;
+                spillHashMapResultIndices[spillCount] = hashMapResultCount;
+                spillCount++;
+                break;
+
+              case NOMATCH:
+                atLeastOneNonMatch = true;
+                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
+                break;
+              }
+            } else {
+              // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveKey + " " + saveJoinResult.name());
+
+              // Series of equal keys.
+
+              switch (saveJoinResult) {
+              case MATCH:
+                equalKeySeriesDuplicateCounts[equalKeySeriesCount]++;
+                allMatchs[allMatchCount++] = batchIndex;
+                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
+                break;
+
+              case SPILL:
+                spills[spillCount] = batchIndex;
+                spillHashMapResultIndices[spillCount] = hashMapResultCount;
+                spillCount++;
+                break;
+
+              case NOMATCH:
+                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
+                break;
+              }
             }
+            // if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) {
+            //   throw new HiveException("allMatchs is not in sort order and unique");
+            // }
           }
         }
 
         if (haveSaveKey) {
-          // Account for last equal key sequence.
+          // Update our counts for the last key.
           switch (saveJoinResult) {
           case MATCH:
+            hashMapResultCount++;
+            equalKeySeriesCount++;
+            break;
           case SPILL:
             hashMapResultCount++;
             break;
@@ -367,27 +430,26 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
 
         if (LOG.isDebugEnabled()) {
           LOG.debug(CLASS_NAME + " batch #" + batchCounter +
-              " matchs " + intArrayToRangesString(matchs, matchCount) +
-              " matchHashMapResultIndices " + intArrayToRangesString(matchHashMapResultIndices, matchCount) +
-              " nonSpills " + intArrayToRangesString(nonSpills, nonSpillCount) +
+              " allMatchs " + intArrayToRangesString(allMatchs,allMatchCount) +
+              " equalKeySeriesHashMapResultIndices " + intArrayToRangesString(equalKeySeriesHashMapResultIndices, equalKeySeriesCount) +
+              " equalKeySeriesAllMatchIndices " + intArrayToRangesString(equalKeySeriesAllMatchIndices, equalKeySeriesCount) +
+              " equalKeySeriesIsSingleValue " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesIsSingleValue, 0, equalKeySeriesCount)) +
+              " equalKeySeriesDuplicateCounts " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesDuplicateCounts, 0, equalKeySeriesCount)) +
+              " atLeastOneNonMatch " + atLeastOneNonMatch +
+              " inputSelectedInUse " + inputSelectedInUse +
+              " inputLogicalSize " + inputLogicalSize +
               " spills " + intArrayToRangesString(spills, spillCount) +
               " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) +
               " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount)));
         }
 
         // We will generate results for all matching and non-matching rows.
-        // Note that scratch1 is undefined at this point -- it's preallocated storage.
-        numSel = finishOuter(batch,
-                    matchs, matchHashMapResultIndices, matchCount,
-                    nonSpills, nonSpillCount,
-                    spills, spillHashMapResultIndices, spillCount,
-                    hashMapResults, hashMapResultCount,
-                    scratch1);
+        finishOuter(batch,
+            allMatchCount, equalKeySeriesCount, atLeastOneNonMatch,
+            inputSelectedInUse, inputLogicalSize,
+            spillCount, hashMapResultCount);
       }
 
-      batch.selectedInUse = true;
-      batch.size =  numSel;
-
       if (batch.size > 0) {
         // Forward any remaining selected rows.
         forwardBigTableBatch(batch);

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
index f0af3f6..49efe1a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
@@ -115,13 +115,6 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
 
       batchCounter++;
 
-      // Do the per-batch setup for an outer join.
-
-      outerPerBatchSetup(batch);
-
-      // For outer join, DO NOT apply filters yet.  It is incorrect for outer join to
-      // apply the filter before hash table matching.
-
       final int inputLogicalSize = batch.size;
 
       if (inputLogicalSize == 0) {
@@ -131,6 +124,44 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
         return;
       }
 
+      // Do the per-batch setup for an outer join.
+
+      outerPerBatchSetup(batch);
+
+      // For outer join, remember our input rows before ON expression filtering or before
+      // hash table matching so we can generate results for all rows (matching and non matching)
+      // later.
+      boolean inputSelectedInUse = batch.selectedInUse;
+      if (inputSelectedInUse) {
+        // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) {
+        //   throw new HiveException("batch.selected is not in sort order and unique");
+        // }
+        System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize);
+      }
+
+      // Filtering for outer join just removes rows available for hash table matching.
+      boolean someRowsFilteredOut =  false;
+      if (bigTableFilterExpressions.length > 0) {
+        // Since the input
+        for (VectorExpression ve : bigTableFilterExpressions) {
+          ve.evaluate(batch);
+        }
+        someRowsFilteredOut = (batch.size != inputLogicalSize);
+        if (LOG.isDebugEnabled()) {
+          if (batch.selectedInUse) {
+            if (inputSelectedInUse) {
+              LOG.debug(CLASS_NAME +
+                  " inputSelected " + intArrayToRangesString(inputSelected, inputLogicalSize) +
+                  " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
+            } else {
+              LOG.debug(CLASS_NAME +
+                " inputLogicalSize " + inputLogicalSize +
+                " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
+            }
+          }
+        }
+      }
+
       // Perform any key expressions.  Results will go into scratch columns.
       if (bigTableKeyExpressions != null) {
         for (VectorExpression ve : bigTableKeyExpressions) {
@@ -138,9 +169,6 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
         }
       }
 
-      // We rebuild in-place the selected array with rows destine to be forwarded.
-      int numSel = 0;
-
       /*
        * Single-Column String specific declarations.
        */
@@ -172,8 +200,11 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
          */
 
         JoinUtil.JoinResult joinResult;
-        if (!joinColVector.noNulls && joinColVector.isNull[0]) {
-          // Null key is no match for whole batch.
+        if (batch.size == 0) {
+          // Whole repeated key batch was filtered out.
+          joinResult = JoinUtil.JoinResult.NOMATCH;
+        } else if (!joinColVector.noNulls && joinColVector.isNull[0]) {
+          // Any (repeated) null key column is no match for whole batch.
           joinResult = JoinUtil.JoinResult.NOMATCH;
         } else {
           // Handle *repeated* join key, if found.
@@ -190,7 +221,8 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
         if (LOG.isDebugEnabled()) {
           LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
         }
-        numSel = finishOuterRepeated(batch, joinResult, hashMapResults[0], scratch1);
+        finishOuterRepeated(batch, joinResult, hashMapResults[0], someRowsFilteredOut,
+            inputSelectedInUse, inputLogicalSize);
       } else {
 
         /*
@@ -204,14 +236,13 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
         int selected[] = batch.selected;
         boolean selectedInUse = batch.selectedInUse;
 
-        // For outer join we must apply the filter after match and cause some matches to become
-        // non-matches, we do not track non-matches here.  Instead we remember all non spilled rows
-        // and compute non matches later in finishOuter.
         int hashMapResultCount = 0;
-        int matchCount = 0;
-        int nonSpillCount = 0;
+        int allMatchCount = 0;
+        int equalKeySeriesCount = 0;
         int spillCount = 0;
 
+        boolean atLeastOneNonMatch = someRowsFilteredOut;
+
         /*
          * Single-Column String specific variables.
          */
@@ -223,9 +254,11 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
         JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
 
         // Logical loop over the rows in the batch since the batch may have selected in use.
-        for (int logical = 0; logical < inputLogicalSize; logical++) {
+        for (int logical = 0; logical < batch.size; logical++) {
           int batchIndex = (selectedInUse ? selected[logical] : logical);
 
+          // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch");
+
           /*
            * Single-Column String outer null detection.
            */
@@ -241,8 +274,8 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
             //    Let a current SPILL equal key series keep going, or
             //    Let a current NOMATCH keep not matching.
 
-            // Remember non-matches for Outer Join.
-            nonSpills[nonSpillCount++] = batchIndex;
+            atLeastOneNonMatch = true;
+
             // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL");
           } else {
 
@@ -262,9 +295,12 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
               // New key.
 
               if (haveSaveKey) {
-                // Move on with our count(s).
+                // Move on with our counts.
                 switch (saveJoinResult) {
                 case MATCH:
+                  hashMapResultCount++;
+                  equalKeySeriesCount++;
+                  break;
                 case SPILL:
                   hashMapResultCount++;
                   break;
@@ -290,43 +326,69 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
               byte[] keyBytes = vector[batchIndex];
               int keyStart = start[batchIndex];
               int keyLength = length[batchIndex];
-
               saveJoinResult = hashMap.lookup(keyBytes, keyStart, keyLength, hashMapResults[hashMapResultCount]);
-              // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " New Key " + saveJoinResult.name());
-            } else {
-              // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveJoinResult.name());
-            }
 
-            /*
-             * Common outer join result processing.
-             */
+              /*
+               * Common outer join result processing.
+               */
 
-            switch (saveJoinResult) {
-            case MATCH:
-              matchs[matchCount] = batchIndex;
-              matchHashMapResultIndices[matchCount] = hashMapResultCount;
-              matchCount++;
-              nonSpills[nonSpillCount++] = batchIndex;
-              break;
-
-            case SPILL:
-              spills[spillCount] = batchIndex;
-              spillHashMapResultIndices[spillCount] = hashMapResultCount;
-              spillCount++;
-              break;
-
-            case NOMATCH:
-              nonSpills[nonSpillCount++] = batchIndex;
-              // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
-              break;
+              switch (saveJoinResult) {
+              case MATCH:
+                equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount;
+                equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount;
+                equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow();
+                equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1;
+                allMatchs[allMatchCount++] = batchIndex;
+                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
+                break;
+
+              case SPILL:
+                spills[spillCount] = batchIndex;
+                spillHashMapResultIndices[spillCount] = hashMapResultCount;
+                spillCount++;
+                break;
+
+              case NOMATCH:
+                atLeastOneNonMatch = true;
+                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
+                break;
+              }
+            } else {
+              // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveKey + " " + saveJoinResult.name());
+
+              // Series of equal keys.
+
+              switch (saveJoinResult) {
+              case MATCH:
+                equalKeySeriesDuplicateCounts[equalKeySeriesCount]++;
+                allMatchs[allMatchCount++] = batchIndex;
+                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
+                break;
+
+              case SPILL:
+                spills[spillCount] = batchIndex;
+                spillHashMapResultIndices[spillCount] = hashMapResultCount;
+                spillCount++;
+                break;
+
+              case NOMATCH:
+                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
+                break;
+              }
             }
+            // if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) {
+            //   throw new HiveException("allMatchs is not in sort order and unique");
+            // }
           }
         }
 
         if (haveSaveKey) {
-          // Account for last equal key sequence.
+          // Update our counts for the last key.
           switch (saveJoinResult) {
           case MATCH:
+            hashMapResultCount++;
+            equalKeySeriesCount++;
+            break;
           case SPILL:
             hashMapResultCount++;
             break;
@@ -337,27 +399,26 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
 
         if (LOG.isDebugEnabled()) {
           LOG.debug(CLASS_NAME + " batch #" + batchCounter +
-              " matchs " + intArrayToRangesString(matchs, matchCount) +
-              " matchHashMapResultIndices " + intArrayToRangesString(matchHashMapResultIndices, matchCount) +
-              " nonSpills " + intArrayToRangesString(nonSpills, nonSpillCount) +
+              " allMatchs " + intArrayToRangesString(allMatchs,allMatchCount) +
+              " equalKeySeriesHashMapResultIndices " + intArrayToRangesString(equalKeySeriesHashMapResultIndices, equalKeySeriesCount) +
+              " equalKeySeriesAllMatchIndices " + intArrayToRangesString(equalKeySeriesAllMatchIndices, equalKeySeriesCount) +
+              " equalKeySeriesIsSingleValue " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesIsSingleValue, 0, equalKeySeriesCount)) +
+              " equalKeySeriesDuplicateCounts " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesDuplicateCounts, 0, equalKeySeriesCount)) +
+              " atLeastOneNonMatch " + atLeastOneNonMatch +
+              " inputSelectedInUse " + inputSelectedInUse +
+              " inputLogicalSize " + inputLogicalSize +
               " spills " + intArrayToRangesString(spills, spillCount) +
               " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) +
               " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount)));
         }
 
         // We will generate results for all matching and non-matching rows.
-        // Note that scratch1 is undefined at this point -- it's preallocated storage.
-        numSel = finishOuter(batch,
-                    matchs, matchHashMapResultIndices, matchCount,
-                    nonSpills, nonSpillCount,
-                    spills, spillHashMapResultIndices, spillCount,
-                    hashMapResults, hashMapResultCount,
-                    scratch1);
+        finishOuter(batch,
+            allMatchCount, equalKeySeriesCount, atLeastOneNonMatch,
+            inputSelectedInUse, inputLogicalSize,
+            spillCount, hashMapResultCount);
       }
 
-      batch.selectedInUse = true;
-      batch.size =  numSel;
-
       if (batch.size > 0) {
         // Forward any remaining selected rows.
         forwardBigTableBatch(batch);

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java
index 1c91be6..32b60d0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java
@@ -91,7 +91,7 @@ public class VectorMapJoinRowBytesContainer {
     }
 
     tmpFile = File.createTempFile("BytesContainer", ".tmp", parentFile);
-    LOG.info("BytesContainer created temp file " + tmpFile.getAbsolutePath());
+    LOG.debug("BytesContainer created temp file " + tmpFile.getAbsolutePath());
     tmpFile.deleteOnExit();
 
     fileOutputStream = new FileOutputStream(tmpFile);

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
index f9550c9..6afaec3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
@@ -54,13 +54,13 @@ public abstract class VectorMapJoinFastBytesHashMap
       slotTriples[tripleIndex] = keyStore.add(keyBytes, keyStart, keyLength);
       slotTriples[tripleIndex + 1] = hashCode;
       slotTriples[tripleIndex + 2] = valueStore.addFirst(valueBytes, 0, valueLength);
-      // LOG.info("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
+      // LOG.debug("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
       keysAssigned++;
     } else {
       // Add another value.
-      // LOG.info("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
+      // LOG.debug("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
       slotTriples[tripleIndex + 2] = valueStore.addMore(slotTriples[tripleIndex + 2], valueBytes, 0, valueLength);
-      // LOG.info("VectorMapJoinFastBytesHashMap add more new valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
+      // LOG.debug("VectorMapJoinFastBytesHashMap add more new valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
     }
   }
 
@@ -77,7 +77,7 @@ public abstract class VectorMapJoinFastBytesHashMap
     if (valueRefWord == -1) {
       joinResult = JoinUtil.JoinResult.NOMATCH;
     } else {
-      // LOG.info("VectorMapJoinFastBytesHashMap lookup hashCode " + Long.toHexString(hashCode) + " valueRefWord " + Long.toHexString(valueRefWord) + " (valueStore != null) " + (valueStore != null));
+      // LOG.debug("VectorMapJoinFastBytesHashMap lookup hashCode " + Long.toHexString(hashCode) + " valueRefWord " + Long.toHexString(valueRefWord) + " (valueStore != null) " + (valueStore != null));
 
       optimizedHashMapResult.set(valueStore, valueRefWord);
 

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
index 9dcaf8f..dceb99c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
@@ -49,11 +49,11 @@ public abstract class VectorMapJoinFastBytesHashMultiSet
       slotTriples[tripleIndex] = keyStore.add(keyBytes, keyStart, keyLength);
       slotTriples[tripleIndex + 1] = hashCode;
       slotTriples[tripleIndex + 2] = 1;    // Count.
-      // LOG.info("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
+      // LOG.debug("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
       keysAssigned++;
     } else {
       // Add another value.
-      // LOG.info("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
+      // LOG.debug("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
       slotTriples[tripleIndex + 2]++;
     }
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
index b6e6321..91d7fd6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
@@ -79,13 +79,13 @@ public abstract class VectorMapJoinFastBytesHashTable
     while (true) {
       int tripleIndex = 3 * slot;
       if (slotTriples[tripleIndex] == 0) {
-        // LOG.info("VectorMapJoinFastBytesHashMap findWriteSlot slot " + slot + " tripleIndex " + tripleIndex + " empty");
+        // LOG.debug("VectorMapJoinFastBytesHashMap findWriteSlot slot " + slot + " tripleIndex " + tripleIndex + " empty");
         isNewKey = true;;
         break;
       }
       if (hashCode == slotTriples[tripleIndex + 1] &&
           keyStore.equalKey(slotTriples[tripleIndex], keyBytes, keyStart, keyLength)) {
-        // LOG.info("VectorMapJoinFastBytesHashMap findWriteSlot slot " + slot + " tripleIndex " + tripleIndex + " existing");
+        // LOG.debug("VectorMapJoinFastBytesHashMap findWriteSlot slot " + slot + " tripleIndex " + tripleIndex + " existing");
         isNewKey = false;
         break;
       }
@@ -155,7 +155,7 @@ public abstract class VectorMapJoinFastBytesHashTable
         }
 
         // Use old value reference word.
-        // LOG.info("VectorMapJoinFastLongHashTable expandAndRehash key " + tableKey + " slot " + newSlot + " newPairIndex " + newPairIndex + " empty slot (i = " + i + ")");
+        // LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash key " + tableKey + " slot " + newSlot + " newPairIndex " + newPairIndex + " empty slot (i = " + i + ")");
 
         newSlotTriples[newTripleIndex] = keyRef;
         newSlotTriples[newTripleIndex + 1] = hashCode;
@@ -170,7 +170,7 @@ public abstract class VectorMapJoinFastBytesHashTable
     largestNumberOfSteps = newLargestNumberOfSteps;
     resizeThreshold = (int)(logicalHashBucketCount * loadFactor);
     metricExpands++;
-    // LOG.info("VectorMapJoinFastLongHashTable expandAndRehash new logicalHashBucketCount " + logicalHashBucketCount + " resizeThreshold " + resizeThreshold + " metricExpands " + metricExpands);
+    // LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash new logicalHashBucketCount " + logicalHashBucketCount + " resizeThreshold " + resizeThreshold + " metricExpands " + metricExpands);
   }
 
   protected long findReadSlot(byte[] keyBytes, int keyStart, int keyLength, long hashCode) {
@@ -181,7 +181,7 @@ public abstract class VectorMapJoinFastBytesHashTable
     int i = 0;
     while (true) {
       int tripleIndex = slot * 3;
-      // LOG.info("VectorMapJoinFastBytesHashMap findReadSlot slot keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(hashCode) + " entry hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
+      // LOG.debug("VectorMapJoinFastBytesHashMap findReadSlot slot keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(hashCode) + " entry hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
       if (slotTriples[tripleIndex] != 0 && hashCode == slotTriples[tripleIndex + 1]) {
         // Finally, verify the key bytes match.
 

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java
index f2f42ee..9d95d05 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java
@@ -112,7 +112,7 @@ public class VectorMapJoinFastKeyStore {
     }
     keyRefWord |= absoluteKeyOffset;
 
-    // LOG.info("VectorMapJoinFastKeyStore add keyLength " + keyLength + " absoluteKeyOffset " + absoluteKeyOffset + " keyRefWord " + Long.toHexString(keyRefWord));
+    // LOG.debug("VectorMapJoinFastKeyStore add keyLength " + keyLength + " absoluteKeyOffset " + absoluteKeyOffset + " keyRefWord " + Long.toHexString(keyRefWord));
     return keyRefWord;
   }
 
@@ -122,7 +122,7 @@ public class VectorMapJoinFastKeyStore {
         (int) ((keyRefWord & SmallKeyLength.bitMask) >> SmallKeyLength.bitShift);
     boolean isKeyLengthSmall = (storedKeyLengthLength != SmallKeyLength.allBitsOn);
 
-    // LOG.info("VectorMapJoinFastKeyStore equalKey keyLength " + keyLength + " isKeyLengthSmall " + isKeyLengthSmall + " storedKeyLengthLength " + storedKeyLengthLength + " keyRefWord " + Long.toHexString(keyRefWord));
+    // LOG.debug("VectorMapJoinFastKeyStore equalKey keyLength " + keyLength + " isKeyLengthSmall " + isKeyLengthSmall + " storedKeyLengthLength " + storedKeyLengthLength + " keyRefWord " + Long.toHexString(keyRefWord));
 
     if (isKeyLengthSmall && storedKeyLengthLength != keyLength) {
       return false;
@@ -135,7 +135,7 @@ public class VectorMapJoinFastKeyStore {
       // Read big value length we wrote with the value.
       storedKeyLengthLength = writeBuffers.readVInt(readPos);
       if (storedKeyLengthLength != keyLength) {
-        // LOG.info("VectorMapJoinFastKeyStore equalKey no match big length");
+        // LOG.debug("VectorMapJoinFastKeyStore equalKey no match big length");
         return false;
       }
     }
@@ -148,11 +148,11 @@ public class VectorMapJoinFastKeyStore {
 
     for (int i = 0; i < keyLength; i++) {
       if (currentBytes[currentStart + i] != keyBytes[keyStart + i]) {
-        // LOG.info("VectorMapJoinFastKeyStore equalKey no match on bytes");
+        // LOG.debug("VectorMapJoinFastKeyStore equalKey no match on bytes");
         return false;
       }
     }
-    // LOG.info("VectorMapJoinFastKeyStore equalKey match on bytes");
+    // LOG.debug("VectorMapJoinFastKeyStore equalKey match on bytes");
     return true;
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
index d6ad028..4725f55 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
@@ -68,7 +68,7 @@ public class VectorMapJoinFastLongHashMap
     optimizedHashMapResult.forget();
 
     long hashCode = VectorMapJoinFastLongHashUtil.hashKey(key);
-    // LOG.info("VectorMapJoinFastLongHashMap lookup " + key + " hashCode " + hashCode);
+    // LOG.debug("VectorMapJoinFastLongHashMap lookup " + key + " hashCode " + hashCode);
     long valueRef = findReadSlot(key, hashCode);
     JoinUtil.JoinResult joinResult;
     if (valueRef == -1) {

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
index 2137fb7..17855eb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
@@ -121,13 +121,13 @@ public abstract class VectorMapJoinFastLongHashTable
       int pairIndex = 2 * slot;
       long valueRef = slotPairs[pairIndex];
       if (valueRef == 0) {
-        // LOG.info("VectorMapJoinFastLongHashTable add key " + key + " slot " + slot + " pairIndex " + pairIndex + " empty slot (i = " + i + ")");
+        // LOG.debug("VectorMapJoinFastLongHashTable add key " + key + " slot " + slot + " pairIndex " + pairIndex + " empty slot (i = " + i + ")");
         isNewKey = true;
         break;
       }
       long tableKey = slotPairs[pairIndex + 1];
       if (key == tableKey) {
-        // LOG.info("VectorMapJoinFastLongHashTable add key " + key + " slot " + slot + " pairIndex " + pairIndex + " found key (i = " + i + ")");
+        // LOG.debug("VectorMapJoinFastLongHashTable add key " + key + " slot " + slot + " pairIndex " + pairIndex + " found key (i = " + i + ")");
         isNewKey = false;
         break;
       }
@@ -145,7 +145,7 @@ public abstract class VectorMapJoinFastLongHashTable
       // debugDumpKeyProbe(keyOffset, keyLength, hashCode, slot);
     }
 
-    // LOG.info("VectorMapJoinFastLongHashTable add slot " + slot + " hashCode " + Long.toHexString(hashCode));
+    // LOG.debug("VectorMapJoinFastLongHashTable add slot " + slot + " hashCode " + Long.toHexString(hashCode));
 
     assignSlot(slot, key, isNewKey, currentValue);
 
@@ -206,7 +206,7 @@ public abstract class VectorMapJoinFastLongHashTable
         }
 
         // Use old value reference word.
-        // LOG.info("VectorMapJoinFastLongHashTable expandAndRehash key " + tableKey + " slot " + newSlot + " newPairIndex " + newPairIndex + " empty slot (i = " + i + ")");
+        // LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash key " + tableKey + " slot " + newSlot + " newPairIndex " + newPairIndex + " empty slot (i = " + i + ")");
 
         newSlotPairs[newPairIndex] = valueRef;
         newSlotPairs[newPairIndex + 1] = tableKey;
@@ -220,7 +220,7 @@ public abstract class VectorMapJoinFastLongHashTable
     largestNumberOfSteps = newLargestNumberOfSteps;
     resizeThreshold = (int)(logicalHashBucketCount * loadFactor);
     metricExpands++;
-    // LOG.info("VectorMapJoinFastLongHashTable expandAndRehash new logicalHashBucketCount " + logicalHashBucketCount + " resizeThreshold " + resizeThreshold + " metricExpands " + metricExpands);
+    // LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash new logicalHashBucketCount " + logicalHashBucketCount + " resizeThreshold " + resizeThreshold + " metricExpands " + metricExpands);
   }
 
   protected long findReadSlot(long key, long hashCode) {
@@ -235,20 +235,20 @@ public abstract class VectorMapJoinFastLongHashTable
       long valueRef = slotPairs[pairIndex];
       if (valueRef == 0) {
         // Given that we do not delete, an empty slot means no match.
-        // LOG.info("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " empty slot (i = " + i + ")");
+        // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " empty slot (i = " + i + ")");
         return -1;
       }
       long tableKey = slotPairs[pairIndex + 1];
       if (key == tableKey) {
-        // LOG.info("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " found key (i = " + i + ")");
+        // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " found key (i = " + i + ")");
         return slotPairs[pairIndex];
       }
       // Some other key (collision) - keep probing.
       probeSlot += (++i);
       if (i > largestNumberOfSteps) {
-        // LOG.info("VectorMapJoinFastLongHashTable findReadSlot returning not found");
+        // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot returning not found");
         // We know we never went that far when we were inserting.
-        // LOG.info("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " largestNumberOfSteps " + largestNumberOfSteps + " (i = " + i + ")");
+        // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " largestNumberOfSteps " + largestNumberOfSteps + " (i = " + i + ")");
         return -1;
       }
       slot = (int)(probeSlot & logicalHashBucketMask);

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java
index 373b5f4..4b1d6f6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java
@@ -81,7 +81,7 @@ public class VectorMapJoinFastTableContainer implements VectorMapJoinTableContai
     int newThreshold = HashMapWrapper.calculateTableSize(
         keyCountAdj, threshold, loadFactor, keyCount);
 
-    // LOG.info("VectorMapJoinFastTableContainer load newThreshold " + newThreshold);
+    // LOG.debug("VectorMapJoinFastTableContainer load newThreshold " + newThreshold);
 
     VectorMapJoinFastHashTable = createHashTable(newThreshold);
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
index caa705c..6491dc6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
@@ -142,7 +142,7 @@ public class VectorMapJoinFastValueStore {
     }
 
     public void set(VectorMapJoinFastValueStore valueStore, long valueRefWord) {
-      // LOG.info("VectorMapJoinFastValueStore set valueRefWord " + Long.toHexString(valueRefWord));
+      // LOG.debug("VectorMapJoinFastValueStore set valueRefWord " + Long.toHexString(valueRefWord));
 
       this.valueStore = valueStore;
       this.valueRefWord = valueRefWord;
@@ -473,7 +473,7 @@ public class VectorMapJoinFastValueStore {
       valueRefWord |= SmallValueLength.allBitsOnBitShifted;
     }
 
-    // LOG.info("VectorMapJoinFastValueStore addFirst valueLength " + valueLength + " newAbsoluteOffset " + newAbsoluteOffset + " valueRefWord " + Long.toHexString(valueRefWord));
+    // LOG.debug("VectorMapJoinFastValueStore addFirst valueLength " + valueLength + " newAbsoluteOffset " + newAbsoluteOffset + " valueRefWord " + Long.toHexString(valueRefWord));
 
     // The lower bits are the absolute value offset.
     valueRefWord |= newAbsoluteOffset;
@@ -499,7 +499,7 @@ public class VectorMapJoinFastValueStore {
     boolean isOldValueLast =
         ((oldValueRef & IsLastFlag.flagOnMask) != 0);
 
-    // LOG.info("VectorMapJoinFastValueStore addMore isOldValueLast " + isOldValueLast + " oldSmallValueLength " + oldSmallValueLength + " oldAbsoluteValueOffset " + oldAbsoluteValueOffset + " oldValueRef " + Long.toHexString(oldValueRef));
+    // LOG.debug("VectorMapJoinFastValueStore addMore isOldValueLast " + isOldValueLast + " oldSmallValueLength " + oldSmallValueLength + " oldAbsoluteValueOffset " + oldAbsoluteValueOffset + " oldValueRef " + Long.toHexString(oldValueRef));
 
     /*
      * Write information about the old value (which becomes our next) at the beginning
@@ -546,7 +546,7 @@ public class VectorMapJoinFastValueStore {
     // The lower bits are the absolute value offset.
     newValueRef |=  newAbsoluteOffset;
 
-    // LOG.info("VectorMapJoinFastValueStore addMore valueLength " + valueLength + " newAbsoluteOffset " + newAbsoluteOffset + " newValueRef " + Long.toHexString(newValueRef));
+    // LOG.debug("VectorMapJoinFastValueStore addMore valueLength " + valueLength + " newAbsoluteOffset " + newAbsoluteOffset + " newValueRef " + Long.toHexString(newValueRef));
 
     return newValueRef;
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java
index 60825ce..dc65eaa 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java
@@ -113,7 +113,7 @@ public class VectorMapJoinOptimizedLongCommon {
       }
 
       // byte[] bytes = Arrays.copyOf(currentKey.get(), currentKey.getLength());
-      // LOG.info("VectorMapJoinOptimizedLongCommon adaptPutRow key " + key + " min " + min + " max " + max + " hashTableKeyType " + hashTableKeyType.name() + " hex " + Hex.encodeHexString(bytes));
+      // LOG.debug("VectorMapJoinOptimizedLongCommon adaptPutRow key " + key + " min " + min + " max " + max + " hashTableKeyType " + hashTableKeyType.name() + " hex " + Hex.encodeHexString(bytes));
 
     }
 
@@ -145,7 +145,7 @@ public class VectorMapJoinOptimizedLongCommon {
     }
 
     // byte[] bytes = Arrays.copyOf(output.getData(), output.getLength());
-    // LOG.info("VectorMapJoinOptimizedLongCommon serialize key " + key + " hashTableKeyType " + hashTableKeyType.name() + " hex " + Hex.encodeHexString(bytes));
+    // LOG.debug("VectorMapJoinOptimizedLongCommon serialize key " + key + " hashTableKeyType " + hashTableKeyType.name() + " hex " + Hex.encodeHexString(bytes));
 
     serializedBytes.bytes = output.getData();
     serializedBytes.offset = 0;

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 096239e..656a5e3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -1069,11 +1069,21 @@ public class Vectorizer implements PhysicalPlanResolver {
   private boolean validateMapJoinDesc(MapJoinDesc desc) {
     byte posBigTable = (byte) desc.getPosBigTable();
     List<ExprNodeDesc> filterExprs = desc.getFilters().get(posBigTable);
+    if (!validateExprNodeDesc(filterExprs, VectorExpressionDescriptor.Mode.FILTER)) {
+      LOG.info("Cannot vectorize map work filter expression");
+      return false;
+    }
     List<ExprNodeDesc> keyExprs = desc.getKeys().get(posBigTable);
+    if (!validateExprNodeDesc(keyExprs)) {
+      LOG.info("Cannot vectorize map work key expression");
+      return false;
+    }
     List<ExprNodeDesc> valueExprs = desc.getExprs().get(posBigTable);
-    return validateExprNodeDesc(filterExprs, VectorExpressionDescriptor.Mode.FILTER) &&
-        validateExprNodeDesc(keyExprs) &&
-        validateExprNodeDesc(valueExprs);
+    if (!validateExprNodeDesc(valueExprs)) {
+      LOG.info("Cannot vectorize map work value expression");
+      return false;
+    }
+    return true;
   }
 
   private boolean validateReduceSinkOperator(ReduceSinkOperator op) {
@@ -1089,6 +1099,7 @@ public class Vectorizer implements PhysicalPlanResolver {
     for (ExprNodeDesc desc : descList) {
       boolean ret = validateExprNodeDesc(desc);
       if (!ret) {
+        LOG.info("Cannot vectorize select expression: " + desc.toString());
         return false;
       }
     }
@@ -1110,10 +1121,12 @@ public class Vectorizer implements PhysicalPlanResolver {
     }
     boolean ret = validateExprNodeDesc(desc.getKeys());
     if (!ret) {
+      LOG.info("Cannot vectorize groupby key expression");
       return false;
     }
     ret = validateAggregationDesc(desc.getAggregators(), isReduce);
     if (!ret) {
+      LOG.info("Cannot vectorize groupby aggregate expression");
       return false;
     }
     if (isReduce) {
@@ -1248,10 +1261,13 @@ public class Vectorizer implements PhysicalPlanResolver {
   }
 
   private boolean validateAggregationDesc(AggregationDesc aggDesc, boolean isReduce) {
-    if (!supportedAggregationUdfs.contains(aggDesc.getGenericUDAFName().toLowerCase())) {
+    String udfName = aggDesc.getGenericUDAFName().toLowerCase();
+    if (!supportedAggregationUdfs.contains(udfName)) {
+      LOG.info("Cannot vectorize groupby aggregate expression: UDF " + udfName + " not supported");
       return false;
     }
     if (aggDesc.getParameters() != null && !validateExprNodeDesc(aggDesc.getParameters())) {
+      LOG.info("Cannot vectorize groupby aggregate expression: UDF parameters not supported");
       return false;
     }
     // See if we can vectorize the aggregation.

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/queries/clientpositive/vector_join30.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_join30.q b/ql/src/test/queries/clientpositive/vector_join30.q
new file mode 100644
index 0000000..2275804
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_join30.q
@@ -0,0 +1,160 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.vectorized.execution.mapjoin.native.enabled=true;
+set hive.fetch.task.conversion=none;
+SET hive.auto.convert.join=true;
+SET hive.auto.convert.join.noconditionaltask=true;
+SET hive.auto.convert.join.noconditionaltask.size=1000000000;
+
+-- SORT_QUERY_RESULTS
+
+CREATE TABLE orcsrc STORED AS ORC AS SELECT * FROM src;
+
+explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value));
+
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN 
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value));
+
+explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value));
+
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN 
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value));
+
+explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value));
+
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN 
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value));
+
+explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+explain
+FROM 
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/queries/clientpositive/vector_join_filters.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_join_filters.q b/ql/src/test/queries/clientpositive/vector_join_filters.q
new file mode 100644
index 0000000..adf525c
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_join_filters.q
@@ -0,0 +1,38 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.vectorized.execution.mapjoin.native.enabled=true;
+set hive.fetch.task.conversion=none;
+SET hive.auto.convert.join=true;
+SET hive.auto.convert.join.noconditionaltask=true;
+SET hive.auto.convert.join.noconditionaltask.size=1000000000;
+
+-- SORT_QUERY_RESULTS
+
+CREATE TABLE myinput1_txt(key int, value int);
+LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1_txt;
+CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt;
+
+SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+
+SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value);
+SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value);
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value);
+SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value);
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.key = c.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/queries/clientpositive/vector_join_nulls.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_join_nulls.q b/ql/src/test/queries/clientpositive/vector_join_nulls.q
new file mode 100644
index 0000000..6cfb7a8
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_join_nulls.q
@@ -0,0 +1,33 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.vectorized.execution.mapjoin.native.enabled=true;
+set hive.fetch.task.conversion=none;
+SET hive.auto.convert.join=true;
+SET hive.auto.convert.join.noconditionaltask=true;
+SET hive.auto.convert.join.noconditionaltask.size=1000000000;
+
+-- SORT_QUERY_RESULTS
+
+CREATE TABLE myinput1_txt(key int, value int);
+LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE myinput1_txt;
+CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt;
+
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value;
+
+SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value);
+SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value);
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/queries/clientpositive/vector_left_outer_join2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_left_outer_join2.q b/ql/src/test/queries/clientpositive/vector_left_outer_join2.q
index 098d002..62ad9ee 100644
--- a/ql/src/test/queries/clientpositive/vector_left_outer_join2.q
+++ b/ql/src/test/queries/clientpositive/vector_left_outer_join2.q
@@ -3,6 +3,8 @@ set hive.auto.convert.join=true;
 set hive.auto.convert.join.noconditionaltask=true;
 set hive.auto.convert.join.noconditionaltask.size=10000;
 
+-- SORT_QUERY_RESULTS
+
 drop table if exists TJOIN1;
 drop table if exists TJOIN2;
 create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc;

[7/7] hive git commit: HIVE-10565: Native Vector Map Join doesn't handle filtering and matching on LEFT OUTER JOIN repeated key correctly (Matt McCline via Gunther Hagleitner)

Posted by gu...@apache.org.

HIVE-10565: Native Vector Map Join doesn't handle filtering and matching on LEFT OUTER JOIN repeated key correctly (Matt McCline via Gunther Hagleitner)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2b9f2f5e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2b9f2f5e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2b9f2f5e

Branch: refs/heads/master
Commit: 2b9f2f5e2574e6e64ce9496dfe9ff6e085036fb1
Parents: 3fa7489
Author: Gunther Hagleitner <gu...@apache.org>
Authored: Thu May 14 15:42:04 2015 -0700
Committer: Gunther Hagleitner <gu...@apache.org>
Committed: Thu May 14 15:42:04 2015 -0700

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |   10 +
 .../ql/exec/vector/VectorizedBatchUtil.java     |    5 +-
 .../mapjoin/VectorMapJoinCommonOperator.java    |    8 +-
 .../VectorMapJoinGenerateResultOperator.java    |   47 +-
 ...pJoinInnerBigOnlyGenerateResultOperator.java |   53 +-
 .../VectorMapJoinInnerBigOnlyLongOperator.java  |   15 +-
 ...ctorMapJoinInnerBigOnlyMultiKeyOperator.java |   15 +-
 ...VectorMapJoinInnerBigOnlyStringOperator.java |   12 +-
 ...ectorMapJoinInnerGenerateResultOperator.java |   39 +-
 .../mapjoin/VectorMapJoinInnerLongOperator.java |   17 +-
 .../VectorMapJoinInnerMultiKeyOperator.java     |   19 +-
 .../VectorMapJoinInnerStringOperator.java       |   17 +-
 ...orMapJoinLeftSemiGenerateResultOperator.java |   40 +-
 .../VectorMapJoinLeftSemiLongOperator.java      |   13 +-
 .../VectorMapJoinLeftSemiMultiKeyOperator.java  |   17 +-
 .../VectorMapJoinLeftSemiStringOperator.java    |   17 +-
 ...ectorMapJoinOuterGenerateResultOperator.java |  805 ++++---
 .../mapjoin/VectorMapJoinOuterLongOperator.java |  189 +-
 .../VectorMapJoinOuterMultiKeyOperator.java     |  184 +-
 .../VectorMapJoinOuterStringOperator.java       |  185 +-
 .../mapjoin/VectorMapJoinRowBytesContainer.java |    2 +-
 .../fast/VectorMapJoinFastBytesHashMap.java     |    8 +-
 .../VectorMapJoinFastBytesHashMultiSet.java     |    4 +-
 .../fast/VectorMapJoinFastBytesHashTable.java   |   10 +-
 .../mapjoin/fast/VectorMapJoinFastKeyStore.java |   10 +-
 .../fast/VectorMapJoinFastLongHashMap.java      |    2 +-
 .../fast/VectorMapJoinFastLongHashTable.java    |   18 +-
 .../fast/VectorMapJoinFastTableContainer.java   |    2 +-
 .../fast/VectorMapJoinFastValueStore.java       |    8 +-
 .../VectorMapJoinOptimizedLongCommon.java       |    4 +-
 .../hive/ql/optimizer/physical/Vectorizer.java  |   24 +-
 .../test/queries/clientpositive/vector_join30.q |  160 ++
 .../clientpositive/vector_join_filters.q        |   38 +
 .../queries/clientpositive/vector_join_nulls.q  |   33 +
 .../clientpositive/vector_left_outer_join2.q    |    2 +
 .../queries/clientpositive/vector_outer_join5.q |  173 ++
 .../tez/acid_vectorization_partition.q.out      |   20 +-
 .../clientpositive/tez/vector_join30.q.out      | 1367 +++++++++++
 .../tez/vector_join_filters.q.out               |  222 ++
 .../clientpositive/tez/vector_join_nulls.q.out  |  195 ++
 .../tez/vector_left_outer_join2.q.out           |   20 +-
 .../tez/vector_left_outer_join3.q.out           |  222 ++
 .../clientpositive/tez/vector_outer_join5.q.out | 1328 +++++++++++
 .../tez/vectorized_timestamp_ints_casts.q.out   |  234 ++
 .../results/clientpositive/vector_join30.q.out  | 2194 ++++++++++++++++++
 .../clientpositive/vector_join_filters.q.out    |  222 ++
 .../clientpositive/vector_join_nulls.q.out      |  195 ++
 .../vector_left_outer_join2.q.out               |    8 +-
 .../clientpositive/vector_outer_join5.q.out     | 1300 +++++++++++
 49 files changed, 8936 insertions(+), 796 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index f9c9351..c79c36c 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -220,8 +220,12 @@ minitez.query.files.shared=alter_merge_2_orc.q,\
   vector_groupby_3.q,\
   vector_groupby_reduce.q,\
   vector_if_expr.q,\
+  vector_inner_join.q,\
   vector_interval_1.q,\
   vector_interval_2.q,\
+  vector_join30.q,\
+  vector_join_filters.q,\
+  vector_join_nulls.q,\
   vector_left_outer_join.q,\
   vector_left_outer_join2.q,\
   vector_leftsemi_mapjoin.q,\
@@ -230,6 +234,12 @@ minitez.query.files.shared=alter_merge_2_orc.q,\
   vector_multi_insert.q,\
   vector_non_string_partition.q,\
   vector_orderby_5.q,\
+  vector_outer_join0.q,\
+  vector_outer_join1.q,\
+  vector_outer_join2.q,\
+  vector_outer_join3.q,\
+  vector_outer_join4.q,\
+  vector_outer_join5.q,\
   vector_partition_diff_num_cols.q,\
   vector_partitioned_date_time.q,\
   vector_reduce_groupby_decimal.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
index dcea8ae..4a16b4c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
@@ -645,8 +645,7 @@ public class VectorizedBatchUtil {
   public static void debugDisplayOneRow(VectorizedRowBatch batch, int index, String prefix) {
     StringBuffer sb = new StringBuffer();
     sb.append(prefix + " row " + index + " ");
-    for (int i = 0; i < batch.projectionSize; i++) {
-      int column = batch.projectedColumns[i];
+    for (int column = 0; column < batch.cols.length; column++) {
       ColumnVector colVector = batch.cols[column];
       if (colVector == null) {
         sb.append("(null colVector " + column + ")");
@@ -666,7 +665,7 @@ public class VectorizedBatchUtil {
             if (bytes == null) {
               sb.append("(Unexpected null bytes with start " + start + " length " + length + ")");
             } else {
-              sb.append(displayBytes(bytes, start, length));
+              sb.append("bytes: '" + displayBytes(bytes, start, length) + "'");
             }
           } else if (colVector instanceof DecimalColumnVector) {
             sb.append(((DecimalColumnVector) colVector).vector[index].toString());

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
index a9082eb..af78776 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
@@ -470,8 +470,8 @@ public abstract class VectorMapJoinCommonOperator extends MapJoinOperator implem
       LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor bigTableByteColumnVectorColumns " + Arrays.toString(bigTableByteColumnVectorColumns));
       LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor smallTableByteColumnVectorColumns " + Arrays.toString(smallTableByteColumnVectorColumns));
 
-      LOG.info(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor outputProjection " + Arrays.toString(outputProjection));
-      LOG.info(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor outputTypeNames " + Arrays.toString(outputTypeNames));
+      LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor outputProjection " + Arrays.toString(outputProjection));
+      LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor outputTypeNames " + Arrays.toString(outputTypeNames));
     }
 
     setupVOutContext(conf.getOutputColumnNames());
@@ -503,7 +503,7 @@ public abstract class VectorMapJoinCommonOperator extends MapJoinOperator implem
    */
   protected void setupVOutContext(List<String> outputColumnNames) {
     if (LOG.isDebugEnabled()) {
-      LOG.info(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor outputColumnNames " + outputColumnNames);
+      LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor outputColumnNames " + outputColumnNames);
     }
     if (outputColumnNames.size() != outputProjection.length) {
       throw new RuntimeException("Output column names " + outputColumnNames + " length and output projection " + Arrays.toString(outputProjection) + " / " + Arrays.toString(outputTypeNames) + " length mismatch");
@@ -729,9 +729,9 @@ public abstract class VectorMapJoinCommonOperator extends MapJoinOperator implem
    * Common one time setup by native vectorized map join operator's processOp.
    */
   protected void commonSetup(VectorizedRowBatch batch) throws HiveException {
-    LOG.info("VectorMapJoinInnerCommonOperator commonSetup begin...");
 
     if (LOG.isDebugEnabled()) {
+      LOG.debug("VectorMapJoinInnerCommonOperator commonSetup begin...");
       displayBatchColumns(batch, "batch");
       displayBatchColumns(overflowBatch, "overflowBatch");
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java
index 860ebb5..32c126c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java
@@ -373,10 +373,8 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
    *          The big table batch.
    * @param hashMapResult
    *          The hash map results for the repeated key.
-   * @return
-   *          The new count of selected rows.
    */
-  protected int generateHashMapResultRepeatedAll(VectorizedRowBatch batch,
+  protected void generateHashMapResultRepeatedAll(VectorizedRowBatch batch,
               VectorMapJoinHashMapResult hashMapResult) throws IOException, HiveException {
 
     int[] selected = batch.selected;
@@ -400,7 +398,7 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
           batch.selected, 0, batch.size);
     }
 
-    return numSel;
+    batch.size = numSel;
   }
 
   //-----------------------------------------------------------------------------------------------
@@ -462,7 +460,7 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
 //  int length = output.getLength() - offset;
     rowBytesContainer.finishRow();
 
-//  LOG.info("spillSerializeRow spilled batchIndex " + batchIndex + ", length " + length);
+//  LOG.debug("spillSerializeRow spilled batchIndex " + batchIndex + ", length " + length);
   }
 
   protected void spillHashMapBatch(VectorizedRowBatch batch,
@@ -514,14 +512,18 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
         smallTable);
     needHashTableSetup = true;
 
-    LOG.info(CLASS_NAME + " reloadHashTable!");
+    if (LOG.isDebugEnabled()) {
+      LOG.debug(CLASS_NAME + " reloadHashTable!");
+    }
   }
 
   @Override
   protected void reProcessBigTable(int partitionId)
       throws HiveException {
 
-    LOG.info(CLASS_NAME + " reProcessBigTable enter...");
+    if (LOG.isDebugEnabled()) {
+      LOG.debug(CLASS_NAME + " reProcessBigTable enter...");
+    }
 
     if (spillReplayBatch == null) {
       // The process method was not called -- no big table rows.
@@ -544,14 +546,14 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
         int offset = bigTable.currentOffset();
         int length = bigTable.currentLength();
 
-//      LOG.info(CLASS_NAME + " reProcessBigTable serialized row #" + rowCount + ", offset " + offset + ", length " + length);
+//      LOG.debug(CLASS_NAME + " reProcessBigTable serialized row #" + rowCount + ", offset " + offset + ", length " + length);
 
         bigTableVectorDeserializeRow.setBytes(bytes, offset, length);
         bigTableVectorDeserializeRow.deserializeByValue(spillReplayBatch, spillReplayBatch.size);
         spillReplayBatch.size++;
 
         if (spillReplayBatch.size == VectorizedRowBatch.DEFAULT_SIZE) {
-          LOG.info("reProcessBigTable going to call process with spillReplayBatch.size " + spillReplayBatch.size + " rows");
+          // LOG.debug("reProcessBigTable going to call process with spillReplayBatch.size " + spillReplayBatch.size + " rows");
           process(spillReplayBatch, posBigTable); // call process once we have a full batch
           spillReplayBatch.reset();
           batchCount++;
@@ -559,7 +561,7 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
       }
       // Process the row batch that has less than DEFAULT_SIZE rows
       if (spillReplayBatch.size > 0) {
-        LOG.info("reProcessBigTable going to call process with spillReplayBatch.size " + spillReplayBatch.size + " rows");
+        // LOG.debug("reProcessBigTable going to call process with spillReplayBatch.size " + spillReplayBatch.size + " rows");
         process(spillReplayBatch, posBigTable);
         spillReplayBatch.reset();
         batchCount++;
@@ -570,7 +572,9 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
       throw new HiveException(e);
     }
 
-    LOG.info(CLASS_NAME + " reProcessBigTable exit! " + rowCount + " row processed and " + batchCount + " batches processed");
+    if (LOG.isDebugEnabled()) {
+      LOG.debug(CLASS_NAME + " reProcessBigTable exit! " + rowCount + " row processed and " + batchCount + " batches processed");
+    }
   }
 
 
@@ -632,7 +636,9 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
     if (!aborted && overflowBatch.size > 0) {
       forwardOverflow();
     }
-    LOG.info("VectorMapJoinInnerLongOperator closeOp " + batchCounter + " batches processed");
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("VectorMapJoinInnerLongOperator closeOp " + batchCounter + " batches processed");
+    }
   }
 
   //-----------------------------------------------------------------------------------------------
@@ -641,6 +647,23 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
    * Debug.
    */
 
+  public boolean verifyMonotonicallyIncreasing(int[] selected, int size) {
+
+    if (size == 0) {
+      return true;
+    }
+    int prevBatchIndex = selected[0];
+
+    for (int i = 1; i < size; i++) {
+      int batchIndex = selected[i];
+      if (batchIndex <= prevBatchIndex) {
+        return false;
+      }
+      prevBatchIndex = batchIndex;
+    }
+    return true;
+  }
+
   public static String intArrayToRangesString(int selection[], int size) {
     if (size == 0) {
       return "[]";

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyGenerateResultOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyGenerateResultOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyGenerateResultOperator.java
index 3132531..f18b982 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyGenerateResultOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyGenerateResultOperator.java
@@ -129,22 +129,10 @@ public abstract class VectorMapJoinInnerBigOnlyGenerateResultOperator
    * @param batch
    *          The big table batch with any matching and any non matching rows both as
    *          selected in use.
-   * @param allMatchs
-   *          A subset of the rows of the batch that are matches.
    * @param allMatchCount
    *          Number of matches in allMatchs.
-   * @param equalKeySeriesValueCounts
-   *          For each equal key series, whether the number of (empty) small table values.
-   * @param equalKeySeriesAllMatchIndices
-   *          For each equal key series, the logical index into allMatchs.
-   * @param equalKeySeriesDuplicateCounts
-   *          For each equal key series, the number of duplicates or equal keys.
    * @param equalKeySeriesCount
    *          Number of single value matches.
-   * @param spills
-   *          A subset of the rows of the batch that are spills.
-   * @param spillHashMapResultIndices
-   *          For each entry in spills, the index into the hashMapResult.
    * @param spillCount
    *          Number of spills in spills.
    * @param hashTableResults
@@ -154,15 +142,16 @@ public abstract class VectorMapJoinInnerBigOnlyGenerateResultOperator
    *          Number of entries in hashMapResults.
    *
    **/
-  protected int finishInnerBigOnly(VectorizedRowBatch batch,
-      int[] allMatchs, int allMatchCount,
-      long[] equalKeySeriesValueCounts, int[] equalKeySeriesAllMatchIndices,
-      int[] equalKeySeriesDuplicateCounts, int equalKeySeriesCount,
-      int[] spills, int[] spillHashMapResultIndices, int spillCount,
+  protected void finishInnerBigOnly(VectorizedRowBatch batch,
+      int allMatchCount, int equalKeySeriesCount, int spillCount,
       VectorMapJoinHashTableResult[] hashTableResults, int hashMapResultCount)
           throws HiveException, IOException {
 
-    int numSel = 0;
+    // Get rid of spills before we start modifying the batch.
+    if (spillCount > 0) {
+      spillHashMapBatch(batch, hashTableResults,
+          spills, spillHashMapResultIndices, spillCount);
+    }
 
     /*
      * Optimize by running value expressions only over the matched rows.
@@ -171,6 +160,7 @@ public abstract class VectorMapJoinInnerBigOnlyGenerateResultOperator
       performValueExpressions(batch, allMatchs, allMatchCount);
     }
 
+    int numSel = 0;
     for (int i = 0; i < equalKeySeriesCount; i++) {
       long count = equalKeySeriesValueCounts[i];
       int allMatchesIndex = equalKeySeriesAllMatchIndices[i];
@@ -185,13 +175,8 @@ public abstract class VectorMapJoinInnerBigOnlyGenerateResultOperator
             duplicateCount, count);
       }
     }
-
-    if (spillCount > 0) {
-      spillHashMapBatch(batch, hashTableResults,
-          spills, spillHashMapResultIndices, spillCount);
-    }
-
-    return numSel;
+    batch.size = numSel;
+    batch.selectedInUse = true;
   }
 
   /**
@@ -215,11 +200,11 @@ public abstract class VectorMapJoinInnerBigOnlyGenerateResultOperator
       int[] allMatchs, int allMatchesIndex, int duplicateCount, int numSel)
           throws HiveException, IOException {
 
-    // LOG.info("generateHashMultiSetResultSingleValue enter...");
+    // LOG.debug("generateHashMultiSetResultSingleValue enter...");
 
     // Generate result within big table batch itself.
 
-    // LOG.info("generateHashMultiSetResultSingleValue with big table...");
+    // LOG.debug("generateHashMultiSetResultSingleValue with big table...");
 
     for (int i = 0; i < duplicateCount; i++) {
 
@@ -250,7 +235,7 @@ public abstract class VectorMapJoinInnerBigOnlyGenerateResultOperator
       int[] allMatchs, int allMatchesIndex,
       int duplicateCount, long count) throws HiveException, IOException {
 
-    // LOG.info("generateHashMultiSetResultMultiValue allMatchesIndex " + allMatchesIndex + " duplicateCount " + duplicateCount + " count " + count);
+    // LOG.debug("generateHashMultiSetResultMultiValue allMatchesIndex " + allMatchesIndex + " duplicateCount " + duplicateCount + " count " + count);
 
     // TODO: Look at repeating optimizations...
 
@@ -309,11 +294,9 @@ public abstract class VectorMapJoinInnerBigOnlyGenerateResultOperator
     return 0;
   }
 
-  protected int finishInnerBigOnlyRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult joinResult,
+  protected void finishInnerBigOnlyRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult joinResult,
       VectorMapJoinHashMultiSetResult hashMultiSetResult) throws HiveException, IOException {
 
-    int numSel = 0;
-
     switch (joinResult) {
     case MATCH:
 
@@ -325,19 +308,21 @@ public abstract class VectorMapJoinInnerBigOnlyGenerateResultOperator
       }
 
       // Generate special repeated case.
-      numSel = generateHashMultiSetResultRepeatedAll(batch, hashMultiSetResult);
+      int numSel = generateHashMultiSetResultRepeatedAll(batch, hashMultiSetResult);
+      batch.size = numSel;
+      batch.selectedInUse = true;
       break;
 
     case SPILL:
       // Whole batch is spilled.
       spillBatchRepeated(batch, (VectorMapJoinHashTableResult) hashMultiSetResult);
+      batch.size = 0;
       break;
 
     case NOMATCH:
       // No match for entire batch.
+      batch.size = 0;
       break;
     }
-
-    return numSel;
   }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java
index 53a91d8..bb7efda 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java
@@ -151,9 +151,6 @@ public class VectorMapJoinInnerBigOnlyLongOperator extends VectorMapJoinInnerBig
         }
       }
 
-      // We rebuild in-place the selected array with rows destine to be forwarded.
-      int numSel = 0;
-
       /*
        * Single-Column Long specific declarations.
        */
@@ -198,7 +195,7 @@ public class VectorMapJoinInnerBigOnlyLongOperator extends VectorMapJoinInnerBig
         if (LOG.isDebugEnabled()) {
           LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
         }
-        numSel = finishInnerBigOnlyRepeated(batch, joinResult, hashMultiSetResults[0]);
+        finishInnerBigOnlyRepeated(batch, joinResult, hashMultiSetResults[0]);
       } else {
 
         /*
@@ -358,17 +355,11 @@ public class VectorMapJoinInnerBigOnlyLongOperator extends VectorMapJoinInnerBig
               " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMultiSetResults, 0, hashMultiSetResultCount)));
         }
 
-        numSel = finishInnerBigOnly(batch,
-            allMatchs, allMatchCount,
-            equalKeySeriesValueCounts, equalKeySeriesAllMatchIndices,
-            equalKeySeriesDuplicateCounts, equalKeySeriesCount,
-            spills, spillHashMapResultIndices, spillCount,
+        finishInnerBigOnly(batch,
+            allMatchCount, equalKeySeriesCount, spillCount,
             (VectorMapJoinHashTableResult[]) hashMultiSetResults, hashMultiSetResultCount);
       }
 
-      batch.selectedInUse = true;
-      batch.size =  numSel;
-
       if (batch.size > 0) {
         // Forward any remaining selected rows.
         forwardBigTableBatch(batch);

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java
index 9553fa0..c36f668 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java
@@ -156,9 +156,6 @@ public class VectorMapJoinInnerBigOnlyMultiKeyOperator extends VectorMapJoinInne
         }
       }
 
-      // We rebuild in-place the selected array with rows destine to be forwarded.
-      int numSel = 0;
-
       /*
        * Multi-Key specific declarations.
        */
@@ -210,7 +207,7 @@ public class VectorMapJoinInnerBigOnlyMultiKeyOperator extends VectorMapJoinInne
         if (LOG.isDebugEnabled()) {
           LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
         }
-        numSel = finishInnerBigOnlyRepeated(batch, joinResult, hashMultiSetResults[0]);
+        finishInnerBigOnlyRepeated(batch, joinResult, hashMultiSetResults[0]);
       } else {
 
         /*
@@ -371,17 +368,11 @@ public class VectorMapJoinInnerBigOnlyMultiKeyOperator extends VectorMapJoinInne
               " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMultiSetResults, 0, hashMultiSetResultCount)));
         }
 
-        numSel = finishInnerBigOnly(batch,
-            allMatchs, allMatchCount,
-            equalKeySeriesValueCounts, equalKeySeriesAllMatchIndices,
-            equalKeySeriesDuplicateCounts, equalKeySeriesCount,
-            spills, spillHashMapResultIndices, spillCount,
+        finishInnerBigOnly(batch,
+            allMatchCount, equalKeySeriesCount, spillCount,
             (VectorMapJoinHashTableResult[]) hashMultiSetResults, hashMultiSetResultCount);
       }
 
-      batch.selectedInUse = true;
-      batch.size =  numSel;
-
       if (batch.size > 0) {
         // Forward any remaining selected rows.
         forwardBigTableBatch(batch);

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java
index 17d0b63..87a11c0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java
@@ -187,7 +187,7 @@ public class VectorMapJoinInnerBigOnlyStringOperator extends VectorMapJoinInnerB
         if (LOG.isDebugEnabled()) {
           LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
         }
-        numSel = finishInnerBigOnlyRepeated(batch, joinResult, hashMultiSetResults[0]);
+        finishInnerBigOnlyRepeated(batch, joinResult, hashMultiSetResults[0]);
       } else {
 
         /*
@@ -347,17 +347,11 @@ public class VectorMapJoinInnerBigOnlyStringOperator extends VectorMapJoinInnerB
               " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMultiSetResults, 0, hashMultiSetResultCount)));
         }
 
-        numSel = finishInnerBigOnly(batch,
-            allMatchs, allMatchCount,
-            equalKeySeriesValueCounts, equalKeySeriesAllMatchIndices,
-            equalKeySeriesDuplicateCounts, equalKeySeriesCount,
-            spills, spillHashMapResultIndices, spillCount,
+        finishInnerBigOnly(batch,
+            allMatchCount, equalKeySeriesCount, spillCount,
             (VectorMapJoinHashTableResult[]) hashMultiSetResults, hashMultiSetResultCount);
       }
 
-      batch.selectedInUse = true;
-      batch.size =  numSel;
-
       if (batch.size > 0) {
         // Forward any remaining selected rows.
         forwardBigTableBatch(batch);

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java
index 3a5e4b2..ee1abd3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java
@@ -147,38 +147,17 @@ public abstract class VectorMapJoinInnerGenerateResultOperator
    * @param batch
    *          The big table batch with any matching and any non matching rows both as
    *          selected in use.
-   * @param allMatchs
-   *          A subset of the rows of the batch that are matches.
    * @param allMatchCount
    *          Number of matches in allMatchs.
-   * @param equalKeySeriesHashMapResultIndices
-   *          For each equal key series, the index into the hashMapResult.
-   * @param equalKeySeriesAllMatchIndices
-   *          For each equal key series, the logical index into allMatchs.
-   * @param equalKeySeriesIsSingleValue
-   *          For each equal key series, whether there is 1 or multiple small table values.
-   * @param equalKeySeriesDuplicateCounts
-   *          For each equal key series, the number of duplicates or equal keys.
    * @param equalKeySeriesCount
    *          Number of single value matches.
-   * @param spills
-   *          A subset of the rows of the batch that are spills.
-   * @param spillHashMapResultIndices
-   *          For each entry in spills, the index into the hashMapResult.
    * @param spillCount
    *          Number of spills in spills.
-   * @param hashMapResults
-   *          The array of all hash map results for the batch.
    * @param hashMapResultCount
    *          Number of entries in hashMapResults.
    */
-  protected int finishInner(VectorizedRowBatch batch,
-      int[] allMatchs, int allMatchCount,
-      int[] equalKeySeriesHashMapResultIndices, int[] equalKeySeriesAllMatchIndices,
-      boolean[] equalKeySeriesIsSingleValue, int[] equalKeySeriesDuplicateCounts,
-      int equalKeySeriesCount,
-      int[] spills, int[] spillHashMapResultIndices, int spillCount,
-      VectorMapJoinHashMapResult[] hashMapResults, int hashMapResultCount)
+  protected void finishInner(VectorizedRowBatch batch,
+      int allMatchCount, int equalKeySeriesCount, int spillCount, int hashMapResultCount)
           throws HiveException, IOException {
 
     int numSel = 0;
@@ -211,10 +190,11 @@ public abstract class VectorMapJoinInnerGenerateResultOperator
           spills, spillHashMapResultIndices, spillCount);
     }
 
-    return numSel;
+    batch.size = numSel;
+    batch.selectedInUse = true;
   }
 
-  protected int finishInnerRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult joinResult,
+  protected void finishInnerRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult joinResult,
       VectorMapJoinHashTableResult hashMapResult) throws HiveException, IOException {
 
     int numSel = 0;
@@ -230,22 +210,19 @@ public abstract class VectorMapJoinInnerGenerateResultOperator
       }
 
       // Generate special repeated case.
-      numSel = generateHashMapResultRepeatedAll(batch, hashMapResults[0]);
+      generateHashMapResultRepeatedAll(batch, hashMapResults[0]);
       break;
 
     case SPILL:
       // Whole batch is spilled.
       spillBatchRepeated(batch, (VectorMapJoinHashTableResult) hashMapResults[0]);
+      batch.size = 0;
       break;
 
     case NOMATCH:
       // No match for entire batch.
+      batch.size = 0;
       break;
     }
-    /*
-     * Common repeated join result processing.
-     */
-
-    return numSel;
   }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java
index b77a93c..9005d00 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java
@@ -149,9 +149,6 @@ public class VectorMapJoinInnerLongOperator extends VectorMapJoinInnerGenerateRe
         }
       }
 
-      // We rebuild in-place the selected array with rows destine to be forwarded.
-      int numSel = 0;
-
       /*
        * Single-Column Long specific declarations.
        */
@@ -196,7 +193,7 @@ public class VectorMapJoinInnerLongOperator extends VectorMapJoinInnerGenerateRe
         if (LOG.isDebugEnabled()) {
           LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
         }
-        numSel = finishInnerRepeated(batch, joinResult, hashMapResults[0]);
+        finishInnerRepeated(batch, joinResult, hashMapResults[0]);
       } else {
 
         /*
@@ -356,18 +353,10 @@ public class VectorMapJoinInnerLongOperator extends VectorMapJoinInnerGenerateRe
               " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount)));
         }
 
-        numSel = finishInner(batch,
-            allMatchs, allMatchCount,
-            equalKeySeriesHashMapResultIndices, equalKeySeriesAllMatchIndices,
-            equalKeySeriesIsSingleValue, equalKeySeriesDuplicateCounts,
-            equalKeySeriesCount,
-            spills, spillHashMapResultIndices, spillCount,
-            hashMapResults, hashMapResultCount);
+        finishInner(batch,
+            allMatchCount, equalKeySeriesCount, spillCount, hashMapResultCount);
       }
 
-      batch.selectedInUse = true;
-      batch.size =  numSel;
-
       if (batch.size > 0) {
         // Forward any remaining selected rows.
         forwardBigTableBatch(batch);

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java
index 938506b..b13ded6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java
@@ -153,9 +153,6 @@ public class VectorMapJoinInnerMultiKeyOperator extends VectorMapJoinInnerGenera
         }
       }
 
-      // We rebuild in-place the selected array with rows destine to be forwarded.
-      int numSel = 0;
-
       /*
        * Multi-Key specific declarations.
        */
@@ -207,7 +204,7 @@ public class VectorMapJoinInnerMultiKeyOperator extends VectorMapJoinInnerGenera
         if (LOG.isDebugEnabled()) {
           LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
         }
-        numSel = finishInnerRepeated(batch, joinResult, hashMapResults[0]);
+        finishInnerRepeated(batch, joinResult, hashMapResults[0]);
       } else {
 
         /*
@@ -279,7 +276,7 @@ public class VectorMapJoinInnerMultiKeyOperator extends VectorMapJoinInnerGenera
             haveSaveKey = true;
 
             /*
-             * Multi-Key specific save key and lookup.
+             * Multi-Key specific save key.
              */
 
             temp = saveKeyOutput;
@@ -368,18 +365,10 @@ public class VectorMapJoinInnerMultiKeyOperator extends VectorMapJoinInnerGenera
               " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount)));
         }
 
-        numSel = finishInner(batch,
-            allMatchs, allMatchCount,
-            equalKeySeriesHashMapResultIndices, equalKeySeriesAllMatchIndices,
-            equalKeySeriesIsSingleValue, equalKeySeriesDuplicateCounts,
-            equalKeySeriesCount,
-            spills, spillHashMapResultIndices, spillCount,
-            hashMapResults, hashMapResultCount);
+        finishInner(batch,
+            allMatchCount, equalKeySeriesCount, spillCount, hashMapResultCount);
       }
 
-      batch.selectedInUse = true;
-      batch.size =  numSel;
-
       if (batch.size > 0) {
         // Forward any remaining selected rows.
         forwardBigTableBatch(batch);

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java
index f7dd8e2..9f10ff1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java
@@ -140,9 +140,6 @@ public class VectorMapJoinInnerStringOperator extends VectorMapJoinInnerGenerate
         }
       }
 
-      // We rebuild in-place the selected array with rows destine to be forwarded.
-      int numSel = 0;
-
       /*
        * Single-Column String specific declarations.
        */
@@ -185,7 +182,7 @@ public class VectorMapJoinInnerStringOperator extends VectorMapJoinInnerGenerate
         if (LOG.isDebugEnabled()) {
           LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
         }
-        numSel = finishInnerRepeated(batch, joinResult, hashMapResults[0]);
+        finishInnerRepeated(batch, joinResult, hashMapResults[0]);
       } else {
 
         /*
@@ -345,18 +342,10 @@ public class VectorMapJoinInnerStringOperator extends VectorMapJoinInnerGenerate
               " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount)));
         }
 
-        numSel = finishInner(batch,
-            allMatchs, allMatchCount,
-            equalKeySeriesHashMapResultIndices, equalKeySeriesAllMatchIndices,
-            equalKeySeriesIsSingleValue, equalKeySeriesDuplicateCounts,
-            equalKeySeriesCount,
-            spills, spillHashMapResultIndices, spillCount,
-            hashMapResults, hashMapResultCount);
+        finishInner(batch,
+            allMatchCount, equalKeySeriesCount, spillCount, hashMapResultCount);
       }
 
-      batch.selectedInUse = true;
-      batch.size =  numSel;
-
       if (batch.size > 0) {
         // Forward any remaining selected rows.
         forwardBigTableBatch(batch);

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiGenerateResultOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiGenerateResultOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiGenerateResultOperator.java
index 230f9fe..07393b2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiGenerateResultOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiGenerateResultOperator.java
@@ -111,26 +111,23 @@ public abstract class VectorMapJoinLeftSemiGenerateResultOperator
    * @param batch
    *          The big table batch with any matching and any non matching rows both as
    *          selected in use.
-   * @param allMatchs
-   *          A subset of the rows of the batch that are matches.
    * @param allMatchCount
    *          Number of matches in allMatchs.
-   * @param spills
-   *          A subset of the rows of the batch that are spills.
-   * @param spillHashMapResultIndices
-   *          For each entry in spills, the index into the hashTableResults.
    * @param spillCount
    *          Number of spills in spills.
    * @param hashTableResults
    *          The array of all hash table results for the batch. We need the
    *          VectorMapJoinHashTableResult for the spill information.
    */
-  protected int finishLeftSemi(VectorizedRowBatch batch,
-      int[] allMatchs, int allMatchCount,
-      int[] spills, int[] spillHashMapResultIndices, int spillCount,
+  protected void finishLeftSemi(VectorizedRowBatch batch,
+      int allMatchCount, int spillCount,
       VectorMapJoinHashTableResult[] hashTableResults) throws HiveException, IOException {
 
-    int numSel;
+    // Get rid of spills before we start modifying the batch.
+    if (spillCount > 0) {
+      spillHashMapBatch(batch, hashTableResults,
+          spills, spillHashMapResultIndices, spillCount);
+    }
 
     /*
      * Optimize by running value expressions only over the matched rows.
@@ -139,14 +136,9 @@ public abstract class VectorMapJoinLeftSemiGenerateResultOperator
       performValueExpressions(batch, allMatchs, allMatchCount);
     }
 
-    numSel = generateHashSetResults(batch, allMatchs, allMatchCount);
-
-    if (spillCount > 0) {
-      spillHashMapBatch(batch, hashTableResults,
-          spills, spillHashMapResultIndices, spillCount);
-    }
-
-    return numSel;
+    int numSel = generateHashSetResults(batch, allMatchs, allMatchCount);
+    batch.size = numSel;
+    batch.selectedInUse = true;
   }
 
   /**
@@ -199,11 +191,9 @@ public abstract class VectorMapJoinLeftSemiGenerateResultOperator
     return batch.size;
   }
 
-  protected int finishLeftSemiRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult joinResult,
+  protected void finishLeftSemiRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult joinResult,
       VectorMapJoinHashTableResult hashSetResult) throws HiveException, IOException {
 
-    int numSel = 0;
-
     switch (joinResult) {
     case MATCH:
 
@@ -215,19 +205,21 @@ public abstract class VectorMapJoinLeftSemiGenerateResultOperator
       }
 
       // Generate special repeated case.
-      numSel = generateHashSetResultRepeatedAll(batch);
+      int numSel = generateHashSetResultRepeatedAll(batch);
+      batch.size = numSel;
+      batch.selectedInUse = true;
       break;
 
     case SPILL:
       // Whole batch is spilled.
       spillBatchRepeated(batch, (VectorMapJoinHashTableResult) hashSetResult);
+      batch.size = 0;
       break;
 
     case NOMATCH:
       // No match for entire batch.
+      batch.size = 0;
       break;
     }
-
-    return numSel;
   }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java
index 75aeefb..712978a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java
@@ -151,9 +151,6 @@ public class VectorMapJoinLeftSemiLongOperator extends VectorMapJoinLeftSemiGene
         }
       }
 
-      // We rebuild in-place the selected array with rows destine to be forwarded.
-      int numSel = 0;
-
       /*
        * Single-Column Long specific declarations.
        */
@@ -198,7 +195,7 @@ public class VectorMapJoinLeftSemiLongOperator extends VectorMapJoinLeftSemiGene
         if (LOG.isDebugEnabled()) {
           LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
         }
-        numSel = finishLeftSemiRepeated(batch, joinResult, hashSetResults[0]);
+        finishLeftSemiRepeated(batch, joinResult, hashSetResults[0]);
       } else {
 
         /*
@@ -348,15 +345,11 @@ public class VectorMapJoinLeftSemiLongOperator extends VectorMapJoinLeftSemiGene
               " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashSetResults, 0, hashSetResultCount)));
         }
 
-        numSel = finishLeftSemi(batch,
-            allMatchs, allMatchCount,
-            spills, spillHashMapResultIndices, spillCount,
+        finishLeftSemi(batch,
+            allMatchCount, spillCount,
             (VectorMapJoinHashTableResult[]) hashSetResults);
       }
 
-      batch.selectedInUse = true;
-      batch.size =  numSel;
-
       if (batch.size > 0) {
         // Forward any remaining selected rows.
         forwardBigTableBatch(batch);

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java
index ea287f4..b941431 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java
@@ -155,9 +155,6 @@ public class VectorMapJoinLeftSemiMultiKeyOperator extends VectorMapJoinLeftSemi
         }
       }
 
-      // We rebuild in-place the selected array with rows destine to be forwarded.
-      int numSel = 0;
-
       /*
        * Multi-Key specific declarations.
        */
@@ -210,7 +207,7 @@ public class VectorMapJoinLeftSemiMultiKeyOperator extends VectorMapJoinLeftSemi
         if (LOG.isDebugEnabled()) {
           LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
         }
-        numSel = finishLeftSemiRepeated(batch, joinResult, hashSetResults[0]);
+        finishLeftSemiRepeated(batch, joinResult, hashSetResults[0]);
       } else {
 
         /*
@@ -291,6 +288,10 @@ public class VectorMapJoinLeftSemiMultiKeyOperator extends VectorMapJoinLeftSemi
             saveKeyOutput = currentKeyOutput;
             currentKeyOutput = temp;
 
+            /*
+             * Multi-key specific lookup key.
+             */
+
             byte[] keyBytes = saveKeyOutput.getData();
             int keyLength = saveKeyOutput.getLength();
             saveJoinResult = hashSet.contains(keyBytes, 0, keyLength, hashSetResults[hashSetResultCount]);
@@ -360,15 +361,11 @@ public class VectorMapJoinLeftSemiMultiKeyOperator extends VectorMapJoinLeftSemi
               " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashSetResults, 0, hashSetResultCount)));
         }
 
-        numSel = finishLeftSemi(batch,
-            allMatchs, allMatchCount,
-            spills, spillHashMapResultIndices, spillCount,
+        finishLeftSemi(batch,
+            allMatchCount, spillCount,
             (VectorMapJoinHashTableResult[]) hashSetResults);
       }
 
-      batch.selectedInUse = true;
-      batch.size =  numSel;
-
       if (batch.size > 0) {
         // Forward any remaining selected rows.
         forwardBigTableBatch(batch);

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
index 116cb81..9ff1141 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
@@ -142,9 +142,6 @@ public class VectorMapJoinLeftSemiStringOperator extends VectorMapJoinLeftSemiGe
         }
       }
 
-      // We rebuild in-place the selected array with rows destine to be forwarded.
-      int numSel = 0;
-
       /*
        * Single-Column String specific declarations.
        */
@@ -187,7 +184,7 @@ public class VectorMapJoinLeftSemiStringOperator extends VectorMapJoinLeftSemiGe
         if (LOG.isDebugEnabled()) {
           LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
         }
-        numSel = finishLeftSemiRepeated(batch, joinResult, hashSetResults[0]);
+        finishLeftSemiRepeated(batch, joinResult, hashSetResults[0]);
       } else {
 
         /*
@@ -263,6 +260,10 @@ public class VectorMapJoinLeftSemiStringOperator extends VectorMapJoinLeftSemiGe
 
             saveKeyBatchIndex = batchIndex;
 
+            /*
+             * Single-Column String specific lookup key.
+             */
+
             byte[] keyBytes = vector[batchIndex];
             int keyStart = start[batchIndex];
             int keyLength = length[batchIndex];
@@ -333,15 +334,11 @@ public class VectorMapJoinLeftSemiStringOperator extends VectorMapJoinLeftSemiGe
               " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashSetResults, 0, hashSetResultCount)));
         }
 
-        numSel = finishLeftSemi(batch,
-            allMatchs, allMatchCount,
-            spills, spillHashMapResultIndices, spillCount,
+        finishLeftSemi(batch,
+            allMatchCount, spillCount,
             (VectorMapJoinHashTableResult[]) hashSetResults);
       }
 
-      batch.selectedInUse = true;
-      batch.size =  numSel;
-
       if (batch.size > 0) {
         // Forward any remaining selected rows.
         forwardBigTableBatch(batch);

http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java
index 7ef5574..57814fd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java
@@ -70,15 +70,34 @@ public abstract class VectorMapJoinOuterGenerateResultOperator
   // generation.
   protected transient VectorMapJoinHashMapResult hashMapResults[];
 
-  // Pre-allocated member for storing any matching row indexes during a processOp call.
-  protected transient int[] matchs;
+  // Pre-allocated member for remembering the big table's selected array at the beginning of
+  // the process method before applying any filter.  For outer join we need to remember which
+  // rows did not match since they will appear the in outer join result with NULLs for the
+  // small table.
+  protected transient int[] inputSelected;
 
-  // Pre-allocated member for storing the mapping to the row batchIndex of the first of a series of
-  // equal keys that was looked up during a processOp call.
-  protected transient int[] matchHashMapResultIndices;
+  // Pre-allocated member for storing the (physical) batch index of matching row (single- or
+  // multi-small-table-valued) indexes during a process call.
+  protected transient int[] allMatchs;
 
-  // All matching and non-matching big table rows.
-  protected transient int[] nonSpills;
+  /*
+   *  Pre-allocated members for storing information equal key series for small-table matches.
+   *
+   *  ~HashMapResultIndices
+   *                Index into the hashMapResults array for the match.
+   *  ~AllMatchIndices
+   *                (Logical) indices into allMatchs to the first row of a match of a
+   *                possible series of duplicate keys.
+   *  ~IsSingleValue
+   *                Whether there is 1 or multiple small table values.
+   *  ~DuplicateCounts
+   *                The duplicate count for each matched key.
+   *
+   */
+  protected transient int[] equalKeySeriesHashMapResultIndices;
+  protected transient int[] equalKeySeriesAllMatchIndices;
+  protected transient boolean[] equalKeySeriesIsSingleValue;
+  protected transient int[] equalKeySeriesDuplicateCounts;
 
   // Pre-allocated member for storing the (physical) batch index of rows that need to be spilled.
   protected transient int[] spills;
@@ -86,8 +105,11 @@ public abstract class VectorMapJoinOuterGenerateResultOperator
   // Pre-allocated member for storing index into the hashSetResults for each spilled row.
   protected transient int[] spillHashMapResultIndices;
 
-  // Pre-allocated member for storing any non-matching row indexes during a processOp call.
-  protected transient int[] scratch1;
+  // Pre-allocated member for storing any non-spills, non-matches, or merged row indexes during a
+  // process method call.
+  protected transient int[] nonSpills;
+  protected transient int[] noMatchs;
+  protected transient int[] merged;
 
   public VectorMapJoinOuterGenerateResultOperator() {
     super();
@@ -111,12 +133,23 @@ public abstract class VectorMapJoinOuterGenerateResultOperator
     for (int i = 0; i < hashMapResults.length; i++) {
       hashMapResults[i] = baseHashMap.createHashMapResult();
     }
-    matchs = new int[batch.DEFAULT_SIZE];
-    matchHashMapResultIndices = new int[batch.DEFAULT_SIZE];
-    nonSpills = new int[batch.DEFAULT_SIZE];
+
+    inputSelected = new int[batch.DEFAULT_SIZE];
+
+    allMatchs = new int[batch.DEFAULT_SIZE];
+
+    equalKeySeriesHashMapResultIndices = new int[batch.DEFAULT_SIZE];
+    equalKeySeriesAllMatchIndices = new int[batch.DEFAULT_SIZE];
+    equalKeySeriesIsSingleValue = new boolean[batch.DEFAULT_SIZE];
+    equalKeySeriesDuplicateCounts = new int[batch.DEFAULT_SIZE];
+
     spills = new int[batch.DEFAULT_SIZE];
     spillHashMapResultIndices = new int[batch.DEFAULT_SIZE];
-    scratch1 = new int[batch.DEFAULT_SIZE];
+
+    nonSpills = new int[batch.DEFAULT_SIZE];
+    noMatchs = new int[batch.DEFAULT_SIZE];
+    merged = new int[batch.DEFAULT_SIZE];
+
   }
 
   //-----------------------------------------------------------------------------------------------
@@ -145,260 +178,372 @@ public abstract class VectorMapJoinOuterGenerateResultOperator
   }
 
   /**
-   * Generate the outer join output results for one vectorized row batch.
-   *
-   * Any filter expressions will apply now since hash map lookup for outer join is complete.
+   * Apply the value expression to rows in the (original) input selected array.
    *
    * @param batch
-   *          The big table batch with any matching and any non matching rows both as
-   *          selected in use.
-   * @param matchs
-   *          A subset of the rows of the batch that are matches.
-   * @param matchHashMapResultIndices
-   *          For each entry in matches, the index into the hashMapResult.
-   * @param matchSize
-   *          Number of matches in matchs.
-   * @param nonSpills
-   *          The rows of the batch that are both matches and non-matches.
-   * @param nonspillCount
-   *          Number of rows in nonSpills.
-   * @param spills
-   *          A subset of the rows of the batch that are spills.
-   * @param spillHashMapResultIndices
-   *          For each entry in spills, the index into the hashMapResult.
-   * @param spillCount
-   *          Number of spills in spills.
-   * @param hashMapResults
-   *          The array of all hash map results for the batch.
-   * @param hashMapResultCount
-   *          Number of entries in hashMapResults.
-   * @param scratch1
-   *          Pre-allocated storage to internal use.
+   *          The vectorized row batch.
+   * @param inputSelectedInUse
+   *          Whether the (original) input batch is selectedInUse.
+   * @param inputLogicalSize
+   *          The (original) input batch size.
    */
-  public int finishOuter(VectorizedRowBatch batch,
-      int[] matchs, int[] matchHashMapResultIndices, int matchCount,
-      int[] nonSpills, int nonSpillCount,
-      int[] spills, int[] spillHashMapResultIndices, int spillCount,
-      VectorMapJoinHashMapResult[] hashMapResults, int hashMapResultCount,
-      int[] scratch1) throws IOException, HiveException {
-
-     int numSel = 0;
-
-    // At this point we have determined the matching rows only for the ON equality condition(s).
-    // Implicitly, non-matching rows are those in the selected array minus matchs.
+  private void doValueExprOnInputSelected(VectorizedRowBatch batch,
+      boolean inputSelectedInUse, int inputLogicalSize) {
 
-    // Next, for outer join, apply any ON predicates to filter down the matches.
-    if (matchCount > 0 && bigTableFilterExpressions.length > 0) {
+    int saveBatchSize = batch.size;
+    int[] saveSelected = batch.selected;
+    boolean saveSelectedInUse = batch.selectedInUse;
 
-      System.arraycopy(matchs, 0, batch.selected, 0, matchCount);
-      batch.size = matchCount;
+    batch.size = inputLogicalSize;
+    batch.selected = inputSelected;
+    batch.selectedInUse = inputSelectedInUse;
 
-      // Non matches will be removed from the selected array.
-      for (VectorExpression ve : bigTableFilterExpressions) {
+    if (bigTableValueExpressions != null) {
+      for(VectorExpression ve: bigTableValueExpressions) {
         ve.evaluate(batch);
       }
+    }
 
-      // LOG.info("finishOuter" +
-      //     " filtered batch.selected " + Arrays.toString(Arrays.copyOfRange(batch.selected, 0, batch.size)));
-
-      // Fixup the matchHashMapResultIndices array.
-      if (batch.size < matchCount) {
-        int numMatch = 0;
-        int[] selected = batch.selected;
-        for (int i = 0; i < batch.size; i++) {
-          if (selected[i] == matchs[numMatch]) {
-            matchHashMapResultIndices[numMatch] = matchHashMapResultIndices[i];
-            numMatch++;
-            if (numMatch == matchCount) {
-              break;
-            }
-          }
-        }
-        System.arraycopy(batch.selected, 0, matchs, 0, matchCount);
+    batch.size = saveBatchSize;
+    batch.selected = saveSelected;
+    batch.selectedInUse = saveSelectedInUse;
+  }
+
+  /**
+   * Apply the value expression to rows specified by a selected array.
+   *
+   * @param batch
+   *          The vectorized row batch.
+   * @param selected
+   *          The (physical) batch indices to apply the expression to.
+   * @param size
+   *          The size of selected.
+   */
+  private void doValueExpr(VectorizedRowBatch batch,
+      int[] selected, int size) {
+
+    int saveBatchSize = batch.size;
+    int[] saveSelected = batch.selected;
+    boolean saveSelectedInUse = batch.selectedInUse;
+
+    batch.size = size;
+    batch.selected = selected;
+    batch.selectedInUse = true;
+
+    if (bigTableValueExpressions != null) {
+      for(VectorExpression ve: bigTableValueExpressions) {
+        ve.evaluate(batch);
       }
     }
-    // LOG.info("finishOuter" +
-    //     " matchs[" + matchCount + "] " + intArrayToRangesString(matchs, matchCount) +
-    //     " matchHashMapResultIndices " + Arrays.toString(Arrays.copyOfRange(matchHashMapResultIndices, 0, matchCount)));
 
-    // Big table value expressions apply to ALL matching and non-matching rows.
-    if (bigTableValueExpressions != null) {
+    batch.size = saveBatchSize;
+    batch.selected = saveSelected;
+    batch.selectedInUse = saveSelectedInUse;
+  }
 
-      System.arraycopy(nonSpills, 0, batch.selected, 0, nonSpillCount);
-      batch.size = nonSpillCount;
+  /**
+   * Remove (subtract) members from the input selected array and produce the results into
+   * a difference array.
+   *
+   * @param inputSelectedInUse
+   *          Whether the (original) input batch is selectedInUse.
+   * @param inputLogicalSize
+   *          The (original) input batch size.
+   * @param remove
+   *          The indices to remove.  They must all be present in input selected array.
+   * @param removeSize
+   *          The size of remove.
+   * @param difference
+   *          The resulting difference -- the input selected array indices not in the
+   *          remove array.
+   * @return
+   *          The resulting size of the difference array.
+   * @throws HiveException 
+   */
+  private int subtractFromInputSelected(boolean inputSelectedInUse, int inputLogicalSize,
+      int[] remove, int removeSize, int[] difference) throws HiveException {
 
-      for (VectorExpression ve: bigTableValueExpressions) {
-        ve.evaluate(batch);
+    // if (!verifyMonotonicallyIncreasing(remove, removeSize)) {
+    //   throw new HiveException("remove is not in sort order and unique");
+    // }
+
+   int differenceCount = 0;
+
+   // Determine which rows are left.
+   int removeIndex = 0;
+   if (inputSelectedInUse) {
+     for (int i = 0; i < inputLogicalSize; i++) {
+       int candidateIndex = inputSelected[i];
+       if (removeIndex < removeSize && candidateIndex == remove[removeIndex]) {
+         removeIndex++;
+       } else {
+         difference[differenceCount++] = candidateIndex;
+       }
+     }
+   } else {
+     for (int candidateIndex = 0; candidateIndex < inputLogicalSize; candidateIndex++) {
+       if (removeIndex < removeSize && candidateIndex == remove[removeIndex]) {
+         removeIndex++;
+       } else {
+         difference[differenceCount++] = candidateIndex;
+       }
+     }
+   }
+
+   if (removeIndex != removeSize) {
+     throw new HiveException("Not all batch indices removed");
+   }
+
+   // if (!verifyMonotonicallyIncreasing(difference, differenceCount)) {
+   //   throw new HiveException("difference is not in sort order and unique");
+   // }
+
+   return differenceCount;
+ }
+
+  /**
+   * Remove (subtract) members from an array and produce the results into
+   * a difference array.
+
+   * @param all
+   *          The selected array containing all members.
+   * @param allSize
+   *          The size of all.
+   * @param remove
+   *          The indices to remove.  They must all be present in input selected array.
+   * @param removeSize
+   *          The size of remove.
+   * @param difference
+   *          The resulting difference -- the all array indices not in the
+   *          remove array.
+   * @return
+   *          The resulting size of the difference array.
+   * @throws HiveException 
+   */
+  private int subtract(int[] all, int allSize,
+      int[] remove, int removeSize, int[] difference) throws HiveException {
+
+    // if (!verifyMonotonicallyIncreasing(remove, removeSize)) {
+    //   throw new HiveException("remove is not in sort order and unique");
+    // }
+
+    int differenceCount = 0;
+
+    // Determine which rows are left.
+    int removeIndex = 0;
+    for (int i = 0; i < allSize; i++) {
+      int candidateIndex = all[i];
+      if (removeIndex < removeSize && candidateIndex == remove[removeIndex]) {
+        removeIndex++;
+      } else {
+        difference[differenceCount++] = candidateIndex;
       }
     }
 
-    // Determine which rows are non matches by determining the delta between selected and
-    // matchs.
-    int[] noMatchs = scratch1;
-    int noMatchCount = 0;
-    if (matchCount < nonSpillCount) {
-      // Determine which rows are non matches.
-      int matchIndex = 0;
-      for (int i = 0; i < nonSpillCount; i++) {
-        int candidateIndex = nonSpills[i];
-        if (matchIndex < matchCount && candidateIndex == matchs[matchIndex]) {
-          matchIndex++;
+    if (removeIndex != removeSize) {
+      throw new HiveException("Not all batch indices removed");
+    }
+
+    return differenceCount;
+  }
+
+  /**
+   * Sort merge two select arrays so the resulting array is ordered by (batch) index.
+   *
+   * @param selected1
+   * @param selected1Count
+   * @param selected2
+   * @param selected2Count
+   * @param sortMerged
+   *          The resulting sort merge of selected1 and selected2.
+   * @return
+   *          The resulting size of the sortMerged array.
+   * @throws HiveException 
+   */
+  private int sortMerge(int[] selected1, int selected1Count,
+          int[] selected2, int selected2Count, int[] sortMerged) throws HiveException {
+
+    // if (!verifyMonotonicallyIncreasing(selected1, selected1Count)) {
+    //   throw new HiveException("selected1 is not in sort order and unique");
+    // }
+
+    // if (!verifyMonotonicallyIncreasing(selected2, selected2Count)) {
+    //   throw new HiveException("selected1 is not in sort order and unique");
+    // }
+
+
+    int sortMergeCount = 0;
+
+    int selected1Index = 0;
+    int selected2Index = 0;
+    for (int i = 0; i < selected1Count + selected2Count; i++) {
+      if (selected1Index < selected1Count && selected2Index < selected2Count) {
+        if (selected1[selected1Index] < selected2[selected2Index]) {
+          sortMerged[sortMergeCount++] = selected1[selected1Index++];
         } else {
-          noMatchs[noMatchCount++] = candidateIndex;
+          sortMerged[sortMergeCount++] = selected2[selected2Index++];
         }
+      } else if (selected1Index < selected1Count) {
+        sortMerged[sortMergeCount++] = selected1[selected1Index++];
+      } else {
+        sortMerged[sortMergeCount++] = selected2[selected2Index++];
       }
     }
-    // LOG.info("finishOuter" +
-    //     " noMatchs[" + noMatchCount + "] " + intArrayToRangesString(noMatchs, noMatchCount));
 
+    // if (!verifyMonotonicallyIncreasing(sortMerged, sortMergeCount)) {
+    //   throw new HiveException("sortMerged is not in sort order and unique");
+    // }
 
-    // When we generate results into the overflow batch, we may still end up with fewer rows
-    // in the big table batch.  So, nulSel and the batch's selected array will be rebuilt with
-    // just the big table rows that need to be forwarded, minus any rows processed with the
-    // overflow batch.
-    if (matchCount > 0) {
-      numSel = generateOuterHashMapMatchResults(batch,
-          matchs, matchHashMapResultIndices, matchCount,
-          hashMapResults, numSel);
-    }
+    return sortMergeCount;
+  }
 
-    if (noMatchCount > 0) {
-      numSel = generateOuterHashMapNoMatchResults(batch, noMatchs, noMatchCount, numSel);
-    }
+  /**
+   * Generate the outer join output results for one vectorized row batch.
+   *
+   * @param batch
+   *          The big table batch with any matching and any non matching rows both as
+   *          selected in use.
+   * @param allMatchCount
+   *          Number of matches in allMatchs.
+   * @param equalKeySeriesCount
+   *          Number of single value matches.
+   * @param atLeastOneNonMatch
+   *          Whether at least one row was a non-match.
+   * @param inputSelectedInUse
+   *          A copy of the batch's selectedInUse flag on input to the process method.
+   * @param inputLogicalSize
+   *          The batch's size on input to the process method.
+   * @param spillCount
+   *          Number of spills in spills.
+   * @param hashMapResultCount
+   *          Number of entries in hashMapResults.
+   */
+  public void finishOuter(VectorizedRowBatch batch,
+      int allMatchCount, int equalKeySeriesCount, boolean atLeastOneNonMatch,
+      boolean inputSelectedInUse, int inputLogicalSize,
+      int spillCount, int hashMapResultCount) throws IOException, HiveException {
 
+    // Get rid of spills before we start modifying the batch.
     if (spillCount > 0) {
       spillHashMapBatch(batch, (VectorMapJoinHashTableResult[]) hashMapResults,
           spills, spillHashMapResultIndices, spillCount);
     }
 
-    return numSel;
-  }
-
-   /**
-    * Generate the matching outer join output results for one row of a vectorized row batch into
-    * the overflow batch.
-    *
-    * @param batch
-    *          The big table batch.
-    * @param batchIndex
-    *          Index of the big table row.
-    * @param hashMapResult
-    *          The hash map result with the small table values.
-    */
-   private void copyOuterHashMapResultToOverflow(VectorizedRowBatch batch, int batchIndex,
-               VectorMapJoinHashMapResult hashMapResult) throws HiveException, IOException {
-
-     // if (hashMapResult.isCappedCountAvailable()) {
-     //   LOG.info("copyOuterHashMapResultToOverflow cappedCount " + hashMapResult.cappedCount());
-     // }
-     ByteSegmentRef byteSegmentRef = hashMapResult.first();
-     while (byteSegmentRef != null) {
-
-       // Copy the BigTable values into the overflow batch. Since the overflow batch may
-       // not get flushed here, we must copy by value.
-       if (bigTableRetainedVectorCopy != null) {
-         bigTableRetainedVectorCopy.copyByValue(batch, batchIndex,
-                                                overflowBatch, overflowBatch.size);
-       }
-
-       // Reference the keys we just copied above.
-       if (bigTableVectorCopyOuterKeys != null) {
-         bigTableVectorCopyOuterKeys.copyByReference(overflowBatch, overflowBatch.size,
-                                                     overflowBatch, overflowBatch.size);
-       }
-
-       if (smallTableVectorDeserializeRow != null) {
-
-         byte[] bytes = byteSegmentRef.getBytes();
-         int offset = (int) byteSegmentRef.getOffset();
-         int length = byteSegmentRef.getLength();
-         smallTableVectorDeserializeRow.setBytes(bytes, offset, length);
-
-         smallTableVectorDeserializeRow.deserializeByValue(overflowBatch, overflowBatch.size);
-       }
+    int noMatchCount = 0;
+    if (spillCount > 0) {
 
-       ++overflowBatch.size;
-       if (overflowBatch.size == VectorizedRowBatch.DEFAULT_SIZE) {
-         forwardOverflow();
-       }
+      // Subtract the spills to get all match and non-match rows.
+      int nonSpillCount = subtractFromInputSelected(
+              inputSelectedInUse, inputLogicalSize, spills, spillCount, nonSpills);
 
-       byteSegmentRef = hashMapResult.next();
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("finishOuter spillCount > 0" +
+            " nonSpills " + intArrayToRangesString(nonSpills, nonSpillCount));
+      }
+  
+      // Big table value expressions apply to ALL matching and non-matching rows.
+      if (bigTableValueExpressions != null) {
+  
+        doValueExpr(batch, nonSpills, nonSpillCount);
+  
       }
-     // LOG.info("copyOuterHashMapResultToOverflow overflowBatch.size " + overflowBatch.size);
+  
+      if (atLeastOneNonMatch) {
+        noMatchCount = subtract(nonSpills, nonSpillCount, allMatchs, allMatchCount,
+                noMatchs);
+
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("finishOuter spillCount > 0" +
+              " noMatchs " + intArrayToRangesString(noMatchs, noMatchCount));
+        }
 
-   }
+      }
+    } else {
 
-   /**
-    * Generate the matching outer join output results for one vectorized row batch.
-    *
-    * For each matching row specified by parameter, get the one or more small table values and
-    * form join results.
-    *
-    * (Note: Since all matching and non-matching rows are selected and output for outer joins,
-    * we cannot use selected as the matching rows).
-    *
-    * @param batch
-    *          The big table batch with any matching and any non matching rows both as
-    *          selected in use.
-    * @param matchs
-    *          A subset of the rows of the batch that are matches.
-    * @param matchHashMapResultIndices
-    *          For each entry in matches, the index into the hashMapResult.
-    * @param matchSize
-    *          Number of matches in matchs.
-    * @param hashMapResults
-    *          The array of all hash map results for the batch.
-    * @param numSel
-    *          The current count of rows in the rebuilding of the selected array.
-    *
-    * @return
-    *          The new count of selected rows.
-    */
-   protected int generateOuterHashMapMatchResults(VectorizedRowBatch batch,
-       int[] matchs, int[] matchHashMapResultIndices, int matchSize,
-       VectorMapJoinHashMapResult[] hashMapResults, int numSel)
-               throws IOException, HiveException {
+      // Run value expressions over original (whole) input batch.
+      doValueExprOnInputSelected(batch, inputSelectedInUse, inputLogicalSize);
 
-     int[] selected = batch.selected;
+      if (atLeastOneNonMatch) {
+        noMatchCount = subtractFromInputSelected(
+            inputSelectedInUse, inputLogicalSize, allMatchs, allMatchCount, noMatchs);
 
-     // Generate result within big table batch when single small table value.  Otherwise, copy
-     // to overflow batch.
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("finishOuter spillCount == 0" +
+              " noMatchs " + intArrayToRangesString(noMatchs, noMatchCount));
+        }
+      }
+    }
 
-     for (int i = 0; i < matchSize; i++) {
-       int batchIndex = matchs[i];
+    // When we generate results into the overflow batch, we may still end up with fewer rows
+    // in the big table batch.  So, nulSel and the batch's selected array will be rebuilt with
+    // just the big table rows that need to be forwarded, minus any rows processed with the
+    // overflow batch.
+    if (allMatchCount > 0) {
+
+      int numSel = 0;
+      for (int i = 0; i < equalKeySeriesCount; i++) {
+        int hashMapResultIndex = equalKeySeriesHashMapResultIndices[i];
+        VectorMapJoinHashMapResult hashMapResult = hashMapResults[hashMapResultIndex];
+        int allMatchesIndex = equalKeySeriesAllMatchIndices[i];
+        boolean isSingleValue = equalKeySeriesIsSingleValue[i];
+        int duplicateCount = equalKeySeriesDuplicateCounts[i];
+
+        if (isSingleValue) {
+          numSel = generateHashMapResultSingleValue(
+                      batch, hashMapResult, allMatchs, allMatchesIndex, duplicateCount, numSel);
+        } else {
+          generateHashMapResultMultiValue(
+              batch, hashMapResult, allMatchs, allMatchesIndex, duplicateCount);
+        }
+      }
 
-       int hashMapResultIndex = matchHashMapResultIndices[i];
-       VectorMapJoinHashMapResult hashMapResult = hashMapResults[hashMapResultIndex];
+      // The number of single value rows that were generated in the big table batch.
+      batch.size = numSel;
+      batch.selectedInUse = true;
 
-       if (!hashMapResult.isSingleRow()) {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("finishOuter allMatchCount > 0" +
+            " batch.selected " + intArrayToRangesString(batch.selected, batch.size));
+      }
 
-         // Multiple small table rows require use of the overflow batch.
-         copyOuterHashMapResultToOverflow(batch, batchIndex, hashMapResult);
-       } else {
+    } else {
+      batch.size = 0;
+    }
 
-         // Generate join result in big table batch.
-         ByteSegmentRef byteSegmentRef = hashMapResult.first();
+    if (noMatchCount > 0) {
+      if (batch.size > 0) {
+
+        generateOuterNulls(batch, noMatchs, noMatchCount);
+  
+        // Merge noMatchs and (match) selected.
+        int mergeCount = sortMerge(
+                noMatchs, noMatchCount, batch.selected, batch.size, merged);
+    
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("finishOuter noMatchCount > 0 && batch.size > 0" +
+              " merged " + intArrayToRangesString(merged, mergeCount));
+        }
 
-         if (bigTableVectorCopyOuterKeys != null) {
-           bigTableVectorCopyOuterKeys.copyByReference(batch, batchIndex, batch, batchIndex);
-         }
+        System.arraycopy(merged, 0, batch.selected, 0, mergeCount);
+        batch.size = mergeCount;
+        batch.selectedInUse = true;
+      } else {
 
-         if (smallTableVectorDeserializeRow != null) {
+        // We can use the whole batch for output of no matches.
 
-           byte[] bytes = byteSegmentRef.getBytes();
-           int offset = (int) byteSegmentRef.getOffset();
-           int length = byteSegmentRef.getLength();
-           smallTableVectorDeserializeRow.setBytes(bytes, offset, length);
+        generateOuterNullsRepeatedAll(batch);
 
-           smallTableVectorDeserializeRow.deserializeByValue(batch, batchIndex);
-         }
+        System.arraycopy(noMatchs, 0, batch.selected, 0, noMatchCount);
+        batch.size = noMatchCount;
+        batch.selectedInUse = true;
 
-         // Remember this big table row was used for an output result.
-         selected[numSel++] = batchIndex;
-       }
-     }
-     return numSel;
-   }
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("finishOuter noMatchCount > 0 && batch.size == 0" +
+              " batch.selected " + intArrayToRangesString(batch.selected, batch.size));
+        }
+      }
+    }
+  }
 
    /**
     * Generate the non matching outer join output results for one vectorized row batch.
@@ -412,72 +557,30 @@ public abstract class VectorMapJoinOuterGenerateResultOperator
     *          A subset of the rows of the batch that are non matches.
     * @param noMatchSize
     *          Number of non matches in noMatchs.
-    * @param numSel
-    *          The current count of rows in the rebuilding of the selected array.
-    *
-    * @return
-    *          The new count of selected rows.
     */
-   protected int generateOuterHashMapNoMatchResults(VectorizedRowBatch batch, int[] noMatchs,
-       int noMatchSize, int numSel) throws IOException, HiveException {
-     int[] selected = batch.selected;
-
-     // Generate result within big table batch with null small table results, using isRepeated
-     // if possible.
+   protected void generateOuterNulls(VectorizedRowBatch batch, int[] noMatchs,
+       int noMatchSize) throws IOException, HiveException {
 
-     if (numSel == 0) {
+     // Set null information in the small table results area.
 
-       // There were 0 matching rows -- so we can use the isRepeated optimization for the non
-       // matching rows.
+     for (int i = 0; i < noMatchSize; i++) {
+       int batchIndex = noMatchs[i];
 
        // Mark any scratch small table scratch columns that would normally receive a copy of the
-       // key as null and repeating.
+       // key as null, too.
        for (int column : bigTableOuterKeyOutputVectorColumns) {
          ColumnVector colVector = batch.cols[column];
-         colVector.isRepeating = true;
          colVector.noNulls = false;
-         colVector.isNull[0] = true;
+         colVector.isNull[batchIndex] = true;
        }
 
-       // Small table values are set to null and repeating.
+       // Small table values are set to null.
        for (int column : smallTableOutputVectorColumns) {
          ColumnVector colVector = batch.cols[column];
-         colVector.isRepeating = true;
          colVector.noNulls = false;
-         colVector.isNull[0] = true;
-       }
-
-       // Rebuild the selected array.
-       for (int i = 0; i < noMatchSize; i++) {
-         int batchIndex = noMatchs[i];
-         selected[numSel++] = batchIndex;
-       }
-     } else {
-
-       // Set null information in the small table results area.
-
-       for (int i = 0; i < noMatchSize; i++) {
-         int batchIndex = noMatchs[i];
-
-         // Mark any scratch small table scratch columns that would normally receive a copy of the
-         // key as null, too.
-         for (int column : bigTableOuterKeyOutputVectorColumns) {
-           ColumnVector colVector = batch.cols[column];
-           colVector.noNulls = false;
-           colVector.isNull[batchIndex] = true;
-         }
-
-         // Small table values are set to null.
-         for (int column : smallTableOutputVectorColumns) {
-           ColumnVector colVector = batch.cols[column];
-           colVector.noNulls = false;
-           colVector.isNull[batchIndex] = true;
-         }
-
-         selected[numSel++] = batchIndex;
+         colVector.isNull[batchIndex] = true;
        }
      }
-     return numSel;
    }
 
   /**
@@ -492,65 +595,114 @@ public abstract class VectorMapJoinOuterGenerateResultOperator
    *          The hash map lookup result for the repeated key.
    * @param hashMapResults
    *          The array of all hash map results for the batch.
+   * @param someRowsFilteredOut
+   *          Whether some rows of the repeated key batch were knocked out by the filter.
+   * @param inputSelectedInUse
+   *          A copy of the batch's selectedInUse flag on input to the process method.
+   * @param inputLogicalSize
+   *          The batch's size on input to the process method.
    * @param scratch1
    *          Pre-allocated storage to internal use.
+   * @param scratch2
+   *          Pre-allocated storage to internal use.
    */
-  public int finishOuterRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult joinResult,
-      VectorMapJoinHashMapResult hashMapResult, int[] scratch1)
+  public void finishOuterRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult joinResult,
+      VectorMapJoinHashMapResult hashMapResult, boolean someRowsFilteredOut,
+      boolean inputSelectedInUse, int inputLogicalSize)
           throws IOException, HiveException {
 
-    int numSel = 0;
+    // LOG.debug("finishOuterRepeated batch #" + batchCounter + " " + joinResult.name() + " batch.size " + batch.size + " someRowsFilteredOut " + someRowsFilteredOut);
 
-    if (joinResult == JoinUtil.JoinResult.MATCH && bigTableFilterExpressions.length > 0) {
+    switch (joinResult) {
+    case MATCH:
 
-      // Since it is repeated, the evaluation of the filter will knock the whole batch out.
-      // But since we are doing outer join, we want to keep non-matches.
+      // Rows we looked up as one repeated key are a match.  But filtered out rows
+      // need to be generated as non-matches, too.
 
-      // First, remember selected;
-      int[] rememberSelected = scratch1;
-      int rememberBatchSize = batch.size;
-      if (batch.selectedInUse) {
-        System.arraycopy(batch.selected, 0, rememberSelected, 0, batch.size);
-      }
+      if (someRowsFilteredOut) {
 
-      // Filter.
-      for (VectorExpression ve : bigTableFilterExpressions) {
-        ve.evaluate(batch);
-      }
+        // For the filtered out rows that didn't (logically) get looked up in the hash table,
+        // we need to generate no match results for those too...
 
-      // Convert a filter out to a non match.
-      if (batch.size == 0) {
-        joinResult = JoinUtil.JoinResult.NOMATCH;
-        if (batch.selectedInUse) {
-          System.arraycopy(rememberSelected, 0, batch.selected, 0, rememberBatchSize);
-          // LOG.info("finishOuterRepeated batch #" + batchCounter + " filter out converted to no matchs " +
-          //     Arrays.toString(Arrays.copyOfRange(batch.selected, 0, rememberBatchSize)));
-        } else {
-          // LOG.info("finishOuterRepeated batch #" + batchCounter + " filter out converted to no matchs batch size " +
-          //     rememberBatchSize);
-        }
-        batch.size = rememberBatchSize;
-      }
-    }
+        // Run value expressions over original (whole) input batch.
+        doValueExprOnInputSelected(batch, inputSelectedInUse, inputLogicalSize);
 
-    // LOG.info("finishOuterRepeated batch #" + batchCounter + " " + joinResult.name() + " batch.size " + batch.size);
-    switch (joinResult) {
-    case MATCH:
-      // Run our value expressions over whole batch.
-      if (bigTableValueExpressions != null) {
-        for(VectorExpression ve: bigTableValueExpressions) {
-          ve.evaluate(batch);
+        // Now calculate which rows were filtered out (they are logically no matches).
+
+        // Determine which rows are non matches by determining the delta between inputSelected and
+        // (current) batch selected.
+
+        int noMatchCount = subtractFromInputSelected(
+                inputSelectedInUse, inputLogicalSize, batch.selected, batch.size, noMatchs);
+
+        generateOuterNulls(batch, noMatchs, noMatchCount);
+
+        // Now generate the matchs.  Single small table values will be put into the big table
+        // batch and come back in matchs.  Any multiple small table value results will go into
+        // the overflow batch.
+        generateHashMapResultRepeatedAll(batch, hashMapResult);
+
+        // Merge noMatchs and (match) selected.
+        int mergeCount = sortMerge(
+                noMatchs, noMatchCount, batch.selected, batch.size, merged);
+
+        System.arraycopy(merged, 0, batch.selected, 0, mergeCount);
+        batch.size = mergeCount;
+        batch.selectedInUse = true;
+      } else {
+
+        // Just run our value expressions over input batch.
+
+        if (bigTableValueExpressions != null) {
+          for(VectorExpression ve: bigTableValueExpressions) {
+            ve.evaluate(batch);
+          }
         }
-      }
 
-      // Use a common method applicable for inner and outer.
-      numSel = generateHashMapResultRepeatedAll(batch, hashMapResult);
+        generateHashMapResultRepeatedAll(batch, hashMapResult);
+      }
       break;
+
     case SPILL:
-      // Whole batch is spilled.
+
+      // Rows we looked up as one repeated key need to spill.  But filtered out rows
+      // need to be generated as non-matches, too.
+
       spillBatchRepeated(batch, (VectorMapJoinHashTableResult) hashMapResult);
+
+      // After using selected to generate spills, generate non-matches, if any.
+      if (someRowsFilteredOut) {
+
+        // Determine which rows are non matches by determining the delta between inputSelected and
+        // (current) batch selected.
+
+        int noMatchCount = subtractFromInputSelected(
+                inputSelectedInUse, inputLogicalSize, batch.selected, batch.size, noMatchs);
+
+        System.arraycopy(noMatchs, 0, batch.selected, 0, noMatchCount);
+        batch.size = noMatchCount;
+        batch.selectedInUse = true;
+
+        generateOuterNullsRepeatedAll(batch);
+      } else {
+        batch.size = 0;
+      }
+
       break;
+
     case NOMATCH:
+
+      if (someRowsFilteredOut) {
+
+        // When the repeated no match is due to filtering, we need to restore the
+        // selected information.
+
+        if (inputSelectedInUse) {
+          System.arraycopy(inputSelected, 0, batch.selected, 0, inputLogicalSize);
+        }
+        batch.size = inputLogicalSize;
+      }
+
       // Run our value expressions over whole batch.
       if (bigTableValueExpressions != null) {
         for(VectorExpression ve: bigTableValueExpressions) {
@@ -558,11 +710,9 @@ public abstract class VectorMapJoinOuterGenerateResultOperator
         }
       }
 
-      numSel = generateOuterNullsRepeatedAll(batch);
+      generateOuterNullsRepeatedAll(batch);
       break;
     }
-
-    return numSel;
   }
 
   /**
@@ -573,24 +723,8 @@ public abstract class VectorMapJoinOuterGenerateResultOperator
    *
    * @param batch
    *          The big table batch.
-   * @return
-   *          The new count of selected rows.
    */
-  protected int generateOuterNullsRepeatedAll(VectorizedRowBatch batch) throws HiveException {
-
-    int[] selected = batch.selected;
-    boolean selectedInUse = batch.selectedInUse;
-
-    // Generate result within big table batch using is repeated for null small table results.
-
-    if (batch.selectedInUse) {
-      // The selected array is already filled in as we want it.
-    } else {
-      for (int i = 0; i < batch.size; i++) {
-        selected[i] = i;
-      }
-      batch.selectedInUse = true;
-    }
+  protected void generateOuterNullsRepeatedAll(VectorizedRowBatch batch) throws HiveException {
 
     for (int column : smallTableOutputVectorColumns) {
       ColumnVector colVector = batch.cols[column];
@@ -607,12 +741,5 @@ public abstract class VectorMapJoinOuterGenerateResultOperator
       colVector.isNull[0] = true;
       colVector.isRepeating = true;
     }
-
-    // for (int i = 0; i < batch.size; i++) {
-    //   int bigTableIndex = selected[i];
-    //   VectorizedBatchUtil.debugDisplayOneRow(batch, bigTableIndex, taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator generate generateOuterNullsRepeatedAll batch");
-    // }
-
-    return batch.size;
   }
 }
\ No newline at end of file