You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2015/05/15 00:45:57 UTC
[1/7] hive git commit: HIVE-10565: Native Vector Map Join doesn't
handle filtering and matching on LEFT OUTER JOIN repeated key correctly (Matt
McCline via Gunther Hagleitner)
Repository: hive
Updated Branches:
refs/heads/master 3fa7489e2 -> 2b9f2f5e2
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/results/clientpositive/vector_join_filters.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_join_filters.q.out b/ql/src/test/results/clientpositive/vector_join_filters.q.out
new file mode 100644
index 0000000..48fc072
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_join_filters.q.out
@@ -0,0 +1,222 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE myinput1_txt(key int, value int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@myinput1_txt
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE myinput1_txt(key int, value int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@myinput1_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1_txt
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@myinput1_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1_txt
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@myinput1_txt
+PREHOOK: query: CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@myinput1_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@myinput1
+POSTHOOK: query: CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@myinput1_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@myinput1
+Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+Warning: Map Join MAPJOIN[17][bigTable=a] in task 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4937935
+Warning: Map Join MAPJOIN[17][bigTable=b] in task 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3080335
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4937935
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4937935
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4937935
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4937935
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3080335
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3080335
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3080335
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3080335
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3080335
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3080335
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.key = c.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.key = c.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/results/clientpositive/vector_join_nulls.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_join_nulls.q.out b/ql/src/test/results/clientpositive/vector_join_nulls.q.out
new file mode 100644
index 0000000..c1516f2
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_join_nulls.q.out
@@ -0,0 +1,195 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE myinput1_txt(key int, value int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@myinput1_txt
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE myinput1_txt(key int, value int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@myinput1_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE myinput1_txt
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@myinput1_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE myinput1_txt
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@myinput1_txt
+PREHOOK: query: CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@myinput1_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@myinput1
+POSTHOOK: query: CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@myinput1_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@myinput1
+Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+13630578
+Warning: Map Join MAPJOIN[17][bigTable=?] in task 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+13630578
+Warning: Map Join MAPJOIN[17][bigTable=?] in task 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+13630578
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4509856
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3112070
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4542003
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4542038
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4543491
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4542003
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3079923
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4509891
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3113558
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3079923
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3112070
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3113558
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3112070
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/results/clientpositive/vector_left_outer_join2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_left_outer_join2.q.out b/ql/src/test/results/clientpositive/vector_left_outer_join2.q.out
index 23d3f32..f9077c8 100644
--- a/ql/src/test/results/clientpositive/vector_left_outer_join2.q.out
+++ b/ql/src/test/results/clientpositive/vector_left_outer_join2.q.out
@@ -1,6 +1,10 @@
-PREHOOK: query: drop table if exists TJOIN1
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+drop table if exists TJOIN1
PREHOOK: type: DROPTABLE
-POSTHOOK: query: drop table if exists TJOIN1
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+drop table if exists TJOIN1
POSTHOOK: type: DROPTABLE
PREHOOK: query: drop table if exists TJOIN2
PREHOOK: type: DROPTABLE
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/results/clientpositive/vector_outer_join5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_outer_join5.q.out b/ql/src/test/results/clientpositive/vector_outer_join5.q.out
new file mode 100644
index 0000000..bbe8ba1
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_outer_join5.q.out
@@ -0,0 +1,1300 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+create table sorted_mod_4 stored as orc
+as select ctinyint, pmod(cint, 4) as cmodint from alltypesorc
+where cint is not null and ctinyint is not null
+order by ctinyint
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@sorted_mod_4
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+create table sorted_mod_4 stored as orc
+as select ctinyint, pmod(cint, 4) as cmodint from alltypesorc
+where cint is not null and ctinyint is not null
+order by ctinyint
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@sorted_mod_4
+PREHOOK: query: ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sorted_mod_4
+PREHOOK: Output: default@sorted_mod_4
+POSTHOOK: query: ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sorted_mod_4
+POSTHOOK: Output: default@sorted_mod_4
+PREHOOK: query: ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS FOR COLUMNS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+POSTHOOK: query: ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS FOR COLUMNS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+PREHOOK: query: create table small_table stored
+as orc as select ctinyint, cbigint from alltypesorc limit 100
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@small_table
+POSTHOOK: query: create table small_table stored
+as orc as select ctinyint, cbigint from alltypesorc limit 100
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@small_table
+PREHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table
+PREHOOK: Output: default@small_table
+POSTHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table
+POSTHOOK: Output: default@small_table
+PREHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table
+#### A masked pattern was here ####
+POSTHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table
+#### A masked pattern was here ####
+PREHOOK: query: explain
+select count(*) from (select s.*, st.*
+from sorted_mod_4 s
+left outer join small_table st
+on s.ctinyint = st.ctinyint
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.*, st.*
+from sorted_mod_4 s
+left outer join small_table st
+on s.ctinyint = st.ctinyint
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-5 is a root stage
+ Stage-2 depends on stages: Stage-5
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-5
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $hdt$_0:$hdt$_1:st
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $hdt$_0:$hdt$_1:st
+ TableScan
+ alias: st
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: tinyint)
+ 1 _col0 (type: tinyint)
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: s
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col0 (type: tinyint)
+ 1 _col0 (type: tinyint)
+ Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (select s.*, st.*
+from sorted_mod_4 s
+left outer join small_table st
+on s.ctinyint = st.ctinyint
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table
+PREHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.*, st.*
+from sorted_mod_4 s
+left outer join small_table st
+on s.ctinyint = st.ctinyint
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table
+POSTHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+6876
+PREHOOK: query: explain
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.cmodint = 2
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.cmodint = 2
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-5 is a root stage
+ Stage-2 depends on stages: Stage-5
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-5
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $hdt$_0:$hdt$_1:sm
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $hdt$_0:$hdt$_1:sm
+ TableScan
+ alias: sm
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ filter predicates:
+ 0 {(_col1 = 2)}
+ 1
+ keys:
+ 0 _col0 (type: tinyint)
+ 1 _col0 (type: tinyint)
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: s
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint), cmodint (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ filter predicates:
+ 0 {(_col1 = 2)}
+ 1
+ keys:
+ 0 _col0 (type: tinyint)
+ 1 _col0 (type: tinyint)
+ Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.cmodint = 2
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table
+PREHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.cmodint = 2
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table
+POSTHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+6058
+PREHOOK: query: explain
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-5 is a root stage
+ Stage-2 depends on stages: Stage-5
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-5
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $hdt$_0:$hdt$_1:sm
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $hdt$_0:$hdt$_1:sm
+ TableScan
+ alias: sm
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ filter predicates:
+ 0 {((UDFToInteger(_col0) pmod 4) = _col1)}
+ 1
+ keys:
+ 0 _col0 (type: tinyint)
+ 1 _col0 (type: tinyint)
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: s
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint), cmodint (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ filter predicates:
+ 0 {((UDFToInteger(_col0) pmod 4) = _col1)}
+ 1
+ keys:
+ 0 _col0 (type: tinyint)
+ 1 _col0 (type: tinyint)
+ Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table
+PREHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table
+POSTHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+6248
+PREHOOK: query: explain
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.ctinyint < 100
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.ctinyint < 100
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-5 is a root stage
+ Stage-2 depends on stages: Stage-5
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-5
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $hdt$_0:$hdt$_1:sm
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $hdt$_0:$hdt$_1:sm
+ TableScan
+ alias: sm
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ filter predicates:
+ 0 {(_col0 < 100)}
+ 1
+ keys:
+ 0 _col0 (type: tinyint)
+ 1 _col0 (type: tinyint)
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: s
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ filter predicates:
+ 0 {(_col0 < 100)}
+ 1
+ keys:
+ 0 _col0 (type: tinyint)
+ 1 _col0 (type: tinyint)
+ Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.ctinyint < 100
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table
+PREHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.ctinyint < 100
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table
+POSTHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+6876
+PREHOOK: query: explain
+select count(*) from (select s.*, sm.*, s2.*
+from sorted_mod_4 s
+left outer join small_table sm
+ on pmod(sm.cbigint, 8) = s.cmodint
+left outer join sorted_mod_4 s2
+ on s2.ctinyint = s.ctinyint
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.*, sm.*, s2.*
+from sorted_mod_4 s
+left outer join small_table sm
+ on pmod(sm.cbigint, 8) = s.cmodint
+left outer join sorted_mod_4 s2
+ on s2.ctinyint = s.ctinyint
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-8 is a root stage
+ Stage-3 depends on stages: Stage-8
+ Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+ Stage: Stage-8
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $hdt$_0:$hdt$_1:sm
+ Fetch Operator
+ limit: -1
+ $hdt$_0:$hdt$_2:s
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $hdt$_0:$hdt$_1:sm
+ TableScan
+ alias: sm
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cbigint (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ keys:
+ 0 UDFToLong(_col1) (type: bigint)
+ 1 (_col0 pmod UDFToLong(8)) (type: bigint)
+ $hdt$_0:$hdt$_2:s
+ TableScan
+ alias: s
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: tinyint)
+ 1 _col0 (type: tinyint)
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: s
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint), cmodint (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 UDFToLong(_col1) (type: bigint)
+ 1 (_col0 pmod UDFToLong(8)) (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col0 (type: tinyint)
+ 1 _col0 (type: tinyint)
+ Statistics: Num rows: 7329 Data size: 58641 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (select s.*, sm.*, s2.*
+from sorted_mod_4 s
+left outer join small_table sm
+ on pmod(sm.cbigint, 8) = s.cmodint
+left outer join sorted_mod_4 s2
+ on s2.ctinyint = s.ctinyint
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table
+PREHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.*, sm.*, s2.*
+from sorted_mod_4 s
+left outer join small_table sm
+ on pmod(sm.cbigint, 8) = s.cmodint
+left outer join sorted_mod_4 s2
+ on s2.ctinyint = s.ctinyint
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table
+POSTHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+3268334
+PREHOOK: query: create table mod_8_mod_4 stored as orc
+as select pmod(ctinyint, 8) as cmodtinyint, pmod(cint, 4) as cmodint from alltypesorc
+where cint is not null and ctinyint is not null
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mod_8_mod_4
+POSTHOOK: query: create table mod_8_mod_4 stored as orc
+as select pmod(ctinyint, 8) as cmodtinyint, pmod(cint, 4) as cmodint from alltypesorc
+where cint is not null and ctinyint is not null
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mod_8_mod_4
+PREHOOK: query: ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mod_8_mod_4
+PREHOOK: Output: default@mod_8_mod_4
+POSTHOOK: query: ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mod_8_mod_4
+POSTHOOK: Output: default@mod_8_mod_4
+PREHOOK: query: ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS FOR COLUMNS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mod_8_mod_4
+#### A masked pattern was here ####
+POSTHOOK: query: ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS FOR COLUMNS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mod_8_mod_4
+#### A masked pattern was here ####
+PREHOOK: query: create table small_table2 stored
+as orc as select pmod(ctinyint, 16) as cmodtinyint, cbigint from alltypesorc limit 100
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@small_table2
+POSTHOOK: query: create table small_table2 stored
+as orc as select pmod(ctinyint, 16) as cmodtinyint, cbigint from alltypesorc limit 100
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@small_table2
+PREHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table2
+PREHOOK: Output: default@small_table2
+POSTHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table2
+POSTHOOK: Output: default@small_table2
+PREHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+POSTHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+PREHOOK: query: explain
+select count(*) from (select s.*, st.*
+from mod_8_mod_4 s
+left outer join small_table2 st
+on s.cmodtinyint = st.cmodtinyint
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.*, st.*
+from mod_8_mod_4 s
+left outer join small_table2 st
+on s.cmodtinyint = st.cmodtinyint
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-5 is a root stage
+ Stage-2 depends on stages: Stage-5
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-5
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $hdt$_0:$hdt$_1:st
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $hdt$_0:$hdt$_1:st
+ TableScan
+ alias: st
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cmodtinyint (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: s
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cmodtinyint (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (select s.*, st.*
+from mod_8_mod_4 s
+left outer join small_table2 st
+on s.cmodtinyint = st.cmodtinyint
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mod_8_mod_4
+PREHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.*, st.*
+from mod_8_mod_4 s
+left outer join small_table2 st
+on s.cmodtinyint = st.cmodtinyint
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mod_8_mod_4
+POSTHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+39112
+PREHOOK: query: explain
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-5 is a root stage
+ Stage-2 depends on stages: Stage-5
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-5
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $hdt$_0:$hdt$_1:sm
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $hdt$_0:$hdt$_1:sm
+ TableScan
+ alias: sm
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cmodtinyint (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ filter predicates:
+ 0 {(_col1 = 2)}
+ 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: s
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cmodtinyint (type: int), cmodint (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ filter predicates:
+ 0 {(_col1 = 2)}
+ 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mod_8_mod_4
+PREHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mod_8_mod_4
+POSTHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+11171
+PREHOOK: query: explain
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-5 is a root stage
+ Stage-2 depends on stages: Stage-5
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-5
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $hdt$_0:$hdt$_1:sm
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $hdt$_0:$hdt$_1:sm
+ TableScan
+ alias: sm
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cmodtinyint (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ filter predicates:
+ 0 {((_col0 pmod 4) = _col1)}
+ 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: s
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cmodtinyint (type: int), cmodint (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ filter predicates:
+ 0 {((_col0 pmod 4) = _col1)}
+ 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mod_8_mod_4
+PREHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mod_8_mod_4
+POSTHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+14371
+PREHOOK: query: explain
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-5 is a root stage
+ Stage-2 depends on stages: Stage-5
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-5
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $hdt$_0:$hdt$_1:sm
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $hdt$_0:$hdt$_1:sm
+ TableScan
+ alias: sm
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cmodtinyint (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ filter predicates:
+ 0 {(_col0 < 3)}
+ 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: s
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cmodtinyint (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ filter predicates:
+ 0 {(_col0 < 3)}
+ 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mod_8_mod_4
+PREHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mod_8_mod_4
+POSTHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+17792
+PREHOOK: query: explain
+select count(*) from (select s.*, sm.*, s2.*
+from mod_8_mod_4 s
+left outer join small_table2 sm
+ on pmod(sm.cbigint, 8) = s.cmodint
+left outer join mod_8_mod_4 s2
+ on s2.cmodtinyint = s.cmodtinyint
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.*, sm.*, s2.*
+from mod_8_mod_4 s
+left outer join small_table2 sm
+ on pmod(sm.cbigint, 8) = s.cmodint
+left outer join mod_8_mod_4 s2
+ on s2.cmodtinyint = s.cmodtinyint
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-8 is a root stage
+ Stage-3 depends on stages: Stage-8
+ Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+ Stage: Stage-8
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $hdt$_0:$hdt$_1:sm
+ Fetch Operator
+ limit: -1
+ $hdt$_0:$hdt$_2:s
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $hdt$_0:$hdt$_1:sm
+ TableScan
+ alias: sm
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cbigint (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ keys:
+ 0 UDFToLong(_col1) (type: bigint)
+ 1 (_col0 pmod UDFToLong(8)) (type: bigint)
+ $hdt$_0:$hdt$_2:s
+ TableScan
+ alias: s
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cmodtinyint (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: s
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cmodtinyint (type: int), cmodint (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 UDFToLong(_col1) (type: bigint)
+ 1 (_col0 pmod UDFToLong(8)) (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Statistics: Num rows: 7329 Data size: 58641 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (select s.*, sm.*, s2.*
+from mod_8_mod_4 s
+left outer join small_table2 sm
+ on pmod(sm.cbigint, 8) = s.cmodint
+left outer join mod_8_mod_4 s2
+ on s2.cmodtinyint = s.cmodtinyint
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mod_8_mod_4
+PREHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.*, sm.*, s2.*
+from mod_8_mod_4 s
+left outer join small_table2 sm
+ on pmod(sm.cbigint, 8) = s.cmodint
+left outer join mod_8_mod_4 s2
+ on s2.cmodtinyint = s.cmodtinyint
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mod_8_mod_4
+POSTHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+6524438
[2/7] hive git commit: HIVE-10565: Native Vector Map Join doesn't
handle filtering and matching on LEFT OUTER JOIN repeated key correctly (Matt
McCline via Gunther Hagleitner)
Posted by gu...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/results/clientpositive/vector_join30.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_join30.q.out b/ql/src/test/results/clientpositive/vector_join30.q.out
new file mode 100644
index 0000000..57f9aeb
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_join30.q.out
@@ -0,0 +1,2194 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE orcsrc STORED AS ORC AS SELECT * FROM src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orcsrc
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE orcsrc STORED AS ORC AS SELECT * FROM src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orcsrc
+PREHOOK: query: explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-7 depends on stages: Stage-1, Stage-4 , consists of Stage-8, Stage-9, Stage-2
+ Stage-8 has a backup stage: Stage-2
+ Stage-5 depends on stages: Stage-8
+ Stage-3 depends on stages: Stage-2, Stage-5, Stage-6
+ Stage-9 has a backup stage: Stage-2
+ Stage-6 depends on stages: Stage-9
+ Stage-2
+ Stage-4 is a root stage
+ Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-7
+ Conditional Operator
+
+ Stage: Stage-8
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $INTNAME1
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $INTNAME1
+ TableScan
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+
+ Stage: Stage-5
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col2, _col3
+ Group By Operator
+ aggregations: sum(hash(_col2,_col3))
+ mode: hash
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-9
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $INTNAME
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $INTNAME
+ TableScan
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+
+ Stage: Stage-6
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col2, _col3
+ Group By Operator
+ aggregations: sum(hash(_col2,_col3))
+ mode: hash
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col2, _col3
+ Statistics: Num rows: 275 Data size: 48400 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(hash(_col2,_col3))
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+POSTHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+103231310608
+PREHOOK: query: explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-6 depends on stages: Stage-1, Stage-4 , consists of Stage-7, Stage-2
+ Stage-7 has a backup stage: Stage-2
+ Stage-5 depends on stages: Stage-7
+ Stage-3 depends on stages: Stage-2, Stage-5
+ Stage-2
+ Stage-4 is a root stage
+ Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-6
+ Conditional Operator
+
+ Stage: Stage-7
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $INTNAME1
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $INTNAME1
+ TableScan
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+
+ Stage: Stage-5
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col2, _col3
+ Group By Operator
+ aggregations: sum(hash(_col2,_col3))
+ mode: hash
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col2, _col3
+ Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(hash(_col2,_col3))
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+POSTHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+103231310608
+PREHOOK: query: explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-6 depends on stages: Stage-1, Stage-4 , consists of Stage-7, Stage-2
+ Stage-7 has a backup stage: Stage-2
+ Stage-5 depends on stages: Stage-7
+ Stage-3 depends on stages: Stage-2, Stage-5
+ Stage-2
+ Stage-4 is a root stage
+ Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-6
+ Conditional Operator
+
+ Stage: Stage-7
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $INTNAME
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $INTNAME
+ TableScan
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+
+ Stage: Stage-5
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Map Join Operator
+ condition map:
+ Right Outer Join0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col2, _col3
+ Group By Operator
+ aggregations: sum(hash(_col2,_col3))
+ mode: hash
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Right Outer Join0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col2, _col3
+ Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(hash(_col2,_col3))
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+POSTHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+103231310608
+PREHOOK: query: explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-9 depends on stages: Stage-1, Stage-4, Stage-5 , consists of Stage-10, Stage-11, Stage-12, Stage-2
+ Stage-10 has a backup stage: Stage-2
+ Stage-6 depends on stages: Stage-10
+ Stage-3 depends on stages: Stage-2, Stage-6, Stage-7, Stage-8
+ Stage-11 has a backup stage: Stage-2
+ Stage-7 depends on stages: Stage-11
+ Stage-12 has a backup stage: Stage-2
+ Stage-8 depends on stages: Stage-12
+ Stage-2
+ Stage-4 is a root stage
+ Stage-5 is a root stage
+ Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-9
+ Conditional Operator
+
+ Stage: Stage-10
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $INTNAME1
+ Fetch Operator
+ limit: -1
+ $INTNAME2
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $INTNAME1
+ TableScan
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ $INTNAME2
+ TableScan
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+
+ Stage: Stage-6
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ outputColumnNames: _col2, _col3
+ Group By Operator
+ aggregations: sum(hash(_col2,_col3))
+ mode: hash
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-11
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $INTNAME
+ Fetch Operator
+ limit: -1
+ $INTNAME2
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $INTNAME
+ TableScan
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ $INTNAME2
+ TableScan
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+
+ Stage: Stage-7
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ outputColumnNames: _col2, _col3
+ Group By Operator
+ aggregations: sum(hash(_col2,_col3))
+ mode: hash
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-12
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $INTNAME
+ Fetch Operator
+ limit: -1
+ $INTNAME1
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $INTNAME
+ TableScan
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ $INTNAME1
+ TableScan
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+
+ Stage: Stage-8
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ outputColumnNames: _col2, _col3
+ Group By Operator
+ aggregations: sum(hash(_col2,_col3))
+ mode: hash
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ outputColumnNames: _col2, _col3
+ Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(hash(_col2,_col3))
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-5
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+POSTHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+348019368476
+PREHOOK: query: explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-8 depends on stages: Stage-1, Stage-4, Stage-5 , consists of Stage-9, Stage-10, Stage-2
+ Stage-9 has a backup stage: Stage-2
+ Stage-6 depends on stages: Stage-9
+ Stage-3 depends on stages: Stage-2, Stage-6, Stage-7
+ Stage-10 has a backup stage: Stage-2
+ Stage-7 depends on stages: Stage-10
+ Stage-2
+ Stage-4 is a root stage
+ Stage-5 is a root stage
+ Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-8
+ Conditional Operator
+
+ Stage: Stage-9
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $INTNAME1
+ Fetch Operator
+ limit: -1
+ $INTNAME2
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $INTNAME1
+ TableScan
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ $INTNAME2
+ TableScan
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+
+ Stage: Stage-6
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Left Outer Join0 to 2
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ outputColumnNames: _col2, _col3
+ Group By Operator
+ aggregations: sum(hash(_col2,_col3))
+ mode: hash
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-10
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $INTNAME
+ Fetch Operator
+ limit: -1
+ $INTNAME2
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $INTNAME
+ TableScan
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ $INTNAME2
+ TableScan
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+
+ Stage: Stage-7
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Left Outer Join0 to 2
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ outputColumnNames: _col2, _col3
+ Group By Operator
+ aggregations: sum(hash(_col2,_col3))
+ mode: hash
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Left Outer Join0 to 2
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ outputColumnNames: _col2, _col3
+ Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(hash(_col2,_col3))
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-5
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+POSTHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+348019368476
+PREHOOK: query: explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-7 depends on stages: Stage-1, Stage-4, Stage-5 , consists of Stage-8, Stage-2
+ Stage-8 has a backup stage: Stage-2
+ Stage-6 depends on stages: Stage-8
+ Stage-3 depends on stages: Stage-2, Stage-6
+ Stage-2
+ Stage-4 is a root stage
+ Stage-5 is a root stage
+ Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-7
+ Conditional Operator
+
+ Stage: Stage-8
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $INTNAME1
+ Fetch Operator
+ limit: -1
+ $INTNAME2
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $INTNAME1
+ TableScan
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ $INTNAME2
+ TableScan
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+
+ Stage: Stage-6
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ Left Outer Join0 to 2
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ outputColumnNames: _col2, _col3
+ Group By Operator
+ aggregations: sum(hash(_col2,_col3))
+ mode: hash
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ Left Outer Join0 to 2
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ outputColumnNames: _col2, _col3
+ Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(hash(_col2,_col3))
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-5
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+POSTHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+348019368476
+PREHOOK: query: explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-7 depends on stages: Stage-1, Stage-4, Stage-5 , consists of Stage-8, Stage-2
+ Stage-8 has a backup stage: Stage-2
+ Stage-6 depends on stages: Stage-8
+ Stage-3 depends on stages: Stage-2, Stage-6
+ Stage-2
+ Stage-4 is a root stage
+ Stage-5 is a root stage
+ Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-7
+ Conditional Operator
+
+ Stage: Stage-8
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $INTNAME
+ Fetch Operator
+ limit: -1
+ $INTNAME1
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $INTNAME
+ TableScan
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ $INTNAME1
+ TableScan
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+
+ Stage: Stage-6
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ Right Outer Join0 to 2
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ outputColumnNames: _col2, _col3
+ Group By Operator
+ aggregations: sum(hash(_col2,_col3))
+ mode: hash
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ Right Outer Join0 to 2
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ outputColumnNames: _col2, _col3
+ Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(hash(_col2,_col3))
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-5
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+POSTHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+348019368476
+PREHOOK: query: explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-7 depends on stages: Stage-1, Stage-4, Stage-5 , consists of Stage-8, Stage-2
+ Stage-8 has a backup stage: Stage-2
+ Stage-6 depends on stages: Stage-8
+ Stage-3 depends on stages: Stage-2, Stage-6
+ Stage-2
+ Stage-4 is a root stage
+ Stage-5 is a root stage
+ Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-7
+ Conditional Operator
+
+ Stage: Stage-8
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $INTNAME
+ Fetch Operator
+ limit: -1
+ $INTNAME1
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $INTNAME
+ TableScan
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ $INTNAME1
+ TableScan
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+
+ Stage: Stage-6
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Map Join Operator
+ condition map:
+ Right Outer Join0 to 1
+ Right Outer Join0 to 2
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ outputColumnNames: _col2, _col3
+ Group By Operator
+ aggregations: sum(hash(_col2,_col3))
+ mode: hash
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Right Outer Join0 to 1
+ Right Outer Join0 to 2
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ outputColumnNames: _col2, _col3
+ Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(hash(_col2,_col3))
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-5
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+POSTHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+348019368476
[5/7] hive git commit: HIVE-10565: Native Vector Map Join doesn't
handle filtering and matching on LEFT OUTER JOIN repeated key correctly (Matt
McCline via Gunther Hagleitner)
Posted by gu...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/queries/clientpositive/vector_outer_join5.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_outer_join5.q b/ql/src/test/queries/clientpositive/vector_outer_join5.q
new file mode 100644
index 0000000..b7ee4a4
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_outer_join5.q
@@ -0,0 +1,173 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.vectorized.execution.mapjoin.native.enabled=true;
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.noconditionaltask=true;
+set hive.auto.convert.join.noconditionaltask.size=10000;
+
+-- SORT_QUERY_RESULTS
+
+create table sorted_mod_4 stored as orc
+as select ctinyint, pmod(cint, 4) as cmodint from alltypesorc
+where cint is not null and ctinyint is not null
+order by ctinyint;
+
+ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS;
+ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS FOR COLUMNS;
+
+create table small_table stored
+as orc as select ctinyint, cbigint from alltypesorc limit 100;
+
+ANALYZE TABLE small_table COMPUTE STATISTICS;
+ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS;
+
+explain
+select count(*) from (select s.*, st.*
+from sorted_mod_4 s
+left outer join small_table st
+on s.ctinyint = st.ctinyint
+) t1;
+
+select count(*) from (select s.*, st.*
+from sorted_mod_4 s
+left outer join small_table st
+on s.ctinyint = st.ctinyint
+) t1;
+
+explain
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.cmodint = 2
+) t1;
+
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.cmodint = 2
+) t1;
+
+explain
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint
+) t1;
+
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint
+) t1;
+
+explain
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.ctinyint < 100
+) t1;
+
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.ctinyint < 100
+) t1;
+
+explain
+select count(*) from (select s.*, sm.*, s2.*
+from sorted_mod_4 s
+left outer join small_table sm
+ on pmod(sm.cbigint, 8) = s.cmodint
+left outer join sorted_mod_4 s2
+ on s2.ctinyint = s.ctinyint
+) t1;
+
+select count(*) from (select s.*, sm.*, s2.*
+from sorted_mod_4 s
+left outer join small_table sm
+ on pmod(sm.cbigint, 8) = s.cmodint
+left outer join sorted_mod_4 s2
+ on s2.ctinyint = s.ctinyint
+) t1;
+
+
+create table mod_8_mod_4 stored as orc
+as select pmod(ctinyint, 8) as cmodtinyint, pmod(cint, 4) as cmodint from alltypesorc
+where cint is not null and ctinyint is not null;
+
+ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS;
+ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS FOR COLUMNS;
+
+create table small_table2 stored
+as orc as select pmod(ctinyint, 16) as cmodtinyint, cbigint from alltypesorc limit 100;
+
+ANALYZE TABLE small_table2 COMPUTE STATISTICS;
+ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS;
+
+explain
+select count(*) from (select s.*, st.*
+from mod_8_mod_4 s
+left outer join small_table2 st
+on s.cmodtinyint = st.cmodtinyint
+) t1;
+
+select count(*) from (select s.*, st.*
+from mod_8_mod_4 s
+left outer join small_table2 st
+on s.cmodtinyint = st.cmodtinyint
+) t1;
+
+explain
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2
+) t1;
+
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2
+) t1;
+
+explain
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint
+) t1;
+
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint
+) t1;
+
+explain
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3
+) t1;
+
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3
+) t1;
+
+explain
+select count(*) from (select s.*, sm.*, s2.*
+from mod_8_mod_4 s
+left outer join small_table2 sm
+ on pmod(sm.cbigint, 8) = s.cmodint
+left outer join mod_8_mod_4 s2
+ on s2.cmodtinyint = s.cmodtinyint
+) t1;
+
+select count(*) from (select s.*, sm.*, s2.*
+from mod_8_mod_4 s
+left outer join small_table2 sm
+ on pmod(sm.cbigint, 8) = s.cmodint
+left outer join mod_8_mod_4 s2
+ on s2.cmodtinyint = s.cmodtinyint
+) t1;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/results/clientpositive/tez/acid_vectorization_partition.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/acid_vectorization_partition.q.out b/ql/src/test/results/clientpositive/tez/acid_vectorization_partition.q.out
index 3aa0e1a..3b37c72 100644
--- a/ql/src/test/results/clientpositive/tez/acid_vectorization_partition.q.out
+++ b/ql/src/test/results/clientpositive/tez/acid_vectorization_partition.q.out
@@ -38,23 +38,23 @@ POSTHOOK: Input: default@acid_vectorized_part
POSTHOOK: Input: default@acid_vectorized_part@ds=today
POSTHOOK: Input: default@acid_vectorized_part@ds=tomorrow
#### A masked pattern was here ####
--1073279343 oj1YrV5Wa today
-1073279343 oj1YrV5Wa tomorrow
--1073051226 A34p7oRr2WvUJNf today
+-1073279343 oj1YrV5Wa today
-1073051226 A34p7oRr2WvUJNf tomorrow
--1072910839 0iqrc5 today
+-1073051226 A34p7oRr2WvUJNf today
-1072910839 0iqrc5 tomorrow
--1072081801 dPkN74F7 today
+-1072910839 0iqrc5 today
-1072081801 dPkN74F7 tomorrow
--1072076362 2uLyD28144vklju213J1mr tomorrow
+-1072081801 dPkN74F7 today
-1072076362 2uLyD28144vklju213J1mr today
--1071480828 aw724t8c5558x2xneC624 today
+-1072076362 2uLyD28144vklju213J1mr tomorrow
-1071480828 aw724t8c5558x2xneC624 tomorrow
--1071363017 Anj0oF today
+-1071480828 aw724t8c5558x2xneC624 today
-1071363017 Anj0oF tomorrow
--1070883071 0ruyd6Y50JpdGRf6HqD tomorrow
+-1071363017 Anj0oF today
-1070883071 0ruyd6Y50JpdGRf6HqD today
--1070551679 iUR3Q today
+-1070883071 0ruyd6Y50JpdGRf6HqD tomorrow
-1070551679 iUR3Q tomorrow
--1069736047 k17Am8uPHWk02cEf1jet tomorrow
+-1070551679 iUR3Q today
-1069736047 k17Am8uPHWk02cEf1jet today
+-1069736047 k17Am8uPHWk02cEf1jet tomorrow
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/results/clientpositive/tez/vector_join30.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_join30.q.out b/ql/src/test/results/clientpositive/tez/vector_join30.q.out
new file mode 100644
index 0000000..2a14842
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/vector_join30.q.out
@@ -0,0 +1,1367 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE orcsrc STORED AS ORC AS SELECT * FROM src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orcsrc
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE orcsrc STORED AS ORC AS SELECT * FROM src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orcsrc
+PREHOOK: query: explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 5 <- Map 4 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col2, _col3
+ input vertices:
+ 1 Reducer 5
+ Statistics: Num rows: 275 Data size: 48400 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Group By Operator
+ aggregations: sum(hash(_col2,_col3))
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+ Reducer 5
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+POSTHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+103231310608
+PREHOOK: query: explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 5 <- Map 4 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col2, _col3
+ input vertices:
+ 1 Reducer 5
+ Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Group By Operator
+ aggregations: sum(hash(_col2,_col3))
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+ Reducer 5
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+POSTHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+103231310608
+PREHOOK: query: explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 2 (BROADCAST_EDGE)
+ Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reducer 4
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Right Outer Join0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col2, _col3
+ input vertices:
+ 0 Reducer 2
+ Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Group By Operator
+ aggregations: sum(hash(_col2,_col3))
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 5
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+POSTHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+103231310608
+PREHOOK: query: explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 5 <- Map 4 (SIMPLE_EDGE)
+ Reducer 7 <- Map 6 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Execution mode: vectorized
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ outputColumnNames: _col2, _col3
+ input vertices:
+ 1 Reducer 5
+ 2 Reducer 7
+ Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Group By Operator
+ aggregations: sum(hash(_col2,_col3))
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+ Reducer 5
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Execution mode: vectorized
+ Reducer 7
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+POSTHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+348019368476
+PREHOOK: query: explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+ Reducer 6 <- Map 5 (SIMPLE_EDGE)
+ Reducer 8 <- Map 7 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Execution mode: vectorized
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reducer 3
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Left Outer Join0 to 2
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ outputColumnNames: _col2, _col3
+ Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(hash(_col2,_col3))
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Execution mode: vectorized
+ Reducer 8
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+POSTHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+348019368476
+PREHOOK: query: explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+ Reducer 6 <- Map 5 (SIMPLE_EDGE)
+ Reducer 8 <- Map 7 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Execution mode: vectorized
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reducer 3
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ Left Outer Join0 to 2
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ outputColumnNames: _col2, _col3
+ Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(hash(_col2,_col3))
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Execution mode: vectorized
+ Reducer 8
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+POSTHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+348019368476
+PREHOOK: query: explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+ Reducer 6 <- Map 5 (SIMPLE_EDGE)
+ Reducer 8 <- Map 7 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Execution mode: vectorized
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reducer 3
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ Right Outer Join0 to 2
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ outputColumnNames: _col2, _col3
+ Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(hash(_col2,_col3))
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Execution mode: vectorized
+ Reducer 8
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+POSTHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+348019368476
+PREHOOK: query: explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+ Reducer 6 <- Map 5 (SIMPLE_EDGE)
+ Reducer 8 <- Map 7 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Execution mode: vectorized
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: orcsrc
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reducer 3
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Right Outer Join0 to 1
+ Right Outer Join0 to 2
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ outputColumnNames: _col2, _col3
+ Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(hash(_col2,_col3))
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+ Reducer 6
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Execution mode: vectorized
+ Reducer 8
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+POSTHOOK: query: FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcsrc
+#### A masked pattern was here ####
+348019368476
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/results/clientpositive/tez/vector_join_filters.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_join_filters.q.out b/ql/src/test/results/clientpositive/tez/vector_join_filters.q.out
new file mode 100644
index 0000000..8cc9311
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/vector_join_filters.q.out
@@ -0,0 +1,222 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE myinput1_txt(key int, value int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@myinput1_txt
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE myinput1_txt(key int, value int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@myinput1_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1_txt
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@myinput1_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1_txt
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@myinput1_txt
+PREHOOK: query: CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@myinput1_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@myinput1
+POSTHOOK: query: CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@myinput1_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@myinput1
+Warning: Map Join MAPJOIN[19][bigTable=?] in task 'Map 1' is a cross product
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+Warning: Map Join MAPJOIN[15][bigTable=a] in task 'Map 1' is a cross product
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4937935
+Warning: Map Join MAPJOIN[15][bigTable=b] in task 'Map 2' is a cross product
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3080335
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4937935
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4937935
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4937935
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4937935
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3080335
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3080335
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3080335
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3080335
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3080335
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3080335
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.key = c.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.key = c.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
[3/7] hive git commit: HIVE-10565: Native Vector Map Join doesn't
handle filtering and matching on LEFT OUTER JOIN repeated key correctly (Matt
McCline via Gunther Hagleitner)
Posted by gu...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/results/clientpositive/tez/vectorized_timestamp_ints_casts.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vectorized_timestamp_ints_casts.q.out b/ql/src/test/results/clientpositive/tez/vectorized_timestamp_ints_casts.q.out
new file mode 100644
index 0000000..1e74446
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/vectorized_timestamp_ints_casts.q.out
@@ -0,0 +1,234 @@
+PREHOOK: query: explain
+select
+-- to timestamp
+ cast (ctinyint as timestamp)
+ ,cast (csmallint as timestamp)
+ ,cast (cint as timestamp)
+ ,cast (cbigint as timestamp)
+ ,cast (cfloat as timestamp)
+ ,cast (cdouble as timestamp)
+ ,cast (cboolean1 as timestamp)
+ ,cast (cbigint * 0 as timestamp)
+ ,cast (ctimestamp1 as timestamp)
+ ,cast (cstring1 as timestamp)
+ ,cast (substr(cstring1, 1, 1) as timestamp)
+from alltypesorc
+-- limit output to a reasonably small number of rows
+where cbigint % 250 = 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+-- to timestamp
+ cast (ctinyint as timestamp)
+ ,cast (csmallint as timestamp)
+ ,cast (cint as timestamp)
+ ,cast (cbigint as timestamp)
+ ,cast (cfloat as timestamp)
+ ,cast (cdouble as timestamp)
+ ,cast (cboolean1 as timestamp)
+ ,cast (cbigint * 0 as timestamp)
+ ,cast (ctimestamp1 as timestamp)
+ ,cast (cstring1 as timestamp)
+ ,cast (substr(cstring1, 1, 1) as timestamp)
+from alltypesorc
+-- limit output to a reasonably small number of rows
+where cbigint % 250 = 0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: alltypesorc
+ Filter Operator
+ predicate: ((cbigint % 250) = 0) (type: boolean)
+ Select Operator
+ expressions: CAST( ctinyint AS TIMESTAMP) (type: timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: timestamp)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
+ ListSink
+
+PREHOOK: query: select
+-- to timestamp
+ cast (ctinyint as timestamp)
+ ,cast (csmallint as timestamp)
+ ,cast (cint as timestamp)
+ ,cast (cbigint as timestamp)
+ ,cast (cfloat as timestamp)
+ ,cast (cdouble as timestamp)
+ ,cast (cboolean1 as timestamp)
+ ,cast (cbigint * 0 as timestamp)
+ ,cast (ctimestamp1 as timestamp)
+ ,cast (cstring1 as timestamp)
+ ,cast (substr(cstring1, 1, 1) as timestamp)
+from alltypesorc
+-- limit output to a reasonably small number of rows
+where cbigint % 250 = 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select
+-- to timestamp
+ cast (ctinyint as timestamp)
+ ,cast (csmallint as timestamp)
+ ,cast (cint as timestamp)
+ ,cast (cbigint as timestamp)
+ ,cast (cfloat as timestamp)
+ ,cast (cdouble as timestamp)
+ ,cast (cboolean1 as timestamp)
+ ,cast (cbigint * 0 as timestamp)
+ ,cast (ctimestamp1 as timestamp)
+ ,cast (cstring1 as timestamp)
+ ,cast (substr(cstring1, 1, 1) as timestamp)
+from alltypesorc
+-- limit output to a reasonably small number of rows
+where cbigint % 250 = 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+1969-12-31 15:59:59.964 1969-12-31 15:59:59.8 NULL 1969-12-08 10:43:03.25 1969-12-31 15:59:24 1969-12-31 15:56:40 NULL 1969-12-31 16:00:00 1969-12-31 15:59:45.748 NULL NULL
+1969-12-31 15:59:59.964 1969-12-31 15:59:59.8 NULL 1970-01-19 04:24:39 1969-12-31 15:59:24 1969-12-31 15:56:40 NULL 1969-12-31 16:00:00 1969-12-31 15:59:53.817 NULL NULL
+1969-12-31 15:59:59.97 1969-12-31 15:59:59.8 NULL 1970-01-17 05:10:52.25 1969-12-31 15:59:30 1969-12-31 15:56:40 NULL 1969-12-31 16:00:00 1969-12-31 16:00:12.935 NULL NULL
+1969-12-31 15:59:59.949 NULL 1970-01-09 14:53:20.971 1970-01-12 20:45:23.25 1969-12-31 15:59:09 NULL 1969-12-31 16:00:00 1969-12-31 16:00:00 1969-12-31 16:00:08.451 NULL NULL
+1969-12-31 15:59:59.949 NULL 1970-01-09 07:39:13.882 1969-12-09 07:45:32.75 1969-12-31 15:59:09 NULL 1969-12-31 16:00:00.001 1969-12-31 16:00:00 1969-12-31 16:00:08.451 NULL NULL
+1969-12-31 16:00:00.02 1969-12-31 16:00:15.601 NULL 1969-12-27 11:19:26.75 1969-12-31 16:00:20 1969-12-31 20:20:01 NULL 1969-12-31 16:00:00 1969-12-31 15:59:45.129 NULL NULL
+1969-12-31 15:59:59.962 1969-12-31 16:00:15.601 NULL 1969-12-10 03:41:51 1969-12-31 15:59:22 1969-12-31 20:20:01 NULL 1969-12-31 16:00:00 1969-12-31 15:59:58.614 NULL NULL
+1969-12-31 15:59:59.995 1969-12-31 16:00:15.601 NULL 1970-01-07 18:06:56 1969-12-31 15:59:55 1969-12-31 20:20:01 NULL 1969-12-31 16:00:00 1969-12-31 16:00:04.679 NULL NULL
+1969-12-31 16:00:00.048 1969-12-31 16:00:15.601 NULL 1969-12-22 11:03:59 1969-12-31 16:00:48 1969-12-31 20:20:01 NULL 1969-12-31 16:00:00 1969-12-31 15:59:50.235 NULL NULL
+1969-12-31 16:00:00.008 NULL 1969-12-24 00:12:58.862 1969-12-20 21:16:47.25 1969-12-31 16:00:08 NULL 1969-12-31 16:00:00 1969-12-31 16:00:00 1969-12-31 16:00:15.892 NULL NULL
+1969-12-31 16:00:00.008 NULL 1969-12-30 11:24:23.566 1969-12-16 11:20:17.25 1969-12-31 16:00:08 NULL 1969-12-31 16:00:00 1969-12-31 16:00:00 1969-12-31 16:00:15.892 NULL NULL
+1969-12-31 16:00:00.008 NULL 1970-01-09 23:39:39.664 1970-01-10 17:09:21.5 1969-12-31 16:00:08 NULL 1969-12-31 16:00:00 1969-12-31 16:00:00 1969-12-31 16:00:15.892 NULL NULL
+1969-12-31 16:00:00.008 NULL 1969-12-23 21:59:27.689 1970-01-19 01:16:31.25 1969-12-31 16:00:08 NULL 1969-12-31 16:00:00 1969-12-31 16:00:00 1969-12-31 16:00:15.892 NULL NULL
+1969-12-31 16:00:00.008 NULL 1970-01-10 23:29:48.972 1969-12-10 02:41:39 1969-12-31 16:00:08 NULL 1969-12-31 16:00:00.001 1969-12-31 16:00:00 1969-12-31 16:00:15.892 NULL NULL
+1969-12-31 16:00:00.008 NULL 1970-01-11 10:34:27.246 1970-01-14 14:49:59.25 1969-12-31 16:00:08 NULL 1969-12-31 16:00:00.001 1969-12-31 16:00:00 1969-12-31 16:00:15.892 NULL NULL
+1969-12-31 15:59:59.941 1969-12-31 15:59:52.804 NULL 1969-12-13 02:11:50 1969-12-31 15:59:01 1969-12-31 14:00:04 NULL 1969-12-31 16:00:00 1969-12-31 16:00:13.15 NULL NULL
+1969-12-31 15:59:59.979 1969-12-31 15:59:52.804 NULL 1970-01-18 12:27:09 1969-12-31 15:59:39 1969-12-31 14:00:04 NULL 1969-12-31 16:00:00 1969-12-31 15:59:55.9 NULL NULL
+1969-12-31 15:59:59.94 1969-12-31 15:59:52.804 NULL 1970-01-18 05:11:54.75 1969-12-31 15:59:00 1969-12-31 14:00:04 NULL 1969-12-31 16:00:00 1969-12-31 15:59:52.408 NULL NULL
+1969-12-31 15:59:59.986 1969-12-31 15:59:52.804 NULL 1969-12-13 16:50:00.5 1969-12-31 15:59:46 1969-12-31 14:00:04 NULL 1969-12-31 16:00:00 1969-12-31 16:00:11.065 NULL NULL
+1969-12-31 16:00:00.059 1969-12-31 15:59:52.804 NULL 1969-12-18 11:57:25.5 1969-12-31 16:00:59 1969-12-31 14:00:04 NULL 1969-12-31 16:00:00 1969-12-31 16:00:10.956 NULL NULL
+1969-12-31 15:59:59.992 1969-12-31 15:59:52.804 NULL 1969-12-10 06:06:48.5 1969-12-31 15:59:52 1969-12-31 14:00:04 NULL 1969-12-31 16:00:00 1969-12-31 16:00:03.136 NULL NULL
+1969-12-31 16:00:00.005 1969-12-31 15:59:52.804 NULL 1969-12-19 21:53:12.5 1969-12-31 16:00:05 1969-12-31 14:00:04 NULL 1969-12-31 16:00:00 1969-12-31 16:00:10.973 NULL NULL
+1969-12-31 15:59:59.976 1969-12-31 15:59:52.804 NULL 1970-01-10 06:18:31 1969-12-31 15:59:36 1969-12-31 14:00:04 NULL 1969-12-31 16:00:00 1969-12-31 15:59:53.145 NULL NULL
+1969-12-31 15:59:59.95 1969-12-31 15:59:52.804 NULL 1969-12-19 17:33:32.75 1969-12-31 15:59:10 1969-12-31 14:00:04 NULL 1969-12-31 16:00:00 1969-12-31 15:59:54.733 NULL NULL
+1969-12-31 16:00:00.011 NULL 1969-12-30 22:03:04.018 1970-01-21 12:50:53.75 1969-12-31 16:00:11 NULL 1969-12-31 16:00:00.001 1969-12-31 16:00:00 1969-12-31 16:00:02.351 NULL NULL
+1969-12-31 16:00:00.011 NULL 1969-12-27 18:49:09.583 1970-01-14 22:35:27 1969-12-31 16:00:11 NULL 1969-12-31 16:00:00.001 1969-12-31 16:00:00 1969-12-31 16:00:02.351 NULL NULL
+PREHOOK: query: explain
+select
+-- to timestamp
+ cast (ctinyint as timestamp)
+ ,cast (csmallint as timestamp)
+ ,cast (cint as timestamp)
+ ,cast (cbigint as timestamp)
+ ,cast (cfloat as timestamp)
+ ,cast (cdouble as timestamp)
+ ,cast (cboolean1 as timestamp)
+ ,cast (cbigint * 0 as timestamp)
+ ,cast (ctimestamp1 as timestamp)
+ ,cast (cstring1 as timestamp)
+ ,cast (substr(cstring1, 1, 1) as timestamp)
+from alltypesorc
+-- limit output to a reasonably small number of rows
+where cbigint % 250 = 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+-- to timestamp
+ cast (ctinyint as timestamp)
+ ,cast (csmallint as timestamp)
+ ,cast (cint as timestamp)
+ ,cast (cbigint as timestamp)
+ ,cast (cfloat as timestamp)
+ ,cast (cdouble as timestamp)
+ ,cast (cboolean1 as timestamp)
+ ,cast (cbigint * 0 as timestamp)
+ ,cast (ctimestamp1 as timestamp)
+ ,cast (cstring1 as timestamp)
+ ,cast (substr(cstring1, 1, 1) as timestamp)
+from alltypesorc
+-- limit output to a reasonably small number of rows
+where cbigint % 250 = 0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: alltypesorc
+ Filter Operator
+ predicate: ((cbigint % 250) = 0) (type: boolean)
+ Select Operator
+ expressions: CAST( ctinyint AS TIMESTAMP) (type: timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: timestamp)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
+ ListSink
+
+PREHOOK: query: select
+-- to timestamp
+ cast (ctinyint as timestamp)
+ ,cast (csmallint as timestamp)
+ ,cast (cint as timestamp)
+ ,cast (cbigint as timestamp)
+ ,cast (cfloat as timestamp)
+ ,cast (cdouble as timestamp)
+ ,cast (cboolean1 as timestamp)
+ ,cast (cbigint * 0 as timestamp)
+ ,cast (ctimestamp1 as timestamp)
+ ,cast (cstring1 as timestamp)
+ ,cast (substr(cstring1, 1, 1) as timestamp)
+from alltypesorc
+-- limit output to a reasonably small number of rows
+where cbigint % 250 = 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select
+-- to timestamp
+ cast (ctinyint as timestamp)
+ ,cast (csmallint as timestamp)
+ ,cast (cint as timestamp)
+ ,cast (cbigint as timestamp)
+ ,cast (cfloat as timestamp)
+ ,cast (cdouble as timestamp)
+ ,cast (cboolean1 as timestamp)
+ ,cast (cbigint * 0 as timestamp)
+ ,cast (ctimestamp1 as timestamp)
+ ,cast (cstring1 as timestamp)
+ ,cast (substr(cstring1, 1, 1) as timestamp)
+from alltypesorc
+-- limit output to a reasonably small number of rows
+where cbigint % 250 = 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+1969-12-31 15:59:24 1969-12-31 15:56:40 NULL 1906-06-05 13:34:10 1969-12-31 15:59:24 1969-12-31 15:56:40 NULL 1969-12-31 16:00:00 1969-12-31 15:59:45.748 NULL NULL
+1969-12-31 15:59:24 1969-12-31 15:56:40 NULL 2020-09-11 19:50:00 1969-12-31 15:59:24 1969-12-31 15:56:40 NULL 1969-12-31 16:00:00 1969-12-31 15:59:53.817 NULL NULL
+1969-12-31 15:59:30 1969-12-31 15:56:40 NULL 2015-04-23 22:10:50 1969-12-31 15:59:30 1969-12-31 15:56:40 NULL 1969-12-31 16:00:00 1969-12-31 16:00:12.935 NULL NULL
+1969-12-31 15:59:09 NULL 1994-07-07 10:09:31 2003-05-25 21:27:30 1969-12-31 15:59:09 NULL 1969-12-31 16:00:00 1969-12-31 16:00:00 1969-12-31 16:00:08.451 NULL NULL
+1969-12-31 15:59:09 NULL 1993-09-08 22:51:22 1908-10-29 07:05:50 1969-12-31 15:59:09 NULL 1969-12-31 16:00:01 1969-12-31 16:00:00 1969-12-31 16:00:08.451 NULL NULL
+1969-12-31 16:00:20 1969-12-31 20:20:01 NULL 1958-07-07 21:05:50 1969-12-31 16:00:20 1969-12-31 20:20:01 NULL 1969-12-31 16:00:00 1969-12-31 15:59:45.129 NULL NULL
+1969-12-31 15:59:22 1969-12-31 20:20:01 NULL 1911-02-07 01:30:00 1969-12-31 15:59:22 1969-12-31 20:20:01 NULL 1969-12-31 16:00:00 1969-12-31 15:59:58.614 NULL NULL
+1969-12-31 15:59:55 1969-12-31 20:20:01 NULL 1989-05-28 20:33:20 1969-12-31 15:59:55 1969-12-31 20:20:01 NULL 1969-12-31 16:00:00 1969-12-31 16:00:04.679 NULL NULL
+1969-12-31 16:00:48 1969-12-31 20:20:01 NULL 1944-10-18 03:23:20 1969-12-31 16:00:48 1969-12-31 20:20:01 NULL 1969-12-31 16:00:00 1969-12-31 15:59:50.235 NULL NULL
+1969-12-31 16:00:08 NULL 1949-01-13 00:21:02 1940-06-26 15:47:30 1969-12-31 16:00:08 NULL 1969-12-31 16:00:00 1969-12-31 16:00:00 1969-12-31 16:00:15.892 NULL NULL
+1969-12-31 16:00:08 NULL 1966-09-27 07:32:46 1928-05-26 10:07:30 1969-12-31 16:00:08 NULL 1969-12-31 16:00:00 1969-12-31 16:00:00 1969-12-31 16:00:15.892 NULL NULL
+1969-12-31 16:00:08 NULL 1995-07-07 22:01:04 1997-07-05 20:58:20 1969-12-31 16:00:08 NULL 1969-12-31 16:00:00 1969-12-31 16:00:00 1969-12-31 16:00:15.892 NULL NULL
+1969-12-31 16:00:08 NULL 1948-10-12 08:01:29 2020-05-04 04:20:50 1969-12-31 16:00:08 NULL 1969-12-31 16:00:00 1969-12-31 16:00:00 1969-12-31 16:00:15.892 NULL NULL
+1969-12-31 16:00:08 NULL 1998-03-27 00:56:12 1910-12-27 06:10:00 1969-12-31 16:00:08 NULL 1969-12-31 16:00:01 1969-12-31 16:00:00 1969-12-31 16:00:15.892 NULL NULL
+1969-12-31 16:00:08 NULL 1999-07-01 15:14:06 2008-03-13 02:07:30 1969-12-31 16:00:08 NULL 1969-12-31 16:00:01 1969-12-31 16:00:00 1969-12-31 16:00:15.892 NULL NULL
+1969-12-31 15:59:01 1969-12-31 14:00:04 NULL 1919-02-22 13:13:20 1969-12-31 15:59:01 1969-12-31 14:00:04 NULL 1969-12-31 16:00:00 1969-12-31 16:00:13.15 NULL NULL
+1969-12-31 15:59:39 1969-12-31 14:00:04 NULL 2018-11-16 20:30:00 1969-12-31 15:59:39 1969-12-31 14:00:04 NULL 1969-12-31 16:00:00 1969-12-31 15:59:55.9 NULL NULL
+1969-12-31 15:59:00 1969-12-31 14:00:04 NULL 2018-01-18 14:32:30 1969-12-31 15:59:00 1969-12-31 14:00:04 NULL 1969-12-31 16:00:00 1969-12-31 15:59:52.408 NULL NULL
+1969-12-31 15:59:46 1969-12-31 14:00:04 NULL 1920-10-24 09:28:20 1969-12-31 15:59:46 1969-12-31 14:00:04 NULL 1969-12-31 16:00:00 1969-12-31 16:00:11.065 NULL NULL
+1969-12-31 16:00:59 1969-12-31 14:00:04 NULL 1933-12-12 05:05:00 1969-12-31 16:00:59 1969-12-31 14:00:04 NULL 1969-12-31 16:00:00 1969-12-31 16:00:10.956 NULL NULL
+1969-12-31 15:59:52 1969-12-31 14:00:04 NULL 1911-05-18 17:28:20 1969-12-31 15:59:52 1969-12-31 14:00:04 NULL 1969-12-31 16:00:00 1969-12-31 16:00:03.136 NULL NULL
+1969-12-31 16:00:05 1969-12-31 14:00:04 NULL 1937-10-25 22:48:20 1969-12-31 16:00:05 1969-12-31 14:00:04 NULL 1969-12-31 16:00:00 1969-12-31 16:00:10.973 NULL NULL
+1969-12-31 15:59:36 1969-12-31 14:00:04 NULL 1996-04-09 21:36:40 1969-12-31 15:59:36 1969-12-31 14:00:04 NULL 1969-12-31 16:00:00 1969-12-31 15:59:53.145 NULL NULL
+1969-12-31 15:59:10 1969-12-31 14:00:04 NULL 1937-04-28 15:05:50 1969-12-31 15:59:10 1969-12-31 14:00:04 NULL 1969-12-31 16:00:00 1969-12-31 15:59:54.733 NULL NULL
+1969-12-31 16:00:11 NULL 1967-12-14 19:06:58 2027-02-19 08:15:50 1969-12-31 16:00:11 NULL 1969-12-31 16:00:01 1969-12-31 16:00:00 1969-12-31 16:00:02.351 NULL NULL
+1969-12-31 16:00:11 NULL 1959-05-16 04:19:43 2009-01-30 06:50:00 1969-12-31 16:00:11 NULL 1969-12-31 16:00:01 1969-12-31 16:00:00 1969-12-31 16:00:02.351 NULL NULL
[4/7] hive git commit: HIVE-10565: Native Vector Map Join doesn't
handle filtering and matching on LEFT OUTER JOIN repeated key correctly (Matt
McCline via Gunther Hagleitner)
Posted by gu...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/results/clientpositive/tez/vector_join_nulls.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_join_nulls.q.out b/ql/src/test/results/clientpositive/tez/vector_join_nulls.q.out
new file mode 100644
index 0000000..2243072
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/vector_join_nulls.q.out
@@ -0,0 +1,195 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE myinput1_txt(key int, value int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@myinput1_txt
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE myinput1_txt(key int, value int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@myinput1_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE myinput1_txt
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@myinput1_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE myinput1_txt
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@myinput1_txt
+PREHOOK: query: CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@myinput1_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@myinput1
+POSTHOOK: query: CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@myinput1_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@myinput1
+Warning: Map Join MAPJOIN[16][bigTable=?] in task 'Map 1' is a cross product
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+13630578
+Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Map 1' is a cross product
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+13630578
+Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Map 1' is a cross product
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+13630578
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4509856
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3112070
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3078400
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4542003
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4542038
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4543491
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4542003
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3079923
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+4509891
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3113558
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3079923
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3112070
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3113558
+PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@myinput1
+#### A masked pattern was here ####
+3112070
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/results/clientpositive/tez/vector_left_outer_join2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_left_outer_join2.q.out b/ql/src/test/results/clientpositive/tez/vector_left_outer_join2.q.out
index 929194e..6c781e3 100644
--- a/ql/src/test/results/clientpositive/tez/vector_left_outer_join2.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_left_outer_join2.q.out
@@ -1,6 +1,10 @@
-PREHOOK: query: drop table if exists TJOIN1
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+drop table if exists TJOIN1
PREHOOK: type: DROPTABLE
-POSTHOOK: query: drop table if exists TJOIN1
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+drop table if exists TJOIN1
POSTHOOK: type: DROPTABLE
PREHOOK: query: drop table if exists TJOIN2
PREHOOK: type: DROPTABLE
@@ -228,8 +232,8 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@tjoin1
POSTHOOK: Input: default@tjoin2
#### A masked pattern was here ####
-1 20 25 NULL
0 10 15 NULL
+1 20 25 NULL
2 NULL 50 NULL
PREHOOK: query: explain
select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
@@ -387,8 +391,8 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@tjoin1
POSTHOOK: Input: default@tjoin2
#### A masked pattern was here ####
-1 20 25 NULL
0 10 15 NULL
+1 20 25 NULL
2 NULL 50 NULL
PREHOOK: query: explain
select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
@@ -466,10 +470,9 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@tjoin1
POSTHOOK: Input: default@tjoin2
#### A masked pattern was here ####
+0 10 15 NULL
1 20 25 NULL
2 NULL 50 NULL
-0 10 15 BB
-0 10 15 FF
PREHOOK: query: explain
select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
PREHOOK: type: QUERY
@@ -547,7 +550,6 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@tjoin1
POSTHOOK: Input: default@tjoin2
#### A masked pattern was here ####
-2 NULL 50 NULL
+0 10 15 NULL
1 20 25 NULL
-0 10 15 BB
-0 10 15 FF
+2 NULL 50 NULL
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/results/clientpositive/tez/vector_left_outer_join3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_left_outer_join3.q.out b/ql/src/test/results/clientpositive/tez/vector_left_outer_join3.q.out
new file mode 100644
index 0000000..591e165
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/vector_left_outer_join3.q.out
@@ -0,0 +1,222 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+create table vsmb_bucket_1(RNUM int, C1 int, C2 int)
+ CLUSTERED BY (C1)
+ SORTED BY (C1) INTO 1 BUCKETS
+ STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@vsmb_bucket_1
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+create table vsmb_bucket_1(RNUM int, C1 int, C2 int)
+ CLUSTERED BY (C1)
+ SORTED BY (C1) INTO 1 BUCKETS
+ STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@vsmb_bucket_1
+PREHOOK: query: create table vsmb_bucket_2(RNUM int, C1 int, C2 int)
+ CLUSTERED BY (C1)
+ SORTED BY (C1) INTO 1 BUCKETS
+ STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@vsmb_bucket_2
+POSTHOOK: query: create table vsmb_bucket_2(RNUM int, C1 int, C2 int)
+ CLUSTERED BY (C1)
+ SORTED BY (C1) INTO 1 BUCKETS
+ STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@vsmb_bucket_2
+PREHOOK: query: create table vsmb_bucket_TXT_1(RNUM int, C1 int, C2 int)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@vsmb_bucket_TXT_1
+POSTHOOK: query: create table vsmb_bucket_TXT_1(RNUM int, C1 int, C2 int)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@vsmb_bucket_TXT_1
+PREHOOK: query: create table vsmb_bucket_TXT_2(RNUM int, C1 int, C2 int)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@vsmb_bucket_TXT_2
+POSTHOOK: query: create table vsmb_bucket_TXT_2(RNUM int, C1 int, C2 int)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@vsmb_bucket_TXT_2
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE vsmb_bucket_TXT_1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@vsmb_bucket_txt_1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE vsmb_bucket_TXT_1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@vsmb_bucket_txt_1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE vsmb_bucket_TXT_2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@vsmb_bucket_txt_2
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE vsmb_bucket_TXT_2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@vsmb_bucket_txt_2
+PREHOOK: query: insert into table vsmb_bucket_1 select * from vsmb_bucket_TXT_1 order by c1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@vsmb_bucket_txt_1
+PREHOOK: Output: default@vsmb_bucket_1
+POSTHOOK: query: insert into table vsmb_bucket_1 select * from vsmb_bucket_TXT_1 order by c1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@vsmb_bucket_txt_1
+POSTHOOK: Output: default@vsmb_bucket_1
+POSTHOOK: Lineage: vsmb_bucket_1.c1 SIMPLE [(vsmb_bucket_txt_1)vsmb_bucket_txt_1.FieldSchema(name:c1, type:int, comment:null), ]
+POSTHOOK: Lineage: vsmb_bucket_1.c2 SIMPLE [(vsmb_bucket_txt_1)vsmb_bucket_txt_1.FieldSchema(name:c2, type:int, comment:null), ]
+POSTHOOK: Lineage: vsmb_bucket_1.rnum SIMPLE [(vsmb_bucket_txt_1)vsmb_bucket_txt_1.FieldSchema(name:rnum, type:int, comment:null), ]
+PREHOOK: query: insert into table vsmb_bucket_2 select * from vsmb_bucket_TXT_2 order by c1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@vsmb_bucket_txt_2
+PREHOOK: Output: default@vsmb_bucket_2
+POSTHOOK: query: insert into table vsmb_bucket_2 select * from vsmb_bucket_TXT_2 order by c1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@vsmb_bucket_txt_2
+POSTHOOK: Output: default@vsmb_bucket_2
+POSTHOOK: Lineage: vsmb_bucket_2.c1 SIMPLE [(vsmb_bucket_txt_2)vsmb_bucket_txt_2.FieldSchema(name:c1, type:int, comment:null), ]
+POSTHOOK: Lineage: vsmb_bucket_2.c2 SIMPLE [(vsmb_bucket_txt_2)vsmb_bucket_txt_2.FieldSchema(name:c2, type:int, comment:null), ]
+POSTHOOK: Lineage: vsmb_bucket_2.rnum SIMPLE [(vsmb_bucket_txt_2)vsmb_bucket_txt_2.FieldSchema(name:rnum, type:int, comment:null), ]
+PREHOOK: query: explain
+select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 from vsmb_bucket_1 tjoin1 left outer join vsmb_bucket_2 tjoin2 on tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 from vsmb_bucket_1 tjoin1 left outer join vsmb_bucket_2 tjoin2 on tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tjoin2
+ Statistics: Num rows: 4 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+ Map Operator Tree:
+ TableScan
+ alias: tjoin1
+ Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ filter predicates:
+ 0 {(c2 > 15)}
+ 1
+ keys:
+ 0 c1 (type: int)
+ 1 c1 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col8
+ Statistics: Num rows: 4 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col8 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 4 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 4 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 from vsmb_bucket_1 tjoin1 left outer join vsmb_bucket_2 tjoin2 on tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15
+PREHOOK: type: QUERY
+PREHOOK: Input: default@vsmb_bucket_1
+PREHOOK: Input: default@vsmb_bucket_2
+#### A masked pattern was here ####
+POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 from vsmb_bucket_1 tjoin1 left outer join vsmb_bucket_2 tjoin2 on tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@vsmb_bucket_1
+POSTHOOK: Input: default@vsmb_bucket_2
+#### A masked pattern was here ####
+0 10 15 NULL
+1 20 25 NULL
+2 NULL 50 NULL
+PREHOOK: query: explain
+select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 from vsmb_bucket_1 tjoin1 left outer join vsmb_bucket_2 tjoin2 on tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 from vsmb_bucket_1 tjoin1 left outer join vsmb_bucket_2 tjoin2 on tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tjoin2
+ Statistics: Num rows: 4 Data size: 28 Basic stats: COMPLETE Column stats: NONE
+ Map Operator Tree:
+ TableScan
+ alias: tjoin1
+ Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ filter predicates:
+ 0 {(c2 > 15)}
+ 1
+ keys:
+ 0 c1 (type: int)
+ 1 c1 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col8
+ Statistics: Num rows: 4 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col8 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 4 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 4 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 from vsmb_bucket_1 tjoin1 left outer join vsmb_bucket_2 tjoin2 on tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15
+PREHOOK: type: QUERY
+PREHOOK: Input: default@vsmb_bucket_1
+PREHOOK: Input: default@vsmb_bucket_2
+#### A masked pattern was here ####
+POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 from vsmb_bucket_1 tjoin1 left outer join vsmb_bucket_2 tjoin2 on tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@vsmb_bucket_1
+POSTHOOK: Input: default@vsmb_bucket_2
+#### A masked pattern was here ####
+0 10 15 NULL
+1 20 25 NULL
+2 NULL 50 NULL
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/results/clientpositive/tez/vector_outer_join5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_outer_join5.q.out b/ql/src/test/results/clientpositive/tez/vector_outer_join5.q.out
new file mode 100644
index 0000000..e77903a
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/vector_outer_join5.q.out
@@ -0,0 +1,1328 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+create table sorted_mod_4 stored as orc
+as select ctinyint, pmod(cint, 4) as cmodint from alltypesorc
+where cint is not null and ctinyint is not null
+order by ctinyint
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@sorted_mod_4
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+create table sorted_mod_4 stored as orc
+as select ctinyint, pmod(cint, 4) as cmodint from alltypesorc
+where cint is not null and ctinyint is not null
+order by ctinyint
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@sorted_mod_4
+PREHOOK: query: ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sorted_mod_4
+PREHOOK: Output: default@sorted_mod_4
+POSTHOOK: query: ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sorted_mod_4
+POSTHOOK: Output: default@sorted_mod_4
+PREHOOK: query: ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS FOR COLUMNS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+POSTHOOK: query: ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS FOR COLUMNS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+PREHOOK: query: create table small_table stored
+as orc as select ctinyint, cbigint from alltypesorc limit 100
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@small_table
+POSTHOOK: query: create table small_table stored
+as orc as select ctinyint, cbigint from alltypesorc limit 100
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@small_table
+PREHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table
+PREHOOK: Output: default@small_table
+POSTHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table
+POSTHOOK: Output: default@small_table
+PREHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table
+#### A masked pattern was here ####
+POSTHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table
+#### A masked pattern was here ####
+PREHOOK: query: explain
+select count(*) from (select s.*, st.*
+from sorted_mod_4 s
+left outer join small_table st
+on s.ctinyint = st.ctinyint
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.*, st.*
+from sorted_mod_4 s
+left outer join small_table st
+on s.ctinyint = st.ctinyint
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Map 1 <- Map 3 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: s
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col0 (type: tinyint)
+ 1 _col0 (type: tinyint)
+ input vertices:
+ 1 Map 3
+ Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: st
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (select s.*, st.*
+from sorted_mod_4 s
+left outer join small_table st
+on s.ctinyint = st.ctinyint
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table
+PREHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.*, st.*
+from sorted_mod_4 s
+left outer join small_table st
+on s.ctinyint = st.ctinyint
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table
+POSTHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+6876
+PREHOOK: query: explain
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.cmodint = 2
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.cmodint = 2
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Map 1 <- Map 3 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: s
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint), cmodint (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ filter predicates:
+ 0 {(_col1 = 2)}
+ 1
+ keys:
+ 0 _col0 (type: tinyint)
+ 1 _col0 (type: tinyint)
+ input vertices:
+ 1 Map 3
+ Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: sm
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.cmodint = 2
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table
+PREHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.cmodint = 2
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table
+POSTHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+6058
+PREHOOK: query: explain
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Map 1 <- Map 3 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: s
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint), cmodint (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ filter predicates:
+ 0 {((UDFToInteger(_col0) pmod 4) = _col1)}
+ 1
+ keys:
+ 0 _col0 (type: tinyint)
+ 1 _col0 (type: tinyint)
+ input vertices:
+ 1 Map 3
+ Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: sm
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table
+PREHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table
+POSTHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+6248
+PREHOOK: query: explain
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.ctinyint < 100
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.ctinyint < 100
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Map 1 <- Map 3 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: s
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ filter predicates:
+ 0 {(_col0 < 100)}
+ 1
+ keys:
+ 0 _col0 (type: tinyint)
+ 1 _col0 (type: tinyint)
+ input vertices:
+ 1 Map 3
+ Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: sm
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.ctinyint < 100
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table
+PREHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint
+from sorted_mod_4 s
+left outer join small_table sm
+on s.ctinyint = sm.ctinyint and s.ctinyint < 100
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table
+POSTHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+6876
+PREHOOK: query: explain
+select count(*) from (select s.*, sm.*, s2.*
+from sorted_mod_4 s
+left outer join small_table sm
+ on pmod(sm.cbigint, 8) = s.cmodint
+left outer join sorted_mod_4 s2
+ on s2.ctinyint = s.ctinyint
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.*, sm.*, s2.*
+from sorted_mod_4 s
+left outer join small_table sm
+ on pmod(sm.cbigint, 8) = s.cmodint
+left outer join sorted_mod_4 s2
+ on s2.ctinyint = s.ctinyint
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Map 1 <- Map 4 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: s
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint), cmodint (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 UDFToLong(_col1) (type: bigint)
+ 1 (_col0 pmod UDFToLong(8)) (type: bigint)
+ outputColumnNames: _col0
+ input vertices:
+ 1 Map 4
+ Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: sm
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cbigint (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: (_col0 pmod UDFToLong(8)) (type: bigint)
+ sort order: +
+ Map-reduce partition columns: (_col0 pmod UDFToLong(8)) (type: bigint)
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: s
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col0 (type: tinyint)
+ 1 _col0 (type: tinyint)
+ Statistics: Num rows: 7329 Data size: 58641 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (select s.*, sm.*, s2.*
+from sorted_mod_4 s
+left outer join small_table sm
+ on pmod(sm.cbigint, 8) = s.cmodint
+left outer join sorted_mod_4 s2
+ on s2.ctinyint = s.ctinyint
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table
+PREHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.*, sm.*, s2.*
+from sorted_mod_4 s
+left outer join small_table sm
+ on pmod(sm.cbigint, 8) = s.cmodint
+left outer join sorted_mod_4 s2
+ on s2.ctinyint = s.ctinyint
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table
+POSTHOOK: Input: default@sorted_mod_4
+#### A masked pattern was here ####
+3268334
+PREHOOK: query: create table mod_8_mod_4 stored as orc
+as select pmod(ctinyint, 8) as cmodtinyint, pmod(cint, 4) as cmodint from alltypesorc
+where cint is not null and ctinyint is not null
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mod_8_mod_4
+POSTHOOK: query: create table mod_8_mod_4 stored as orc
+as select pmod(ctinyint, 8) as cmodtinyint, pmod(cint, 4) as cmodint from alltypesorc
+where cint is not null and ctinyint is not null
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mod_8_mod_4
+PREHOOK: query: ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mod_8_mod_4
+PREHOOK: Output: default@mod_8_mod_4
+POSTHOOK: query: ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mod_8_mod_4
+POSTHOOK: Output: default@mod_8_mod_4
+PREHOOK: query: ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS FOR COLUMNS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mod_8_mod_4
+#### A masked pattern was here ####
+POSTHOOK: query: ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS FOR COLUMNS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mod_8_mod_4
+#### A masked pattern was here ####
+PREHOOK: query: create table small_table2 stored
+as orc as select pmod(ctinyint, 16) as cmodtinyint, cbigint from alltypesorc limit 100
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: database:default
+PREHOOK: Output: default@small_table2
+POSTHOOK: query: create table small_table2 stored
+as orc as select pmod(ctinyint, 16) as cmodtinyint, cbigint from alltypesorc limit 100
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@small_table2
+PREHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table2
+PREHOOK: Output: default@small_table2
+POSTHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table2
+POSTHOOK: Output: default@small_table2
+PREHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+POSTHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+PREHOOK: query: explain
+select count(*) from (select s.*, st.*
+from mod_8_mod_4 s
+left outer join small_table2 st
+on s.cmodtinyint = st.cmodtinyint
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.*, st.*
+from mod_8_mod_4 s
+left outer join small_table2 st
+on s.cmodtinyint = st.cmodtinyint
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Map 1 <- Map 3 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: s
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cmodtinyint (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ input vertices:
+ 1 Map 3
+ Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: st
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cmodtinyint (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (select s.*, st.*
+from mod_8_mod_4 s
+left outer join small_table2 st
+on s.cmodtinyint = st.cmodtinyint
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mod_8_mod_4
+PREHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.*, st.*
+from mod_8_mod_4 s
+left outer join small_table2 st
+on s.cmodtinyint = st.cmodtinyint
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mod_8_mod_4
+POSTHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+39112
+PREHOOK: query: explain
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Map 1 <- Map 3 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: s
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cmodtinyint (type: int), cmodint (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ filter predicates:
+ 0 {(_col1 = 2)}
+ 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ input vertices:
+ 1 Map 3
+ Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: sm
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cmodtinyint (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mod_8_mod_4
+PREHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mod_8_mod_4
+POSTHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+11171
+PREHOOK: query: explain
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Map 1 <- Map 3 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: s
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cmodtinyint (type: int), cmodint (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ filter predicates:
+ 0 {((_col0 pmod 4) = _col1)}
+ 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ input vertices:
+ 1 Map 3
+ Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: sm
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cmodtinyint (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mod_8_mod_4
+PREHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mod_8_mod_4
+POSTHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+14371
+PREHOOK: query: explain
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Map 1 <- Map 3 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: s
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cmodtinyint (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ filter predicates:
+ 0 {(_col0 < 3)}
+ 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ input vertices:
+ 1 Map 3
+ Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: sm
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cmodtinyint (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mod_8_mod_4
+PREHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint
+from mod_8_mod_4 s
+left outer join small_table2 sm
+on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mod_8_mod_4
+POSTHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+17792
+PREHOOK: query: explain
+select count(*) from (select s.*, sm.*, s2.*
+from mod_8_mod_4 s
+left outer join small_table2 sm
+ on pmod(sm.cbigint, 8) = s.cmodint
+left outer join mod_8_mod_4 s2
+ on s2.cmodtinyint = s.cmodtinyint
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select s.*, sm.*, s2.*
+from mod_8_mod_4 s
+left outer join small_table2 sm
+ on pmod(sm.cbigint, 8) = s.cmodint
+left outer join mod_8_mod_4 s2
+ on s2.cmodtinyint = s.cmodtinyint
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Map 1 <- Map 4 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: s
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cmodtinyint (type: int), cmodint (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 UDFToLong(_col1) (type: bigint)
+ 1 (_col0 pmod UDFToLong(8)) (type: bigint)
+ outputColumnNames: _col0
+ input vertices:
+ 1 Map 4
+ Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 6663 Data size: 53310 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: sm
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cbigint (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: (_col0 pmod UDFToLong(8)) (type: bigint)
+ sort order: +
+ Map-reduce partition columns: (_col0 pmod UDFToLong(8)) (type: bigint)
+ Statistics: Num rows: 100 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: s
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cmodtinyint (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 6058 Data size: 48464 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Statistics: Num rows: 7329 Data size: 58641 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (select s.*, sm.*, s2.*
+from mod_8_mod_4 s
+left outer join small_table2 sm
+ on pmod(sm.cbigint, 8) = s.cmodint
+left outer join mod_8_mod_4 s2
+ on s2.cmodtinyint = s.cmodtinyint
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mod_8_mod_4
+PREHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select s.*, sm.*, s2.*
+from mod_8_mod_4 s
+left outer join small_table2 sm
+ on pmod(sm.cbigint, 8) = s.cmodint
+left outer join mod_8_mod_4 s2
+ on s2.cmodtinyint = s.cmodtinyint
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mod_8_mod_4
+POSTHOOK: Input: default@small_table2
+#### A masked pattern was here ####
+6524438
[6/7] hive git commit: HIVE-10565: Native Vector Map Join doesn't
handle filtering and matching on LEFT OUTER JOIN repeated key correctly (Matt
McCline via Gunther Hagleitner)
Posted by gu...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java
index 37ccf22..f971727 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java
@@ -24,7 +24,9 @@ import java.util.Arrays;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.JoinUtil;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -123,13 +125,6 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
batchCounter++;
- // Do the per-batch setup for an outer join.
-
- outerPerBatchSetup(batch);
-
- // For outer join, DO NOT apply filters yet. It is incorrect for outer join to
- // apply the filter before hash table matching.
-
final int inputLogicalSize = batch.size;
if (inputLogicalSize == 0) {
@@ -139,6 +134,44 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
return;
}
+ // Do the per-batch setup for an outer join.
+
+ outerPerBatchSetup(batch);
+
+ // For outer join, remember our input rows before ON expression filtering or before
+ // hash table matching so we can generate results for all rows (matching and non matching)
+ // later.
+ boolean inputSelectedInUse = batch.selectedInUse;
+ if (inputSelectedInUse) {
+ // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) {
+ // throw new HiveException("batch.selected is not in sort order and unique");
+ // }
+ System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize);
+ }
+
+ // Filtering for outer join just removes rows available for hash table matching.
+ boolean someRowsFilteredOut = false;
+ if (bigTableFilterExpressions.length > 0) {
+ // Since the input
+ for (VectorExpression ve : bigTableFilterExpressions) {
+ ve.evaluate(batch);
+ }
+ someRowsFilteredOut = (batch.size != inputLogicalSize);
+ if (LOG.isDebugEnabled()) {
+ if (batch.selectedInUse) {
+ if (inputSelectedInUse) {
+ LOG.debug(CLASS_NAME +
+ " inputSelected " + intArrayToRangesString(inputSelected, inputLogicalSize) +
+ " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
+ } else {
+ LOG.debug(CLASS_NAME +
+ " inputLogicalSize " + inputLogicalSize +
+ " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
+ }
+ }
+ }
+ }
+
// Perform any key expressions. Results will go into scratch columns.
if (bigTableKeyExpressions != null) {
for (VectorExpression ve : bigTableKeyExpressions) {
@@ -146,9 +179,6 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
}
}
- // We rebuild in-place the selected array with rows destine to be forwarded.
- int numSel = 0;
-
/*
* Single-Column Long specific declarations.
*/
@@ -178,12 +208,16 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
*/
JoinUtil.JoinResult joinResult;
- if (!joinColVector.noNulls && joinColVector.isNull[0]) {
- // Null key is no match for whole batch.
+ if (batch.size == 0) {
+ // Whole repeated key batch was filtered out.
+ joinResult = JoinUtil.JoinResult.NOMATCH;
+ } else if (!joinColVector.noNulls && joinColVector.isNull[0]) {
+ // Any (repeated) null key column is no match for whole batch.
joinResult = JoinUtil.JoinResult.NOMATCH;
} else {
// Handle *repeated* join key, if found.
long key = vector[0];
+ // LOG.debug(CLASS_NAME + " repeated key " + key);
if (useMinMax && (key < min || key > max)) {
// Out of range for whole batch.
joinResult = JoinUtil.JoinResult.NOMATCH;
@@ -199,7 +233,8 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
}
- numSel = finishOuterRepeated(batch, joinResult, hashMapResults[0], scratch1);
+ finishOuterRepeated(batch, joinResult, hashMapResults[0], someRowsFilteredOut,
+ inputSelectedInUse, inputLogicalSize);
} else {
/*
@@ -213,14 +248,13 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
int selected[] = batch.selected;
boolean selectedInUse = batch.selectedInUse;
- // For outer join we must apply the filter after match and cause some matches to become
- // non-matches, we do not track non-matches here. Instead we remember all non spilled rows
- // and compute non matches later in finishOuter.
int hashMapResultCount = 0;
- int matchCount = 0;
- int nonSpillCount = 0;
+ int allMatchCount = 0;
+ int equalKeySeriesCount = 0;
int spillCount = 0;
+ boolean atLeastOneNonMatch = someRowsFilteredOut;
+
/*
* Single-Column Long specific variables.
*/
@@ -232,9 +266,11 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
// Logical loop over the rows in the batch since the batch may have selected in use.
- for (int logical = 0; logical < inputLogicalSize; logical++) {
+ for (int logical = 0; logical < batch.size; logical++) {
int batchIndex = (selectedInUse ? selected[logical] : logical);
+ // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch");
+
/*
* Single-Column Long outer null detection.
*/
@@ -250,8 +286,8 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
// Let a current SPILL equal key series keep going, or
// Let a current NOMATCH keep not matching.
- // Remember non-matches for Outer Join.
- nonSpills[nonSpillCount++] = batchIndex;
+ atLeastOneNonMatch = true;
+
// LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL");
} else {
@@ -269,9 +305,12 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
// New key.
if (haveSaveKey) {
- // Move on with our count(s).
+ // Move on with our counts.
switch (saveJoinResult) {
case MATCH:
+ hashMapResultCount++;
+ equalKeySeriesCount++;
+ break;
case SPILL:
hashMapResultCount++;
break;
@@ -300,41 +339,70 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
} else {
saveJoinResult = hashMap.lookup(currentKey, hashMapResults[hashMapResultCount]);
}
- // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " New Key " + saveJoinResult.name());
- } else {
- // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveJoinResult.name());
- }
- /*
- * Common outer join result processing.
- */
+ // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " New Key " + currentKey + " " + saveJoinResult.name());
- switch (saveJoinResult) {
- case MATCH:
- matchs[matchCount] = batchIndex;
- matchHashMapResultIndices[matchCount] = hashMapResultCount;
- matchCount++;
- nonSpills[nonSpillCount++] = batchIndex;
- break;
-
- case SPILL:
- spills[spillCount] = batchIndex;
- spillHashMapResultIndices[spillCount] = hashMapResultCount;
- spillCount++;
- break;
-
- case NOMATCH:
- nonSpills[nonSpillCount++] = batchIndex;
- // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
- break;
+ /*
+ * Common outer join result processing.
+ */
+
+ switch (saveJoinResult) {
+ case MATCH:
+ equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount;
+ equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount;
+ equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow();
+ equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1;
+ allMatchs[allMatchCount++] = batchIndex;
+ // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
+ break;
+
+ case SPILL:
+ spills[spillCount] = batchIndex;
+ spillHashMapResultIndices[spillCount] = hashMapResultCount;
+ spillCount++;
+ break;
+
+ case NOMATCH:
+ atLeastOneNonMatch = true;
+ // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
+ break;
+ }
+ } else {
+ // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveKey + " " + saveJoinResult.name());
+
+ // Series of equal keys.
+
+ switch (saveJoinResult) {
+ case MATCH:
+ equalKeySeriesDuplicateCounts[equalKeySeriesCount]++;
+ allMatchs[allMatchCount++] = batchIndex;
+ // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
+ break;
+
+ case SPILL:
+ spills[spillCount] = batchIndex;
+ spillHashMapResultIndices[spillCount] = hashMapResultCount;
+ spillCount++;
+ break;
+
+ case NOMATCH:
+ // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
+ break;
+ }
}
+ // if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) {
+ // throw new HiveException("allMatchs is not in sort order and unique");
+ // }
}
}
if (haveSaveKey) {
- // Account for last equal key sequence.
+ // Update our counts for the last key.
switch (saveJoinResult) {
case MATCH:
+ hashMapResultCount++;
+ equalKeySeriesCount++;
+ break;
case SPILL:
hashMapResultCount++;
break;
@@ -345,27 +413,26 @@ public class VectorMapJoinOuterLongOperator extends VectorMapJoinOuterGenerateRe
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter +
- " matchs " + intArrayToRangesString(matchs, matchCount) +
- " matchHashMapResultIndices " + intArrayToRangesString(matchHashMapResultIndices, matchCount) +
- " nonSpills " + intArrayToRangesString(nonSpills, nonSpillCount) +
+ " allMatchs " + intArrayToRangesString(allMatchs,allMatchCount) +
+ " equalKeySeriesHashMapResultIndices " + intArrayToRangesString(equalKeySeriesHashMapResultIndices, equalKeySeriesCount) +
+ " equalKeySeriesAllMatchIndices " + intArrayToRangesString(equalKeySeriesAllMatchIndices, equalKeySeriesCount) +
+ " equalKeySeriesIsSingleValue " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesIsSingleValue, 0, equalKeySeriesCount)) +
+ " equalKeySeriesDuplicateCounts " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesDuplicateCounts, 0, equalKeySeriesCount)) +
+ " atLeastOneNonMatch " + atLeastOneNonMatch +
+ " inputSelectedInUse " + inputSelectedInUse +
+ " inputLogicalSize " + inputLogicalSize +
" spills " + intArrayToRangesString(spills, spillCount) +
" spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) +
" hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount)));
}
// We will generate results for all matching and non-matching rows.
- // Note that scratch1 is undefined at this point -- it's preallocated storage.
- numSel = finishOuter(batch,
- matchs, matchHashMapResultIndices, matchCount,
- nonSpills, nonSpillCount,
- spills, spillHashMapResultIndices, spillCount,
- hashMapResults, hashMapResultCount,
- scratch1);
+ finishOuter(batch,
+ allMatchCount, equalKeySeriesCount, atLeastOneNonMatch,
+ inputSelectedInUse, inputLogicalSize,
+ spillCount, hashMapResultCount);
}
- batch.selectedInUse = true;
- batch.size = numSel;
-
if (batch.size > 0) {
// Forward any remaining selected rows.
forwardBigTableBatch(batch);
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java
index 23a29f7..bea032a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java
@@ -128,13 +128,6 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
batchCounter++;
- // Do the per-batch setup for an outer join.
-
- outerPerBatchSetup(batch);
-
- // For outer join, DO NOT apply filters yet. It is incorrect for outer join to
- // apply the filter before hash table matching.
-
final int inputLogicalSize = batch.size;
if (inputLogicalSize == 0) {
@@ -144,6 +137,44 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
return;
}
+ // Do the per-batch setup for an outer join.
+
+ outerPerBatchSetup(batch);
+
+ // For outer join, remember our input rows before ON expression filtering or before
+ // hash table matching so we can generate results for all rows (matching and non matching)
+ // later.
+ boolean inputSelectedInUse = batch.selectedInUse;
+ if (inputSelectedInUse) {
+ // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) {
+ // throw new HiveException("batch.selected is not in sort order and unique");
+ // }
+ System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize);
+ }
+
+ // Filtering for outer join just removes rows available for hash table matching.
+ boolean someRowsFilteredOut = false;
+ if (bigTableFilterExpressions.length > 0) {
+ // Since the input
+ for (VectorExpression ve : bigTableFilterExpressions) {
+ ve.evaluate(batch);
+ }
+ someRowsFilteredOut = (batch.size != inputLogicalSize);
+ if (LOG.isDebugEnabled()) {
+ if (batch.selectedInUse) {
+ if (inputSelectedInUse) {
+ LOG.debug(CLASS_NAME +
+ " inputSelected " + intArrayToRangesString(inputSelected, inputLogicalSize) +
+ " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
+ } else {
+ LOG.debug(CLASS_NAME +
+ " inputLogicalSize " + inputLogicalSize +
+ " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
+ }
+ }
+ }
+ }
+
// Perform any key expressions. Results will go into scratch columns.
if (bigTableKeyExpressions != null) {
for (VectorExpression ve : bigTableKeyExpressions) {
@@ -151,9 +182,6 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
}
}
- // We rebuild in-place the selected array with rows destine to be forwarded.
- int numSel = 0;
-
/*
* Multi-Key specific declarations.
*/
@@ -199,8 +227,11 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
*/
JoinUtil.JoinResult joinResult;
- if (someKeyInputColumnIsNull) {
- // Any null key column is no match for whole batch.
+ if (batch.size == 0) {
+ // Whole repeated key batch was filtered out.
+ joinResult = JoinUtil.JoinResult.NOMATCH;
+ } else if (someKeyInputColumnIsNull) {
+ // Any (repeated) null key column is no match for whole batch.
joinResult = JoinUtil.JoinResult.NOMATCH;
} else {
@@ -219,7 +250,8 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
}
- numSel = finishOuterRepeated(batch, joinResult, hashMapResults[0], scratch1);
+ finishOuterRepeated(batch, joinResult, hashMapResults[0], someRowsFilteredOut,
+ inputSelectedInUse, inputLogicalSize);
} else {
/*
@@ -233,14 +265,13 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
int selected[] = batch.selected;
boolean selectedInUse = batch.selectedInUse;
- // For outer join we must apply the filter after match and cause some matches to become
- // non-matches, we do not track non-matches here. Instead we remember all non spilled rows
- // and compute non matches later in finishOuter.
int hashMapResultCount = 0;
- int matchCount = 0;
- int nonSpillCount = 0;
+ int allMatchCount = 0;
+ int equalKeySeriesCount = 0;
int spillCount = 0;
+ boolean atLeastOneNonMatch = someRowsFilteredOut;
+
/*
* Multi-Key specific variables.
*/
@@ -252,9 +283,11 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
// Logical loop over the rows in the batch since the batch may have selected in use.
- for (int logical = 0; logical < inputLogicalSize; logical++) {
+ for (int logical = 0; logical < batch.size; logical++) {
int batchIndex = (selectedInUse ? selected[logical] : logical);
+ // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch");
+
/*
* Multi-Key outer null detection.
*/
@@ -272,8 +305,8 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
// Let a current SPILL equal key series keep going, or
// Let a current NOMATCH keep not matching.
- // Remember non-matches for Outer Join.
- nonSpills[nonSpillCount++] = batchIndex;
+ atLeastOneNonMatch = true;
+
// LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL");
} else {
@@ -292,9 +325,12 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
// New key.
if (haveSaveKey) {
- // Move on with our count(s).
+ // Move on with our counts.
switch (saveJoinResult) {
case MATCH:
+ hashMapResultCount++;
+ equalKeySeriesCount++;
+ break;
case SPILL:
hashMapResultCount++;
break;
@@ -322,41 +358,68 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
byte[] keyBytes = saveKeyOutput.getData();
int keyLength = saveKeyOutput.getLength();
saveJoinResult = hashMap.lookup(keyBytes, 0, keyLength, hashMapResults[hashMapResultCount]);
- // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " New Key " + saveJoinResult.name());
- } else {
- // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveJoinResult.name());
- }
- /*
- * Common outer join result processing.
- */
+ /*
+ * Common outer join result processing.
+ */
- switch (saveJoinResult) {
- case MATCH:
- matchs[matchCount] = batchIndex;
- matchHashMapResultIndices[matchCount] = hashMapResultCount;
- matchCount++;
- nonSpills[nonSpillCount++] = batchIndex;
- break;
-
- case SPILL:
- spills[spillCount] = batchIndex;
- spillHashMapResultIndices[spillCount] = hashMapResultCount;
- spillCount++;
- break;
-
- case NOMATCH:
- nonSpills[nonSpillCount++] = batchIndex;
- // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
- break;
+ switch (saveJoinResult) {
+ case MATCH:
+ equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount;
+ equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount;
+ equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow();
+ equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1;
+ allMatchs[allMatchCount++] = batchIndex;
+ // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
+ break;
+
+ case SPILL:
+ spills[spillCount] = batchIndex;
+ spillHashMapResultIndices[spillCount] = hashMapResultCount;
+ spillCount++;
+ break;
+
+ case NOMATCH:
+ atLeastOneNonMatch = true;
+ // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
+ break;
+ }
+ } else {
+ // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveKey + " " + saveJoinResult.name());
+
+ // Series of equal keys.
+
+ switch (saveJoinResult) {
+ case MATCH:
+ equalKeySeriesDuplicateCounts[equalKeySeriesCount]++;
+ allMatchs[allMatchCount++] = batchIndex;
+ // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
+ break;
+
+ case SPILL:
+ spills[spillCount] = batchIndex;
+ spillHashMapResultIndices[spillCount] = hashMapResultCount;
+ spillCount++;
+ break;
+
+ case NOMATCH:
+ // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
+ break;
+ }
}
+ // if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) {
+ // throw new HiveException("allMatchs is not in sort order and unique");
+ // }
}
}
if (haveSaveKey) {
- // Account for last equal key sequence.
+ // Update our counts for the last key.
switch (saveJoinResult) {
case MATCH:
+ hashMapResultCount++;
+ equalKeySeriesCount++;
+ break;
case SPILL:
hashMapResultCount++;
break;
@@ -367,27 +430,26 @@ public class VectorMapJoinOuterMultiKeyOperator extends VectorMapJoinOuterGenera
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter +
- " matchs " + intArrayToRangesString(matchs, matchCount) +
- " matchHashMapResultIndices " + intArrayToRangesString(matchHashMapResultIndices, matchCount) +
- " nonSpills " + intArrayToRangesString(nonSpills, nonSpillCount) +
+ " allMatchs " + intArrayToRangesString(allMatchs,allMatchCount) +
+ " equalKeySeriesHashMapResultIndices " + intArrayToRangesString(equalKeySeriesHashMapResultIndices, equalKeySeriesCount) +
+ " equalKeySeriesAllMatchIndices " + intArrayToRangesString(equalKeySeriesAllMatchIndices, equalKeySeriesCount) +
+ " equalKeySeriesIsSingleValue " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesIsSingleValue, 0, equalKeySeriesCount)) +
+ " equalKeySeriesDuplicateCounts " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesDuplicateCounts, 0, equalKeySeriesCount)) +
+ " atLeastOneNonMatch " + atLeastOneNonMatch +
+ " inputSelectedInUse " + inputSelectedInUse +
+ " inputLogicalSize " + inputLogicalSize +
" spills " + intArrayToRangesString(spills, spillCount) +
" spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) +
" hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount)));
}
// We will generate results for all matching and non-matching rows.
- // Note that scratch1 is undefined at this point -- it's preallocated storage.
- numSel = finishOuter(batch,
- matchs, matchHashMapResultIndices, matchCount,
- nonSpills, nonSpillCount,
- spills, spillHashMapResultIndices, spillCount,
- hashMapResults, hashMapResultCount,
- scratch1);
+ finishOuter(batch,
+ allMatchCount, equalKeySeriesCount, atLeastOneNonMatch,
+ inputSelectedInUse, inputLogicalSize,
+ spillCount, hashMapResultCount);
}
- batch.selectedInUse = true;
- batch.size = numSel;
-
if (batch.size > 0) {
// Forward any remaining selected rows.
forwardBigTableBatch(batch);
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
index f0af3f6..49efe1a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java
@@ -115,13 +115,6 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
batchCounter++;
- // Do the per-batch setup for an outer join.
-
- outerPerBatchSetup(batch);
-
- // For outer join, DO NOT apply filters yet. It is incorrect for outer join to
- // apply the filter before hash table matching.
-
final int inputLogicalSize = batch.size;
if (inputLogicalSize == 0) {
@@ -131,6 +124,44 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
return;
}
+ // Do the per-batch setup for an outer join.
+
+ outerPerBatchSetup(batch);
+
+ // For outer join, remember our input rows before ON expression filtering or before
+ // hash table matching so we can generate results for all rows (matching and non matching)
+ // later.
+ boolean inputSelectedInUse = batch.selectedInUse;
+ if (inputSelectedInUse) {
+ // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) {
+ // throw new HiveException("batch.selected is not in sort order and unique");
+ // }
+ System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize);
+ }
+
+ // Filtering for outer join just removes rows available for hash table matching.
+ boolean someRowsFilteredOut = false;
+ if (bigTableFilterExpressions.length > 0) {
+ // Since the input
+ for (VectorExpression ve : bigTableFilterExpressions) {
+ ve.evaluate(batch);
+ }
+ someRowsFilteredOut = (batch.size != inputLogicalSize);
+ if (LOG.isDebugEnabled()) {
+ if (batch.selectedInUse) {
+ if (inputSelectedInUse) {
+ LOG.debug(CLASS_NAME +
+ " inputSelected " + intArrayToRangesString(inputSelected, inputLogicalSize) +
+ " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
+ } else {
+ LOG.debug(CLASS_NAME +
+ " inputLogicalSize " + inputLogicalSize +
+ " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
+ }
+ }
+ }
+ }
+
// Perform any key expressions. Results will go into scratch columns.
if (bigTableKeyExpressions != null) {
for (VectorExpression ve : bigTableKeyExpressions) {
@@ -138,9 +169,6 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
}
}
- // We rebuild in-place the selected array with rows destine to be forwarded.
- int numSel = 0;
-
/*
* Single-Column String specific declarations.
*/
@@ -172,8 +200,11 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
*/
JoinUtil.JoinResult joinResult;
- if (!joinColVector.noNulls && joinColVector.isNull[0]) {
- // Null key is no match for whole batch.
+ if (batch.size == 0) {
+ // Whole repeated key batch was filtered out.
+ joinResult = JoinUtil.JoinResult.NOMATCH;
+ } else if (!joinColVector.noNulls && joinColVector.isNull[0]) {
+ // Any (repeated) null key column is no match for whole batch.
joinResult = JoinUtil.JoinResult.NOMATCH;
} else {
// Handle *repeated* join key, if found.
@@ -190,7 +221,8 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
}
- numSel = finishOuterRepeated(batch, joinResult, hashMapResults[0], scratch1);
+ finishOuterRepeated(batch, joinResult, hashMapResults[0], someRowsFilteredOut,
+ inputSelectedInUse, inputLogicalSize);
} else {
/*
@@ -204,14 +236,13 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
int selected[] = batch.selected;
boolean selectedInUse = batch.selectedInUse;
- // For outer join we must apply the filter after match and cause some matches to become
- // non-matches, we do not track non-matches here. Instead we remember all non spilled rows
- // and compute non matches later in finishOuter.
int hashMapResultCount = 0;
- int matchCount = 0;
- int nonSpillCount = 0;
+ int allMatchCount = 0;
+ int equalKeySeriesCount = 0;
int spillCount = 0;
+ boolean atLeastOneNonMatch = someRowsFilteredOut;
+
/*
* Single-Column String specific variables.
*/
@@ -223,9 +254,11 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
// Logical loop over the rows in the batch since the batch may have selected in use.
- for (int logical = 0; logical < inputLogicalSize; logical++) {
+ for (int logical = 0; logical < batch.size; logical++) {
int batchIndex = (selectedInUse ? selected[logical] : logical);
+ // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch");
+
/*
* Single-Column String outer null detection.
*/
@@ -241,8 +274,8 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
// Let a current SPILL equal key series keep going, or
// Let a current NOMATCH keep not matching.
- // Remember non-matches for Outer Join.
- nonSpills[nonSpillCount++] = batchIndex;
+ atLeastOneNonMatch = true;
+
// LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL");
} else {
@@ -262,9 +295,12 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
// New key.
if (haveSaveKey) {
- // Move on with our count(s).
+ // Move on with our counts.
switch (saveJoinResult) {
case MATCH:
+ hashMapResultCount++;
+ equalKeySeriesCount++;
+ break;
case SPILL:
hashMapResultCount++;
break;
@@ -290,43 +326,69 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
byte[] keyBytes = vector[batchIndex];
int keyStart = start[batchIndex];
int keyLength = length[batchIndex];
-
saveJoinResult = hashMap.lookup(keyBytes, keyStart, keyLength, hashMapResults[hashMapResultCount]);
- // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " New Key " + saveJoinResult.name());
- } else {
- // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveJoinResult.name());
- }
- /*
- * Common outer join result processing.
- */
+ /*
+ * Common outer join result processing.
+ */
- switch (saveJoinResult) {
- case MATCH:
- matchs[matchCount] = batchIndex;
- matchHashMapResultIndices[matchCount] = hashMapResultCount;
- matchCount++;
- nonSpills[nonSpillCount++] = batchIndex;
- break;
-
- case SPILL:
- spills[spillCount] = batchIndex;
- spillHashMapResultIndices[spillCount] = hashMapResultCount;
- spillCount++;
- break;
-
- case NOMATCH:
- nonSpills[nonSpillCount++] = batchIndex;
- // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
- break;
+ switch (saveJoinResult) {
+ case MATCH:
+ equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount;
+ equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount;
+ equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow();
+ equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1;
+ allMatchs[allMatchCount++] = batchIndex;
+ // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
+ break;
+
+ case SPILL:
+ spills[spillCount] = batchIndex;
+ spillHashMapResultIndices[spillCount] = hashMapResultCount;
+ spillCount++;
+ break;
+
+ case NOMATCH:
+ atLeastOneNonMatch = true;
+ // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
+ break;
+ }
+ } else {
+ // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveKey + " " + saveJoinResult.name());
+
+ // Series of equal keys.
+
+ switch (saveJoinResult) {
+ case MATCH:
+ equalKeySeriesDuplicateCounts[equalKeySeriesCount]++;
+ allMatchs[allMatchCount++] = batchIndex;
+ // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
+ break;
+
+ case SPILL:
+ spills[spillCount] = batchIndex;
+ spillHashMapResultIndices[spillCount] = hashMapResultCount;
+ spillCount++;
+ break;
+
+ case NOMATCH:
+ // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
+ break;
+ }
}
+ // if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) {
+ // throw new HiveException("allMatchs is not in sort order and unique");
+ // }
}
}
if (haveSaveKey) {
- // Account for last equal key sequence.
+ // Update our counts for the last key.
switch (saveJoinResult) {
case MATCH:
+ hashMapResultCount++;
+ equalKeySeriesCount++;
+ break;
case SPILL:
hashMapResultCount++;
break;
@@ -337,27 +399,26 @@ public class VectorMapJoinOuterStringOperator extends VectorMapJoinOuterGenerate
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter +
- " matchs " + intArrayToRangesString(matchs, matchCount) +
- " matchHashMapResultIndices " + intArrayToRangesString(matchHashMapResultIndices, matchCount) +
- " nonSpills " + intArrayToRangesString(nonSpills, nonSpillCount) +
+ " allMatchs " + intArrayToRangesString(allMatchs,allMatchCount) +
+ " equalKeySeriesHashMapResultIndices " + intArrayToRangesString(equalKeySeriesHashMapResultIndices, equalKeySeriesCount) +
+ " equalKeySeriesAllMatchIndices " + intArrayToRangesString(equalKeySeriesAllMatchIndices, equalKeySeriesCount) +
+ " equalKeySeriesIsSingleValue " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesIsSingleValue, 0, equalKeySeriesCount)) +
+ " equalKeySeriesDuplicateCounts " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesDuplicateCounts, 0, equalKeySeriesCount)) +
+ " atLeastOneNonMatch " + atLeastOneNonMatch +
+ " inputSelectedInUse " + inputSelectedInUse +
+ " inputLogicalSize " + inputLogicalSize +
" spills " + intArrayToRangesString(spills, spillCount) +
" spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) +
" hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount)));
}
// We will generate results for all matching and non-matching rows.
- // Note that scratch1 is undefined at this point -- it's preallocated storage.
- numSel = finishOuter(batch,
- matchs, matchHashMapResultIndices, matchCount,
- nonSpills, nonSpillCount,
- spills, spillHashMapResultIndices, spillCount,
- hashMapResults, hashMapResultCount,
- scratch1);
+ finishOuter(batch,
+ allMatchCount, equalKeySeriesCount, atLeastOneNonMatch,
+ inputSelectedInUse, inputLogicalSize,
+ spillCount, hashMapResultCount);
}
- batch.selectedInUse = true;
- batch.size = numSel;
-
if (batch.size > 0) {
// Forward any remaining selected rows.
forwardBigTableBatch(batch);
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java
index 1c91be6..32b60d0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinRowBytesContainer.java
@@ -91,7 +91,7 @@ public class VectorMapJoinRowBytesContainer {
}
tmpFile = File.createTempFile("BytesContainer", ".tmp", parentFile);
- LOG.info("BytesContainer created temp file " + tmpFile.getAbsolutePath());
+ LOG.debug("BytesContainer created temp file " + tmpFile.getAbsolutePath());
tmpFile.deleteOnExit();
fileOutputStream = new FileOutputStream(tmpFile);
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
index f9550c9..6afaec3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java
@@ -54,13 +54,13 @@ public abstract class VectorMapJoinFastBytesHashMap
slotTriples[tripleIndex] = keyStore.add(keyBytes, keyStart, keyLength);
slotTriples[tripleIndex + 1] = hashCode;
slotTriples[tripleIndex + 2] = valueStore.addFirst(valueBytes, 0, valueLength);
- // LOG.info("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
+ // LOG.debug("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
keysAssigned++;
} else {
// Add another value.
- // LOG.info("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
+ // LOG.debug("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
slotTriples[tripleIndex + 2] = valueStore.addMore(slotTriples[tripleIndex + 2], valueBytes, 0, valueLength);
- // LOG.info("VectorMapJoinFastBytesHashMap add more new valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
+ // LOG.debug("VectorMapJoinFastBytesHashMap add more new valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
}
}
@@ -77,7 +77,7 @@ public abstract class VectorMapJoinFastBytesHashMap
if (valueRefWord == -1) {
joinResult = JoinUtil.JoinResult.NOMATCH;
} else {
- // LOG.info("VectorMapJoinFastBytesHashMap lookup hashCode " + Long.toHexString(hashCode) + " valueRefWord " + Long.toHexString(valueRefWord) + " (valueStore != null) " + (valueStore != null));
+ // LOG.debug("VectorMapJoinFastBytesHashMap lookup hashCode " + Long.toHexString(hashCode) + " valueRefWord " + Long.toHexString(valueRefWord) + " (valueStore != null) " + (valueStore != null));
optimizedHashMapResult.set(valueStore, valueRefWord);
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
index 9dcaf8f..dceb99c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java
@@ -49,11 +49,11 @@ public abstract class VectorMapJoinFastBytesHashMultiSet
slotTriples[tripleIndex] = keyStore.add(keyBytes, keyStart, keyLength);
slotTriples[tripleIndex + 1] = hashCode;
slotTriples[tripleIndex + 2] = 1; // Count.
- // LOG.info("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
+ // LOG.debug("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
keysAssigned++;
} else {
// Add another value.
- // LOG.info("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
+ // LOG.debug("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
slotTriples[tripleIndex + 2]++;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
index b6e6321..91d7fd6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java
@@ -79,13 +79,13 @@ public abstract class VectorMapJoinFastBytesHashTable
while (true) {
int tripleIndex = 3 * slot;
if (slotTriples[tripleIndex] == 0) {
- // LOG.info("VectorMapJoinFastBytesHashMap findWriteSlot slot " + slot + " tripleIndex " + tripleIndex + " empty");
+ // LOG.debug("VectorMapJoinFastBytesHashMap findWriteSlot slot " + slot + " tripleIndex " + tripleIndex + " empty");
isNewKey = true;;
break;
}
if (hashCode == slotTriples[tripleIndex + 1] &&
keyStore.equalKey(slotTriples[tripleIndex], keyBytes, keyStart, keyLength)) {
- // LOG.info("VectorMapJoinFastBytesHashMap findWriteSlot slot " + slot + " tripleIndex " + tripleIndex + " existing");
+ // LOG.debug("VectorMapJoinFastBytesHashMap findWriteSlot slot " + slot + " tripleIndex " + tripleIndex + " existing");
isNewKey = false;
break;
}
@@ -155,7 +155,7 @@ public abstract class VectorMapJoinFastBytesHashTable
}
// Use old value reference word.
- // LOG.info("VectorMapJoinFastLongHashTable expandAndRehash key " + tableKey + " slot " + newSlot + " newPairIndex " + newPairIndex + " empty slot (i = " + i + ")");
+ // LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash key " + tableKey + " slot " + newSlot + " newPairIndex " + newPairIndex + " empty slot (i = " + i + ")");
newSlotTriples[newTripleIndex] = keyRef;
newSlotTriples[newTripleIndex + 1] = hashCode;
@@ -170,7 +170,7 @@ public abstract class VectorMapJoinFastBytesHashTable
largestNumberOfSteps = newLargestNumberOfSteps;
resizeThreshold = (int)(logicalHashBucketCount * loadFactor);
metricExpands++;
- // LOG.info("VectorMapJoinFastLongHashTable expandAndRehash new logicalHashBucketCount " + logicalHashBucketCount + " resizeThreshold " + resizeThreshold + " metricExpands " + metricExpands);
+ // LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash new logicalHashBucketCount " + logicalHashBucketCount + " resizeThreshold " + resizeThreshold + " metricExpands " + metricExpands);
}
protected long findReadSlot(byte[] keyBytes, int keyStart, int keyLength, long hashCode) {
@@ -181,7 +181,7 @@ public abstract class VectorMapJoinFastBytesHashTable
int i = 0;
while (true) {
int tripleIndex = slot * 3;
- // LOG.info("VectorMapJoinFastBytesHashMap findReadSlot slot keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(hashCode) + " entry hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
+ // LOG.debug("VectorMapJoinFastBytesHashMap findReadSlot slot keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(hashCode) + " entry hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2]));
if (slotTriples[tripleIndex] != 0 && hashCode == slotTriples[tripleIndex + 1]) {
// Finally, verify the key bytes match.
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java
index f2f42ee..9d95d05 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java
@@ -112,7 +112,7 @@ public class VectorMapJoinFastKeyStore {
}
keyRefWord |= absoluteKeyOffset;
- // LOG.info("VectorMapJoinFastKeyStore add keyLength " + keyLength + " absoluteKeyOffset " + absoluteKeyOffset + " keyRefWord " + Long.toHexString(keyRefWord));
+ // LOG.debug("VectorMapJoinFastKeyStore add keyLength " + keyLength + " absoluteKeyOffset " + absoluteKeyOffset + " keyRefWord " + Long.toHexString(keyRefWord));
return keyRefWord;
}
@@ -122,7 +122,7 @@ public class VectorMapJoinFastKeyStore {
(int) ((keyRefWord & SmallKeyLength.bitMask) >> SmallKeyLength.bitShift);
boolean isKeyLengthSmall = (storedKeyLengthLength != SmallKeyLength.allBitsOn);
- // LOG.info("VectorMapJoinFastKeyStore equalKey keyLength " + keyLength + " isKeyLengthSmall " + isKeyLengthSmall + " storedKeyLengthLength " + storedKeyLengthLength + " keyRefWord " + Long.toHexString(keyRefWord));
+ // LOG.debug("VectorMapJoinFastKeyStore equalKey keyLength " + keyLength + " isKeyLengthSmall " + isKeyLengthSmall + " storedKeyLengthLength " + storedKeyLengthLength + " keyRefWord " + Long.toHexString(keyRefWord));
if (isKeyLengthSmall && storedKeyLengthLength != keyLength) {
return false;
@@ -135,7 +135,7 @@ public class VectorMapJoinFastKeyStore {
// Read big value length we wrote with the value.
storedKeyLengthLength = writeBuffers.readVInt(readPos);
if (storedKeyLengthLength != keyLength) {
- // LOG.info("VectorMapJoinFastKeyStore equalKey no match big length");
+ // LOG.debug("VectorMapJoinFastKeyStore equalKey no match big length");
return false;
}
}
@@ -148,11 +148,11 @@ public class VectorMapJoinFastKeyStore {
for (int i = 0; i < keyLength; i++) {
if (currentBytes[currentStart + i] != keyBytes[keyStart + i]) {
- // LOG.info("VectorMapJoinFastKeyStore equalKey no match on bytes");
+ // LOG.debug("VectorMapJoinFastKeyStore equalKey no match on bytes");
return false;
}
}
- // LOG.info("VectorMapJoinFastKeyStore equalKey match on bytes");
+ // LOG.debug("VectorMapJoinFastKeyStore equalKey match on bytes");
return true;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
index d6ad028..4725f55 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java
@@ -68,7 +68,7 @@ public class VectorMapJoinFastLongHashMap
optimizedHashMapResult.forget();
long hashCode = VectorMapJoinFastLongHashUtil.hashKey(key);
- // LOG.info("VectorMapJoinFastLongHashMap lookup " + key + " hashCode " + hashCode);
+ // LOG.debug("VectorMapJoinFastLongHashMap lookup " + key + " hashCode " + hashCode);
long valueRef = findReadSlot(key, hashCode);
JoinUtil.JoinResult joinResult;
if (valueRef == -1) {
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
index 2137fb7..17855eb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
@@ -121,13 +121,13 @@ public abstract class VectorMapJoinFastLongHashTable
int pairIndex = 2 * slot;
long valueRef = slotPairs[pairIndex];
if (valueRef == 0) {
- // LOG.info("VectorMapJoinFastLongHashTable add key " + key + " slot " + slot + " pairIndex " + pairIndex + " empty slot (i = " + i + ")");
+ // LOG.debug("VectorMapJoinFastLongHashTable add key " + key + " slot " + slot + " pairIndex " + pairIndex + " empty slot (i = " + i + ")");
isNewKey = true;
break;
}
long tableKey = slotPairs[pairIndex + 1];
if (key == tableKey) {
- // LOG.info("VectorMapJoinFastLongHashTable add key " + key + " slot " + slot + " pairIndex " + pairIndex + " found key (i = " + i + ")");
+ // LOG.debug("VectorMapJoinFastLongHashTable add key " + key + " slot " + slot + " pairIndex " + pairIndex + " found key (i = " + i + ")");
isNewKey = false;
break;
}
@@ -145,7 +145,7 @@ public abstract class VectorMapJoinFastLongHashTable
// debugDumpKeyProbe(keyOffset, keyLength, hashCode, slot);
}
- // LOG.info("VectorMapJoinFastLongHashTable add slot " + slot + " hashCode " + Long.toHexString(hashCode));
+ // LOG.debug("VectorMapJoinFastLongHashTable add slot " + slot + " hashCode " + Long.toHexString(hashCode));
assignSlot(slot, key, isNewKey, currentValue);
@@ -206,7 +206,7 @@ public abstract class VectorMapJoinFastLongHashTable
}
// Use old value reference word.
- // LOG.info("VectorMapJoinFastLongHashTable expandAndRehash key " + tableKey + " slot " + newSlot + " newPairIndex " + newPairIndex + " empty slot (i = " + i + ")");
+ // LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash key " + tableKey + " slot " + newSlot + " newPairIndex " + newPairIndex + " empty slot (i = " + i + ")");
newSlotPairs[newPairIndex] = valueRef;
newSlotPairs[newPairIndex + 1] = tableKey;
@@ -220,7 +220,7 @@ public abstract class VectorMapJoinFastLongHashTable
largestNumberOfSteps = newLargestNumberOfSteps;
resizeThreshold = (int)(logicalHashBucketCount * loadFactor);
metricExpands++;
- // LOG.info("VectorMapJoinFastLongHashTable expandAndRehash new logicalHashBucketCount " + logicalHashBucketCount + " resizeThreshold " + resizeThreshold + " metricExpands " + metricExpands);
+ // LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash new logicalHashBucketCount " + logicalHashBucketCount + " resizeThreshold " + resizeThreshold + " metricExpands " + metricExpands);
}
protected long findReadSlot(long key, long hashCode) {
@@ -235,20 +235,20 @@ public abstract class VectorMapJoinFastLongHashTable
long valueRef = slotPairs[pairIndex];
if (valueRef == 0) {
// Given that we do not delete, an empty slot means no match.
- // LOG.info("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " empty slot (i = " + i + ")");
+ // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " empty slot (i = " + i + ")");
return -1;
}
long tableKey = slotPairs[pairIndex + 1];
if (key == tableKey) {
- // LOG.info("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " found key (i = " + i + ")");
+ // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " found key (i = " + i + ")");
return slotPairs[pairIndex];
}
// Some other key (collision) - keep probing.
probeSlot += (++i);
if (i > largestNumberOfSteps) {
- // LOG.info("VectorMapJoinFastLongHashTable findReadSlot returning not found");
+ // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot returning not found");
// We know we never went that far when we were inserting.
- // LOG.info("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " largestNumberOfSteps " + largestNumberOfSteps + " (i = " + i + ")");
+ // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " largestNumberOfSteps " + largestNumberOfSteps + " (i = " + i + ")");
return -1;
}
slot = (int)(probeSlot & logicalHashBucketMask);
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java
index 373b5f4..4b1d6f6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java
@@ -81,7 +81,7 @@ public class VectorMapJoinFastTableContainer implements VectorMapJoinTableContai
int newThreshold = HashMapWrapper.calculateTableSize(
keyCountAdj, threshold, loadFactor, keyCount);
- // LOG.info("VectorMapJoinFastTableContainer load newThreshold " + newThreshold);
+ // LOG.debug("VectorMapJoinFastTableContainer load newThreshold " + newThreshold);
VectorMapJoinFastHashTable = createHashTable(newThreshold);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
index caa705c..6491dc6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java
@@ -142,7 +142,7 @@ public class VectorMapJoinFastValueStore {
}
public void set(VectorMapJoinFastValueStore valueStore, long valueRefWord) {
- // LOG.info("VectorMapJoinFastValueStore set valueRefWord " + Long.toHexString(valueRefWord));
+ // LOG.debug("VectorMapJoinFastValueStore set valueRefWord " + Long.toHexString(valueRefWord));
this.valueStore = valueStore;
this.valueRefWord = valueRefWord;
@@ -473,7 +473,7 @@ public class VectorMapJoinFastValueStore {
valueRefWord |= SmallValueLength.allBitsOnBitShifted;
}
- // LOG.info("VectorMapJoinFastValueStore addFirst valueLength " + valueLength + " newAbsoluteOffset " + newAbsoluteOffset + " valueRefWord " + Long.toHexString(valueRefWord));
+ // LOG.debug("VectorMapJoinFastValueStore addFirst valueLength " + valueLength + " newAbsoluteOffset " + newAbsoluteOffset + " valueRefWord " + Long.toHexString(valueRefWord));
// The lower bits are the absolute value offset.
valueRefWord |= newAbsoluteOffset;
@@ -499,7 +499,7 @@ public class VectorMapJoinFastValueStore {
boolean isOldValueLast =
((oldValueRef & IsLastFlag.flagOnMask) != 0);
- // LOG.info("VectorMapJoinFastValueStore addMore isOldValueLast " + isOldValueLast + " oldSmallValueLength " + oldSmallValueLength + " oldAbsoluteValueOffset " + oldAbsoluteValueOffset + " oldValueRef " + Long.toHexString(oldValueRef));
+ // LOG.debug("VectorMapJoinFastValueStore addMore isOldValueLast " + isOldValueLast + " oldSmallValueLength " + oldSmallValueLength + " oldAbsoluteValueOffset " + oldAbsoluteValueOffset + " oldValueRef " + Long.toHexString(oldValueRef));
/*
* Write information about the old value (which becomes our next) at the beginning
@@ -546,7 +546,7 @@ public class VectorMapJoinFastValueStore {
// The lower bits are the absolute value offset.
newValueRef |= newAbsoluteOffset;
- // LOG.info("VectorMapJoinFastValueStore addMore valueLength " + valueLength + " newAbsoluteOffset " + newAbsoluteOffset + " newValueRef " + Long.toHexString(newValueRef));
+ // LOG.debug("VectorMapJoinFastValueStore addMore valueLength " + valueLength + " newAbsoluteOffset " + newAbsoluteOffset + " newValueRef " + Long.toHexString(newValueRef));
return newValueRef;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java
index 60825ce..dc65eaa 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java
@@ -113,7 +113,7 @@ public class VectorMapJoinOptimizedLongCommon {
}
// byte[] bytes = Arrays.copyOf(currentKey.get(), currentKey.getLength());
- // LOG.info("VectorMapJoinOptimizedLongCommon adaptPutRow key " + key + " min " + min + " max " + max + " hashTableKeyType " + hashTableKeyType.name() + " hex " + Hex.encodeHexString(bytes));
+ // LOG.debug("VectorMapJoinOptimizedLongCommon adaptPutRow key " + key + " min " + min + " max " + max + " hashTableKeyType " + hashTableKeyType.name() + " hex " + Hex.encodeHexString(bytes));
}
@@ -145,7 +145,7 @@ public class VectorMapJoinOptimizedLongCommon {
}
// byte[] bytes = Arrays.copyOf(output.getData(), output.getLength());
- // LOG.info("VectorMapJoinOptimizedLongCommon serialize key " + key + " hashTableKeyType " + hashTableKeyType.name() + " hex " + Hex.encodeHexString(bytes));
+ // LOG.debug("VectorMapJoinOptimizedLongCommon serialize key " + key + " hashTableKeyType " + hashTableKeyType.name() + " hex " + Hex.encodeHexString(bytes));
serializedBytes.bytes = output.getData();
serializedBytes.offset = 0;
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 096239e..656a5e3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -1069,11 +1069,21 @@ public class Vectorizer implements PhysicalPlanResolver {
private boolean validateMapJoinDesc(MapJoinDesc desc) {
byte posBigTable = (byte) desc.getPosBigTable();
List<ExprNodeDesc> filterExprs = desc.getFilters().get(posBigTable);
+ if (!validateExprNodeDesc(filterExprs, VectorExpressionDescriptor.Mode.FILTER)) {
+ LOG.info("Cannot vectorize map work filter expression");
+ return false;
+ }
List<ExprNodeDesc> keyExprs = desc.getKeys().get(posBigTable);
+ if (!validateExprNodeDesc(keyExprs)) {
+ LOG.info("Cannot vectorize map work key expression");
+ return false;
+ }
List<ExprNodeDesc> valueExprs = desc.getExprs().get(posBigTable);
- return validateExprNodeDesc(filterExprs, VectorExpressionDescriptor.Mode.FILTER) &&
- validateExprNodeDesc(keyExprs) &&
- validateExprNodeDesc(valueExprs);
+ if (!validateExprNodeDesc(valueExprs)) {
+ LOG.info("Cannot vectorize map work value expression");
+ return false;
+ }
+ return true;
}
private boolean validateReduceSinkOperator(ReduceSinkOperator op) {
@@ -1089,6 +1099,7 @@ public class Vectorizer implements PhysicalPlanResolver {
for (ExprNodeDesc desc : descList) {
boolean ret = validateExprNodeDesc(desc);
if (!ret) {
+ LOG.info("Cannot vectorize select expression: " + desc.toString());
return false;
}
}
@@ -1110,10 +1121,12 @@ public class Vectorizer implements PhysicalPlanResolver {
}
boolean ret = validateExprNodeDesc(desc.getKeys());
if (!ret) {
+ LOG.info("Cannot vectorize groupby key expression");
return false;
}
ret = validateAggregationDesc(desc.getAggregators(), isReduce);
if (!ret) {
+ LOG.info("Cannot vectorize groupby aggregate expression");
return false;
}
if (isReduce) {
@@ -1248,10 +1261,13 @@ public class Vectorizer implements PhysicalPlanResolver {
}
private boolean validateAggregationDesc(AggregationDesc aggDesc, boolean isReduce) {
- if (!supportedAggregationUdfs.contains(aggDesc.getGenericUDAFName().toLowerCase())) {
+ String udfName = aggDesc.getGenericUDAFName().toLowerCase();
+ if (!supportedAggregationUdfs.contains(udfName)) {
+ LOG.info("Cannot vectorize groupby aggregate expression: UDF " + udfName + " not supported");
return false;
}
if (aggDesc.getParameters() != null && !validateExprNodeDesc(aggDesc.getParameters())) {
+ LOG.info("Cannot vectorize groupby aggregate expression: UDF parameters not supported");
return false;
}
// See if we can vectorize the aggregation.
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/queries/clientpositive/vector_join30.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_join30.q b/ql/src/test/queries/clientpositive/vector_join30.q
new file mode 100644
index 0000000..2275804
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_join30.q
@@ -0,0 +1,160 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.vectorized.execution.mapjoin.native.enabled=true;
+set hive.fetch.task.conversion=none;
+SET hive.auto.convert.join=true;
+SET hive.auto.convert.join.noconditionaltask=true;
+SET hive.auto.convert.join.noconditionaltask.size=1000000000;
+
+-- SORT_QUERY_RESULTS
+
+CREATE TABLE orcsrc STORED AS ORC AS SELECT * FROM src;
+
+explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value));
+
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value));
+
+explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value));
+
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value));
+
+explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value));
+
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+select sum(hash(Y.key,Y.value));
+
+explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+LEFT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+explain
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
+
+FROM
+(SELECT orcsrc.* FROM orcsrc sort by key) x
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Y
+ON (x.key = Y.key)
+RIGHT OUTER JOIN
+(SELECT orcsrc.* FROM orcsrc sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value));
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/queries/clientpositive/vector_join_filters.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_join_filters.q b/ql/src/test/queries/clientpositive/vector_join_filters.q
new file mode 100644
index 0000000..adf525c
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_join_filters.q
@@ -0,0 +1,38 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.vectorized.execution.mapjoin.native.enabled=true;
+set hive.fetch.task.conversion=none;
+SET hive.auto.convert.join=true;
+SET hive.auto.convert.join.noconditionaltask=true;
+SET hive.auto.convert.join.noconditionaltask.size=1000000000;
+
+-- SORT_QUERY_RESULTS
+
+CREATE TABLE myinput1_txt(key int, value int);
+LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1_txt;
+CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt;
+
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value;
+
+SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value);
+SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value);
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value);
+SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value);
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.key = c.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/queries/clientpositive/vector_join_nulls.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_join_nulls.q b/ql/src/test/queries/clientpositive/vector_join_nulls.q
new file mode 100644
index 0000000..6cfb7a8
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_join_nulls.q
@@ -0,0 +1,33 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.vectorized.execution.mapjoin.native.enabled=true;
+set hive.fetch.task.conversion=none;
+SET hive.auto.convert.join=true;
+SET hive.auto.convert.join.noconditionaltask=true;
+SET hive.auto.convert.join.noconditionaltask.size=1000000000;
+
+-- SORT_QUERY_RESULTS
+
+CREATE TABLE myinput1_txt(key int, value int);
+LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE myinput1_txt;
+CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt;
+
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value and a.key=b.key;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value;
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value;
+
+SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value);
+SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value);
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.value = c.value;
+
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/test/queries/clientpositive/vector_left_outer_join2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_left_outer_join2.q b/ql/src/test/queries/clientpositive/vector_left_outer_join2.q
index 098d002..62ad9ee 100644
--- a/ql/src/test/queries/clientpositive/vector_left_outer_join2.q
+++ b/ql/src/test/queries/clientpositive/vector_left_outer_join2.q
@@ -3,6 +3,8 @@ set hive.auto.convert.join=true;
set hive.auto.convert.join.noconditionaltask=true;
set hive.auto.convert.join.noconditionaltask.size=10000;
+-- SORT_QUERY_RESULTS
+
drop table if exists TJOIN1;
drop table if exists TJOIN2;
create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc;
[7/7] hive git commit: HIVE-10565: Native Vector Map Join doesn't
handle filtering and matching on LEFT OUTER JOIN repeated key correctly (Matt
McCline via Gunther Hagleitner)
Posted by gu...@apache.org.
HIVE-10565: Native Vector Map Join doesn't handle filtering and matching on LEFT OUTER JOIN repeated key correctly (Matt McCline via Gunther Hagleitner)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2b9f2f5e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2b9f2f5e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2b9f2f5e
Branch: refs/heads/master
Commit: 2b9f2f5e2574e6e64ce9496dfe9ff6e085036fb1
Parents: 3fa7489
Author: Gunther Hagleitner <gu...@apache.org>
Authored: Thu May 14 15:42:04 2015 -0700
Committer: Gunther Hagleitner <gu...@apache.org>
Committed: Thu May 14 15:42:04 2015 -0700
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 10 +
.../ql/exec/vector/VectorizedBatchUtil.java | 5 +-
.../mapjoin/VectorMapJoinCommonOperator.java | 8 +-
.../VectorMapJoinGenerateResultOperator.java | 47 +-
...pJoinInnerBigOnlyGenerateResultOperator.java | 53 +-
.../VectorMapJoinInnerBigOnlyLongOperator.java | 15 +-
...ctorMapJoinInnerBigOnlyMultiKeyOperator.java | 15 +-
...VectorMapJoinInnerBigOnlyStringOperator.java | 12 +-
...ectorMapJoinInnerGenerateResultOperator.java | 39 +-
.../mapjoin/VectorMapJoinInnerLongOperator.java | 17 +-
.../VectorMapJoinInnerMultiKeyOperator.java | 19 +-
.../VectorMapJoinInnerStringOperator.java | 17 +-
...orMapJoinLeftSemiGenerateResultOperator.java | 40 +-
.../VectorMapJoinLeftSemiLongOperator.java | 13 +-
.../VectorMapJoinLeftSemiMultiKeyOperator.java | 17 +-
.../VectorMapJoinLeftSemiStringOperator.java | 17 +-
...ectorMapJoinOuterGenerateResultOperator.java | 805 ++++---
.../mapjoin/VectorMapJoinOuterLongOperator.java | 189 +-
.../VectorMapJoinOuterMultiKeyOperator.java | 184 +-
.../VectorMapJoinOuterStringOperator.java | 185 +-
.../mapjoin/VectorMapJoinRowBytesContainer.java | 2 +-
.../fast/VectorMapJoinFastBytesHashMap.java | 8 +-
.../VectorMapJoinFastBytesHashMultiSet.java | 4 +-
.../fast/VectorMapJoinFastBytesHashTable.java | 10 +-
.../mapjoin/fast/VectorMapJoinFastKeyStore.java | 10 +-
.../fast/VectorMapJoinFastLongHashMap.java | 2 +-
.../fast/VectorMapJoinFastLongHashTable.java | 18 +-
.../fast/VectorMapJoinFastTableContainer.java | 2 +-
.../fast/VectorMapJoinFastValueStore.java | 8 +-
.../VectorMapJoinOptimizedLongCommon.java | 4 +-
.../hive/ql/optimizer/physical/Vectorizer.java | 24 +-
.../test/queries/clientpositive/vector_join30.q | 160 ++
.../clientpositive/vector_join_filters.q | 38 +
.../queries/clientpositive/vector_join_nulls.q | 33 +
.../clientpositive/vector_left_outer_join2.q | 2 +
.../queries/clientpositive/vector_outer_join5.q | 173 ++
.../tez/acid_vectorization_partition.q.out | 20 +-
.../clientpositive/tez/vector_join30.q.out | 1367 +++++++++++
.../tez/vector_join_filters.q.out | 222 ++
.../clientpositive/tez/vector_join_nulls.q.out | 195 ++
.../tez/vector_left_outer_join2.q.out | 20 +-
.../tez/vector_left_outer_join3.q.out | 222 ++
.../clientpositive/tez/vector_outer_join5.q.out | 1328 +++++++++++
.../tez/vectorized_timestamp_ints_casts.q.out | 234 ++
.../results/clientpositive/vector_join30.q.out | 2194 ++++++++++++++++++
.../clientpositive/vector_join_filters.q.out | 222 ++
.../clientpositive/vector_join_nulls.q.out | 195 ++
.../vector_left_outer_join2.q.out | 8 +-
.../clientpositive/vector_outer_join5.q.out | 1300 +++++++++++
49 files changed, 8936 insertions(+), 796 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index f9c9351..c79c36c 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -220,8 +220,12 @@ minitez.query.files.shared=alter_merge_2_orc.q,\
vector_groupby_3.q,\
vector_groupby_reduce.q,\
vector_if_expr.q,\
+ vector_inner_join.q,\
vector_interval_1.q,\
vector_interval_2.q,\
+ vector_join30.q,\
+ vector_join_filters.q,\
+ vector_join_nulls.q,\
vector_left_outer_join.q,\
vector_left_outer_join2.q,\
vector_leftsemi_mapjoin.q,\
@@ -230,6 +234,12 @@ minitez.query.files.shared=alter_merge_2_orc.q,\
vector_multi_insert.q,\
vector_non_string_partition.q,\
vector_orderby_5.q,\
+ vector_outer_join0.q,\
+ vector_outer_join1.q,\
+ vector_outer_join2.q,\
+ vector_outer_join3.q,\
+ vector_outer_join4.q,\
+ vector_outer_join5.q,\
vector_partition_diff_num_cols.q,\
vector_partitioned_date_time.q,\
vector_reduce_groupby_decimal.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
index dcea8ae..4a16b4c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
@@ -645,8 +645,7 @@ public class VectorizedBatchUtil {
public static void debugDisplayOneRow(VectorizedRowBatch batch, int index, String prefix) {
StringBuffer sb = new StringBuffer();
sb.append(prefix + " row " + index + " ");
- for (int i = 0; i < batch.projectionSize; i++) {
- int column = batch.projectedColumns[i];
+ for (int column = 0; column < batch.cols.length; column++) {
ColumnVector colVector = batch.cols[column];
if (colVector == null) {
sb.append("(null colVector " + column + ")");
@@ -666,7 +665,7 @@ public class VectorizedBatchUtil {
if (bytes == null) {
sb.append("(Unexpected null bytes with start " + start + " length " + length + ")");
} else {
- sb.append(displayBytes(bytes, start, length));
+ sb.append("bytes: '" + displayBytes(bytes, start, length) + "'");
}
} else if (colVector instanceof DecimalColumnVector) {
sb.append(((DecimalColumnVector) colVector).vector[index].toString());
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
index a9082eb..af78776 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
@@ -470,8 +470,8 @@ public abstract class VectorMapJoinCommonOperator extends MapJoinOperator implem
LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor bigTableByteColumnVectorColumns " + Arrays.toString(bigTableByteColumnVectorColumns));
LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor smallTableByteColumnVectorColumns " + Arrays.toString(smallTableByteColumnVectorColumns));
- LOG.info(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor outputProjection " + Arrays.toString(outputProjection));
- LOG.info(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor outputTypeNames " + Arrays.toString(outputTypeNames));
+ LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor outputProjection " + Arrays.toString(outputProjection));
+ LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor outputTypeNames " + Arrays.toString(outputTypeNames));
}
setupVOutContext(conf.getOutputColumnNames());
@@ -503,7 +503,7 @@ public abstract class VectorMapJoinCommonOperator extends MapJoinOperator implem
*/
protected void setupVOutContext(List<String> outputColumnNames) {
if (LOG.isDebugEnabled()) {
- LOG.info(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor outputColumnNames " + outputColumnNames);
+ LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor outputColumnNames " + outputColumnNames);
}
if (outputColumnNames.size() != outputProjection.length) {
throw new RuntimeException("Output column names " + outputColumnNames + " length and output projection " + Arrays.toString(outputProjection) + " / " + Arrays.toString(outputTypeNames) + " length mismatch");
@@ -729,9 +729,9 @@ public abstract class VectorMapJoinCommonOperator extends MapJoinOperator implem
* Common one time setup by native vectorized map join operator's processOp.
*/
protected void commonSetup(VectorizedRowBatch batch) throws HiveException {
- LOG.info("VectorMapJoinInnerCommonOperator commonSetup begin...");
if (LOG.isDebugEnabled()) {
+ LOG.debug("VectorMapJoinInnerCommonOperator commonSetup begin...");
displayBatchColumns(batch, "batch");
displayBatchColumns(overflowBatch, "overflowBatch");
}
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java
index 860ebb5..32c126c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java
@@ -373,10 +373,8 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
* The big table batch.
* @param hashMapResult
* The hash map results for the repeated key.
- * @return
- * The new count of selected rows.
*/
- protected int generateHashMapResultRepeatedAll(VectorizedRowBatch batch,
+ protected void generateHashMapResultRepeatedAll(VectorizedRowBatch batch,
VectorMapJoinHashMapResult hashMapResult) throws IOException, HiveException {
int[] selected = batch.selected;
@@ -400,7 +398,7 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
batch.selected, 0, batch.size);
}
- return numSel;
+ batch.size = numSel;
}
//-----------------------------------------------------------------------------------------------
@@ -462,7 +460,7 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
// int length = output.getLength() - offset;
rowBytesContainer.finishRow();
-// LOG.info("spillSerializeRow spilled batchIndex " + batchIndex + ", length " + length);
+// LOG.debug("spillSerializeRow spilled batchIndex " + batchIndex + ", length " + length);
}
protected void spillHashMapBatch(VectorizedRowBatch batch,
@@ -514,14 +512,18 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
smallTable);
needHashTableSetup = true;
- LOG.info(CLASS_NAME + " reloadHashTable!");
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(CLASS_NAME + " reloadHashTable!");
+ }
}
@Override
protected void reProcessBigTable(int partitionId)
throws HiveException {
- LOG.info(CLASS_NAME + " reProcessBigTable enter...");
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(CLASS_NAME + " reProcessBigTable enter...");
+ }
if (spillReplayBatch == null) {
// The process method was not called -- no big table rows.
@@ -544,14 +546,14 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
int offset = bigTable.currentOffset();
int length = bigTable.currentLength();
-// LOG.info(CLASS_NAME + " reProcessBigTable serialized row #" + rowCount + ", offset " + offset + ", length " + length);
+// LOG.debug(CLASS_NAME + " reProcessBigTable serialized row #" + rowCount + ", offset " + offset + ", length " + length);
bigTableVectorDeserializeRow.setBytes(bytes, offset, length);
bigTableVectorDeserializeRow.deserializeByValue(spillReplayBatch, spillReplayBatch.size);
spillReplayBatch.size++;
if (spillReplayBatch.size == VectorizedRowBatch.DEFAULT_SIZE) {
- LOG.info("reProcessBigTable going to call process with spillReplayBatch.size " + spillReplayBatch.size + " rows");
+ // LOG.debug("reProcessBigTable going to call process with spillReplayBatch.size " + spillReplayBatch.size + " rows");
process(spillReplayBatch, posBigTable); // call process once we have a full batch
spillReplayBatch.reset();
batchCount++;
@@ -559,7 +561,7 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
}
// Process the row batch that has less than DEFAULT_SIZE rows
if (spillReplayBatch.size > 0) {
- LOG.info("reProcessBigTable going to call process with spillReplayBatch.size " + spillReplayBatch.size + " rows");
+ // LOG.debug("reProcessBigTable going to call process with spillReplayBatch.size " + spillReplayBatch.size + " rows");
process(spillReplayBatch, posBigTable);
spillReplayBatch.reset();
batchCount++;
@@ -570,7 +572,9 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
throw new HiveException(e);
}
- LOG.info(CLASS_NAME + " reProcessBigTable exit! " + rowCount + " row processed and " + batchCount + " batches processed");
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(CLASS_NAME + " reProcessBigTable exit! " + rowCount + " row processed and " + batchCount + " batches processed");
+ }
}
@@ -632,7 +636,9 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
if (!aborted && overflowBatch.size > 0) {
forwardOverflow();
}
- LOG.info("VectorMapJoinInnerLongOperator closeOp " + batchCounter + " batches processed");
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("VectorMapJoinInnerLongOperator closeOp " + batchCounter + " batches processed");
+ }
}
//-----------------------------------------------------------------------------------------------
@@ -641,6 +647,23 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
* Debug.
*/
+ public boolean verifyMonotonicallyIncreasing(int[] selected, int size) {
+
+ if (size == 0) {
+ return true;
+ }
+ int prevBatchIndex = selected[0];
+
+ for (int i = 1; i < size; i++) {
+ int batchIndex = selected[i];
+ if (batchIndex <= prevBatchIndex) {
+ return false;
+ }
+ prevBatchIndex = batchIndex;
+ }
+ return true;
+ }
+
public static String intArrayToRangesString(int selection[], int size) {
if (size == 0) {
return "[]";
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyGenerateResultOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyGenerateResultOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyGenerateResultOperator.java
index 3132531..f18b982 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyGenerateResultOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyGenerateResultOperator.java
@@ -129,22 +129,10 @@ public abstract class VectorMapJoinInnerBigOnlyGenerateResultOperator
* @param batch
* The big table batch with any matching and any non matching rows both as
* selected in use.
- * @param allMatchs
- * A subset of the rows of the batch that are matches.
* @param allMatchCount
* Number of matches in allMatchs.
- * @param equalKeySeriesValueCounts
- * For each equal key series, whether the number of (empty) small table values.
- * @param equalKeySeriesAllMatchIndices
- * For each equal key series, the logical index into allMatchs.
- * @param equalKeySeriesDuplicateCounts
- * For each equal key series, the number of duplicates or equal keys.
* @param equalKeySeriesCount
* Number of single value matches.
- * @param spills
- * A subset of the rows of the batch that are spills.
- * @param spillHashMapResultIndices
- * For each entry in spills, the index into the hashMapResult.
* @param spillCount
* Number of spills in spills.
* @param hashTableResults
@@ -154,15 +142,16 @@ public abstract class VectorMapJoinInnerBigOnlyGenerateResultOperator
* Number of entries in hashMapResults.
*
**/
- protected int finishInnerBigOnly(VectorizedRowBatch batch,
- int[] allMatchs, int allMatchCount,
- long[] equalKeySeriesValueCounts, int[] equalKeySeriesAllMatchIndices,
- int[] equalKeySeriesDuplicateCounts, int equalKeySeriesCount,
- int[] spills, int[] spillHashMapResultIndices, int spillCount,
+ protected void finishInnerBigOnly(VectorizedRowBatch batch,
+ int allMatchCount, int equalKeySeriesCount, int spillCount,
VectorMapJoinHashTableResult[] hashTableResults, int hashMapResultCount)
throws HiveException, IOException {
- int numSel = 0;
+ // Get rid of spills before we start modifying the batch.
+ if (spillCount > 0) {
+ spillHashMapBatch(batch, hashTableResults,
+ spills, spillHashMapResultIndices, spillCount);
+ }
/*
* Optimize by running value expressions only over the matched rows.
@@ -171,6 +160,7 @@ public abstract class VectorMapJoinInnerBigOnlyGenerateResultOperator
performValueExpressions(batch, allMatchs, allMatchCount);
}
+ int numSel = 0;
for (int i = 0; i < equalKeySeriesCount; i++) {
long count = equalKeySeriesValueCounts[i];
int allMatchesIndex = equalKeySeriesAllMatchIndices[i];
@@ -185,13 +175,8 @@ public abstract class VectorMapJoinInnerBigOnlyGenerateResultOperator
duplicateCount, count);
}
}
-
- if (spillCount > 0) {
- spillHashMapBatch(batch, hashTableResults,
- spills, spillHashMapResultIndices, spillCount);
- }
-
- return numSel;
+ batch.size = numSel;
+ batch.selectedInUse = true;
}
/**
@@ -215,11 +200,11 @@ public abstract class VectorMapJoinInnerBigOnlyGenerateResultOperator
int[] allMatchs, int allMatchesIndex, int duplicateCount, int numSel)
throws HiveException, IOException {
- // LOG.info("generateHashMultiSetResultSingleValue enter...");
+ // LOG.debug("generateHashMultiSetResultSingleValue enter...");
// Generate result within big table batch itself.
- // LOG.info("generateHashMultiSetResultSingleValue with big table...");
+ // LOG.debug("generateHashMultiSetResultSingleValue with big table...");
for (int i = 0; i < duplicateCount; i++) {
@@ -250,7 +235,7 @@ public abstract class VectorMapJoinInnerBigOnlyGenerateResultOperator
int[] allMatchs, int allMatchesIndex,
int duplicateCount, long count) throws HiveException, IOException {
- // LOG.info("generateHashMultiSetResultMultiValue allMatchesIndex " + allMatchesIndex + " duplicateCount " + duplicateCount + " count " + count);
+ // LOG.debug("generateHashMultiSetResultMultiValue allMatchesIndex " + allMatchesIndex + " duplicateCount " + duplicateCount + " count " + count);
// TODO: Look at repeating optimizations...
@@ -309,11 +294,9 @@ public abstract class VectorMapJoinInnerBigOnlyGenerateResultOperator
return 0;
}
- protected int finishInnerBigOnlyRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult joinResult,
+ protected void finishInnerBigOnlyRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult joinResult,
VectorMapJoinHashMultiSetResult hashMultiSetResult) throws HiveException, IOException {
- int numSel = 0;
-
switch (joinResult) {
case MATCH:
@@ -325,19 +308,21 @@ public abstract class VectorMapJoinInnerBigOnlyGenerateResultOperator
}
// Generate special repeated case.
- numSel = generateHashMultiSetResultRepeatedAll(batch, hashMultiSetResult);
+ int numSel = generateHashMultiSetResultRepeatedAll(batch, hashMultiSetResult);
+ batch.size = numSel;
+ batch.selectedInUse = true;
break;
case SPILL:
// Whole batch is spilled.
spillBatchRepeated(batch, (VectorMapJoinHashTableResult) hashMultiSetResult);
+ batch.size = 0;
break;
case NOMATCH:
// No match for entire batch.
+ batch.size = 0;
break;
}
-
- return numSel;
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java
index 53a91d8..bb7efda 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java
@@ -151,9 +151,6 @@ public class VectorMapJoinInnerBigOnlyLongOperator extends VectorMapJoinInnerBig
}
}
- // We rebuild in-place the selected array with rows destine to be forwarded.
- int numSel = 0;
-
/*
* Single-Column Long specific declarations.
*/
@@ -198,7 +195,7 @@ public class VectorMapJoinInnerBigOnlyLongOperator extends VectorMapJoinInnerBig
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
}
- numSel = finishInnerBigOnlyRepeated(batch, joinResult, hashMultiSetResults[0]);
+ finishInnerBigOnlyRepeated(batch, joinResult, hashMultiSetResults[0]);
} else {
/*
@@ -358,17 +355,11 @@ public class VectorMapJoinInnerBigOnlyLongOperator extends VectorMapJoinInnerBig
" hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMultiSetResults, 0, hashMultiSetResultCount)));
}
- numSel = finishInnerBigOnly(batch,
- allMatchs, allMatchCount,
- equalKeySeriesValueCounts, equalKeySeriesAllMatchIndices,
- equalKeySeriesDuplicateCounts, equalKeySeriesCount,
- spills, spillHashMapResultIndices, spillCount,
+ finishInnerBigOnly(batch,
+ allMatchCount, equalKeySeriesCount, spillCount,
(VectorMapJoinHashTableResult[]) hashMultiSetResults, hashMultiSetResultCount);
}
- batch.selectedInUse = true;
- batch.size = numSel;
-
if (batch.size > 0) {
// Forward any remaining selected rows.
forwardBigTableBatch(batch);
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java
index 9553fa0..c36f668 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java
@@ -156,9 +156,6 @@ public class VectorMapJoinInnerBigOnlyMultiKeyOperator extends VectorMapJoinInne
}
}
- // We rebuild in-place the selected array with rows destine to be forwarded.
- int numSel = 0;
-
/*
* Multi-Key specific declarations.
*/
@@ -210,7 +207,7 @@ public class VectorMapJoinInnerBigOnlyMultiKeyOperator extends VectorMapJoinInne
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
}
- numSel = finishInnerBigOnlyRepeated(batch, joinResult, hashMultiSetResults[0]);
+ finishInnerBigOnlyRepeated(batch, joinResult, hashMultiSetResults[0]);
} else {
/*
@@ -371,17 +368,11 @@ public class VectorMapJoinInnerBigOnlyMultiKeyOperator extends VectorMapJoinInne
" hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMultiSetResults, 0, hashMultiSetResultCount)));
}
- numSel = finishInnerBigOnly(batch,
- allMatchs, allMatchCount,
- equalKeySeriesValueCounts, equalKeySeriesAllMatchIndices,
- equalKeySeriesDuplicateCounts, equalKeySeriesCount,
- spills, spillHashMapResultIndices, spillCount,
+ finishInnerBigOnly(batch,
+ allMatchCount, equalKeySeriesCount, spillCount,
(VectorMapJoinHashTableResult[]) hashMultiSetResults, hashMultiSetResultCount);
}
- batch.selectedInUse = true;
- batch.size = numSel;
-
if (batch.size > 0) {
// Forward any remaining selected rows.
forwardBigTableBatch(batch);
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java
index 17d0b63..87a11c0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java
@@ -187,7 +187,7 @@ public class VectorMapJoinInnerBigOnlyStringOperator extends VectorMapJoinInnerB
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
}
- numSel = finishInnerBigOnlyRepeated(batch, joinResult, hashMultiSetResults[0]);
+ finishInnerBigOnlyRepeated(batch, joinResult, hashMultiSetResults[0]);
} else {
/*
@@ -347,17 +347,11 @@ public class VectorMapJoinInnerBigOnlyStringOperator extends VectorMapJoinInnerB
" hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMultiSetResults, 0, hashMultiSetResultCount)));
}
- numSel = finishInnerBigOnly(batch,
- allMatchs, allMatchCount,
- equalKeySeriesValueCounts, equalKeySeriesAllMatchIndices,
- equalKeySeriesDuplicateCounts, equalKeySeriesCount,
- spills, spillHashMapResultIndices, spillCount,
+ finishInnerBigOnly(batch,
+ allMatchCount, equalKeySeriesCount, spillCount,
(VectorMapJoinHashTableResult[]) hashMultiSetResults, hashMultiSetResultCount);
}
- batch.selectedInUse = true;
- batch.size = numSel;
-
if (batch.size > 0) {
// Forward any remaining selected rows.
forwardBigTableBatch(batch);
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java
index 3a5e4b2..ee1abd3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java
@@ -147,38 +147,17 @@ public abstract class VectorMapJoinInnerGenerateResultOperator
* @param batch
* The big table batch with any matching and any non matching rows both as
* selected in use.
- * @param allMatchs
- * A subset of the rows of the batch that are matches.
* @param allMatchCount
* Number of matches in allMatchs.
- * @param equalKeySeriesHashMapResultIndices
- * For each equal key series, the index into the hashMapResult.
- * @param equalKeySeriesAllMatchIndices
- * For each equal key series, the logical index into allMatchs.
- * @param equalKeySeriesIsSingleValue
- * For each equal key series, whether there is 1 or multiple small table values.
- * @param equalKeySeriesDuplicateCounts
- * For each equal key series, the number of duplicates or equal keys.
* @param equalKeySeriesCount
* Number of single value matches.
- * @param spills
- * A subset of the rows of the batch that are spills.
- * @param spillHashMapResultIndices
- * For each entry in spills, the index into the hashMapResult.
* @param spillCount
* Number of spills in spills.
- * @param hashMapResults
- * The array of all hash map results for the batch.
* @param hashMapResultCount
* Number of entries in hashMapResults.
*/
- protected int finishInner(VectorizedRowBatch batch,
- int[] allMatchs, int allMatchCount,
- int[] equalKeySeriesHashMapResultIndices, int[] equalKeySeriesAllMatchIndices,
- boolean[] equalKeySeriesIsSingleValue, int[] equalKeySeriesDuplicateCounts,
- int equalKeySeriesCount,
- int[] spills, int[] spillHashMapResultIndices, int spillCount,
- VectorMapJoinHashMapResult[] hashMapResults, int hashMapResultCount)
+ protected void finishInner(VectorizedRowBatch batch,
+ int allMatchCount, int equalKeySeriesCount, int spillCount, int hashMapResultCount)
throws HiveException, IOException {
int numSel = 0;
@@ -211,10 +190,11 @@ public abstract class VectorMapJoinInnerGenerateResultOperator
spills, spillHashMapResultIndices, spillCount);
}
- return numSel;
+ batch.size = numSel;
+ batch.selectedInUse = true;
}
- protected int finishInnerRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult joinResult,
+ protected void finishInnerRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult joinResult,
VectorMapJoinHashTableResult hashMapResult) throws HiveException, IOException {
int numSel = 0;
@@ -230,22 +210,19 @@ public abstract class VectorMapJoinInnerGenerateResultOperator
}
// Generate special repeated case.
- numSel = generateHashMapResultRepeatedAll(batch, hashMapResults[0]);
+ generateHashMapResultRepeatedAll(batch, hashMapResults[0]);
break;
case SPILL:
// Whole batch is spilled.
spillBatchRepeated(batch, (VectorMapJoinHashTableResult) hashMapResults[0]);
+ batch.size = 0;
break;
case NOMATCH:
// No match for entire batch.
+ batch.size = 0;
break;
}
- /*
- * Common repeated join result processing.
- */
-
- return numSel;
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java
index b77a93c..9005d00 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java
@@ -149,9 +149,6 @@ public class VectorMapJoinInnerLongOperator extends VectorMapJoinInnerGenerateRe
}
}
- // We rebuild in-place the selected array with rows destine to be forwarded.
- int numSel = 0;
-
/*
* Single-Column Long specific declarations.
*/
@@ -196,7 +193,7 @@ public class VectorMapJoinInnerLongOperator extends VectorMapJoinInnerGenerateRe
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
}
- numSel = finishInnerRepeated(batch, joinResult, hashMapResults[0]);
+ finishInnerRepeated(batch, joinResult, hashMapResults[0]);
} else {
/*
@@ -356,18 +353,10 @@ public class VectorMapJoinInnerLongOperator extends VectorMapJoinInnerGenerateRe
" hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount)));
}
- numSel = finishInner(batch,
- allMatchs, allMatchCount,
- equalKeySeriesHashMapResultIndices, equalKeySeriesAllMatchIndices,
- equalKeySeriesIsSingleValue, equalKeySeriesDuplicateCounts,
- equalKeySeriesCount,
- spills, spillHashMapResultIndices, spillCount,
- hashMapResults, hashMapResultCount);
+ finishInner(batch,
+ allMatchCount, equalKeySeriesCount, spillCount, hashMapResultCount);
}
- batch.selectedInUse = true;
- batch.size = numSel;
-
if (batch.size > 0) {
// Forward any remaining selected rows.
forwardBigTableBatch(batch);
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java
index 938506b..b13ded6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java
@@ -153,9 +153,6 @@ public class VectorMapJoinInnerMultiKeyOperator extends VectorMapJoinInnerGenera
}
}
- // We rebuild in-place the selected array with rows destine to be forwarded.
- int numSel = 0;
-
/*
* Multi-Key specific declarations.
*/
@@ -207,7 +204,7 @@ public class VectorMapJoinInnerMultiKeyOperator extends VectorMapJoinInnerGenera
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
}
- numSel = finishInnerRepeated(batch, joinResult, hashMapResults[0]);
+ finishInnerRepeated(batch, joinResult, hashMapResults[0]);
} else {
/*
@@ -279,7 +276,7 @@ public class VectorMapJoinInnerMultiKeyOperator extends VectorMapJoinInnerGenera
haveSaveKey = true;
/*
- * Multi-Key specific save key and lookup.
+ * Multi-Key specific save key.
*/
temp = saveKeyOutput;
@@ -368,18 +365,10 @@ public class VectorMapJoinInnerMultiKeyOperator extends VectorMapJoinInnerGenera
" hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount)));
}
- numSel = finishInner(batch,
- allMatchs, allMatchCount,
- equalKeySeriesHashMapResultIndices, equalKeySeriesAllMatchIndices,
- equalKeySeriesIsSingleValue, equalKeySeriesDuplicateCounts,
- equalKeySeriesCount,
- spills, spillHashMapResultIndices, spillCount,
- hashMapResults, hashMapResultCount);
+ finishInner(batch,
+ allMatchCount, equalKeySeriesCount, spillCount, hashMapResultCount);
}
- batch.selectedInUse = true;
- batch.size = numSel;
-
if (batch.size > 0) {
// Forward any remaining selected rows.
forwardBigTableBatch(batch);
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java
index f7dd8e2..9f10ff1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java
@@ -140,9 +140,6 @@ public class VectorMapJoinInnerStringOperator extends VectorMapJoinInnerGenerate
}
}
- // We rebuild in-place the selected array with rows destine to be forwarded.
- int numSel = 0;
-
/*
* Single-Column String specific declarations.
*/
@@ -185,7 +182,7 @@ public class VectorMapJoinInnerStringOperator extends VectorMapJoinInnerGenerate
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
}
- numSel = finishInnerRepeated(batch, joinResult, hashMapResults[0]);
+ finishInnerRepeated(batch, joinResult, hashMapResults[0]);
} else {
/*
@@ -345,18 +342,10 @@ public class VectorMapJoinInnerStringOperator extends VectorMapJoinInnerGenerate
" hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount)));
}
- numSel = finishInner(batch,
- allMatchs, allMatchCount,
- equalKeySeriesHashMapResultIndices, equalKeySeriesAllMatchIndices,
- equalKeySeriesIsSingleValue, equalKeySeriesDuplicateCounts,
- equalKeySeriesCount,
- spills, spillHashMapResultIndices, spillCount,
- hashMapResults, hashMapResultCount);
+ finishInner(batch,
+ allMatchCount, equalKeySeriesCount, spillCount, hashMapResultCount);
}
- batch.selectedInUse = true;
- batch.size = numSel;
-
if (batch.size > 0) {
// Forward any remaining selected rows.
forwardBigTableBatch(batch);
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiGenerateResultOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiGenerateResultOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiGenerateResultOperator.java
index 230f9fe..07393b2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiGenerateResultOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiGenerateResultOperator.java
@@ -111,26 +111,23 @@ public abstract class VectorMapJoinLeftSemiGenerateResultOperator
* @param batch
* The big table batch with any matching and any non matching rows both as
* selected in use.
- * @param allMatchs
- * A subset of the rows of the batch that are matches.
* @param allMatchCount
* Number of matches in allMatchs.
- * @param spills
- * A subset of the rows of the batch that are spills.
- * @param spillHashMapResultIndices
- * For each entry in spills, the index into the hashTableResults.
* @param spillCount
* Number of spills in spills.
* @param hashTableResults
* The array of all hash table results for the batch. We need the
* VectorMapJoinHashTableResult for the spill information.
*/
- protected int finishLeftSemi(VectorizedRowBatch batch,
- int[] allMatchs, int allMatchCount,
- int[] spills, int[] spillHashMapResultIndices, int spillCount,
+ protected void finishLeftSemi(VectorizedRowBatch batch,
+ int allMatchCount, int spillCount,
VectorMapJoinHashTableResult[] hashTableResults) throws HiveException, IOException {
- int numSel;
+ // Get rid of spills before we start modifying the batch.
+ if (spillCount > 0) {
+ spillHashMapBatch(batch, hashTableResults,
+ spills, spillHashMapResultIndices, spillCount);
+ }
/*
* Optimize by running value expressions only over the matched rows.
@@ -139,14 +136,9 @@ public abstract class VectorMapJoinLeftSemiGenerateResultOperator
performValueExpressions(batch, allMatchs, allMatchCount);
}
- numSel = generateHashSetResults(batch, allMatchs, allMatchCount);
-
- if (spillCount > 0) {
- spillHashMapBatch(batch, hashTableResults,
- spills, spillHashMapResultIndices, spillCount);
- }
-
- return numSel;
+ int numSel = generateHashSetResults(batch, allMatchs, allMatchCount);
+ batch.size = numSel;
+ batch.selectedInUse = true;
}
/**
@@ -199,11 +191,9 @@ public abstract class VectorMapJoinLeftSemiGenerateResultOperator
return batch.size;
}
- protected int finishLeftSemiRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult joinResult,
+ protected void finishLeftSemiRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult joinResult,
VectorMapJoinHashTableResult hashSetResult) throws HiveException, IOException {
- int numSel = 0;
-
switch (joinResult) {
case MATCH:
@@ -215,19 +205,21 @@ public abstract class VectorMapJoinLeftSemiGenerateResultOperator
}
// Generate special repeated case.
- numSel = generateHashSetResultRepeatedAll(batch);
+ int numSel = generateHashSetResultRepeatedAll(batch);
+ batch.size = numSel;
+ batch.selectedInUse = true;
break;
case SPILL:
// Whole batch is spilled.
spillBatchRepeated(batch, (VectorMapJoinHashTableResult) hashSetResult);
+ batch.size = 0;
break;
case NOMATCH:
// No match for entire batch.
+ batch.size = 0;
break;
}
-
- return numSel;
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java
index 75aeefb..712978a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java
@@ -151,9 +151,6 @@ public class VectorMapJoinLeftSemiLongOperator extends VectorMapJoinLeftSemiGene
}
}
- // We rebuild in-place the selected array with rows destine to be forwarded.
- int numSel = 0;
-
/*
* Single-Column Long specific declarations.
*/
@@ -198,7 +195,7 @@ public class VectorMapJoinLeftSemiLongOperator extends VectorMapJoinLeftSemiGene
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
}
- numSel = finishLeftSemiRepeated(batch, joinResult, hashSetResults[0]);
+ finishLeftSemiRepeated(batch, joinResult, hashSetResults[0]);
} else {
/*
@@ -348,15 +345,11 @@ public class VectorMapJoinLeftSemiLongOperator extends VectorMapJoinLeftSemiGene
" hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashSetResults, 0, hashSetResultCount)));
}
- numSel = finishLeftSemi(batch,
- allMatchs, allMatchCount,
- spills, spillHashMapResultIndices, spillCount,
+ finishLeftSemi(batch,
+ allMatchCount, spillCount,
(VectorMapJoinHashTableResult[]) hashSetResults);
}
- batch.selectedInUse = true;
- batch.size = numSel;
-
if (batch.size > 0) {
// Forward any remaining selected rows.
forwardBigTableBatch(batch);
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java
index ea287f4..b941431 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java
@@ -155,9 +155,6 @@ public class VectorMapJoinLeftSemiMultiKeyOperator extends VectorMapJoinLeftSemi
}
}
- // We rebuild in-place the selected array with rows destine to be forwarded.
- int numSel = 0;
-
/*
* Multi-Key specific declarations.
*/
@@ -210,7 +207,7 @@ public class VectorMapJoinLeftSemiMultiKeyOperator extends VectorMapJoinLeftSemi
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
}
- numSel = finishLeftSemiRepeated(batch, joinResult, hashSetResults[0]);
+ finishLeftSemiRepeated(batch, joinResult, hashSetResults[0]);
} else {
/*
@@ -291,6 +288,10 @@ public class VectorMapJoinLeftSemiMultiKeyOperator extends VectorMapJoinLeftSemi
saveKeyOutput = currentKeyOutput;
currentKeyOutput = temp;
+ /*
+ * Multi-key specific lookup key.
+ */
+
byte[] keyBytes = saveKeyOutput.getData();
int keyLength = saveKeyOutput.getLength();
saveJoinResult = hashSet.contains(keyBytes, 0, keyLength, hashSetResults[hashSetResultCount]);
@@ -360,15 +361,11 @@ public class VectorMapJoinLeftSemiMultiKeyOperator extends VectorMapJoinLeftSemi
" hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashSetResults, 0, hashSetResultCount)));
}
- numSel = finishLeftSemi(batch,
- allMatchs, allMatchCount,
- spills, spillHashMapResultIndices, spillCount,
+ finishLeftSemi(batch,
+ allMatchCount, spillCount,
(VectorMapJoinHashTableResult[]) hashSetResults);
}
- batch.selectedInUse = true;
- batch.size = numSel;
-
if (batch.size > 0) {
// Forward any remaining selected rows.
forwardBigTableBatch(batch);
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
index 116cb81..9ff1141 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
@@ -142,9 +142,6 @@ public class VectorMapJoinLeftSemiStringOperator extends VectorMapJoinLeftSemiGe
}
}
- // We rebuild in-place the selected array with rows destine to be forwarded.
- int numSel = 0;
-
/*
* Single-Column String specific declarations.
*/
@@ -187,7 +184,7 @@ public class VectorMapJoinLeftSemiStringOperator extends VectorMapJoinLeftSemiGe
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
}
- numSel = finishLeftSemiRepeated(batch, joinResult, hashSetResults[0]);
+ finishLeftSemiRepeated(batch, joinResult, hashSetResults[0]);
} else {
/*
@@ -263,6 +260,10 @@ public class VectorMapJoinLeftSemiStringOperator extends VectorMapJoinLeftSemiGe
saveKeyBatchIndex = batchIndex;
+ /*
+ * Single-Column String specific lookup key.
+ */
+
byte[] keyBytes = vector[batchIndex];
int keyStart = start[batchIndex];
int keyLength = length[batchIndex];
@@ -333,15 +334,11 @@ public class VectorMapJoinLeftSemiStringOperator extends VectorMapJoinLeftSemiGe
" hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashSetResults, 0, hashSetResultCount)));
}
- numSel = finishLeftSemi(batch,
- allMatchs, allMatchCount,
- spills, spillHashMapResultIndices, spillCount,
+ finishLeftSemi(batch,
+ allMatchCount, spillCount,
(VectorMapJoinHashTableResult[]) hashSetResults);
}
- batch.selectedInUse = true;
- batch.size = numSel;
-
if (batch.size > 0) {
// Forward any remaining selected rows.
forwardBigTableBatch(batch);
http://git-wip-us.apache.org/repos/asf/hive/blob/2b9f2f5e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java
index 7ef5574..57814fd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java
@@ -70,15 +70,34 @@ public abstract class VectorMapJoinOuterGenerateResultOperator
// generation.
protected transient VectorMapJoinHashMapResult hashMapResults[];
- // Pre-allocated member for storing any matching row indexes during a processOp call.
- protected transient int[] matchs;
+ // Pre-allocated member for remembering the big table's selected array at the beginning of
+ // the process method before applying any filter. For outer join we need to remember which
+ // rows did not match since they will appear the in outer join result with NULLs for the
+ // small table.
+ protected transient int[] inputSelected;
- // Pre-allocated member for storing the mapping to the row batchIndex of the first of a series of
- // equal keys that was looked up during a processOp call.
- protected transient int[] matchHashMapResultIndices;
+ // Pre-allocated member for storing the (physical) batch index of matching row (single- or
+ // multi-small-table-valued) indexes during a process call.
+ protected transient int[] allMatchs;
- // All matching and non-matching big table rows.
- protected transient int[] nonSpills;
+ /*
+ * Pre-allocated members for storing information equal key series for small-table matches.
+ *
+ * ~HashMapResultIndices
+ * Index into the hashMapResults array for the match.
+ * ~AllMatchIndices
+ * (Logical) indices into allMatchs to the first row of a match of a
+ * possible series of duplicate keys.
+ * ~IsSingleValue
+ * Whether there is 1 or multiple small table values.
+ * ~DuplicateCounts
+ * The duplicate count for each matched key.
+ *
+ */
+ protected transient int[] equalKeySeriesHashMapResultIndices;
+ protected transient int[] equalKeySeriesAllMatchIndices;
+ protected transient boolean[] equalKeySeriesIsSingleValue;
+ protected transient int[] equalKeySeriesDuplicateCounts;
// Pre-allocated member for storing the (physical) batch index of rows that need to be spilled.
protected transient int[] spills;
@@ -86,8 +105,11 @@ public abstract class VectorMapJoinOuterGenerateResultOperator
// Pre-allocated member for storing index into the hashSetResults for each spilled row.
protected transient int[] spillHashMapResultIndices;
- // Pre-allocated member for storing any non-matching row indexes during a processOp call.
- protected transient int[] scratch1;
+ // Pre-allocated member for storing any non-spills, non-matches, or merged row indexes during a
+ // process method call.
+ protected transient int[] nonSpills;
+ protected transient int[] noMatchs;
+ protected transient int[] merged;
public VectorMapJoinOuterGenerateResultOperator() {
super();
@@ -111,12 +133,23 @@ public abstract class VectorMapJoinOuterGenerateResultOperator
for (int i = 0; i < hashMapResults.length; i++) {
hashMapResults[i] = baseHashMap.createHashMapResult();
}
- matchs = new int[batch.DEFAULT_SIZE];
- matchHashMapResultIndices = new int[batch.DEFAULT_SIZE];
- nonSpills = new int[batch.DEFAULT_SIZE];
+
+ inputSelected = new int[batch.DEFAULT_SIZE];
+
+ allMatchs = new int[batch.DEFAULT_SIZE];
+
+ equalKeySeriesHashMapResultIndices = new int[batch.DEFAULT_SIZE];
+ equalKeySeriesAllMatchIndices = new int[batch.DEFAULT_SIZE];
+ equalKeySeriesIsSingleValue = new boolean[batch.DEFAULT_SIZE];
+ equalKeySeriesDuplicateCounts = new int[batch.DEFAULT_SIZE];
+
spills = new int[batch.DEFAULT_SIZE];
spillHashMapResultIndices = new int[batch.DEFAULT_SIZE];
- scratch1 = new int[batch.DEFAULT_SIZE];
+
+ nonSpills = new int[batch.DEFAULT_SIZE];
+ noMatchs = new int[batch.DEFAULT_SIZE];
+ merged = new int[batch.DEFAULT_SIZE];
+
}
//-----------------------------------------------------------------------------------------------
@@ -145,260 +178,372 @@ public abstract class VectorMapJoinOuterGenerateResultOperator
}
/**
- * Generate the outer join output results for one vectorized row batch.
- *
- * Any filter expressions will apply now since hash map lookup for outer join is complete.
+ * Apply the value expression to rows in the (original) input selected array.
*
* @param batch
- * The big table batch with any matching and any non matching rows both as
- * selected in use.
- * @param matchs
- * A subset of the rows of the batch that are matches.
- * @param matchHashMapResultIndices
- * For each entry in matches, the index into the hashMapResult.
- * @param matchSize
- * Number of matches in matchs.
- * @param nonSpills
- * The rows of the batch that are both matches and non-matches.
- * @param nonspillCount
- * Number of rows in nonSpills.
- * @param spills
- * A subset of the rows of the batch that are spills.
- * @param spillHashMapResultIndices
- * For each entry in spills, the index into the hashMapResult.
- * @param spillCount
- * Number of spills in spills.
- * @param hashMapResults
- * The array of all hash map results for the batch.
- * @param hashMapResultCount
- * Number of entries in hashMapResults.
- * @param scratch1
- * Pre-allocated storage to internal use.
+ * The vectorized row batch.
+ * @param inputSelectedInUse
+ * Whether the (original) input batch is selectedInUse.
+ * @param inputLogicalSize
+ * The (original) input batch size.
*/
- public int finishOuter(VectorizedRowBatch batch,
- int[] matchs, int[] matchHashMapResultIndices, int matchCount,
- int[] nonSpills, int nonSpillCount,
- int[] spills, int[] spillHashMapResultIndices, int spillCount,
- VectorMapJoinHashMapResult[] hashMapResults, int hashMapResultCount,
- int[] scratch1) throws IOException, HiveException {
-
- int numSel = 0;
-
- // At this point we have determined the matching rows only for the ON equality condition(s).
- // Implicitly, non-matching rows are those in the selected array minus matchs.
+ private void doValueExprOnInputSelected(VectorizedRowBatch batch,
+ boolean inputSelectedInUse, int inputLogicalSize) {
- // Next, for outer join, apply any ON predicates to filter down the matches.
- if (matchCount > 0 && bigTableFilterExpressions.length > 0) {
+ int saveBatchSize = batch.size;
+ int[] saveSelected = batch.selected;
+ boolean saveSelectedInUse = batch.selectedInUse;
- System.arraycopy(matchs, 0, batch.selected, 0, matchCount);
- batch.size = matchCount;
+ batch.size = inputLogicalSize;
+ batch.selected = inputSelected;
+ batch.selectedInUse = inputSelectedInUse;
- // Non matches will be removed from the selected array.
- for (VectorExpression ve : bigTableFilterExpressions) {
+ if (bigTableValueExpressions != null) {
+ for(VectorExpression ve: bigTableValueExpressions) {
ve.evaluate(batch);
}
+ }
- // LOG.info("finishOuter" +
- // " filtered batch.selected " + Arrays.toString(Arrays.copyOfRange(batch.selected, 0, batch.size)));
-
- // Fixup the matchHashMapResultIndices array.
- if (batch.size < matchCount) {
- int numMatch = 0;
- int[] selected = batch.selected;
- for (int i = 0; i < batch.size; i++) {
- if (selected[i] == matchs[numMatch]) {
- matchHashMapResultIndices[numMatch] = matchHashMapResultIndices[i];
- numMatch++;
- if (numMatch == matchCount) {
- break;
- }
- }
- }
- System.arraycopy(batch.selected, 0, matchs, 0, matchCount);
+ batch.size = saveBatchSize;
+ batch.selected = saveSelected;
+ batch.selectedInUse = saveSelectedInUse;
+ }
+
+ /**
+ * Apply the value expression to rows specified by a selected array.
+ *
+ * @param batch
+ * The vectorized row batch.
+ * @param selected
+ * The (physical) batch indices to apply the expression to.
+ * @param size
+ * The size of selected.
+ */
+ private void doValueExpr(VectorizedRowBatch batch,
+ int[] selected, int size) {
+
+ int saveBatchSize = batch.size;
+ int[] saveSelected = batch.selected;
+ boolean saveSelectedInUse = batch.selectedInUse;
+
+ batch.size = size;
+ batch.selected = selected;
+ batch.selectedInUse = true;
+
+ if (bigTableValueExpressions != null) {
+ for(VectorExpression ve: bigTableValueExpressions) {
+ ve.evaluate(batch);
}
}
- // LOG.info("finishOuter" +
- // " matchs[" + matchCount + "] " + intArrayToRangesString(matchs, matchCount) +
- // " matchHashMapResultIndices " + Arrays.toString(Arrays.copyOfRange(matchHashMapResultIndices, 0, matchCount)));
- // Big table value expressions apply to ALL matching and non-matching rows.
- if (bigTableValueExpressions != null) {
+ batch.size = saveBatchSize;
+ batch.selected = saveSelected;
+ batch.selectedInUse = saveSelectedInUse;
+ }
- System.arraycopy(nonSpills, 0, batch.selected, 0, nonSpillCount);
- batch.size = nonSpillCount;
+ /**
+ * Remove (subtract) members from the input selected array and produce the results into
+ * a difference array.
+ *
+ * @param inputSelectedInUse
+ * Whether the (original) input batch is selectedInUse.
+ * @param inputLogicalSize
+ * The (original) input batch size.
+ * @param remove
+ * The indices to remove. They must all be present in input selected array.
+ * @param removeSize
+ * The size of remove.
+ * @param difference
+ * The resulting difference -- the input selected array indices not in the
+ * remove array.
+ * @return
+ * The resulting size of the difference array.
+ * @throws HiveException
+ */
+ private int subtractFromInputSelected(boolean inputSelectedInUse, int inputLogicalSize,
+ int[] remove, int removeSize, int[] difference) throws HiveException {
- for (VectorExpression ve: bigTableValueExpressions) {
- ve.evaluate(batch);
+ // if (!verifyMonotonicallyIncreasing(remove, removeSize)) {
+ // throw new HiveException("remove is not in sort order and unique");
+ // }
+
+ int differenceCount = 0;
+
+ // Determine which rows are left.
+ int removeIndex = 0;
+ if (inputSelectedInUse) {
+ for (int i = 0; i < inputLogicalSize; i++) {
+ int candidateIndex = inputSelected[i];
+ if (removeIndex < removeSize && candidateIndex == remove[removeIndex]) {
+ removeIndex++;
+ } else {
+ difference[differenceCount++] = candidateIndex;
+ }
+ }
+ } else {
+ for (int candidateIndex = 0; candidateIndex < inputLogicalSize; candidateIndex++) {
+ if (removeIndex < removeSize && candidateIndex == remove[removeIndex]) {
+ removeIndex++;
+ } else {
+ difference[differenceCount++] = candidateIndex;
+ }
+ }
+ }
+
+ if (removeIndex != removeSize) {
+ throw new HiveException("Not all batch indices removed");
+ }
+
+ // if (!verifyMonotonicallyIncreasing(difference, differenceCount)) {
+ // throw new HiveException("difference is not in sort order and unique");
+ // }
+
+ return differenceCount;
+ }
+
+ /**
+ * Remove (subtract) members from an array and produce the results into
+ * a difference array.
+
+ * @param all
+ * The selected array containing all members.
+ * @param allSize
+ * The size of all.
+ * @param remove
+ * The indices to remove. They must all be present in input selected array.
+ * @param removeSize
+ * The size of remove.
+ * @param difference
+ * The resulting difference -- the all array indices not in the
+ * remove array.
+ * @return
+ * The resulting size of the difference array.
+ * @throws HiveException
+ */
+ private int subtract(int[] all, int allSize,
+ int[] remove, int removeSize, int[] difference) throws HiveException {
+
+ // if (!verifyMonotonicallyIncreasing(remove, removeSize)) {
+ // throw new HiveException("remove is not in sort order and unique");
+ // }
+
+ int differenceCount = 0;
+
+ // Determine which rows are left.
+ int removeIndex = 0;
+ for (int i = 0; i < allSize; i++) {
+ int candidateIndex = all[i];
+ if (removeIndex < removeSize && candidateIndex == remove[removeIndex]) {
+ removeIndex++;
+ } else {
+ difference[differenceCount++] = candidateIndex;
}
}
- // Determine which rows are non matches by determining the delta between selected and
- // matchs.
- int[] noMatchs = scratch1;
- int noMatchCount = 0;
- if (matchCount < nonSpillCount) {
- // Determine which rows are non matches.
- int matchIndex = 0;
- for (int i = 0; i < nonSpillCount; i++) {
- int candidateIndex = nonSpills[i];
- if (matchIndex < matchCount && candidateIndex == matchs[matchIndex]) {
- matchIndex++;
+ if (removeIndex != removeSize) {
+ throw new HiveException("Not all batch indices removed");
+ }
+
+ return differenceCount;
+ }
+
+ /**
+ * Sort merge two select arrays so the resulting array is ordered by (batch) index.
+ *
+ * @param selected1
+ * @param selected1Count
+ * @param selected2
+ * @param selected2Count
+ * @param sortMerged
+ * The resulting sort merge of selected1 and selected2.
+ * @return
+ * The resulting size of the sortMerged array.
+ * @throws HiveException
+ */
+ private int sortMerge(int[] selected1, int selected1Count,
+ int[] selected2, int selected2Count, int[] sortMerged) throws HiveException {
+
+ // if (!verifyMonotonicallyIncreasing(selected1, selected1Count)) {
+ // throw new HiveException("selected1 is not in sort order and unique");
+ // }
+
+ // if (!verifyMonotonicallyIncreasing(selected2, selected2Count)) {
+ // throw new HiveException("selected1 is not in sort order and unique");
+ // }
+
+
+ int sortMergeCount = 0;
+
+ int selected1Index = 0;
+ int selected2Index = 0;
+ for (int i = 0; i < selected1Count + selected2Count; i++) {
+ if (selected1Index < selected1Count && selected2Index < selected2Count) {
+ if (selected1[selected1Index] < selected2[selected2Index]) {
+ sortMerged[sortMergeCount++] = selected1[selected1Index++];
} else {
- noMatchs[noMatchCount++] = candidateIndex;
+ sortMerged[sortMergeCount++] = selected2[selected2Index++];
}
+ } else if (selected1Index < selected1Count) {
+ sortMerged[sortMergeCount++] = selected1[selected1Index++];
+ } else {
+ sortMerged[sortMergeCount++] = selected2[selected2Index++];
}
}
- // LOG.info("finishOuter" +
- // " noMatchs[" + noMatchCount + "] " + intArrayToRangesString(noMatchs, noMatchCount));
+ // if (!verifyMonotonicallyIncreasing(sortMerged, sortMergeCount)) {
+ // throw new HiveException("sortMerged is not in sort order and unique");
+ // }
- // When we generate results into the overflow batch, we may still end up with fewer rows
- // in the big table batch. So, nulSel and the batch's selected array will be rebuilt with
- // just the big table rows that need to be forwarded, minus any rows processed with the
- // overflow batch.
- if (matchCount > 0) {
- numSel = generateOuterHashMapMatchResults(batch,
- matchs, matchHashMapResultIndices, matchCount,
- hashMapResults, numSel);
- }
+ return sortMergeCount;
+ }
- if (noMatchCount > 0) {
- numSel = generateOuterHashMapNoMatchResults(batch, noMatchs, noMatchCount, numSel);
- }
+ /**
+ * Generate the outer join output results for one vectorized row batch.
+ *
+ * @param batch
+ * The big table batch with any matching and any non matching rows both as
+ * selected in use.
+ * @param allMatchCount
+ * Number of matches in allMatchs.
+ * @param equalKeySeriesCount
+ * Number of single value matches.
+ * @param atLeastOneNonMatch
+ * Whether at least one row was a non-match.
+ * @param inputSelectedInUse
+ * A copy of the batch's selectedInUse flag on input to the process method.
+ * @param inputLogicalSize
+ * The batch's size on input to the process method.
+ * @param spillCount
+ * Number of spills in spills.
+ * @param hashMapResultCount
+ * Number of entries in hashMapResults.
+ */
+ public void finishOuter(VectorizedRowBatch batch,
+ int allMatchCount, int equalKeySeriesCount, boolean atLeastOneNonMatch,
+ boolean inputSelectedInUse, int inputLogicalSize,
+ int spillCount, int hashMapResultCount) throws IOException, HiveException {
+ // Get rid of spills before we start modifying the batch.
if (spillCount > 0) {
spillHashMapBatch(batch, (VectorMapJoinHashTableResult[]) hashMapResults,
spills, spillHashMapResultIndices, spillCount);
}
- return numSel;
- }
-
- /**
- * Generate the matching outer join output results for one row of a vectorized row batch into
- * the overflow batch.
- *
- * @param batch
- * The big table batch.
- * @param batchIndex
- * Index of the big table row.
- * @param hashMapResult
- * The hash map result with the small table values.
- */
- private void copyOuterHashMapResultToOverflow(VectorizedRowBatch batch, int batchIndex,
- VectorMapJoinHashMapResult hashMapResult) throws HiveException, IOException {
-
- // if (hashMapResult.isCappedCountAvailable()) {
- // LOG.info("copyOuterHashMapResultToOverflow cappedCount " + hashMapResult.cappedCount());
- // }
- ByteSegmentRef byteSegmentRef = hashMapResult.first();
- while (byteSegmentRef != null) {
-
- // Copy the BigTable values into the overflow batch. Since the overflow batch may
- // not get flushed here, we must copy by value.
- if (bigTableRetainedVectorCopy != null) {
- bigTableRetainedVectorCopy.copyByValue(batch, batchIndex,
- overflowBatch, overflowBatch.size);
- }
-
- // Reference the keys we just copied above.
- if (bigTableVectorCopyOuterKeys != null) {
- bigTableVectorCopyOuterKeys.copyByReference(overflowBatch, overflowBatch.size,
- overflowBatch, overflowBatch.size);
- }
-
- if (smallTableVectorDeserializeRow != null) {
-
- byte[] bytes = byteSegmentRef.getBytes();
- int offset = (int) byteSegmentRef.getOffset();
- int length = byteSegmentRef.getLength();
- smallTableVectorDeserializeRow.setBytes(bytes, offset, length);
-
- smallTableVectorDeserializeRow.deserializeByValue(overflowBatch, overflowBatch.size);
- }
+ int noMatchCount = 0;
+ if (spillCount > 0) {
- ++overflowBatch.size;
- if (overflowBatch.size == VectorizedRowBatch.DEFAULT_SIZE) {
- forwardOverflow();
- }
+ // Subtract the spills to get all match and non-match rows.
+ int nonSpillCount = subtractFromInputSelected(
+ inputSelectedInUse, inputLogicalSize, spills, spillCount, nonSpills);
- byteSegmentRef = hashMapResult.next();
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("finishOuter spillCount > 0" +
+ " nonSpills " + intArrayToRangesString(nonSpills, nonSpillCount));
+ }
+
+ // Big table value expressions apply to ALL matching and non-matching rows.
+ if (bigTableValueExpressions != null) {
+
+ doValueExpr(batch, nonSpills, nonSpillCount);
+
}
- // LOG.info("copyOuterHashMapResultToOverflow overflowBatch.size " + overflowBatch.size);
+
+ if (atLeastOneNonMatch) {
+ noMatchCount = subtract(nonSpills, nonSpillCount, allMatchs, allMatchCount,
+ noMatchs);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("finishOuter spillCount > 0" +
+ " noMatchs " + intArrayToRangesString(noMatchs, noMatchCount));
+ }
- }
+ }
+ } else {
- /**
- * Generate the matching outer join output results for one vectorized row batch.
- *
- * For each matching row specified by parameter, get the one or more small table values and
- * form join results.
- *
- * (Note: Since all matching and non-matching rows are selected and output for outer joins,
- * we cannot use selected as the matching rows).
- *
- * @param batch
- * The big table batch with any matching and any non matching rows both as
- * selected in use.
- * @param matchs
- * A subset of the rows of the batch that are matches.
- * @param matchHashMapResultIndices
- * For each entry in matches, the index into the hashMapResult.
- * @param matchSize
- * Number of matches in matchs.
- * @param hashMapResults
- * The array of all hash map results for the batch.
- * @param numSel
- * The current count of rows in the rebuilding of the selected array.
- *
- * @return
- * The new count of selected rows.
- */
- protected int generateOuterHashMapMatchResults(VectorizedRowBatch batch,
- int[] matchs, int[] matchHashMapResultIndices, int matchSize,
- VectorMapJoinHashMapResult[] hashMapResults, int numSel)
- throws IOException, HiveException {
+ // Run value expressions over original (whole) input batch.
+ doValueExprOnInputSelected(batch, inputSelectedInUse, inputLogicalSize);
- int[] selected = batch.selected;
+ if (atLeastOneNonMatch) {
+ noMatchCount = subtractFromInputSelected(
+ inputSelectedInUse, inputLogicalSize, allMatchs, allMatchCount, noMatchs);
- // Generate result within big table batch when single small table value. Otherwise, copy
- // to overflow batch.
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("finishOuter spillCount == 0" +
+ " noMatchs " + intArrayToRangesString(noMatchs, noMatchCount));
+ }
+ }
+ }
- for (int i = 0; i < matchSize; i++) {
- int batchIndex = matchs[i];
+ // When we generate results into the overflow batch, we may still end up with fewer rows
+ // in the big table batch. So, nulSel and the batch's selected array will be rebuilt with
+ // just the big table rows that need to be forwarded, minus any rows processed with the
+ // overflow batch.
+ if (allMatchCount > 0) {
+
+ int numSel = 0;
+ for (int i = 0; i < equalKeySeriesCount; i++) {
+ int hashMapResultIndex = equalKeySeriesHashMapResultIndices[i];
+ VectorMapJoinHashMapResult hashMapResult = hashMapResults[hashMapResultIndex];
+ int allMatchesIndex = equalKeySeriesAllMatchIndices[i];
+ boolean isSingleValue = equalKeySeriesIsSingleValue[i];
+ int duplicateCount = equalKeySeriesDuplicateCounts[i];
+
+ if (isSingleValue) {
+ numSel = generateHashMapResultSingleValue(
+ batch, hashMapResult, allMatchs, allMatchesIndex, duplicateCount, numSel);
+ } else {
+ generateHashMapResultMultiValue(
+ batch, hashMapResult, allMatchs, allMatchesIndex, duplicateCount);
+ }
+ }
- int hashMapResultIndex = matchHashMapResultIndices[i];
- VectorMapJoinHashMapResult hashMapResult = hashMapResults[hashMapResultIndex];
+ // The number of single value rows that were generated in the big table batch.
+ batch.size = numSel;
+ batch.selectedInUse = true;
- if (!hashMapResult.isSingleRow()) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("finishOuter allMatchCount > 0" +
+ " batch.selected " + intArrayToRangesString(batch.selected, batch.size));
+ }
- // Multiple small table rows require use of the overflow batch.
- copyOuterHashMapResultToOverflow(batch, batchIndex, hashMapResult);
- } else {
+ } else {
+ batch.size = 0;
+ }
- // Generate join result in big table batch.
- ByteSegmentRef byteSegmentRef = hashMapResult.first();
+ if (noMatchCount > 0) {
+ if (batch.size > 0) {
+
+ generateOuterNulls(batch, noMatchs, noMatchCount);
+
+ // Merge noMatchs and (match) selected.
+ int mergeCount = sortMerge(
+ noMatchs, noMatchCount, batch.selected, batch.size, merged);
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("finishOuter noMatchCount > 0 && batch.size > 0" +
+ " merged " + intArrayToRangesString(merged, mergeCount));
+ }
- if (bigTableVectorCopyOuterKeys != null) {
- bigTableVectorCopyOuterKeys.copyByReference(batch, batchIndex, batch, batchIndex);
- }
+ System.arraycopy(merged, 0, batch.selected, 0, mergeCount);
+ batch.size = mergeCount;
+ batch.selectedInUse = true;
+ } else {
- if (smallTableVectorDeserializeRow != null) {
+ // We can use the whole batch for output of no matches.
- byte[] bytes = byteSegmentRef.getBytes();
- int offset = (int) byteSegmentRef.getOffset();
- int length = byteSegmentRef.getLength();
- smallTableVectorDeserializeRow.setBytes(bytes, offset, length);
+ generateOuterNullsRepeatedAll(batch);
- smallTableVectorDeserializeRow.deserializeByValue(batch, batchIndex);
- }
+ System.arraycopy(noMatchs, 0, batch.selected, 0, noMatchCount);
+ batch.size = noMatchCount;
+ batch.selectedInUse = true;
- // Remember this big table row was used for an output result.
- selected[numSel++] = batchIndex;
- }
- }
- return numSel;
- }
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("finishOuter noMatchCount > 0 && batch.size == 0" +
+ " batch.selected " + intArrayToRangesString(batch.selected, batch.size));
+ }
+ }
+ }
+ }
/**
* Generate the non matching outer join output results for one vectorized row batch.
@@ -412,72 +557,30 @@ public abstract class VectorMapJoinOuterGenerateResultOperator
* A subset of the rows of the batch that are non matches.
* @param noMatchSize
* Number of non matches in noMatchs.
- * @param numSel
- * The current count of rows in the rebuilding of the selected array.
- *
- * @return
- * The new count of selected rows.
*/
- protected int generateOuterHashMapNoMatchResults(VectorizedRowBatch batch, int[] noMatchs,
- int noMatchSize, int numSel) throws IOException, HiveException {
- int[] selected = batch.selected;
-
- // Generate result within big table batch with null small table results, using isRepeated
- // if possible.
+ protected void generateOuterNulls(VectorizedRowBatch batch, int[] noMatchs,
+ int noMatchSize) throws IOException, HiveException {
- if (numSel == 0) {
+ // Set null information in the small table results area.
- // There were 0 matching rows -- so we can use the isRepeated optimization for the non
- // matching rows.
+ for (int i = 0; i < noMatchSize; i++) {
+ int batchIndex = noMatchs[i];
// Mark any scratch small table scratch columns that would normally receive a copy of the
- // key as null and repeating.
+ // key as null, too.
for (int column : bigTableOuterKeyOutputVectorColumns) {
ColumnVector colVector = batch.cols[column];
- colVector.isRepeating = true;
colVector.noNulls = false;
- colVector.isNull[0] = true;
+ colVector.isNull[batchIndex] = true;
}
- // Small table values are set to null and repeating.
+ // Small table values are set to null.
for (int column : smallTableOutputVectorColumns) {
ColumnVector colVector = batch.cols[column];
- colVector.isRepeating = true;
colVector.noNulls = false;
- colVector.isNull[0] = true;
- }
-
- // Rebuild the selected array.
- for (int i = 0; i < noMatchSize; i++) {
- int batchIndex = noMatchs[i];
- selected[numSel++] = batchIndex;
- }
- } else {
-
- // Set null information in the small table results area.
-
- for (int i = 0; i < noMatchSize; i++) {
- int batchIndex = noMatchs[i];
-
- // Mark any scratch small table scratch columns that would normally receive a copy of the
- // key as null, too.
- for (int column : bigTableOuterKeyOutputVectorColumns) {
- ColumnVector colVector = batch.cols[column];
- colVector.noNulls = false;
- colVector.isNull[batchIndex] = true;
- }
-
- // Small table values are set to null.
- for (int column : smallTableOutputVectorColumns) {
- ColumnVector colVector = batch.cols[column];
- colVector.noNulls = false;
- colVector.isNull[batchIndex] = true;
- }
-
- selected[numSel++] = batchIndex;
+ colVector.isNull[batchIndex] = true;
}
}
- return numSel;
}
/**
@@ -492,65 +595,114 @@ public abstract class VectorMapJoinOuterGenerateResultOperator
* The hash map lookup result for the repeated key.
* @param hashMapResults
* The array of all hash map results for the batch.
+ * @param someRowsFilteredOut
+ * Whether some rows of the repeated key batch were knocked out by the filter.
+ * @param inputSelectedInUse
+ * A copy of the batch's selectedInUse flag on input to the process method.
+ * @param inputLogicalSize
+ * The batch's size on input to the process method.
* @param scratch1
* Pre-allocated storage to internal use.
+ * @param scratch2
+ * Pre-allocated storage to internal use.
*/
- public int finishOuterRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult joinResult,
- VectorMapJoinHashMapResult hashMapResult, int[] scratch1)
+ public void finishOuterRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult joinResult,
+ VectorMapJoinHashMapResult hashMapResult, boolean someRowsFilteredOut,
+ boolean inputSelectedInUse, int inputLogicalSize)
throws IOException, HiveException {
- int numSel = 0;
+ // LOG.debug("finishOuterRepeated batch #" + batchCounter + " " + joinResult.name() + " batch.size " + batch.size + " someRowsFilteredOut " + someRowsFilteredOut);
- if (joinResult == JoinUtil.JoinResult.MATCH && bigTableFilterExpressions.length > 0) {
+ switch (joinResult) {
+ case MATCH:
- // Since it is repeated, the evaluation of the filter will knock the whole batch out.
- // But since we are doing outer join, we want to keep non-matches.
+ // Rows we looked up as one repeated key are a match. But filtered out rows
+ // need to be generated as non-matches, too.
- // First, remember selected;
- int[] rememberSelected = scratch1;
- int rememberBatchSize = batch.size;
- if (batch.selectedInUse) {
- System.arraycopy(batch.selected, 0, rememberSelected, 0, batch.size);
- }
+ if (someRowsFilteredOut) {
- // Filter.
- for (VectorExpression ve : bigTableFilterExpressions) {
- ve.evaluate(batch);
- }
+ // For the filtered out rows that didn't (logically) get looked up in the hash table,
+ // we need to generate no match results for those too...
- // Convert a filter out to a non match.
- if (batch.size == 0) {
- joinResult = JoinUtil.JoinResult.NOMATCH;
- if (batch.selectedInUse) {
- System.arraycopy(rememberSelected, 0, batch.selected, 0, rememberBatchSize);
- // LOG.info("finishOuterRepeated batch #" + batchCounter + " filter out converted to no matchs " +
- // Arrays.toString(Arrays.copyOfRange(batch.selected, 0, rememberBatchSize)));
- } else {
- // LOG.info("finishOuterRepeated batch #" + batchCounter + " filter out converted to no matchs batch size " +
- // rememberBatchSize);
- }
- batch.size = rememberBatchSize;
- }
- }
+ // Run value expressions over original (whole) input batch.
+ doValueExprOnInputSelected(batch, inputSelectedInUse, inputLogicalSize);
- // LOG.info("finishOuterRepeated batch #" + batchCounter + " " + joinResult.name() + " batch.size " + batch.size);
- switch (joinResult) {
- case MATCH:
- // Run our value expressions over whole batch.
- if (bigTableValueExpressions != null) {
- for(VectorExpression ve: bigTableValueExpressions) {
- ve.evaluate(batch);
+ // Now calculate which rows were filtered out (they are logically no matches).
+
+ // Determine which rows are non matches by determining the delta between inputSelected and
+ // (current) batch selected.
+
+ int noMatchCount = subtractFromInputSelected(
+ inputSelectedInUse, inputLogicalSize, batch.selected, batch.size, noMatchs);
+
+ generateOuterNulls(batch, noMatchs, noMatchCount);
+
+ // Now generate the matchs. Single small table values will be put into the big table
+ // batch and come back in matchs. Any multiple small table value results will go into
+ // the overflow batch.
+ generateHashMapResultRepeatedAll(batch, hashMapResult);
+
+ // Merge noMatchs and (match) selected.
+ int mergeCount = sortMerge(
+ noMatchs, noMatchCount, batch.selected, batch.size, merged);
+
+ System.arraycopy(merged, 0, batch.selected, 0, mergeCount);
+ batch.size = mergeCount;
+ batch.selectedInUse = true;
+ } else {
+
+ // Just run our value expressions over input batch.
+
+ if (bigTableValueExpressions != null) {
+ for(VectorExpression ve: bigTableValueExpressions) {
+ ve.evaluate(batch);
+ }
}
- }
- // Use a common method applicable for inner and outer.
- numSel = generateHashMapResultRepeatedAll(batch, hashMapResult);
+ generateHashMapResultRepeatedAll(batch, hashMapResult);
+ }
break;
+
case SPILL:
- // Whole batch is spilled.
+
+ // Rows we looked up as one repeated key need to spill. But filtered out rows
+ // need to be generated as non-matches, too.
+
spillBatchRepeated(batch, (VectorMapJoinHashTableResult) hashMapResult);
+
+ // After using selected to generate spills, generate non-matches, if any.
+ if (someRowsFilteredOut) {
+
+ // Determine which rows are non matches by determining the delta between inputSelected and
+ // (current) batch selected.
+
+ int noMatchCount = subtractFromInputSelected(
+ inputSelectedInUse, inputLogicalSize, batch.selected, batch.size, noMatchs);
+
+ System.arraycopy(noMatchs, 0, batch.selected, 0, noMatchCount);
+ batch.size = noMatchCount;
+ batch.selectedInUse = true;
+
+ generateOuterNullsRepeatedAll(batch);
+ } else {
+ batch.size = 0;
+ }
+
break;
+
case NOMATCH:
+
+ if (someRowsFilteredOut) {
+
+ // When the repeated no match is due to filtering, we need to restore the
+ // selected information.
+
+ if (inputSelectedInUse) {
+ System.arraycopy(inputSelected, 0, batch.selected, 0, inputLogicalSize);
+ }
+ batch.size = inputLogicalSize;
+ }
+
// Run our value expressions over whole batch.
if (bigTableValueExpressions != null) {
for(VectorExpression ve: bigTableValueExpressions) {
@@ -558,11 +710,9 @@ public abstract class VectorMapJoinOuterGenerateResultOperator
}
}
- numSel = generateOuterNullsRepeatedAll(batch);
+ generateOuterNullsRepeatedAll(batch);
break;
}
-
- return numSel;
}
/**
@@ -573,24 +723,8 @@ public abstract class VectorMapJoinOuterGenerateResultOperator
*
* @param batch
* The big table batch.
- * @return
- * The new count of selected rows.
*/
- protected int generateOuterNullsRepeatedAll(VectorizedRowBatch batch) throws HiveException {
-
- int[] selected = batch.selected;
- boolean selectedInUse = batch.selectedInUse;
-
- // Generate result within big table batch using is repeated for null small table results.
-
- if (batch.selectedInUse) {
- // The selected array is already filled in as we want it.
- } else {
- for (int i = 0; i < batch.size; i++) {
- selected[i] = i;
- }
- batch.selectedInUse = true;
- }
+ protected void generateOuterNullsRepeatedAll(VectorizedRowBatch batch) throws HiveException {
for (int column : smallTableOutputVectorColumns) {
ColumnVector colVector = batch.cols[column];
@@ -607,12 +741,5 @@ public abstract class VectorMapJoinOuterGenerateResultOperator
colVector.isNull[0] = true;
colVector.isRepeating = true;
}
-
- // for (int i = 0; i < batch.size; i++) {
- // int bigTableIndex = selected[i];
- // VectorizedBatchUtil.debugDisplayOneRow(batch, bigTableIndex, taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator generate generateOuterNullsRepeatedAll batch");
- // }
-
- return batch.size;
}
}
\ No newline at end of file