You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2016/12/02 19:27:48 UTC
hive git commit: HIVE-15327: Outerjoin might produce wrong result
depending on joinEmitInterval value (Jesus Camacho Rodriguez,
reviewed by Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master 4e0754681 -> 7065407e6
HIVE-15327: Outerjoin might produce wrong result depending on joinEmitInterval value (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7065407e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7065407e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7065407e
Branch: refs/heads/master
Commit: 7065407e63b5dfca6e775ce2120e7c66333578c6
Parents: 4e07546
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Thu Dec 1 11:48:56 2016 +0000
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Fri Dec 2 12:46:37 2016 +0000
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 2 +
.../hadoop/hive/ql/exec/JoinOperator.java | 3 +-
.../queries/clientpositive/join_emit_interval.q | 31 +++
.../clientpositive/mapjoin_emit_interval.q | 32 +++
.../clientpositive/join_emit_interval.q.out | 231 +++++++++++++++++
.../llap/join_emit_interval.q.out | 259 +++++++++++++++++++
.../llap/mapjoin_emit_interval.q.out | 247 ++++++++++++++++++
7 files changed, 804 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/7065407e/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index e4910e4..27ece51 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -180,6 +180,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\
insert_values_tmp_table.q,\
join0.q,\
join1.q,\
+ join_emit_interval.q,\
join_nullsafe.q,\
leftsemijoin.q,\
limit_pushdown.q,\
@@ -509,6 +510,7 @@ minillaplocal.query.files=acid_globallimit.q,\
load_dyn_part5.q,\
lvj_mapjoin.q,\
mapjoin_decimal.q,\
+ mapjoin_emit_interval.q,\
mergejoin_3way.q,\
mrr.q,\
multiMapJoin1.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/7065407e/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java
index 82056c4..0282763 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java
@@ -101,7 +101,8 @@ public class JoinOperator extends CommonJoinOperator<JoinDesc> implements Serial
// Are we consuming too much memory
if (alias == numAliases - 1 && !(handleSkewJoin && skewJoinKeyContext.currBigKeyTag >= 0) &&
!hasLeftSemiJoin) {
- if (sz == joinEmitInterval && !hasFilter(alias)) {
+ if (sz == joinEmitInterval && !hasFilter(condn[alias-1].getLeft()) &&
+ !hasFilter(condn[alias-1].getRight())) {
// The input is sorted by alias, so if we are already in the last join
// operand,
// we can emit some results now.
http://git-wip-us.apache.org/repos/asf/hive/blob/7065407e/ql/src/test/queries/clientpositive/join_emit_interval.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/join_emit_interval.q b/ql/src/test/queries/clientpositive/join_emit_interval.q
new file mode 100644
index 0000000..c59d97d
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/join_emit_interval.q
@@ -0,0 +1,31 @@
+set hive.strict.checks.cartesian.product=false;
+set hive.join.emit.interval=1;
+
+CREATE TABLE test1 (key INT, value INT, col_1 STRING);
+INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'),
+ (99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car');
+
+CREATE TABLE test2 (key INT, value INT, col_2 STRING);
+INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'),
+ (104, 3, 'Fli'), (105, NULL, 'None');
+
+
+-- Equi-condition and condition on one input (left outer join)
+EXPLAIN
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value AND test1.key between 100 and 102);
+
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value AND test1.key between 100 and 102);
+
+-- Condition on one input (left outer join)
+EXPLAIN
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.key between 100 and 102);
+
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.key between 100 and 102);
http://git-wip-us.apache.org/repos/asf/hive/blob/7065407e/ql/src/test/queries/clientpositive/mapjoin_emit_interval.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/mapjoin_emit_interval.q b/ql/src/test/queries/clientpositive/mapjoin_emit_interval.q
new file mode 100644
index 0000000..fe2a32a
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/mapjoin_emit_interval.q
@@ -0,0 +1,32 @@
+set hive.auto.convert.join=true;
+set hive.strict.checks.cartesian.product=false;
+set hive.join.emit.interval=1;
+
+CREATE TABLE test1 (key INT, value INT, col_1 STRING);
+INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'),
+ (99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car');
+
+CREATE TABLE test2 (key INT, value INT, col_2 STRING);
+INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'),
+ (104, 3, 'Fli'), (105, NULL, 'None');
+
+
+-- Equi-condition and condition on one input (left outer join)
+EXPLAIN
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value AND test1.key between 100 and 102);
+
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value AND test1.key between 100 and 102);
+
+-- Condition on one input (left outer join)
+EXPLAIN
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.key between 100 and 102);
+
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.key between 100 and 102);
http://git-wip-us.apache.org/repos/asf/hive/blob/7065407e/ql/src/test/results/clientpositive/join_emit_interval.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/join_emit_interval.q.out b/ql/src/test/results/clientpositive/join_emit_interval.q.out
new file mode 100644
index 0000000..263ba04
--- /dev/null
+++ b/ql/src/test/results/clientpositive/join_emit_interval.q.out
@@ -0,0 +1,231 @@
+PREHOOK: query: CREATE TABLE test1 (key INT, value INT, col_1 STRING)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test1
+POSTHOOK: query: CREATE TABLE test1 (key INT, value INT, col_1 STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test1
+PREHOOK: query: INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'),
+ (99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@test1
+POSTHOOK: query: INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'),
+ (99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@test1
+POSTHOOK: Lineage: test1.col_1 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: test1.key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: test1.value EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: CREATE TABLE test2 (key INT, value INT, col_2 STRING)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test2
+POSTHOOK: query: CREATE TABLE test2 (key INT, value INT, col_2 STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test2
+PREHOOK: query: INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'),
+ (104, 3, 'Fli'), (105, NULL, 'None')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@test2
+POSTHOOK: query: INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'),
+ (104, 3, 'Fli'), (105, NULL, 'None')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@test2
+POSTHOOK: Lineage: test2.col_2 SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: test2.key EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: test2.value EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: -- Equi-condition and condition on one input (left outer join)
+EXPLAIN
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value AND test1.key between 100 and 102)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Equi-condition and condition on one input (left outer join)
+EXPLAIN
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value AND test1.key between 100 and 102)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: test1
+ Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: int), col_1 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: string)
+ TableScan
+ alias: test2
+ Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: int), col_2 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ filter predicates:
+ 0 {VALUE._col0 BETWEEN 100 AND 102}
+ 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col1 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value AND test1.key between 100 and 102)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test1
+PREHOOK: Input: default@test2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value AND test1.key between 100 and 102)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test1
+POSTHOOK: Input: default@test2
+#### A masked pattern was here ####
+98 NULL None NULL NULL NULL
+NULL NULL None NULL NULL NULL
+99 0 Alice NULL NULL NULL
+100 1 Bob NULL NULL NULL
+101 2 Car 103 2 Ema
+101 2 Car 102 2 Del
+99 2 Mat NULL NULL NULL
+Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: -- Condition on one input (left outer join)
+EXPLAIN
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.key between 100 and 102)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Condition on one input (left outer join)
+EXPLAIN
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.key between 100 and 102)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: test1
+ Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: int), col_1 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string)
+ TableScan
+ alias: test2
+ Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: int), col_2 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ filter predicates:
+ 0 {VALUE._col0 BETWEEN 100 AND 102}
+ 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.key between 100 and 102)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test1
+PREHOOK: Input: default@test2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.key between 100 and 102)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test1
+POSTHOOK: Input: default@test2
+#### A masked pattern was here ####
+101 2 Car 105 NULL None
+101 2 Car 104 3 Fli
+101 2 Car 103 2 Ema
+101 2 Car 102 2 Del
+100 1 Bob 105 NULL None
+100 1 Bob 104 3 Fli
+100 1 Bob 103 2 Ema
+100 1 Bob 102 2 Del
+99 2 Mat NULL NULL NULL
+99 0 Alice NULL NULL NULL
+98 NULL None NULL NULL NULL
+NULL NULL None NULL NULL NULL
http://git-wip-us.apache.org/repos/asf/hive/blob/7065407e/ql/src/test/results/clientpositive/llap/join_emit_interval.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/join_emit_interval.q.out b/ql/src/test/results/clientpositive/llap/join_emit_interval.q.out
new file mode 100644
index 0000000..8158efe
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/join_emit_interval.q.out
@@ -0,0 +1,259 @@
+PREHOOK: query: CREATE TABLE test1 (key INT, value INT, col_1 STRING)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test1
+POSTHOOK: query: CREATE TABLE test1 (key INT, value INT, col_1 STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test1
+PREHOOK: query: INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'),
+ (99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@test1
+POSTHOOK: query: INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'),
+ (99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@test1
+POSTHOOK: Lineage: test1.col_1 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: test1.key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: test1.value EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: CREATE TABLE test2 (key INT, value INT, col_2 STRING)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test2
+POSTHOOK: query: CREATE TABLE test2 (key INT, value INT, col_2 STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test2
+PREHOOK: query: INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'),
+ (104, 3, 'Fli'), (105, NULL, 'None')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@test2
+POSTHOOK: query: INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'),
+ (104, 3, 'Fli'), (105, NULL, 'None')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@test2
+POSTHOOK: Lineage: test2.col_2 SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: test2.key EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: test2.value EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: -- Equi-condition and condition on one input (left outer join)
+EXPLAIN
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value AND test1.key between 100 and 102)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Equi-condition and condition on one input (left outer join)
+EXPLAIN
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value AND test1.key between 100 and 102)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: test1
+ Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: int), col_1 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: string)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: test2
+ Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: int), col_2 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: string)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ filter predicates:
+ 0 {VALUE._col0 BETWEEN 100 AND 102}
+ 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col1 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value AND test1.key between 100 and 102)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test1
+PREHOOK: Input: default@test2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value AND test1.key between 100 and 102)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test1
+POSTHOOK: Input: default@test2
+#### A masked pattern was here ####
+NULL NULL None NULL NULL NULL
+98 NULL None NULL NULL NULL
+99 0 Alice NULL NULL NULL
+100 1 Bob NULL NULL NULL
+99 2 Mat NULL NULL NULL
+101 2 Car 102 2 Del
+101 2 Car 103 2 Ema
+Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+PREHOOK: query: -- Condition on one input (left outer join)
+EXPLAIN
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.key between 100 and 102)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Condition on one input (left outer join)
+EXPLAIN
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.key between 100 and 102)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: test1
+ Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: int), col_1 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: test2
+ Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: int), col_2 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ filter predicates:
+ 0 {VALUE._col0 BETWEEN 100 AND 102}
+ 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+PREHOOK: query: SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.key between 100 and 102)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test1
+PREHOOK: Input: default@test2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.key between 100 and 102)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test1
+POSTHOOK: Input: default@test2
+#### A masked pattern was here ####
+NULL NULL None NULL NULL NULL
+98 NULL None NULL NULL NULL
+99 0 Alice NULL NULL NULL
+99 2 Mat NULL NULL NULL
+100 1 Bob 102 2 Del
+100 1 Bob 103 2 Ema
+100 1 Bob 104 3 Fli
+100 1 Bob 105 NULL None
+101 2 Car 102 2 Del
+101 2 Car 103 2 Ema
+101 2 Car 104 3 Fli
+101 2 Car 105 NULL None
http://git-wip-us.apache.org/repos/asf/hive/blob/7065407e/ql/src/test/results/clientpositive/llap/mapjoin_emit_interval.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/mapjoin_emit_interval.q.out b/ql/src/test/results/clientpositive/llap/mapjoin_emit_interval.q.out
new file mode 100644
index 0000000..9059dac
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/mapjoin_emit_interval.q.out
@@ -0,0 +1,247 @@
+PREHOOK: query: CREATE TABLE test1 (key INT, value INT, col_1 STRING)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test1
+POSTHOOK: query: CREATE TABLE test1 (key INT, value INT, col_1 STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test1
+PREHOOK: query: INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'),
+ (99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@test1
+POSTHOOK: query: INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'),
+ (99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@test1
+POSTHOOK: Lineage: test1.col_1 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: test1.key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: test1.value EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: CREATE TABLE test2 (key INT, value INT, col_2 STRING)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test2
+POSTHOOK: query: CREATE TABLE test2 (key INT, value INT, col_2 STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test2
+PREHOOK: query: INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'),
+ (104, 3, 'Fli'), (105, NULL, 'None')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@test2
+POSTHOOK: query: INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'),
+ (104, 3, 'Fli'), (105, NULL, 'None')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@test2
+POSTHOOK: Lineage: test2.col_2 SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: test2.key EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: test2.value EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: -- Equi-condition and condition on one input (left outer join)
+EXPLAIN
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value AND test1.key between 100 and 102)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Equi-condition and condition on one input (left outer join)
+EXPLAIN
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value AND test1.key between 100 and 102)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Map 2 (BROADCAST_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: test1
+ Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: int), col_1 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ filter predicates:
+ 0 {_col0 BETWEEN 100 AND 102}
+ 1
+ keys:
+ 0 _col1 (type: int)
+ 1 _col1 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ input vertices:
+ 1 Map 2
+ Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: test2
+ Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: int), col_2 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: string)
+ Execution mode: llap
+ LLAP IO: no inputs
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value AND test1.key between 100 and 102)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test1
+PREHOOK: Input: default@test2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value AND test1.key between 100 and 102)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test1
+POSTHOOK: Input: default@test2
+#### A masked pattern was here ####
+NULL NULL None NULL NULL NULL
+98 NULL None NULL NULL NULL
+99 0 Alice NULL NULL NULL
+99 2 Mat NULL NULL NULL
+100 1 Bob NULL NULL NULL
+101 2 Car 102 2 Del
+101 2 Car 103 2 Ema
+Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product
+PREHOOK: query: -- Condition on one input (left outer join)
+EXPLAIN
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.key between 100 and 102)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Condition on one input (left outer join)
+EXPLAIN
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.key between 100 and 102)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Map 2 (BROADCAST_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: test1
+ Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: int), col_1 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ filter predicates:
+ 0 {_col0 BETWEEN 100 AND 102}
+ 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ input vertices:
+ 1 Map 2
+ Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: test2
+ Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: int), col_2 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string)
+ Execution mode: llap
+ LLAP IO: no inputs
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product
+PREHOOK: query: SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.key between 100 and 102)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test1
+PREHOOK: Input: default@test2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.key between 100 and 102)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test1
+POSTHOOK: Input: default@test2
+#### A masked pattern was here ####
+NULL NULL None NULL NULL NULL
+98 NULL None NULL NULL NULL
+99 0 Alice NULL NULL NULL
+99 2 Mat NULL NULL NULL
+100 1 Bob 102 2 Del
+100 1 Bob 105 NULL None
+100 1 Bob 104 3 Fli
+100 1 Bob 103 2 Ema
+101 2 Car 102 2 Del
+101 2 Car 105 NULL None
+101 2 Car 104 3 Fli
+101 2 Car 103 2 Ema