You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2016/12/29 20:52:28 UTC
hive git commit: HIVE-15493: Wrong result for LEFT outer join in Tez
using MapJoinOperator (Jesus Camacho Rodriguez, reviewed by Pengcheng Xiong)
Repository: hive
Updated Branches:
refs/heads/master fb548b27c -> 4486f2a94
HIVE-15493: Wrong result for LEFT outer join in Tez using MapJoinOperator (Jesus Camacho Rodriguez, reviewed by Pengcheng Xiong)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4486f2a9
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4486f2a9
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4486f2a9
Branch: refs/heads/master
Commit: 4486f2a94746c035094001e127807c8d2e82b49a
Parents: fb548b2
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Thu Dec 22 00:43:18 2016 +0000
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Thu Dec 29 20:42:45 2016 +0000
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 1 +
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 12 --
ql/src/test/queries/clientpositive/mapjoin3.q | 55 +++++
.../results/clientpositive/llap/mapjoin3.q.out | 200 +++++++++++++++++++
.../test/results/clientpositive/mapjoin3.q.out | 196 ++++++++++++++++++
5 files changed, 452 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/4486f2a9/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 623e7c1..1cebc70 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -185,6 +185,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\
load_dyn_part3.q,\
lvj_mapjoin.q,\
mapjoin2.q,\
+ mapjoin3.q,\
mapjoin_decimal.q,\
mapjoin_mapjoin.q,\
mapjoin46.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/4486f2a9/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 71d34eb..863b153 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -8062,18 +8062,6 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
index[i] = kindex;
continue;
}
- int vindex;
- if (exprBack == null) {
- vindex = -1;
- } else if (ExprNodeDescUtils.isConstant(exprBack)) {
- vindex = reduceValuesBack.indexOf(exprBack);
- } else {
- vindex = ExprNodeDescUtils.indexOf(exprBack, reduceValuesBack);
- }
- if (vindex >= 0) {
- index[i] = -vindex - 1;
- continue;
- }
index[i] = -reduceValues.size() - 1;
String outputColName = getColumnInternalName(reduceValues.size());
http://git-wip-us.apache.org/repos/asf/hive/blob/4486f2a9/ql/src/test/queries/clientpositive/mapjoin3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/mapjoin3.q b/ql/src/test/queries/clientpositive/mapjoin3.q
new file mode 100644
index 0000000..0a4a5d3
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/mapjoin3.q
@@ -0,0 +1,55 @@
+set hive.auto.convert.join=true;
+
+DROP TABLE IF EXISTS test_1;
+CREATE TABLE test_1
+(
+member BIGINT
+, age VARCHAR (100)
+)
+STORED AS TEXTFILE
+;
+DROP TABLE IF EXISTS test_2;
+CREATE TABLE test_2
+(
+member BIGINT
+)
+STORED AS TEXTFILE
+;
+INSERT INTO test_1 VALUES (1, '20'), (2, '30'), (3, '40');
+INSERT INTO test_2 VALUES (1), (2), (3);
+
+EXPLAIN
+SELECT
+t2.member
+, t1.age_1
+, t1.age_2
+FROM
+test_2 t2
+LEFT JOIN (
+SELECT
+member
+, age as age_1
+, age as age_2
+FROM
+test_1
+) t1
+ON t2.member = t1.member
+;
+
+SELECT
+t2.member
+, t1.age_1
+, t1.age_2
+FROM
+test_2 t2
+LEFT JOIN (
+SELECT
+member
+, age as age_1
+, age as age_2
+FROM
+test_1
+) t1
+ON t2.member = t1.member
+;
+
http://git-wip-us.apache.org/repos/asf/hive/blob/4486f2a9/ql/src/test/results/clientpositive/llap/mapjoin3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/mapjoin3.q.out b/ql/src/test/results/clientpositive/llap/mapjoin3.q.out
new file mode 100644
index 0000000..cf1bebb
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/mapjoin3.q.out
@@ -0,0 +1,200 @@
+PREHOOK: query: DROP TABLE IF EXISTS test_1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS test_1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE test_1
+(
+member BIGINT
+, age VARCHAR (100)
+)
+STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_1
+POSTHOOK: query: CREATE TABLE test_1
+(
+member BIGINT
+, age VARCHAR (100)
+)
+STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_1
+PREHOOK: query: DROP TABLE IF EXISTS test_2
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS test_2
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE test_2
+(
+member BIGINT
+)
+STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_2
+POSTHOOK: query: CREATE TABLE test_2
+(
+member BIGINT
+)
+STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_2
+PREHOOK: query: INSERT INTO test_1 VALUES (1, '20'), (2, '30'), (3, '40')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@test_1
+POSTHOOK: query: INSERT INTO test_1 VALUES (1, '20'), (2, '30'), (3, '40')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@test_1
+POSTHOOK: Lineage: test_1.age EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: test_1.member EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: INSERT INTO test_2 VALUES (1), (2), (3)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@test_2
+POSTHOOK: query: INSERT INTO test_2 VALUES (1), (2), (3)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@test_2
+POSTHOOK: Lineage: test_2.member EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: EXPLAIN
+SELECT
+t2.member
+, t1.age_1
+, t1.age_2
+FROM
+test_2 t2
+LEFT JOIN (
+SELECT
+member
+, age as age_1
+, age as age_2
+FROM
+test_1
+) t1
+ON t2.member = t1.member
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT
+t2.member
+, t1.age_1
+, t1.age_2
+FROM
+test_2 t2
+LEFT JOIN (
+SELECT
+member
+, age as age_1
+, age as age_2
+FROM
+test_1
+) t1
+ON t2.member = t1.member
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Map 2 (BROADCAST_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t2
+ Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: member (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col0 (type: bigint)
+ 1 _col0 (type: bigint)
+ outputColumnNames: _col0, _col2, _col3
+ input vertices:
+ 1 Map 2
+ Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint), _col2 (type: varchar(100)), _col3 (type: varchar(100))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: test_1
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: member (type: bigint), age (type: varchar(100)), age (type: varchar(100))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: varchar(100)), _col2 (type: varchar(100))
+ Execution mode: llap
+ LLAP IO: no inputs
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT
+t2.member
+, t1.age_1
+, t1.age_2
+FROM
+test_2 t2
+LEFT JOIN (
+SELECT
+member
+, age as age_1
+, age as age_2
+FROM
+test_1
+) t1
+ON t2.member = t1.member
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_1
+PREHOOK: Input: default@test_2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+t2.member
+, t1.age_1
+, t1.age_2
+FROM
+test_2 t2
+LEFT JOIN (
+SELECT
+member
+, age as age_1
+, age as age_2
+FROM
+test_1
+) t1
+ON t2.member = t1.member
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_1
+POSTHOOK: Input: default@test_2
+#### A masked pattern was here ####
+1 20 20
+2 30 30
+3 40 40
http://git-wip-us.apache.org/repos/asf/hive/blob/4486f2a9/ql/src/test/results/clientpositive/mapjoin3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/mapjoin3.q.out b/ql/src/test/results/clientpositive/mapjoin3.q.out
new file mode 100644
index 0000000..e0aa2d3
--- /dev/null
+++ b/ql/src/test/results/clientpositive/mapjoin3.q.out
@@ -0,0 +1,196 @@
+PREHOOK: query: DROP TABLE IF EXISTS test_1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS test_1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE test_1
+(
+member BIGINT
+, age VARCHAR (100)
+)
+STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_1
+POSTHOOK: query: CREATE TABLE test_1
+(
+member BIGINT
+, age VARCHAR (100)
+)
+STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_1
+PREHOOK: query: DROP TABLE IF EXISTS test_2
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS test_2
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE test_2
+(
+member BIGINT
+)
+STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_2
+POSTHOOK: query: CREATE TABLE test_2
+(
+member BIGINT
+)
+STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_2
+PREHOOK: query: INSERT INTO test_1 VALUES (1, '20'), (2, '30'), (3, '40')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@test_1
+POSTHOOK: query: INSERT INTO test_1 VALUES (1, '20'), (2, '30'), (3, '40')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@test_1
+POSTHOOK: Lineage: test_1.age EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: test_1.member EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: INSERT INTO test_2 VALUES (1), (2), (3)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@test_2
+POSTHOOK: query: INSERT INTO test_2 VALUES (1), (2), (3)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@test_2
+POSTHOOK: Lineage: test_2.member EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: EXPLAIN
+SELECT
+t2.member
+, t1.age_1
+, t1.age_2
+FROM
+test_2 t2
+LEFT JOIN (
+SELECT
+member
+, age as age_1
+, age as age_2
+FROM
+test_1
+) t1
+ON t2.member = t1.member
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT
+t2.member
+, t1.age_1
+, t1.age_2
+FROM
+test_2 t2
+LEFT JOIN (
+SELECT
+member
+, age as age_1
+, age as age_2
+FROM
+test_1
+) t1
+ON t2.member = t1.member
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-4 is a root stage
+ Stage-3 depends on stages: Stage-4
+ Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+ Stage: Stage-4
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $hdt$_1:test_1
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $hdt$_1:test_1
+ TableScan
+ alias: test_1
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: member (type: bigint), age (type: varchar(100)), age (type: varchar(100))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: bigint)
+ 1 _col0 (type: bigint)
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: t2
+ Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: member (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col0 (type: bigint)
+ 1 _col0 (type: bigint)
+ outputColumnNames: _col0, _col2, _col3
+ Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint), _col2 (type: varchar(100)), _col3 (type: varchar(100))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT
+t2.member
+, t1.age_1
+, t1.age_2
+FROM
+test_2 t2
+LEFT JOIN (
+SELECT
+member
+, age as age_1
+, age as age_2
+FROM
+test_1
+) t1
+ON t2.member = t1.member
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_1
+PREHOOK: Input: default@test_2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+t2.member
+, t1.age_1
+, t1.age_2
+FROM
+test_2 t2
+LEFT JOIN (
+SELECT
+member
+, age as age_1
+, age as age_2
+FROM
+test_1
+) t1
+ON t2.member = t1.member
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_1
+POSTHOOK: Input: default@test_2
+#### A masked pattern was here ####
+1 20 20
+2 30 30
+3 40 40