You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2016/07/17 17:05:33 UTC
hive git commit: HIVE-13191: DummyTable map joins mix up columns
between tables (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master 97649669d -> dbb228e0e
HIVE-13191: DummyTable map joins mix up columns between tables (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/dbb228e0
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/dbb228e0
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/dbb228e0
Branch: refs/heads/master
Commit: dbb228e0e58b845b7a4a4bd031a20058ca531dc6
Parents: 9764966
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Sun Jul 17 10:54:45 2016 +0100
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Sun Jul 17 17:58:28 2016 +0100
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 1 +
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 18 +++-
.../hadoop/hive/ql/plan/ExprNodeDescUtils.java | 6 +-
ql/src/test/queries/clientpositive/mapjoin2.q | 8 ++
.../test/results/clientpositive/mapjoin2.q.out | 36 ++++++++
.../results/clientpositive/tez/mapjoin2.q.out | 92 ++++++++++++++++++++
6 files changed, 158 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/dbb228e0/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 73fcb03..07d55bc 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -151,6 +151,7 @@ minitez.query.files.shared=acid_globallimit.q,\
load_dyn_part1.q,\
load_dyn_part2.q,\
load_dyn_part3.q,\
+ mapjoin2.q,\
mapjoin_mapjoin.q,\
mapreduce1.q,\
mapreduce2.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/dbb228e0/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index a6d1fb8..7fafe5e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -7947,7 +7947,14 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
// backtrack can be null when input is script operator
ExprNodeDesc exprBack = ExprNodeDescUtils.backtrack(expr, dummy, child);
- int kindex = exprBack == null ? -1 : ExprNodeDescUtils.indexOf(exprBack, reduceKeysBack);
+ int kindex;
+ if (exprBack == null) {
+ kindex = -1;
+ } else if (ExprNodeDescUtils.isConstant(exprBack)) {
+ kindex = reduceKeysBack.indexOf(exprBack);
+ } else {
+ kindex = ExprNodeDescUtils.indexOf(exprBack, reduceKeysBack);
+ }
if (kindex >= 0) {
ColumnInfo newColInfo = new ColumnInfo(colInfo);
newColInfo.setInternalName(Utilities.ReduceField.KEY + ".reducesinkkey" + kindex);
@@ -7959,7 +7966,14 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
index[i] = kindex;
continue;
}
- int vindex = exprBack == null ? -1 : ExprNodeDescUtils.indexOf(exprBack, reduceValuesBack);
+ int vindex;
+ if (exprBack == null) {
+ vindex = -1;
+ } else if (ExprNodeDescUtils.isConstant(exprBack)) {
+ vindex = reduceValuesBack.indexOf(exprBack);
+ } else {
+ vindex = ExprNodeDescUtils.indexOf(exprBack, reduceValuesBack);
+ }
if (vindex >= 0) {
index[i] = -vindex - 1;
continue;
http://git-wip-us.apache.org/repos/asf/hive/blob/dbb228e0/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
index 2b7b0c3..4c699a2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
@@ -234,6 +234,10 @@ public class ExprNodeDescUtils {
if (parent == null) {
return source;
}
+ if (!foldExpr && isConstant(source)) {
+ //constant, just return
+ return source;
+ }
if (source instanceof ExprNodeGenericFuncDesc) {
// all children expression should be resolved
ExprNodeGenericFuncDesc function = (ExprNodeGenericFuncDesc) source.clone();
@@ -268,7 +272,7 @@ public class ExprNodeDescUtils {
field.setDesc(fieldDesc);
return field;
}
- // constant or null expr, just return
+ // just return
return source;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/dbb228e0/ql/src/test/queries/clientpositive/mapjoin2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/mapjoin2.q b/ql/src/test/queries/clientpositive/mapjoin2.q
index fcc3bca..068ecbe 100644
--- a/ql/src/test/queries/clientpositive/mapjoin2.q
+++ b/ql/src/test/queries/clientpositive/mapjoin2.q
@@ -10,3 +10,11 @@ select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl where n = 1) a
select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl where 2 = 1) a right outer join (select * from tbl where n = 2) b on a.n = b.n;
select isnull(a.n), isnull(a.t), isnull(b.n), isnull(b.t) from (select * from tbl where n = 1) a full outer join (select * from tbl where n = 2) b on a.n = b.n;
+
+select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key;
+
+select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a left outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key;
+
+select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key;
+
+select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a full outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key;
http://git-wip-us.apache.org/repos/asf/hive/blob/dbb228e0/ql/src/test/results/clientpositive/mapjoin2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/mapjoin2.q.out b/ql/src/test/results/clientpositive/mapjoin2.q.out
index 7bf3e6a..5ae1ac7 100644
--- a/ql/src/test/results/clientpositive/mapjoin2.q.out
+++ b/ql/src/test/results/clientpositive/mapjoin2.q.out
@@ -54,3 +54,39 @@ POSTHOOK: Input: default@tbl
#### A masked pattern was here ####
false false true true
true true false false
+PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+11 1 1 0 0
+PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a left outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a left outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+11 1 1 0 0
+PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+11 1 1 0 0
+PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a full outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a full outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+11 1 1 0 0
http://git-wip-us.apache.org/repos/asf/hive/blob/dbb228e0/ql/src/test/results/clientpositive/tez/mapjoin2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/mapjoin2.q.out b/ql/src/test/results/clientpositive/tez/mapjoin2.q.out
new file mode 100644
index 0000000..5ae1ac7
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/mapjoin2.q.out
@@ -0,0 +1,92 @@
+PREHOOK: query: create table tbl (n bigint, t string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl
+POSTHOOK: query: create table tbl (n bigint, t string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl
+PREHOOK: query: insert into tbl values (1, 'one')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@tbl
+POSTHOOK: query: insert into tbl values (1, 'one')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@tbl
+POSTHOOK: Lineage: tbl.n EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: tbl.t SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: insert into tbl values(2, 'two')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@tbl
+POSTHOOK: query: insert into tbl values(2, 'two')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@tbl
+POSTHOOK: Lineage: tbl.n EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: tbl.t SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl where n = 1) a left outer join (select * from tbl where 1 = 2) b on a.n = b.n
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl
+#### A masked pattern was here ####
+POSTHOOK: query: select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl where n = 1) a left outer join (select * from tbl where 1 = 2) b on a.n = b.n
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl
+#### A masked pattern was here ####
+1 one true true
+PREHOOK: query: select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl where 2 = 1) a right outer join (select * from tbl where n = 2) b on a.n = b.n
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl
+#### A masked pattern was here ####
+POSTHOOK: query: select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl where 2 = 1) a right outer join (select * from tbl where n = 2) b on a.n = b.n
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl
+#### A masked pattern was here ####
+true true 2 two
+PREHOOK: query: select isnull(a.n), isnull(a.t), isnull(b.n), isnull(b.t) from (select * from tbl where n = 1) a full outer join (select * from tbl where n = 2) b on a.n = b.n
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl
+#### A masked pattern was here ####
+POSTHOOK: query: select isnull(a.n), isnull(a.t), isnull(b.n), isnull(b.t) from (select * from tbl where n = 1) a full outer join (select * from tbl where n = 2) b on a.n = b.n
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl
+#### A masked pattern was here ####
+false false true true
+true true false false
+PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+11 1 1 0 0
+PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a left outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a left outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+11 1 1 0 0
+PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+11 1 1 0 0
+PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a full outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a full outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+11 1 1 0 0