You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2015/12/04 20:52:46 UTC
hive git commit: HIVE-11372 join with between predicate comparing
integer types returns no rows when ORC format used (Matt McCline,
reviewed by Prasanth J)
Repository: hive
Updated Branches:
refs/heads/master 7f1aea3cd -> 3c8b9c27b
HIVE-11372 join with between predicate comparing integer types returns no rows when ORC format used (Matt McCline, reviewed by Prasanth J)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3c8b9c27
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3c8b9c27
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3c8b9c27
Branch: refs/heads/master
Commit: 3c8b9c27b18758b2b982ce3a65214bfb0e27314a
Parents: 7f1aea3
Author: Matt McCline <mm...@hortonworks.com>
Authored: Fri Dec 4 11:52:15 2015 -0800
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Fri Dec 4 11:52:35 2015 -0800
----------------------------------------------------------------------
data/files/TINT | 5 +
data/files/TSINT | 5 +
.../test/resources/testconfiguration.properties | 1 +
.../ql/exec/vector/VectorizationContext.java | 6 +
.../ql/exec/vector/VectorizedBatchUtil.java | 2 +-
.../clientpositive/vector_between_columns.q | 29 ++++
.../tez/vector_between_columns.q.out | 155 ++++++++++++++++++
.../clientpositive/vector_between_columns.q.out | 157 +++++++++++++++++++
8 files changed, 359 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/3c8b9c27/data/files/TINT
----------------------------------------------------------------------
diff --git a/data/files/TINT b/data/files/TINT
new file mode 100644
index 0000000..1aeb377
--- /dev/null
+++ b/data/files/TINT
@@ -0,0 +1,5 @@
+0|\N
+1|-1
+2|0
+3|1
+4|10
http://git-wip-us.apache.org/repos/asf/hive/blob/3c8b9c27/data/files/TSINT
----------------------------------------------------------------------
diff --git a/data/files/TSINT b/data/files/TSINT
new file mode 100644
index 0000000..1aeb377
--- /dev/null
+++ b/data/files/TSINT
@@ -0,0 +1,5 @@
+0|\N
+1|-1
+2|0
+3|1
+4|10
http://git-wip-us.apache.org/repos/asf/hive/blob/3c8b9c27/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 643eee6..2819d97 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -228,6 +228,7 @@ minitez.query.files.shared=acid_globallimit.q,\
vector_aggregate_without_gby.q,\
vector_auto_smb_mapjoin_14.q,\
vector_between_in.q,\
+ vector_between_columns.q,\
vector_binary_join_groupby.q,\
vector_bround.q,\
vector_bucket.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/3c8b9c27/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 95a4b9d..7e95244 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -1913,6 +1913,12 @@ public class VectorizationContext {
return null;
}
+ // We don't currently support the BETWEEN ends being columns. They must be scalars.
+ if (!(childExpr.get(2) instanceof ExprNodeConstantDesc) ||
+ !(childExpr.get(3) instanceof ExprNodeConstantDesc)) {
+ return null;
+ }
+
boolean notKeywordPresent = (Boolean) ((ExprNodeConstantDesc) childExpr.get(0)).getValue();
ExprNodeDesc colExpr = childExpr.get(1);
http://git-wip-us.apache.org/repos/asf/hive/blob/3c8b9c27/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
index d75d185..4d2430f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
@@ -685,7 +685,7 @@ public class VectorizedBatchUtil {
LOG.info(sb.toString());
}
- public static void debugDisplayBatch(VectorizedRowBatch batch, String prefix) throws HiveException {
+ public static void debugDisplayBatch(VectorizedRowBatch batch, String prefix) {
for (int i = 0; i < batch.size; i++) {
int index = (batch.selectedInUse ? batch.selected[i] : i);
debugDisplayOneRow(batch, index, prefix);
http://git-wip-us.apache.org/repos/asf/hive/blob/3c8b9c27/ql/src/test/queries/clientpositive/vector_between_columns.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_between_columns.q b/ql/src/test/queries/clientpositive/vector_between_columns.q
new file mode 100644
index 0000000..4c83d0a
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_between_columns.q
@@ -0,0 +1,29 @@
+set hive.cli.print.header=true;
+set hive.explain.user=false;
+SET hive.vectorized.execution.enabled=true;
+SET hive.auto.convert.join=true;
+set hive.fetch.task.conversion=none;
+set hive.mapred.mode=nonstrict;
+
+-- SORT_QUERY_RESULTS
+
+create table if not exists TSINT_txt ( RNUM int , CSINT smallint )
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
+
+create table if not exists TINT_txt ( RNUM int , CINT int )
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
+
+load data local inpath '../../data/files/TSINT' into table TSINT_txt;
+
+load data local inpath '../../data/files/TINT' into table TINT_txt;
+
+create table TSINT stored as orc AS SELECT * FROM TSINT_txt;
+
+create table TINT stored as orc AS SELECT * FROM TINT_txt;
+
+-- We DO NOT expect the following to vectorized because the BETWEEN range expressions
+-- are not constants. We currently do not support the range expressions being columns.
+explain
+select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint;
+
+select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint;
http://git-wip-us.apache.org/repos/asf/hive/blob/3c8b9c27/ql/src/test/results/clientpositive/tez/vector_between_columns.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_between_columns.q.out b/ql/src/test/results/clientpositive/tez/vector_between_columns.q.out
new file mode 100644
index 0000000..972d694
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/vector_between_columns.q.out
@@ -0,0 +1,155 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+create table if not exists TSINT_txt ( RNUM int , CSINT smallint )
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@TSINT_txt
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+create table if not exists TSINT_txt ( RNUM int , CSINT smallint )
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@TSINT_txt
+PREHOOK: query: create table if not exists TINT_txt ( RNUM int , CINT int )
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@TINT_txt
+POSTHOOK: query: create table if not exists TINT_txt ( RNUM int , CINT int )
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@TINT_txt
+PREHOOK: query: load data local inpath '../../data/files/TSINT' into table TSINT_txt
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@tsint_txt
+POSTHOOK: query: load data local inpath '../../data/files/TSINT' into table TSINT_txt
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@tsint_txt
+PREHOOK: query: load data local inpath '../../data/files/TINT' into table TINT_txt
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@tint_txt
+POSTHOOK: query: load data local inpath '../../data/files/TINT' into table TINT_txt
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@tint_txt
+PREHOOK: query: create table TSINT stored as orc AS SELECT * FROM TSINT_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@tsint_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@TSINT
+POSTHOOK: query: create table TSINT stored as orc AS SELECT * FROM TSINT_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@tsint_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@TSINT
+tsint_txt.rnum tsint_txt.csint
+PREHOOK: query: create table TINT stored as orc AS SELECT * FROM TINT_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@tint_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@TINT
+POSTHOOK: query: create table TINT stored as orc AS SELECT * FROM TINT_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@tint_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@TINT
+tint_txt.rnum tint_txt.cint
+Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Map 1' is a cross product
+PREHOOK: query: -- We DO NOT expect the following to vectorized because the BETWEEN range expressions
+-- are not constants. We currently do not support the range expressions being columns.
+explain
+select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint
+PREHOOK: type: QUERY
+POSTHOOK: query: -- We DO NOT expect the following to vectorized because the BETWEEN range expressions
+-- are not constants. We currently do not support the range expressions being columns.
+explain
+select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Map 1 <- Map 2 (BROADCAST_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tint
+ Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: rnum (type: int), cint (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col3
+ input vertices:
+ 1 Map 2
+ Statistics: Num rows: 5 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col1 BETWEEN _col3 AND _col3 (type: boolean)
+ Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col2 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: tsint
+ Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: rnum (type: int), csint (type: smallint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: smallint)
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Map 1' is a cross product
+PREHOOK: query: select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tint
+PREHOOK: Input: default@tsint
+#### A masked pattern was here ####
+POSTHOOK: query: select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tint
+POSTHOOK: Input: default@tsint
+#### A masked pattern was here ####
+tint.rnum tsint.rnum
+1 1
+2 2
+3 3
+4 4
http://git-wip-us.apache.org/repos/asf/hive/blob/3c8b9c27/ql/src/test/results/clientpositive/vector_between_columns.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_between_columns.q.out b/ql/src/test/results/clientpositive/vector_between_columns.q.out
new file mode 100644
index 0000000..4837aba
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_between_columns.q.out
@@ -0,0 +1,157 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+create table if not exists TSINT_txt ( RNUM int , CSINT smallint )
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@TSINT_txt
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+create table if not exists TSINT_txt ( RNUM int , CSINT smallint )
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@TSINT_txt
+PREHOOK: query: create table if not exists TINT_txt ( RNUM int , CINT int )
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@TINT_txt
+POSTHOOK: query: create table if not exists TINT_txt ( RNUM int , CINT int )
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@TINT_txt
+PREHOOK: query: load data local inpath '../../data/files/TSINT' into table TSINT_txt
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@tsint_txt
+POSTHOOK: query: load data local inpath '../../data/files/TSINT' into table TSINT_txt
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@tsint_txt
+PREHOOK: query: load data local inpath '../../data/files/TINT' into table TINT_txt
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@tint_txt
+POSTHOOK: query: load data local inpath '../../data/files/TINT' into table TINT_txt
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@tint_txt
+PREHOOK: query: create table TSINT stored as orc AS SELECT * FROM TSINT_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@tsint_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@TSINT
+POSTHOOK: query: create table TSINT stored as orc AS SELECT * FROM TSINT_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@tsint_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@TSINT
+tsint_txt.rnum tsint_txt.csint
+PREHOOK: query: create table TINT stored as orc AS SELECT * FROM TINT_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@tint_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@TINT
+POSTHOOK: query: create table TINT stored as orc AS SELECT * FROM TINT_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@tint_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@TINT
+tint_txt.rnum tint_txt.cint
+Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross product
+PREHOOK: query: -- We DO NOT expect the following to vectorized because the BETWEEN range expressions
+-- are not constants. We currently do not support the range expressions being columns.
+explain
+select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint
+PREHOOK: type: QUERY
+POSTHOOK: query: -- We DO NOT expect the following to vectorized because the BETWEEN range expressions
+-- are not constants. We currently do not support the range expressions being columns.
+explain
+select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+ Stage-4 is a root stage
+ Stage-3 depends on stages: Stage-4
+ Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+ Stage: Stage-4
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $hdt$_0:tint
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $hdt$_0:tint
+ TableScan
+ alias: tint
+ Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: rnum (type: int), cint (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ keys:
+ 0
+ 1
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: tsint
+ Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: rnum (type: int), csint (type: smallint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 5 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col1 BETWEEN _col3 AND _col3 (type: boolean)
+ Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col2 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross product
+PREHOOK: query: select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tint
+PREHOOK: Input: default@tsint
+#### A masked pattern was here ####
+POSTHOOK: query: select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tint
+POSTHOOK: Input: default@tsint
+#### A masked pattern was here ####
+tint.rnum tsint.rnum
+1 1
+2 2
+3 3
+4 4