You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2015/12/04 20:52:46 UTC

hive git commit: HIVE-11372 join with between predicate comparing integer types returns no rows when ORC format used (Matt McCline, reviewed by Prasanth J)

Repository: hive
Updated Branches:
  refs/heads/master 7f1aea3cd -> 3c8b9c27b


HIVE-11372 join with between predicate comparing integer types returns no rows when ORC format used (Matt McCline, reviewed by Prasanth J)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3c8b9c27
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3c8b9c27
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3c8b9c27

Branch: refs/heads/master
Commit: 3c8b9c27b18758b2b982ce3a65214bfb0e27314a
Parents: 7f1aea3
Author: Matt McCline <mm...@hortonworks.com>
Authored: Fri Dec 4 11:52:15 2015 -0800
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Fri Dec 4 11:52:35 2015 -0800

----------------------------------------------------------------------
 data/files/TINT                                 |   5 +
 data/files/TSINT                                |   5 +
 .../test/resources/testconfiguration.properties |   1 +
 .../ql/exec/vector/VectorizationContext.java    |   6 +
 .../ql/exec/vector/VectorizedBatchUtil.java     |   2 +-
 .../clientpositive/vector_between_columns.q     |  29 ++++
 .../tez/vector_between_columns.q.out            | 155 ++++++++++++++++++
 .../clientpositive/vector_between_columns.q.out | 157 +++++++++++++++++++
 8 files changed, 359 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/3c8b9c27/data/files/TINT
----------------------------------------------------------------------
diff --git a/data/files/TINT b/data/files/TINT
new file mode 100644
index 0000000..1aeb377
--- /dev/null
+++ b/data/files/TINT
@@ -0,0 +1,5 @@
+0|\N
+1|-1
+2|0
+3|1
+4|10

http://git-wip-us.apache.org/repos/asf/hive/blob/3c8b9c27/data/files/TSINT
----------------------------------------------------------------------
diff --git a/data/files/TSINT b/data/files/TSINT
new file mode 100644
index 0000000..1aeb377
--- /dev/null
+++ b/data/files/TSINT
@@ -0,0 +1,5 @@
+0|\N
+1|-1
+2|0
+3|1
+4|10

http://git-wip-us.apache.org/repos/asf/hive/blob/3c8b9c27/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 643eee6..2819d97 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -228,6 +228,7 @@ minitez.query.files.shared=acid_globallimit.q,\
   vector_aggregate_without_gby.q,\
   vector_auto_smb_mapjoin_14.q,\
   vector_between_in.q,\
+  vector_between_columns.q,\
   vector_binary_join_groupby.q,\
   vector_bround.q,\
   vector_bucket.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/3c8b9c27/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 95a4b9d..7e95244 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -1913,6 +1913,12 @@ public class VectorizationContext {
       return null;
     }
 
+    // We don't currently support the BETWEEN ends being columns.  They must be scalars.
+    if (!(childExpr.get(2) instanceof ExprNodeConstantDesc) ||
+        !(childExpr.get(3) instanceof ExprNodeConstantDesc)) {
+      return null;
+    }
+
     boolean notKeywordPresent = (Boolean) ((ExprNodeConstantDesc) childExpr.get(0)).getValue();
     ExprNodeDesc colExpr = childExpr.get(1);
 

http://git-wip-us.apache.org/repos/asf/hive/blob/3c8b9c27/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
index d75d185..4d2430f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
@@ -685,7 +685,7 @@ public class VectorizedBatchUtil {
     LOG.info(sb.toString());
   }
 
-  public static void debugDisplayBatch(VectorizedRowBatch batch, String prefix) throws HiveException {
+  public static void debugDisplayBatch(VectorizedRowBatch batch, String prefix) {
     for (int i = 0; i < batch.size; i++) {
       int index = (batch.selectedInUse ? batch.selected[i] : i);
       debugDisplayOneRow(batch, index, prefix);

http://git-wip-us.apache.org/repos/asf/hive/blob/3c8b9c27/ql/src/test/queries/clientpositive/vector_between_columns.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_between_columns.q b/ql/src/test/queries/clientpositive/vector_between_columns.q
new file mode 100644
index 0000000..4c83d0a
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_between_columns.q
@@ -0,0 +1,29 @@
+set hive.cli.print.header=true;
+set hive.explain.user=false;
+SET hive.vectorized.execution.enabled=true;
+SET hive.auto.convert.join=true;
+set hive.fetch.task.conversion=none;
+set hive.mapred.mode=nonstrict;
+
+-- SORT_QUERY_RESULTS
+
+create table if not exists TSINT_txt ( RNUM int , CSINT smallint )
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
+
+create table if not exists TINT_txt ( RNUM int , CINT int )
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
+
+load data local inpath '../../data/files/TSINT' into table TSINT_txt;
+
+load data local inpath '../../data/files/TINT' into table TINT_txt;
+
+create table TSINT stored as orc AS SELECT * FROM TSINT_txt;
+
+create table TINT stored as orc AS SELECT * FROM TINT_txt;
+
+-- We DO NOT expect the following to vectorized because the BETWEEN range expressions
+-- are not constants.  We currently do not support the range expressions being columns.
+explain
+select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint;
+
+select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint;

http://git-wip-us.apache.org/repos/asf/hive/blob/3c8b9c27/ql/src/test/results/clientpositive/tez/vector_between_columns.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_between_columns.q.out b/ql/src/test/results/clientpositive/tez/vector_between_columns.q.out
new file mode 100644
index 0000000..972d694
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/vector_between_columns.q.out
@@ -0,0 +1,155 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+create table if not exists TSINT_txt ( RNUM int , CSINT smallint )
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@TSINT_txt
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+create table if not exists TSINT_txt ( RNUM int , CSINT smallint )
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@TSINT_txt
+PREHOOK: query: create table if not exists TINT_txt ( RNUM int , CINT int )
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@TINT_txt
+POSTHOOK: query: create table if not exists TINT_txt ( RNUM int , CINT int )
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@TINT_txt
+PREHOOK: query: load data local inpath '../../data/files/TSINT' into table TSINT_txt
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@tsint_txt
+POSTHOOK: query: load data local inpath '../../data/files/TSINT' into table TSINT_txt
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@tsint_txt
+PREHOOK: query: load data local inpath '../../data/files/TINT' into table TINT_txt
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@tint_txt
+POSTHOOK: query: load data local inpath '../../data/files/TINT' into table TINT_txt
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@tint_txt
+PREHOOK: query: create table TSINT stored as orc AS SELECT * FROM TSINT_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@tsint_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@TSINT
+POSTHOOK: query: create table TSINT stored as orc AS SELECT * FROM TSINT_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@tsint_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@TSINT
+tsint_txt.rnum	tsint_txt.csint
+PREHOOK: query: create table TINT stored as orc AS SELECT * FROM TINT_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@tint_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@TINT
+POSTHOOK: query: create table TINT stored as orc AS SELECT * FROM TINT_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@tint_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@TINT
+tint_txt.rnum	tint_txt.cint
+Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Map 1' is a cross product
+PREHOOK: query: -- We DO NOT expect the following to vectorized because the BETWEEN range expressions
+-- are not constants.  We currently do not support the range expressions being columns.
+explain
+select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint
+PREHOOK: type: QUERY
+POSTHOOK: query: -- We DO NOT expect the following to vectorized because the BETWEEN range expressions
+-- are not constants.  We currently do not support the range expressions being columns.
+explain
+select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 1 <- Map 2 (BROADCAST_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: tint
+                  Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: rnum (type: int), cint (type: int)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                      keys:
+                        0 
+                        1 
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      input vertices:
+                        1 Map 2
+                      Statistics: Num rows: 5 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+                      Filter Operator
+                        predicate: _col1 BETWEEN _col3 AND _col3 (type: boolean)
+                        Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+                        Select Operator
+                          expressions: _col0 (type: int), _col2 (type: int)
+                          outputColumnNames: _col0, _col1
+                          Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+                          File Output Operator
+                            compressed: false
+                            Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+                            table:
+                                input format: org.apache.hadoop.mapred.TextInputFormat
+                                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: tsint
+                  Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: rnum (type: int), csint (type: smallint)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: int), _col1 (type: smallint)
+            Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Map 1' is a cross product
+PREHOOK: query: select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tint
+PREHOOK: Input: default@tsint
+#### A masked pattern was here ####
+POSTHOOK: query: select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tint
+POSTHOOK: Input: default@tsint
+#### A masked pattern was here ####
+tint.rnum	tsint.rnum
+1	1
+2	2
+3	3
+4	4

http://git-wip-us.apache.org/repos/asf/hive/blob/3c8b9c27/ql/src/test/results/clientpositive/vector_between_columns.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_between_columns.q.out b/ql/src/test/results/clientpositive/vector_between_columns.q.out
new file mode 100644
index 0000000..4837aba
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_between_columns.q.out
@@ -0,0 +1,157 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+create table if not exists TSINT_txt ( RNUM int , CSINT smallint )
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@TSINT_txt
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+create table if not exists TSINT_txt ( RNUM int , CSINT smallint )
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@TSINT_txt
+PREHOOK: query: create table if not exists TINT_txt ( RNUM int , CINT int )
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@TINT_txt
+POSTHOOK: query: create table if not exists TINT_txt ( RNUM int , CINT int )
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@TINT_txt
+PREHOOK: query: load data local inpath '../../data/files/TSINT' into table TSINT_txt
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@tsint_txt
+POSTHOOK: query: load data local inpath '../../data/files/TSINT' into table TSINT_txt
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@tsint_txt
+PREHOOK: query: load data local inpath '../../data/files/TINT' into table TINT_txt
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@tint_txt
+POSTHOOK: query: load data local inpath '../../data/files/TINT' into table TINT_txt
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@tint_txt
+PREHOOK: query: create table TSINT stored as orc AS SELECT * FROM TSINT_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@tsint_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@TSINT
+POSTHOOK: query: create table TSINT stored as orc AS SELECT * FROM TSINT_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@tsint_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@TSINT
+tsint_txt.rnum	tsint_txt.csint
+PREHOOK: query: create table TINT stored as orc AS SELECT * FROM TINT_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@tint_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@TINT
+POSTHOOK: query: create table TINT stored as orc AS SELECT * FROM TINT_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@tint_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@TINT
+tint_txt.rnum	tint_txt.cint
+Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross product
+PREHOOK: query: -- We DO NOT expect the following to vectorized because the BETWEEN range expressions
+-- are not constants.  We currently do not support the range expressions being columns.
+explain
+select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint
+PREHOOK: type: QUERY
+POSTHOOK: query: -- We DO NOT expect the following to vectorized because the BETWEEN range expressions
+-- are not constants.  We currently do not support the range expressions being columns.
+explain
+select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+  Stage-4 is a root stage
+  Stage-3 depends on stages: Stage-4
+  Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-4
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $hdt$_0:tint 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $hdt$_0:tint 
+          TableScan
+            alias: tint
+            Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: rnum (type: int), cint (type: int)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+              HashTable Sink Operator
+                keys:
+                  0 
+                  1 
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: tsint
+            Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: rnum (type: int), csint (type: smallint)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+              Map Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 
+                  1 
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 5 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+                Filter Operator
+                  predicate: _col1 BETWEEN _col3 AND _col3 (type: boolean)
+                  Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: _col0 (type: int), _col2 (type: int)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+                      table:
+                          input format: org.apache.hadoop.mapred.TextInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross product
+PREHOOK: query: select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tint
+PREHOOK: Input: default@tsint
+#### A masked pattern was here ####
+POSTHOOK: query: select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tint
+POSTHOOK: Input: default@tsint
+#### A masked pattern was here ####
+tint.rnum	tsint.rnum
+1	1
+2	2
+3	3
+4	4