You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sp...@apache.org on 2016/05/27 15:37:32 UTC
[11/48] hive git commit: HIVE-13826: Make VectorUDFAdaptor work for
GenericUDFBetween when used as FILTER (Matt McCline,
reviewed by Ashutosh Chauhan)
HIVE-13826: Make VectorUDFAdaptor work for GenericUDFBetween when used as FILTER (Matt McCline, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/71725869
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/71725869
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/71725869
Branch: refs/heads/java8
Commit: 7172586966739de0ba1659bf9abcea40d109b341
Parents: 9bebaf6
Author: Matt McCline <mm...@hortonworks.com>
Authored: Thu May 26 08:25:29 2016 -0700
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Thu May 26 08:25:29 2016 -0700
----------------------------------------------------------------------
.../ql/exec/vector/VectorizationContext.java | 22 +++-
.../clientpositive/vector_between_columns.q | 8 +-
.../tez/vector_between_columns.q.out | 116 +++++++++++++++++-
.../clientpositive/vector_between_columns.q.out | 117 ++++++++++++++++++-
4 files changed, 250 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/71725869/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 886e222..a76e31d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -1965,8 +1965,17 @@ public class VectorizationContext {
private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr, Mode mode)
throws HiveException {
- if (mode != Mode.PROJECTION) {
- return null;
+ boolean isFilter = false; // Assume.
+ if (mode == Mode.FILTER) {
+
+ // Is output type a BOOLEAN?
+ TypeInfo resultTypeInfo = expr.getTypeInfo();
+ if (resultTypeInfo.getCategory() == Category.PRIMITIVE &&
+ ((PrimitiveTypeInfo) resultTypeInfo).getPrimitiveCategory() == PrimitiveCategory.BOOLEAN) {
+ isFilter = true;
+ } else {
+ return null;
+ }
}
//GenericUDFBridge udfBridge = (GenericUDFBridge) expr.getGenericUDF();
@@ -2032,7 +2041,14 @@ public class VectorizationContext {
for (Integer i : exprResultColumnNums) {
ocm.freeOutputColumn(i);
}
- return ve;
+
+ if (isFilter) {
+ SelectColumnIsTrue filterVectorExpr = new SelectColumnIsTrue(outputCol);
+ filterVectorExpr.setChildExpressions(new VectorExpression[] {ve});
+ return filterVectorExpr;
+ } else {
+ return ve;
+ }
}
public static boolean isStringFamily(String resultType) {
http://git-wip-us.apache.org/repos/asf/hive/blob/71725869/ql/src/test/queries/clientpositive/vector_between_columns.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_between_columns.q b/ql/src/test/queries/clientpositive/vector_between_columns.q
index ba38445..41f9243 100644
--- a/ql/src/test/queries/clientpositive/vector_between_columns.q
+++ b/ql/src/test/queries/clientpositive/vector_between_columns.q
@@ -7,8 +7,7 @@ set hive.mapred.mode=nonstrict;
-- SORT_QUERY_RESULTS
--
--- The following WILL NOT BE ABLE TO USE the VectorUDFAdaptor to GenericUDFBetween
--- because the mode = FILTER is not supported yet.
+-- Verify the VectorUDFAdaptor to GenericUDFBetween works for PROJECTION and FILTER.
--
create table if not exists TSINT_txt ( RNUM int , CSINT smallint )
ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
@@ -25,6 +24,11 @@ create table TSINT stored as orc AS SELECT * FROM TSINT_txt;
create table TINT stored as orc AS SELECT * FROM TINT_txt;
+explain
+select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint;
+
+select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint;
+
explain
select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint;
http://git-wip-us.apache.org/repos/asf/hive/blob/71725869/ql/src/test/results/clientpositive/tez/vector_between_columns.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_between_columns.q.out b/ql/src/test/results/clientpositive/tez/vector_between_columns.q.out
index 8a9978b..939aab5 100644
--- a/ql/src/test/results/clientpositive/tez/vector_between_columns.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_between_columns.q.out
@@ -1,7 +1,6 @@
PREHOOK: query: -- SORT_QUERY_RESULTS
--
--- The following WILL NOT BE ABLE TO USE the VectorUDFAdaptor to GenericUDFBetween
--- because the mode = FILTER is not supported yet.
+-- Verify the VectorUDFAdaptor to GenericUDFBetween works for PROJECTION and FILTER.
--
create table if not exists TSINT_txt ( RNUM int , CSINT smallint )
ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'
@@ -10,8 +9,7 @@ PREHOOK: Output: database:default
PREHOOK: Output: default@TSINT_txt
POSTHOOK: query: -- SORT_QUERY_RESULTS
--
--- The following WILL NOT BE ABLE TO USE the VectorUDFAdaptor to GenericUDFBetween
--- because the mode = FILTER is not supported yet.
+-- Verify the VectorUDFAdaptor to GenericUDFBetween works for PROJECTION and FILTER.
--
create table if not exists TSINT_txt ( RNUM int , CSINT smallint )
ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'
@@ -70,6 +68,115 @@ POSTHOOK: Output: default@TINT
POSTHOOK: Lineage: tint.cint SIMPLE [(tint_txt)tint_txt.FieldSchema(name:cint, type:int, comment:null), ]
POSTHOOK: Lineage: tint.rnum SIMPLE [(tint_txt)tint_txt.FieldSchema(name:rnum, type:int, comment:null), ]
tint_txt.rnum tint_txt.cint
+Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product
+PREHOOK: query: explain
+select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Map 2 (BROADCAST_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tint
+ Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: rnum (type: int), cint (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col3
+ input vertices:
+ 1 Map 2
+ Statistics: Num rows: 5 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint), CASE WHEN (_col1 BETWEEN _col3 AND _col3) THEN ('Ok') ELSE ('NoOk') END (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 5 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 5 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: tsint
+ Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: rnum (type: int), csint (type: smallint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: smallint)
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product
+PREHOOK: query: select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tint
+PREHOOK: Input: default@tsint
+#### A masked pattern was here ####
+POSTHOOK: query: select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tint
+POSTHOOK: Input: default@tsint
+#### A masked pattern was here ####
+tint.rnum tsint.rnum tint.cint tsint.csint between_col
+0 0 NULL NULL NoOk
+0 1 NULL -1 NoOk
+0 2 NULL 0 NoOk
+0 3 NULL 1 NoOk
+0 4 NULL 10 NoOk
+1 0 -1 NULL NoOk
+1 1 -1 -1 Ok
+1 2 -1 0 NoOk
+1 3 -1 1 NoOk
+1 4 -1 10 NoOk
+2 0 0 NULL NoOk
+2 1 0 -1 NoOk
+2 2 0 0 Ok
+2 3 0 1 NoOk
+2 4 0 10 NoOk
+3 0 1 NULL NoOk
+3 1 1 -1 NoOk
+3 2 1 0 NoOk
+3 3 1 1 Ok
+3 4 1 10 NoOk
+4 0 10 NULL NoOk
+4 1 10 -1 NoOk
+4 2 10 0 NoOk
+4 3 10 1 NoOk
+4 4 10 10 Ok
Warning: Map Join MAPJOIN[10][bigTable=?] in task 'Map 1' is a cross product
PREHOOK: query: explain
select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint
@@ -123,6 +230,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
Map 2
Map Operator Tree:
TableScan
http://git-wip-us.apache.org/repos/asf/hive/blob/71725869/ql/src/test/results/clientpositive/vector_between_columns.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_between_columns.q.out b/ql/src/test/results/clientpositive/vector_between_columns.q.out
index 5143074..a1bd6c6 100644
--- a/ql/src/test/results/clientpositive/vector_between_columns.q.out
+++ b/ql/src/test/results/clientpositive/vector_between_columns.q.out
@@ -1,7 +1,6 @@
PREHOOK: query: -- SORT_QUERY_RESULTS
--
--- The following WILL NOT BE ABLE TO USE the VectorUDFAdaptor to GenericUDFBetween
--- because the mode = FILTER is not supported yet.
+-- Verify the VectorUDFAdaptor to GenericUDFBetween works for PROJECTION and FILTER.
--
create table if not exists TSINT_txt ( RNUM int , CSINT smallint )
ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'
@@ -10,8 +9,7 @@ PREHOOK: Output: database:default
PREHOOK: Output: default@TSINT_txt
POSTHOOK: query: -- SORT_QUERY_RESULTS
--
--- The following WILL NOT BE ABLE TO USE the VectorUDFAdaptor to GenericUDFBetween
--- because the mode = FILTER is not supported yet.
+-- Verify the VectorUDFAdaptor to GenericUDFBetween works for PROJECTION and FILTER.
--
create table if not exists TSINT_txt ( RNUM int , CSINT smallint )
ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'
@@ -70,6 +68,116 @@ POSTHOOK: Output: default@TINT
POSTHOOK: Lineage: tint.cint SIMPLE [(tint_txt)tint_txt.FieldSchema(name:cint, type:int, comment:null), ]
POSTHOOK: Lineage: tint.rnum SIMPLE [(tint_txt)tint_txt.FieldSchema(name:rnum, type:int, comment:null), ]
tint_txt.rnum tint_txt.cint
+Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product
+PREHOOK: query: explain
+select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+ Stage-4 is a root stage
+ Stage-3 depends on stages: Stage-4
+ Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+ Stage: Stage-4
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $hdt$_0:tint
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $hdt$_0:tint
+ TableScan
+ alias: tint
+ Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: rnum (type: int), cint (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ keys:
+ 0
+ 1
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: tsint
+ Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: rnum (type: int), csint (type: smallint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 5 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint), CASE WHEN (_col1 BETWEEN _col3 AND _col3) THEN ('Ok') ELSE ('NoOk') END (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 5 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 5 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product
+PREHOOK: query: select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tint
+PREHOOK: Input: default@tsint
+#### A masked pattern was here ####
+POSTHOOK: query: select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col from tint , tsint
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tint
+POSTHOOK: Input: default@tsint
+#### A masked pattern was here ####
+tint.rnum tsint.rnum tint.cint tsint.csint between_col
+0 0 NULL NULL NoOk
+0 1 NULL -1 NoOk
+0 2 NULL 0 NoOk
+0 3 NULL 1 NoOk
+0 4 NULL 10 NoOk
+1 0 -1 NULL NoOk
+1 1 -1 -1 Ok
+1 2 -1 0 NoOk
+1 3 -1 1 NoOk
+1 4 -1 10 NoOk
+2 0 0 NULL NoOk
+2 1 0 -1 NoOk
+2 2 0 0 Ok
+2 3 0 1 NoOk
+2 4 0 10 NoOk
+3 0 1 NULL NoOk
+3 1 1 -1 NoOk
+3 2 1 0 NoOk
+3 3 1 1 Ok
+3 4 1 10 NoOk
+4 0 10 NULL NoOk
+4 1 10 -1 NoOk
+4 2 10 0 NoOk
+4 3 10 1 NoOk
+4 4 10 10 Ok
Warning: Map Join MAPJOIN[10][bigTable=?] in task 'Stage-3:MAPRED' is a cross product
PREHOOK: query: explain
select tint.rnum, tsint.rnum, tint.cint, tsint.csint from tint , tsint where tint.cint between tsint.csint and tsint.csint
@@ -136,6 +244,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
Local Work:
Map Reduce Local Work