You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2016/05/07 08:41:32 UTC
hive git commit: HIVE-13659: An empty where condition leads to
vectorization exceptions instead of throwing a compile time error (Matt
McCline, reviewed by Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master 1f1740673 -> 5e5eec4ba
HIVE-13659: An empty where condition leads to vectorization exceptions instead of throwing a compile time error (Matt McCline, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5e5eec4b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5e5eec4b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5e5eec4b
Branch: refs/heads/master
Commit: 5e5eec4baa7df0a992aad2ccdf932af615d1d38d
Parents: 1f17406
Author: Matt McCline <mm...@hortonworks.com>
Authored: Sat May 7 00:34:18 2016 -0700
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Sat May 7 00:34:18 2016 -0700
----------------------------------------------------------------------
.../ql/exec/vector/VectorizationContext.java | 27 +-
.../queries/clientpositive/vector_empty_where.q | 23 ++
.../clientpositive/vector_empty_where.q.out | 254 +++++++++++++++++++
3 files changed, 302 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/5e5eec4b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 9558d31..d213731 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -408,8 +408,31 @@ public class VectorizationContext {
VectorExpression expr = null;
switch (mode) {
case FILTER:
- //Important: It will come here only if the column is being used as a boolean
- expr = new SelectColumnIsTrue(columnNum);
+ // Evaluate the column as a boolean, converting if necessary.
+ TypeInfo typeInfo = exprDesc.getTypeInfo();
+ if (typeInfo.getCategory() == Category.PRIMITIVE &&
+ ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory() == PrimitiveCategory.BOOLEAN) {
+ expr = new SelectColumnIsTrue(columnNum);
+ } else {
+ // Ok, we need to convert.
+ ArrayList<ExprNodeDesc> exprAsList = new ArrayList<ExprNodeDesc>(1);
+ exprAsList.add(exprDesc);
+
+ // First try our cast method that will handle a few special cases.
+ VectorExpression castToBooleanExpr = getCastToBoolean(exprAsList);
+ if (castToBooleanExpr == null) {
+
+ // Ok, try the UDF.
+ castToBooleanExpr = getVectorExpressionForUdf(null, UDFToBoolean.class, exprAsList,
+ Mode.PROJECTION, null);
+ if (castToBooleanExpr == null) {
+ throw new HiveException("Cannot vectorize converting expression " +
+ exprDesc.getExprString() + " to boolean");
+ }
+ }
+ expr = new SelectColumnIsTrue(castToBooleanExpr.getOutputColumn());
+ expr.setChildExpressions(new VectorExpression[] {castToBooleanExpr});
+ }
break;
case PROJECTION:
expr = new IdentityExpression(columnNum, exprDesc.getTypeString());
http://git-wip-us.apache.org/repos/asf/hive/blob/5e5eec4b/ql/src/test/queries/clientpositive/vector_empty_where.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_empty_where.q b/ql/src/test/queries/clientpositive/vector_empty_where.q
new file mode 100644
index 0000000..0543a65
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_empty_where.q
@@ -0,0 +1,23 @@
+SET hive.vectorized.execution.enabled=true;
+set hive.fetch.task.conversion=none;
+
+-- HIVE-
+explain
+select count (distinct cint) from alltypesorc where cstring1;
+
+select count (distinct cint) from alltypesorc where cstring1;
+
+explain
+select count (distinct cint) from alltypesorc where cint;
+
+select count (distinct cint) from alltypesorc where cint;
+
+explain
+select count (distinct cint) from alltypesorc where cfloat;
+
+select count (distinct cint) from alltypesorc where cfloat;
+
+explain
+select count (distinct cint) from alltypesorc where ctimestamp1;
+
+select count (distinct cint) from alltypesorc where ctimestamp1;
http://git-wip-us.apache.org/repos/asf/hive/blob/5e5eec4b/ql/src/test/results/clientpositive/vector_empty_where.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_empty_where.q.out b/ql/src/test/results/clientpositive/vector_empty_where.q.out
new file mode 100644
index 0000000..8f694da
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_empty_where.q.out
@@ -0,0 +1,254 @@
+PREHOOK: query: -- HIVE-
+explain
+select count (distinct cint) from alltypesorc where cstring1
+PREHOOK: type: QUERY
+POSTHOOK: query: -- HIVE-
+explain
+select count (distinct cint) from alltypesorc where cstring1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cstring1 (type: string)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cint (type: int)
+ outputColumnNames: cint
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(DISTINCT cint)
+ keys: cint (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(DISTINCT KEY._col0:0._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count (distinct cint) from alltypesorc where cstring1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select count (distinct cint) from alltypesorc where cstring1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+6041
+PREHOOK: query: explain
+select count (distinct cint) from alltypesorc where cint
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count (distinct cint) from alltypesorc where cint
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cint (type: int)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(DISTINCT cint)
+ keys: cint (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(DISTINCT KEY._col0:0._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count (distinct cint) from alltypesorc where cint
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select count (distinct cint) from alltypesorc where cint
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+6082
+PREHOOK: query: explain
+select count (distinct cint) from alltypesorc where cfloat
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count (distinct cint) from alltypesorc where cfloat
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: cfloat (type: float)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cint (type: int)
+ outputColumnNames: cint
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(DISTINCT cint)
+ keys: cint (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(DISTINCT KEY._col0:0._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count (distinct cint) from alltypesorc where cfloat
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select count (distinct cint) from alltypesorc where cfloat
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+3022
+PREHOOK: query: explain
+select count (distinct cint) from alltypesorc where ctimestamp1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count (distinct cint) from alltypesorc where ctimestamp1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ctimestamp1 (type: timestamp)
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cint (type: int)
+ outputColumnNames: cint
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(DISTINCT cint)
+ keys: cint (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(DISTINCT KEY._col0:0._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count (distinct cint) from alltypesorc where ctimestamp1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select count (distinct cint) from alltypesorc where ctimestamp1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+3022