You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2016/05/07 08:41:32 UTC

hive git commit: HIVE-13659: An empty where condition leads to vectorization exceptions instead of throwing a compile time error (Matt McCline, reviewed by Ashutosh Chauhan)

Repository: hive
Updated Branches:
  refs/heads/master 1f1740673 -> 5e5eec4ba


HIVE-13659: An empty where condition leads to vectorization exceptions instead of throwing a compile time error (Matt McCline, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5e5eec4b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5e5eec4b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5e5eec4b

Branch: refs/heads/master
Commit: 5e5eec4baa7df0a992aad2ccdf932af615d1d38d
Parents: 1f17406
Author: Matt McCline <mm...@hortonworks.com>
Authored: Sat May 7 00:34:18 2016 -0700
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Sat May 7 00:34:18 2016 -0700

----------------------------------------------------------------------
 .../ql/exec/vector/VectorizationContext.java    |  27 +-
 .../queries/clientpositive/vector_empty_where.q |  23 ++
 .../clientpositive/vector_empty_where.q.out     | 254 +++++++++++++++++++
 3 files changed, 302 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/5e5eec4b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 9558d31..d213731 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -408,8 +408,31 @@ public class VectorizationContext {
     VectorExpression expr = null;
     switch (mode) {
       case FILTER:
-        //Important: It will come here only if the column is being used as a boolean
-        expr = new SelectColumnIsTrue(columnNum);
+        // Evaluate the column as a boolean, converting if necessary.
+        TypeInfo typeInfo = exprDesc.getTypeInfo();
+        if (typeInfo.getCategory() == Category.PRIMITIVE &&
+            ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory() == PrimitiveCategory.BOOLEAN) {
+          expr = new SelectColumnIsTrue(columnNum);
+        } else {
+          // Ok, we need to convert.
+          ArrayList<ExprNodeDesc> exprAsList = new ArrayList<ExprNodeDesc>(1);
+          exprAsList.add(exprDesc);
+
+          // First try our cast method that will handle a few special cases.
+          VectorExpression castToBooleanExpr = getCastToBoolean(exprAsList);
+          if (castToBooleanExpr == null) {
+
+            // Ok, try the UDF.
+            castToBooleanExpr = getVectorExpressionForUdf(null, UDFToBoolean.class, exprAsList,
+                Mode.PROJECTION, null);
+            if (castToBooleanExpr == null) {
+              throw new HiveException("Cannot vectorize converting expression " +
+                  exprDesc.getExprString() + " to boolean");
+            }
+          }
+          expr = new SelectColumnIsTrue(castToBooleanExpr.getOutputColumn());
+          expr.setChildExpressions(new VectorExpression[] {castToBooleanExpr});
+        }
         break;
       case PROJECTION:
         expr = new IdentityExpression(columnNum, exprDesc.getTypeString());

http://git-wip-us.apache.org/repos/asf/hive/blob/5e5eec4b/ql/src/test/queries/clientpositive/vector_empty_where.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_empty_where.q b/ql/src/test/queries/clientpositive/vector_empty_where.q
new file mode 100644
index 0000000..0543a65
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_empty_where.q
@@ -0,0 +1,23 @@
+SET hive.vectorized.execution.enabled=true;
+set hive.fetch.task.conversion=none;
+
+-- HIVE-
+explain
+select count (distinct cint) from alltypesorc where cstring1;
+
+select count (distinct cint) from alltypesorc where cstring1;
+
+explain
+select count (distinct cint) from alltypesorc where cint;
+
+select count (distinct cint) from alltypesorc where cint;
+
+explain
+select count (distinct cint) from alltypesorc where cfloat;
+
+select count (distinct cint) from alltypesorc where cfloat;
+
+explain
+select count (distinct cint) from alltypesorc where ctimestamp1;
+
+select count (distinct cint) from alltypesorc where ctimestamp1;

http://git-wip-us.apache.org/repos/asf/hive/blob/5e5eec4b/ql/src/test/results/clientpositive/vector_empty_where.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_empty_where.q.out b/ql/src/test/results/clientpositive/vector_empty_where.q.out
new file mode 100644
index 0000000..8f694da
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_empty_where.q.out
@@ -0,0 +1,254 @@
+PREHOOK: query: -- HIVE-
+explain
+select count (distinct cint) from alltypesorc where cstring1
+PREHOOK: type: QUERY
+POSTHOOK: query: -- HIVE-
+explain
+select count (distinct cint) from alltypesorc where cstring1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: alltypesorc
+            Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: cstring1 (type: string)
+              Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: cint (type: int)
+                outputColumnNames: cint
+                Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count(DISTINCT cint)
+                  keys: cint (type: int)
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: int)
+                    sort order: +
+                    Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(DISTINCT KEY._col0:0._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count (distinct cint) from alltypesorc where cstring1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select count (distinct cint) from alltypesorc where cstring1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+6041
+PREHOOK: query: explain
+select count (distinct cint) from alltypesorc where cint
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count (distinct cint) from alltypesorc where cint
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: alltypesorc
+            Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: cint (type: int)
+              Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(DISTINCT cint)
+                keys: cint (type: int)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(DISTINCT KEY._col0:0._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count (distinct cint) from alltypesorc where cint
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select count (distinct cint) from alltypesorc where cint
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+6082
+PREHOOK: query: explain
+select count (distinct cint) from alltypesorc where cfloat
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count (distinct cint) from alltypesorc where cfloat
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: alltypesorc
+            Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: cfloat (type: float)
+              Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: cint (type: int)
+                outputColumnNames: cint
+                Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count(DISTINCT cint)
+                  keys: cint (type: int)
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: int)
+                    sort order: +
+                    Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(DISTINCT KEY._col0:0._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count (distinct cint) from alltypesorc where cfloat
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select count (distinct cint) from alltypesorc where cfloat
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+3022
+PREHOOK: query: explain
+select count (distinct cint) from alltypesorc where ctimestamp1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count (distinct cint) from alltypesorc where ctimestamp1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: alltypesorc
+            Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: ctimestamp1 (type: timestamp)
+              Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: cint (type: int)
+                outputColumnNames: cint
+                Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count(DISTINCT cint)
+                  keys: cint (type: int)
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: int)
+                    sort order: +
+                    Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(DISTINCT KEY._col0:0._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count (distinct cint) from alltypesorc where ctimestamp1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select count (distinct cint) from alltypesorc where ctimestamp1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+3022