You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jx...@apache.org on 2015/11/06 18:33:00 UTC
[18/55] [abbrv] hive git commit: HIVE-12209: Vectorize simple UDFs
with null arguments (Gopal V, reviewed by Sergey Shelukhin)
HIVE-12209: Vectorize simple UDFs with null arguments (Gopal V, reviewed by Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/db2c5009
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/db2c5009
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/db2c5009
Branch: refs/heads/master-fixed
Commit: db2c5009b243aeb5be09225b03476d1c12ebef84
Parents: 492a10f
Author: Gopal V <go...@apache.org>
Authored: Mon Nov 2 19:42:35 2015 -0800
Committer: Gopal V <go...@apache.org>
Committed: Mon Nov 2 19:42:35 2015 -0800
----------------------------------------------------------------------
.../ql/exec/vector/VectorizationContext.java | 7 +-
.../ql/exec/vector/udf/VectorUDFArgDesc.java | 19 ++--
.../queries/clientpositive/vectorized_case.q | 19 ++++
.../clientpositive/spark/vectorized_case.q.out | 109 +++++++++++++++++--
.../clientpositive/tez/vectorized_case.q.out | 109 +++++++++++++++++--
.../clientpositive/vectorized_case.q.out | 69 ++++++++++++
6 files changed, 301 insertions(+), 31 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/db2c5009/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 3489c9c..e7a829e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -2022,12 +2022,7 @@ public class VectorizationContext {
variableArgPositions.add(i);
argDescs[i].setVariable(getInputColumnIndex(((ExprNodeColumnDesc) child).getColumn()));
} else if (child instanceof ExprNodeConstantDesc) {
- if (((ExprNodeConstantDesc) child).getValue() == null) {
- // cannot handle constant null at the moment
- throw new HiveException("Unable to vectorize custom UDF. Custom udf containing "
- + "constant null argument cannot be currently vectorized.");
- }
- // this is a constant
+ // this is a constant (or null)
argDescs[i].setConstant((ExprNodeConstantDesc) child);
} else {
throw new HiveException("Unable to vectorize custom UDF. Encountered unsupported expr desc : "
http://git-wip-us.apache.org/repos/asf/hive/blob/db2c5009/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java
index e113980..6abfe63 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java
@@ -59,13 +59,18 @@ public class VectorUDFArgDesc implements Serializable {
* during initialization.
*/
public void prepareConstant() {
- PrimitiveCategory pc = ((PrimitiveTypeInfo) constExpr.getTypeInfo())
- .getPrimitiveCategory();
-
- // Convert from Java to Writable
- Object writableValue = PrimitiveObjectInspectorFactory
- .getPrimitiveJavaObjectInspector(pc).getPrimitiveWritableObject(
- constExpr.getValue());
+ final Object writableValue;
+ if (constExpr != null) {
+ PrimitiveCategory pc = ((PrimitiveTypeInfo) constExpr.getTypeInfo())
+ .getPrimitiveCategory();
+
+ // Convert from Java to Writable
+ writableValue = PrimitiveObjectInspectorFactory
+ .getPrimitiveJavaObjectInspector(pc).getPrimitiveWritableObject(
+ constExpr.getValue());
+ } else {
+ writableValue = null;
+ }
constObjVal = new GenericUDF.DeferredJavaObject(writableValue);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/db2c5009/ql/src/test/queries/clientpositive/vectorized_case.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vectorized_case.q b/ql/src/test/queries/clientpositive/vectorized_case.q
index 8799fbb..e74bf82 100644
--- a/ql/src/test/queries/clientpositive/vectorized_case.q
+++ b/ql/src/test/queries/clientpositive/vectorized_case.q
@@ -1,4 +1,5 @@
set hive.explain.user=false;
+set hive.fetch.task.conversion=none;
set hive.vectorized.execution.enabled = true
;
explain
@@ -36,3 +37,21 @@ where csmallint = 418
or csmallint = 12205
or csmallint = 10583
;
+explain
+select
+ csmallint,
+ case
+ when csmallint = 418 then "a"
+ when csmallint = 12205 then "b"
+ else null
+ end,
+ case csmallint
+ when 418 then "a"
+ when 12205 then null
+ else "c"
+ end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+;
http://git-wip-us.apache.org/repos/asf/hive/blob/db2c5009/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
index c2250e6..ade9cfe 100644
--- a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
@@ -35,21 +35,40 @@ or csmallint = 12205
or csmallint = 10583
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: alltypesorc
- Filter Operator
- predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
- Select Operator
- expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string)
- outputColumnNames: _col0, _col1, _col2
- ListSink
+ ListSink
PREHOOK: query: select
csmallint,
@@ -93,3 +112,75 @@ POSTHOOK: Input: default@alltypesorc
10583 c c
418 a a
12205 b b
+PREHOOK: query: explain
+select
+ csmallint,
+ case
+ when csmallint = 418 then "a"
+ when csmallint = 12205 then "b"
+ else null
+ end,
+ case csmallint
+ when 418 then "a"
+ when 12205 then null
+ else "c"
+ end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ csmallint,
+ case
+ when csmallint = 418 then "a"
+ when csmallint = 12205 then "b"
+ else null
+ end,
+ case csmallint
+ when 418 then "a"
+ when 12205 then null
+ else "c"
+ end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN (null) ELSE ('c') END (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
http://git-wip-us.apache.org/repos/asf/hive/blob/db2c5009/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vectorized_case.q.out b/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
index c2250e6..136714d 100644
--- a/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
@@ -35,21 +35,40 @@ or csmallint = 12205
or csmallint = 10583
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: alltypesorc
- Filter Operator
- predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
- Select Operator
- expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string)
- outputColumnNames: _col0, _col1, _col2
- ListSink
+ ListSink
PREHOOK: query: select
csmallint,
@@ -93,3 +112,75 @@ POSTHOOK: Input: default@alltypesorc
10583 c c
418 a a
12205 b b
+PREHOOK: query: explain
+select
+ csmallint,
+ case
+ when csmallint = 418 then "a"
+ when csmallint = 12205 then "b"
+ else null
+ end,
+ case csmallint
+ when 418 then "a"
+ when 12205 then null
+ else "c"
+ end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ csmallint,
+ case
+ when csmallint = 418 then "a"
+ when csmallint = 12205 then "b"
+ else null
+ end,
+ case csmallint
+ when 418 then "a"
+ when 12205 then null
+ else "c"
+ end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN (null) ELSE ('c') END (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
http://git-wip-us.apache.org/repos/asf/hive/blob/db2c5009/ql/src/test/results/clientpositive/vectorized_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vectorized_case.q.out b/ql/src/test/results/clientpositive/vectorized_case.q.out
index 73bf12d..347a93e 100644
--- a/ql/src/test/results/clientpositive/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/vectorized_case.q.out
@@ -109,3 +109,72 @@ POSTHOOK: Input: default@alltypesorc
10583 c c
418 a a
12205 b b
+PREHOOK: query: explain
+select
+ csmallint,
+ case
+ when csmallint = 418 then "a"
+ when csmallint = 12205 then "b"
+ else null
+ end,
+ case csmallint
+ when 418 then "a"
+ when 12205 then null
+ else "c"
+ end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select
+ csmallint,
+ case
+ when csmallint = 418 then "a"
+ when csmallint = 12205 then "b"
+ else null
+ end,
+ case csmallint
+ when 418 then "a"
+ when 12205 then null
+ else "c"
+ end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN (null) ELSE ('c') END (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+