You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jx...@apache.org on 2015/11/06 18:33:00 UTC

[18/55] [abbrv] hive git commit: HIVE-12209: Vectorize simple UDFs with null arguments (Gopal V, reviewed by Sergey Shelukhin)

HIVE-12209: Vectorize simple UDFs with null arguments (Gopal V, reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/db2c5009
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/db2c5009
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/db2c5009

Branch: refs/heads/master-fixed
Commit: db2c5009b243aeb5be09225b03476d1c12ebef84
Parents: 492a10f
Author: Gopal V <go...@apache.org>
Authored: Mon Nov 2 19:42:35 2015 -0800
Committer: Gopal V <go...@apache.org>
Committed: Mon Nov 2 19:42:35 2015 -0800

----------------------------------------------------------------------
 .../ql/exec/vector/VectorizationContext.java    |   7 +-
 .../ql/exec/vector/udf/VectorUDFArgDesc.java    |  19 ++--
 .../queries/clientpositive/vectorized_case.q    |  19 ++++
 .../clientpositive/spark/vectorized_case.q.out  | 109 +++++++++++++++++--
 .../clientpositive/tez/vectorized_case.q.out    | 109 +++++++++++++++++--
 .../clientpositive/vectorized_case.q.out        |  69 ++++++++++++
 6 files changed, 301 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/db2c5009/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 3489c9c..e7a829e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -2022,12 +2022,7 @@ public class VectorizationContext {
         variableArgPositions.add(i);
         argDescs[i].setVariable(getInputColumnIndex(((ExprNodeColumnDesc) child).getColumn()));
       } else if (child instanceof ExprNodeConstantDesc) {
-         if (((ExprNodeConstantDesc) child).getValue() == null) {
-           // cannot handle constant null at the moment
-           throw new HiveException("Unable to vectorize custom UDF. Custom udf containing "
-               + "constant null argument cannot be currently vectorized.");
-         }
-        // this is a constant
+        // this is a constant (or null)
         argDescs[i].setConstant((ExprNodeConstantDesc) child);
       } else {
         throw new HiveException("Unable to vectorize custom UDF. Encountered unsupported expr desc : "

http://git-wip-us.apache.org/repos/asf/hive/blob/db2c5009/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java
index e113980..6abfe63 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFArgDesc.java
@@ -59,13 +59,18 @@ public class VectorUDFArgDesc implements Serializable {
    * during initialization.
    */
   public void prepareConstant() {
-    PrimitiveCategory pc = ((PrimitiveTypeInfo) constExpr.getTypeInfo())
-        .getPrimitiveCategory();
-
-    // Convert from Java to Writable
-    Object writableValue = PrimitiveObjectInspectorFactory
-        .getPrimitiveJavaObjectInspector(pc).getPrimitiveWritableObject(
-          constExpr.getValue());
+    final Object writableValue;
+    if (constExpr != null) {
+      PrimitiveCategory pc = ((PrimitiveTypeInfo) constExpr.getTypeInfo())
+          .getPrimitiveCategory();
+
+      // Convert from Java to Writable
+      writableValue = PrimitiveObjectInspectorFactory
+          .getPrimitiveJavaObjectInspector(pc).getPrimitiveWritableObject(
+            constExpr.getValue());
+    } else {
+      writableValue = null;
+    }
 
     constObjVal = new GenericUDF.DeferredJavaObject(writableValue);
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/db2c5009/ql/src/test/queries/clientpositive/vectorized_case.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vectorized_case.q b/ql/src/test/queries/clientpositive/vectorized_case.q
index 8799fbb..e74bf82 100644
--- a/ql/src/test/queries/clientpositive/vectorized_case.q
+++ b/ql/src/test/queries/clientpositive/vectorized_case.q
@@ -1,4 +1,5 @@
 set hive.explain.user=false;
+set hive.fetch.task.conversion=none;
 set hive.vectorized.execution.enabled = true
 ;
 explain
@@ -36,3 +37,21 @@ where csmallint = 418
 or csmallint = 12205
 or csmallint = 10583
 ;
+explain
+select 
+  csmallint,
+  case 
+    when csmallint = 418 then "a"
+    when csmallint = 12205 then "b"
+    else null
+  end,
+  case csmallint
+    when 418 then "a"
+    when 12205 then null
+    else "c"
+  end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+;

http://git-wip-us.apache.org/repos/asf/hive/blob/db2c5009/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
index c2250e6..ade9cfe 100644
--- a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
@@ -35,21 +35,40 @@ or csmallint = 12205
 or csmallint = 10583
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-1
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: alltypesorc
+                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
+                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                        table:
+                            input format: org.apache.hadoop.mapred.TextInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+
   Stage: Stage-0
     Fetch Operator
       limit: -1
       Processor Tree:
-        TableScan
-          alias: alltypesorc
-          Filter Operator
-            predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
-            Select Operator
-              expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string)
-              outputColumnNames: _col0, _col1, _col2
-              ListSink
+        ListSink
 
 PREHOOK: query: select 
   csmallint,
@@ -93,3 +112,75 @@ POSTHOOK: Input: default@alltypesorc
 10583	c	c
 418	a	a
 12205	b	b
+PREHOOK: query: explain
+select 
+  csmallint,
+  case 
+    when csmallint = 418 then "a"
+    when csmallint = 12205 then "b"
+    else null
+  end,
+  case csmallint
+    when 418 then "a"
+    when 12205 then null
+    else "c"
+  end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select 
+  csmallint,
+  case 
+    when csmallint = 418 then "a"
+    when csmallint = 12205 then "b"
+    else null
+  end,
+  case csmallint
+    when 418 then "a"
+    when 12205 then null
+    else "c"
+  end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: alltypesorc
+                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
+                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN (null) ELSE ('c') END (type: string)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                        table:
+                            input format: org.apache.hadoop.mapred.TextInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+

http://git-wip-us.apache.org/repos/asf/hive/blob/db2c5009/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vectorized_case.q.out b/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
index c2250e6..136714d 100644
--- a/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/tez/vectorized_case.q.out
@@ -35,21 +35,40 @@ or csmallint = 12205
 or csmallint = 10583
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: alltypesorc
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+                        table:
+                            input format: org.apache.hadoop.mapred.TextInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+
   Stage: Stage-0
     Fetch Operator
       limit: -1
       Processor Tree:
-        TableScan
-          alias: alltypesorc
-          Filter Operator
-            predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
-            Select Operator
-              expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN ('b') ELSE ('c') END (type: string)
-              outputColumnNames: _col0, _col1, _col2
-              ListSink
+        ListSink
 
 PREHOOK: query: select 
   csmallint,
@@ -93,3 +112,75 @@ POSTHOOK: Input: default@alltypesorc
 10583	c	c
 418	a	a
 12205	b	b
+PREHOOK: query: explain
+select 
+  csmallint,
+  case 
+    when csmallint = 418 then "a"
+    when csmallint = 12205 then "b"
+    else null
+  end,
+  case csmallint
+    when 418 then "a"
+    when 12205 then null
+    else "c"
+  end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select 
+  csmallint,
+  case 
+    when csmallint = 418 then "a"
+    when csmallint = 12205 then "b"
+    else null
+  end,
+  case csmallint
+    when 418 then "a"
+    when 12205 then null
+    else "c"
+  end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: alltypesorc
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN (null) ELSE ('c') END (type: string)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+                        table:
+                            input format: org.apache.hadoop.mapred.TextInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+

http://git-wip-us.apache.org/repos/asf/hive/blob/db2c5009/ql/src/test/results/clientpositive/vectorized_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vectorized_case.q.out b/ql/src/test/results/clientpositive/vectorized_case.q.out
index 73bf12d..347a93e 100644
--- a/ql/src/test/results/clientpositive/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/vectorized_case.q.out
@@ -109,3 +109,72 @@ POSTHOOK: Input: default@alltypesorc
 10583	c	c
 418	a	a
 12205	b	b
+PREHOOK: query: explain
+select 
+  csmallint,
+  case 
+    when csmallint = 418 then "a"
+    when csmallint = 12205 then "b"
+    else null
+  end,
+  case csmallint
+    when 418 then "a"
+    when 12205 then null
+    else "c"
+  end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select 
+  csmallint,
+  case 
+    when csmallint = 418 then "a"
+    when csmallint = 12205 then "b"
+    else null
+  end,
+  case csmallint
+    when 418 then "a"
+    when 12205 then null
+    else "c"
+  end
+from alltypesorc
+where csmallint = 418
+or csmallint = 12205
+or csmallint = 10583
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: alltypesorc
+            Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
+              Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END (type: string), CASE (csmallint) WHEN (418) THEN ('a') WHEN (12205) THEN (null) ELSE ('c') END (type: string)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+