You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2015/05/23 08:57:53 UTC

hive git commit: HIVE-10789: union distinct query with NULL constant on both the sides throws "Unsuported vector output type: void" error (Matt McCline reviewed by Gunther Hagleitner)

Repository: hive
Updated Branches:
  refs/heads/master 0d93438a3 -> bbdba9f44


HIVE-10789: union distinct query with NULL constant on both the sides throws "Unsuported vector output type: void" error (Matt McCline reviewed by Gunther Hagleitner)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bbdba9f4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bbdba9f4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bbdba9f4

Branch: refs/heads/master
Commit: bbdba9f442636a6dce2c0aba9b3f133e2e2de496
Parents: 0d93438
Author: Matt McCline <mm...@hortonworks.com>
Authored: Fri May 22 23:57:19 2015 -0700
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Fri May 22 23:57:19 2015 -0700

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |   1 +
 .../hive/ql/optimizer/physical/Vectorizer.java  |  18 +-
 .../clientpositive/vector_null_projection.q     |  18 ++
 .../tez/vector_null_projection.q.out            | 164 +++++++++++++++++++
 .../clientpositive/vector_null_projection.q.out | 163 ++++++++++++++++++
 5 files changed, 358 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/bbdba9f4/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 9e95d1b..b7c7e03 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -234,6 +234,7 @@ minitez.query.files.shared=alter_merge_2_orc.q,\
   vector_multi_insert.q,\
   vector_non_string_partition.q,\
   vector_nullsafe_join.q,\
+  vector_null_projection.q,\
   vector_orderby_5.q,\
   vector_outer_join0.q,\
   vector_outer_join1.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/bbdba9f4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index b429c56..3ed3c7e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -1191,7 +1191,7 @@ public class Vectorizer implements PhysicalPlanResolver {
     return true;
   }
 
-  private boolean validateExprNodeDescRecursive(ExprNodeDesc desc) {
+  private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, VectorExpressionDescriptor.Mode mode) {
     if (desc instanceof ExprNodeColumnDesc) {
       ExprNodeColumnDesc c = (ExprNodeColumnDesc) desc;
       // Currently, we do not support vectorized virtual columns (see HIVE-5570).
@@ -1201,7 +1201,7 @@ public class Vectorizer implements PhysicalPlanResolver {
       }
     }
     String typeName = desc.getTypeInfo().getTypeName();
-    boolean ret = validateDataType(typeName);
+    boolean ret = validateDataType(typeName, mode);
     if (!ret) {
       LOG.info("Cannot vectorize " + desc.toString() + " of type " + typeName);
       return false;
@@ -1215,7 +1215,8 @@ public class Vectorizer implements PhysicalPlanResolver {
     }
     if (desc.getChildren() != null) {
       for (ExprNodeDesc d: desc.getChildren()) {
-        boolean r = validateExprNodeDescRecursive(d);
+        // Don't restrict child expressions for projection.  Always use looser FILTER mode.
+        boolean r = validateExprNodeDescRecursive(d, VectorExpressionDescriptor.Mode.FILTER);
         if (!r) {
           return false;
         }
@@ -1229,7 +1230,7 @@ public class Vectorizer implements PhysicalPlanResolver {
   }
 
   boolean validateExprNodeDesc(ExprNodeDesc desc, VectorExpressionDescriptor.Mode mode) {
-    if (!validateExprNodeDescRecursive(desc)) {
+    if (!validateExprNodeDescRecursive(desc, mode)) {
       return false;
     }
     try {
@@ -1312,8 +1313,13 @@ public class Vectorizer implements PhysicalPlanResolver {
     return false;
   }
 
-  private boolean validateDataType(String type) {
-    return supportedDataTypesPattern.matcher(type.toLowerCase()).matches();
+  private boolean validateDataType(String type, VectorExpressionDescriptor.Mode mode) {
+    type = type.toLowerCase();
+    boolean result = supportedDataTypesPattern.matcher(type).matches();
+    if (result && mode == VectorExpressionDescriptor.Mode.PROJECTION && type.equals("void")) {
+      return false;
+    }
+    return result;
   }
 
   private VectorizationContext getVectorizationContext(RowSchema rowSchema, String contextName,

http://git-wip-us.apache.org/repos/asf/hive/blob/bbdba9f4/ql/src/test/queries/clientpositive/vector_null_projection.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_null_projection.q b/ql/src/test/queries/clientpositive/vector_null_projection.q
new file mode 100644
index 0000000..765e45f
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_null_projection.q
@@ -0,0 +1,18 @@
+SET hive.vectorized.execution.enabled=true;
+set hive.fetch.task.conversion=none;
+
+create table a(s string) stored as orc;
+create table b(s string) stored as orc;
+insert into table a values('aaa');
+insert into table b values('aaa');
+
+-- We expect no vectorization due to NULL (void) projection type.
+explain
+select NULL from a;
+
+select NULL from a;
+
+explain
+select NULL as x from a union distinct select NULL as x from b;
+
+select NULL as x from a union distinct select NULL as x from b;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/bbdba9f4/ql/src/test/results/clientpositive/tez/vector_null_projection.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_null_projection.q.out b/ql/src/test/results/clientpositive/tez/vector_null_projection.q.out
new file mode 100644
index 0000000..9b7b698
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/vector_null_projection.q.out
@@ -0,0 +1,164 @@
+PREHOOK: query: create table a(s string) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@a
+POSTHOOK: query: create table a(s string) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@a
+PREHOOK: query: create table b(s string) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@b
+POSTHOOK: query: create table b(s string) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@b
+PREHOOK: query: insert into table a values('aaa')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@a
+POSTHOOK: query: insert into table a values('aaa')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@a
+POSTHOOK: Lineage: a.s SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: insert into table b values('aaa')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@b
+POSTHOOK: query: insert into table b values('aaa')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@b
+POSTHOOK: Lineage: b.s SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: -- We expect no vectorization due to NULL (void) projection type.
+explain
+select NULL from a
+PREHOOK: type: QUERY
+POSTHOOK: query: -- We expect no vectorization due to NULL (void) projection type.
+explain
+select NULL from a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: null (type: void)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+                      table:
+                          input format: org.apache.hadoop.mapred.TextInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select NULL from a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@a
+#### A masked pattern was here ####
+POSTHOOK: query: select NULL from a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@a
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: explain
+select NULL as x from a union distinct select NULL as x from b
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select NULL as x from a union distinct select NULL as x from b
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 1 <- Union 2 (CONTAINS)
+        Map 4 <- Union 2 (CONTAINS)
+        Reducer 3 <- Union 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  Select Operator
+                    Select Operator
+                      Group By Operator
+                        keys: null (type: void)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Reduce Output Operator
+                          key expressions: _col0 (type: void)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: void)
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  Select Operator
+                    Select Operator
+                      Group By Operator
+                        keys: null (type: void)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Reduce Output Operator
+                          key expressions: _col0 (type: void)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: void)
+        Reducer 3 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: void)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Union 2 
+            Vertex: Union 2
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select NULL as x from a union distinct select NULL as x from b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@a
+PREHOOK: Input: default@b
+#### A masked pattern was here ####
+POSTHOOK: query: select NULL as x from a union distinct select NULL as x from b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@a
+POSTHOOK: Input: default@b
+#### A masked pattern was here ####
+NULL

http://git-wip-us.apache.org/repos/asf/hive/blob/bbdba9f4/ql/src/test/results/clientpositive/vector_null_projection.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_null_projection.q.out b/ql/src/test/results/clientpositive/vector_null_projection.q.out
new file mode 100644
index 0000000..7c3136f
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_null_projection.q.out
@@ -0,0 +1,163 @@
+PREHOOK: query: create table a(s string) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@a
+POSTHOOK: query: create table a(s string) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@a
+PREHOOK: query: create table b(s string) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@b
+POSTHOOK: query: create table b(s string) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@b
+PREHOOK: query: insert into table a values('aaa')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@a
+POSTHOOK: query: insert into table a values('aaa')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@a
+POSTHOOK: Lineage: a.s SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: insert into table b values('aaa')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@b
+POSTHOOK: query: insert into table b values('aaa')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@b
+POSTHOOK: Lineage: b.s SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: -- We expect no vectorization due to NULL (void) projection type.
+explain
+select NULL from a
+PREHOOK: type: QUERY
+POSTHOOK: query: -- We expect no vectorization due to NULL (void) projection type.
+explain
+select NULL from a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: a
+            Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
+            Select Operator
+              expressions: null (type: void)
+              outputColumnNames: _col0
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select NULL from a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@a
+#### A masked pattern was here ####
+POSTHOOK: query: select NULL from a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@a
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: explain
+select NULL as x from a union distinct select NULL as x from b
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select NULL as x from a union distinct select NULL as x from b
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: a
+            Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
+            Select Operator
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+              Union
+                Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+                Select Operator
+                  Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+                  Group By Operator
+                    keys: null (type: void)
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: void)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: void)
+                      Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+          TableScan
+            alias: b
+            Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
+            Select Operator
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+              Union
+                Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+                Select Operator
+                  Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+                  Group By Operator
+                    keys: null (type: void)
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: void)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: void)
+                      Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: void)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select NULL as x from a union distinct select NULL as x from b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@a
+PREHOOK: Input: default@b
+#### A masked pattern was here ####
+POSTHOOK: query: select NULL as x from a union distinct select NULL as x from b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@a
+POSTHOOK: Input: default@b
+#### A masked pattern was here ####
+NULL