You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2015/05/23 08:57:53 UTC
hive git commit: HIVE-10789: union distinct query with NULL constant
on both the sides throws "Unsuported vector output type: void" error (Matt
McCline reviewed by Gunther Hagleitner)
Repository: hive
Updated Branches:
refs/heads/master 0d93438a3 -> bbdba9f44
HIVE-10789: union distinct query with NULL constant on both the sides throws "Unsuported vector output type: void" error (Matt McCline reviewed by Gunther Hagleitner)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bbdba9f4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bbdba9f4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bbdba9f4
Branch: refs/heads/master
Commit: bbdba9f442636a6dce2c0aba9b3f133e2e2de496
Parents: 0d93438
Author: Matt McCline <mm...@hortonworks.com>
Authored: Fri May 22 23:57:19 2015 -0700
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Fri May 22 23:57:19 2015 -0700
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 1 +
.../hive/ql/optimizer/physical/Vectorizer.java | 18 +-
.../clientpositive/vector_null_projection.q | 18 ++
.../tez/vector_null_projection.q.out | 164 +++++++++++++++++++
.../clientpositive/vector_null_projection.q.out | 163 ++++++++++++++++++
5 files changed, 358 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/bbdba9f4/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 9e95d1b..b7c7e03 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -234,6 +234,7 @@ minitez.query.files.shared=alter_merge_2_orc.q,\
vector_multi_insert.q,\
vector_non_string_partition.q,\
vector_nullsafe_join.q,\
+ vector_null_projection.q,\
vector_orderby_5.q,\
vector_outer_join0.q,\
vector_outer_join1.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/bbdba9f4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index b429c56..3ed3c7e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -1191,7 +1191,7 @@ public class Vectorizer implements PhysicalPlanResolver {
return true;
}
- private boolean validateExprNodeDescRecursive(ExprNodeDesc desc) {
+ private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, VectorExpressionDescriptor.Mode mode) {
if (desc instanceof ExprNodeColumnDesc) {
ExprNodeColumnDesc c = (ExprNodeColumnDesc) desc;
// Currently, we do not support vectorized virtual columns (see HIVE-5570).
@@ -1201,7 +1201,7 @@ public class Vectorizer implements PhysicalPlanResolver {
}
}
String typeName = desc.getTypeInfo().getTypeName();
- boolean ret = validateDataType(typeName);
+ boolean ret = validateDataType(typeName, mode);
if (!ret) {
LOG.info("Cannot vectorize " + desc.toString() + " of type " + typeName);
return false;
@@ -1215,7 +1215,8 @@ public class Vectorizer implements PhysicalPlanResolver {
}
if (desc.getChildren() != null) {
for (ExprNodeDesc d: desc.getChildren()) {
- boolean r = validateExprNodeDescRecursive(d);
+ // Don't restrict child expressions for projection. Always use looser FILTER mode.
+ boolean r = validateExprNodeDescRecursive(d, VectorExpressionDescriptor.Mode.FILTER);
if (!r) {
return false;
}
@@ -1229,7 +1230,7 @@ public class Vectorizer implements PhysicalPlanResolver {
}
boolean validateExprNodeDesc(ExprNodeDesc desc, VectorExpressionDescriptor.Mode mode) {
- if (!validateExprNodeDescRecursive(desc)) {
+ if (!validateExprNodeDescRecursive(desc, mode)) {
return false;
}
try {
@@ -1312,8 +1313,13 @@ public class Vectorizer implements PhysicalPlanResolver {
return false;
}
- private boolean validateDataType(String type) {
- return supportedDataTypesPattern.matcher(type.toLowerCase()).matches();
+ private boolean validateDataType(String type, VectorExpressionDescriptor.Mode mode) {
+ type = type.toLowerCase();
+ boolean result = supportedDataTypesPattern.matcher(type).matches();
+ if (result && mode == VectorExpressionDescriptor.Mode.PROJECTION && type.equals("void")) {
+ return false;
+ }
+ return result;
}
private VectorizationContext getVectorizationContext(RowSchema rowSchema, String contextName,
http://git-wip-us.apache.org/repos/asf/hive/blob/bbdba9f4/ql/src/test/queries/clientpositive/vector_null_projection.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_null_projection.q b/ql/src/test/queries/clientpositive/vector_null_projection.q
new file mode 100644
index 0000000..765e45f
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_null_projection.q
@@ -0,0 +1,18 @@
+SET hive.vectorized.execution.enabled=true;
+set hive.fetch.task.conversion=none;
+
+create table a(s string) stored as orc;
+create table b(s string) stored as orc;
+insert into table a values('aaa');
+insert into table b values('aaa');
+
+-- We expect no vectorization due to NULL (void) projection type.
+explain
+select NULL from a;
+
+select NULL from a;
+
+explain
+select NULL as x from a union distinct select NULL as x from b;
+
+select NULL as x from a union distinct select NULL as x from b;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/bbdba9f4/ql/src/test/results/clientpositive/tez/vector_null_projection.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_null_projection.q.out b/ql/src/test/results/clientpositive/tez/vector_null_projection.q.out
new file mode 100644
index 0000000..9b7b698
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/vector_null_projection.q.out
@@ -0,0 +1,164 @@
+PREHOOK: query: create table a(s string) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@a
+POSTHOOK: query: create table a(s string) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@a
+PREHOOK: query: create table b(s string) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@b
+POSTHOOK: query: create table b(s string) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@b
+PREHOOK: query: insert into table a values('aaa')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@a
+POSTHOOK: query: insert into table a values('aaa')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@a
+POSTHOOK: Lineage: a.s SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: insert into table b values('aaa')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@b
+POSTHOOK: query: insert into table b values('aaa')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@b
+POSTHOOK: Lineage: b.s SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: -- We expect no vectorization due to NULL (void) projection type.
+explain
+select NULL from a
+PREHOOK: type: QUERY
+POSTHOOK: query: -- We expect no vectorization due to NULL (void) projection type.
+explain
+select NULL from a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: null (type: void)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select NULL from a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@a
+#### A masked pattern was here ####
+POSTHOOK: query: select NULL from a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@a
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: explain
+select NULL as x from a union distinct select NULL as x from b
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select NULL as x from a union distinct select NULL as x from b
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Map 1 <- Union 2 (CONTAINS)
+ Map 4 <- Union 2 (CONTAINS)
+ Reducer 3 <- Union 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Select Operator
+ Select Operator
+ Group By Operator
+ keys: null (type: void)
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ key expressions: _col0 (type: void)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: void)
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Select Operator
+ Select Operator
+ Group By Operator
+ keys: null (type: void)
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ key expressions: _col0 (type: void)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: void)
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: void)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Union 2
+ Vertex: Union 2
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select NULL as x from a union distinct select NULL as x from b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@a
+PREHOOK: Input: default@b
+#### A masked pattern was here ####
+POSTHOOK: query: select NULL as x from a union distinct select NULL as x from b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@a
+POSTHOOK: Input: default@b
+#### A masked pattern was here ####
+NULL
http://git-wip-us.apache.org/repos/asf/hive/blob/bbdba9f4/ql/src/test/results/clientpositive/vector_null_projection.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_null_projection.q.out b/ql/src/test/results/clientpositive/vector_null_projection.q.out
new file mode 100644
index 0000000..7c3136f
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_null_projection.q.out
@@ -0,0 +1,163 @@
+PREHOOK: query: create table a(s string) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@a
+POSTHOOK: query: create table a(s string) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@a
+PREHOOK: query: create table b(s string) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@b
+POSTHOOK: query: create table b(s string) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@b
+PREHOOK: query: insert into table a values('aaa')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@a
+POSTHOOK: query: insert into table a values('aaa')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@a
+POSTHOOK: Lineage: a.s SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: insert into table b values('aaa')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@b
+POSTHOOK: query: insert into table b values('aaa')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@b
+POSTHOOK: Lineage: b.s SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: -- We expect no vectorization due to NULL (void) projection type.
+explain
+select NULL from a
+PREHOOK: type: QUERY
+POSTHOOK: query: -- We expect no vectorization due to NULL (void) projection type.
+explain
+select NULL from a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: null (type: void)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select NULL from a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@a
+#### A masked pattern was here ####
+POSTHOOK: query: select NULL from a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@a
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: explain
+select NULL as x from a union distinct select NULL as x from b
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select NULL as x from a union distinct select NULL as x from b
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Union
+ Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Group By Operator
+ keys: null (type: void)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: void)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: void)
+ Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ TableScan
+ alias: b
+ Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Union
+ Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Group By Operator
+ keys: null (type: void)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: void)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: void)
+ Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: void)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select NULL as x from a union distinct select NULL as x from b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@a
+PREHOOK: Input: default@b
+#### A masked pattern was here ####
+POSTHOOK: query: select NULL as x from a union distinct select NULL as x from b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@a
+POSTHOOK: Input: default@b
+#### A masked pattern was here ####
+NULL