You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/02/26 01:00:02 UTC
hive git commit: HIVE-18800: Vectorization: VectorCoalesce doesn't
handle the all repeated NULLs case (Matt McCline,
reviewed by Gopal Vijayaraghavan)
Repository: hive
Updated Branches:
refs/heads/master 53a590b53 -> e8e5ab246
HIVE-18800: Vectorization: VectorCoalesce doesn't handle the all repeated NULLs case (Matt McCline, reviewed by Gopal Vijayaraghavan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e8e5ab24
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e8e5ab24
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e8e5ab24
Branch: refs/heads/master
Commit: e8e5ab24616aa834f4966efe3a5f437f6bee4d1d
Parents: 53a590b
Author: Matt McCline <mm...@hortonworks.com>
Authored: Sun Feb 25 18:59:48 2018 -0600
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Sun Feb 25 18:59:48 2018 -0600
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 1 +
.../exec/vector/expressions/VectorCoalesce.java | 11 +-
.../queries/clientpositive/vector_coalesce_4.q | 14 ++
.../clientpositive/llap/vector_coalesce_4.q.out | 146 +++++++++++++++++++
.../clientpositive/vector_coalesce_4.q.out | 120 +++++++++++++++
5 files changed, 289 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/e8e5ab24/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 4a52eb5..2776fe9 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -323,6 +323,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\
vector_coalesce.q,\
vector_coalesce_2.q,\
vector_coalesce_3.q,\
+ vector_coalesce_4.q,\
vector_complex_all.q,\
vector_count.q,\
vector_count_distinct.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/e8e5ab24/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java
index 3a560ca..c66beb0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java
@@ -225,9 +225,14 @@ public class VectorCoalesce extends VectorExpression {
// NULL out the remaining columns.
outputColVector.noNulls = false;
- for (int i = 0; i < unassignedColumnCount; i++) {
- final int batchIndex = unassignedBatchIndices[i];
- outputIsNull[batchIndex] = true;
+ if (isAllUnassigned) {
+ outputIsNull[0] = true;
+ outputColVector.isRepeating = true;
+ } else {
+ for (int i = 0; i < unassignedColumnCount; i++) {
+ final int batchIndex = unassignedBatchIndices[i];
+ outputIsNull[batchIndex] = true;
+ }
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/e8e5ab24/ql/src/test/queries/clientpositive/vector_coalesce_4.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_coalesce_4.q b/ql/src/test/queries/clientpositive/vector_coalesce_4.q
new file mode 100644
index 0000000..a050beb
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_coalesce_4.q
@@ -0,0 +1,14 @@
+SET hive.vectorized.execution.enabled=true;
+set hive.fetch.task.conversion=none;
+
+create table coalesce_test(a int, b int) stored as orc;
+
+insert into coalesce_test values (1, 2);
+
+-- Add a single NULL row that will come from ORC as isRepeated.
+insert into coalesce_test values (NULL, NULL);
+
+explain vectorization detail
+select coalesce(a, b) from coalesce_test order by a, b;
+
+select coalesce(a, b) from coalesce_test order by a, b;;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/e8e5ab24/ql/src/test/results/clientpositive/llap/vector_coalesce_4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_coalesce_4.q.out b/ql/src/test/results/clientpositive/llap/vector_coalesce_4.q.out
new file mode 100644
index 0000000..5c3093f
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_coalesce_4.q.out
@@ -0,0 +1,146 @@
+PREHOOK: query: create table coalesce_test(a int, b int) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@coalesce_test
+POSTHOOK: query: create table coalesce_test(a int, b int) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@coalesce_test
+PREHOOK: query: insert into coalesce_test values (1, 2)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@coalesce_test
+POSTHOOK: query: insert into coalesce_test values (1, 2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@coalesce_test
+POSTHOOK: Lineage: coalesce_test.a SCRIPT []
+POSTHOOK: Lineage: coalesce_test.b SCRIPT []
+PREHOOK: query: insert into coalesce_test values (NULL, NULL)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@coalesce_test
+POSTHOOK: query: insert into coalesce_test values (NULL, NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@coalesce_test
+POSTHOOK: Lineage: coalesce_test.a EXPRESSION []
+POSTHOOK: Lineage: coalesce_test.b EXPRESSION []
+PREHOOK: query: explain vectorization detail
+select coalesce(a, b) from coalesce_test order by a, b
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select coalesce(a, b) from coalesce_test order by a, b
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: coalesce_test
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:a:int, 1:b:int, 2:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: COALESCE(a,b) (type: int), a (type: int), b (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [3, 0, 1]
+ selectExpressions: VectorCoalesce(columns [0, 1])(children: col 0:int, col 1:int) -> 3:int
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: int), _col2 (type: int)
+ sort order: ++
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumnNums: [0, 1]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: [3]
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: a:int, b:int
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint]
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: aa
+ reduceColumnSortOrder: ++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:int, VALUE._col0:int
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int)
+ outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [2]
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select coalesce(a, b) from coalesce_test order by a, b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@coalesce_test
+#### A masked pattern was here ####
+POSTHOOK: query: select coalesce(a, b) from coalesce_test order by a, b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@coalesce_test
+#### A masked pattern was here ####
+NULL
+1
http://git-wip-us.apache.org/repos/asf/hive/blob/e8e5ab24/ql/src/test/results/clientpositive/vector_coalesce_4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_coalesce_4.q.out b/ql/src/test/results/clientpositive/vector_coalesce_4.q.out
new file mode 100644
index 0000000..088d884
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_coalesce_4.q.out
@@ -0,0 +1,120 @@
+PREHOOK: query: create table coalesce_test(a int, b int) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@coalesce_test
+POSTHOOK: query: create table coalesce_test(a int, b int) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@coalesce_test
+PREHOOK: query: insert into coalesce_test values (1, 2)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@coalesce_test
+POSTHOOK: query: insert into coalesce_test values (1, 2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@coalesce_test
+POSTHOOK: Lineage: coalesce_test.a SCRIPT []
+POSTHOOK: Lineage: coalesce_test.b SCRIPT []
+PREHOOK: query: insert into coalesce_test values (NULL, NULL)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@coalesce_test
+POSTHOOK: query: insert into coalesce_test values (NULL, NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@coalesce_test
+POSTHOOK: Lineage: coalesce_test.a EXPRESSION []
+POSTHOOK: Lineage: coalesce_test.b EXPRESSION []
+PREHOOK: query: explain vectorization detail
+select coalesce(a, b) from coalesce_test order by a, b
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select coalesce(a, b) from coalesce_test order by a, b
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: coalesce_test
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:a:int, 1:b:int, 2:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: COALESCE(a,b) (type: int), a (type: int), b (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [3, 0, 1]
+ selectExpressions: VectorCoalesce(columns [0, 1])(children: col 0:int, col 1:int) -> 3:int
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int), _col2 (type: int)
+ sort order: ++
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: a:int, b:int
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint]
+ Reduce Vectorization:
+ enabled: false
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+ enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select coalesce(a, b) from coalesce_test order by a, b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@coalesce_test
+#### A masked pattern was here ####
+POSTHOOK: query: select coalesce(a, b) from coalesce_test order by a, b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@coalesce_test
+#### A masked pattern was here ####
+NULL
+1