You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/02/26 01:00:02 UTC

hive git commit: HIVE-18800: Vectorization: VectorCoalesce doesn't handle the all repeated NULLs case (Matt McCline, reviewed by Gopal Vijayaraghavan)

Repository: hive
Updated Branches:
  refs/heads/master 53a590b53 -> e8e5ab246


HIVE-18800: Vectorization: VectorCoalesce doesn't handle the all repeated NULLs case (Matt McCline, reviewed by Gopal Vijayaraghavan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e8e5ab24
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e8e5ab24
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e8e5ab24

Branch: refs/heads/master
Commit: e8e5ab24616aa834f4966efe3a5f437f6bee4d1d
Parents: 53a590b
Author: Matt McCline <mm...@hortonworks.com>
Authored: Sun Feb 25 18:59:48 2018 -0600
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Sun Feb 25 18:59:48 2018 -0600

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |   1 +
 .../exec/vector/expressions/VectorCoalesce.java |  11 +-
 .../queries/clientpositive/vector_coalesce_4.q  |  14 ++
 .../clientpositive/llap/vector_coalesce_4.q.out | 146 +++++++++++++++++++
 .../clientpositive/vector_coalesce_4.q.out      | 120 +++++++++++++++
 5 files changed, 289 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/e8e5ab24/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 4a52eb5..2776fe9 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -323,6 +323,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\
   vector_coalesce.q,\
   vector_coalesce_2.q,\
   vector_coalesce_3.q,\
+  vector_coalesce_4.q,\
   vector_complex_all.q,\
   vector_count.q,\
   vector_count_distinct.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/e8e5ab24/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java
index 3a560ca..c66beb0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java
@@ -225,9 +225,14 @@ public class VectorCoalesce extends VectorExpression {
 
     // NULL out the remaining columns.
     outputColVector.noNulls = false;
-    for (int i = 0; i < unassignedColumnCount; i++) {
-      final int batchIndex = unassignedBatchIndices[i];
-      outputIsNull[batchIndex] = true;
+    if (isAllUnassigned) {
+      outputIsNull[0] = true;
+      outputColVector.isRepeating = true;
+    } else {
+      for (int i = 0; i < unassignedColumnCount; i++) {
+        final int batchIndex = unassignedBatchIndices[i];
+        outputIsNull[batchIndex] = true;
+      }
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/e8e5ab24/ql/src/test/queries/clientpositive/vector_coalesce_4.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_coalesce_4.q b/ql/src/test/queries/clientpositive/vector_coalesce_4.q
new file mode 100644
index 0000000..a050beb
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_coalesce_4.q
@@ -0,0 +1,14 @@
+SET hive.vectorized.execution.enabled=true;
+set hive.fetch.task.conversion=none;
+
+create table coalesce_test(a int, b int) stored as orc;
+
+insert into coalesce_test values (1, 2);
+
+-- Add a single NULL row that will come from ORC as isRepeated.
+insert into coalesce_test values (NULL, NULL);
+
+explain vectorization detail
+select coalesce(a, b) from coalesce_test order by a, b;
+
+select coalesce(a, b) from coalesce_test order by a, b;;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/e8e5ab24/ql/src/test/results/clientpositive/llap/vector_coalesce_4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_coalesce_4.q.out b/ql/src/test/results/clientpositive/llap/vector_coalesce_4.q.out
new file mode 100644
index 0000000..5c3093f
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_coalesce_4.q.out
@@ -0,0 +1,146 @@
+PREHOOK: query: create table coalesce_test(a int, b int) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@coalesce_test
+POSTHOOK: query: create table coalesce_test(a int, b int) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@coalesce_test
+PREHOOK: query: insert into coalesce_test values (1, 2)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@coalesce_test
+POSTHOOK: query: insert into coalesce_test values (1, 2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@coalesce_test
+POSTHOOK: Lineage: coalesce_test.a SCRIPT []
+POSTHOOK: Lineage: coalesce_test.b SCRIPT []
+PREHOOK: query: insert into coalesce_test values (NULL, NULL)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@coalesce_test
+POSTHOOK: query: insert into coalesce_test values (NULL, NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@coalesce_test
+POSTHOOK: Lineage: coalesce_test.a EXPRESSION []
+POSTHOOK: Lineage: coalesce_test.b EXPRESSION []
+PREHOOK: query: explain vectorization detail
+select coalesce(a, b) from coalesce_test order by a, b
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select coalesce(a, b) from coalesce_test order by a, b
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: coalesce_test
+                  Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:a:int, 1:b:int, 2:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
+                  Select Operator
+                    expressions: COALESCE(a,b) (type: int), a (type: int), b (type: int)
+                    outputColumnNames: _col0, _col1, _col2
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [3, 0, 1]
+                        selectExpressions: VectorCoalesce(columns [0, 1])(children: col 0:int, col 1:int) -> 3:int
+                    Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col1 (type: int), _col2 (type: int)
+                      sort order: ++
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkObjectHashOperator
+                          keyColumnNums: [0, 1]
+                          native: true
+                          nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                          valueColumnNums: [3]
+                      Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+                      value expressions: _col0 (type: int)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: []
+                featureSupportInUse: []
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0, 1]
+                    dataColumns: a:int, b:int
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: [bigint]
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: aa
+                reduceColumnSortOrder: ++
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 3
+                    dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:int, VALUE._col0:int
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: int)
+                outputColumnNames: _col0
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumnNums: [2]
+                Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
+                  Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select coalesce(a, b) from coalesce_test order by a, b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@coalesce_test
+#### A masked pattern was here ####
+POSTHOOK: query: select coalesce(a, b) from coalesce_test order by a, b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@coalesce_test
+#### A masked pattern was here ####
+NULL
+1

http://git-wip-us.apache.org/repos/asf/hive/blob/e8e5ab24/ql/src/test/results/clientpositive/vector_coalesce_4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_coalesce_4.q.out b/ql/src/test/results/clientpositive/vector_coalesce_4.q.out
new file mode 100644
index 0000000..088d884
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_coalesce_4.q.out
@@ -0,0 +1,120 @@
+PREHOOK: query: create table coalesce_test(a int, b int) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@coalesce_test
+POSTHOOK: query: create table coalesce_test(a int, b int) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@coalesce_test
+PREHOOK: query: insert into coalesce_test values (1, 2)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@coalesce_test
+POSTHOOK: query: insert into coalesce_test values (1, 2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@coalesce_test
+POSTHOOK: Lineage: coalesce_test.a SCRIPT []
+POSTHOOK: Lineage: coalesce_test.b SCRIPT []
+PREHOOK: query: insert into coalesce_test values (NULL, NULL)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@coalesce_test
+POSTHOOK: query: insert into coalesce_test values (NULL, NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@coalesce_test
+POSTHOOK: Lineage: coalesce_test.a EXPRESSION []
+POSTHOOK: Lineage: coalesce_test.b EXPRESSION []
+PREHOOK: query: explain vectorization detail
+select coalesce(a, b) from coalesce_test order by a, b
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select coalesce(a, b) from coalesce_test order by a, b
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: coalesce_test
+            Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            TableScan Vectorization:
+                native: true
+                vectorizationSchemaColumns: [0:a:int, 1:b:int, 2:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
+            Select Operator
+              expressions: COALESCE(a,b) (type: int), a (type: int), b (type: int)
+              outputColumnNames: _col0, _col1, _col2
+              Select Vectorization:
+                  className: VectorSelectOperator
+                  native: true
+                  projectedOutputColumnNums: [3, 0, 1]
+                  selectExpressions: VectorCoalesce(columns [0, 1])(children: col 0:int, col 1:int) -> 3:int
+              Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col1 (type: int), _col2 (type: int)
+                sort order: ++
+                Reduce Sink Vectorization:
+                    className: VectorReduceSinkOperator
+                    native: false
+                    nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                    nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+                Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col0 (type: int)
+      Execution mode: vectorized
+      Map Vectorization:
+          enabled: true
+          enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+          inputFormatFeatureSupport: []
+          featureSupportInUse: []
+          inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+          allNative: false
+          usesVectorUDFAdaptor: false
+          vectorized: true
+          rowBatchContext:
+              dataColumnCount: 2
+              includeColumns: [0, 1]
+              dataColumns: a:int, b:int
+              partitionColumnCount: 0
+              scratchColumnTypeNames: [bigint]
+      Reduce Vectorization:
+          enabled: false
+          enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+          enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: int)
+          outputColumnNames: _col0
+          Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select coalesce(a, b) from coalesce_test order by a, b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@coalesce_test
+#### A masked pattern was here ####
+POSTHOOK: query: select coalesce(a, b) from coalesce_test order by a, b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@coalesce_test
+#### A masked pattern was here ####
+NULL
+1