You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/06/05 08:20:52 UTC
hive git commit: HIVE-19109: Vectorization: Enabling vectorization causes TestCliDriver delete_orig_table.q to produce Wrong Results (Matt McCline, reviewed by Teddy Choi)

Repository: hive
Updated Branches:
  refs/heads/master b15c842b5 -> f80cff9ab


HIVE-19109: Vectorization: Enabling vectorization causes TestCliDriver delete_orig_table.q to produce Wrong Results (Matt McCline, reviewed by Teddy Choi)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f80cff9a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f80cff9a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f80cff9a

Branch: refs/heads/master
Commit: f80cff9abf3beb5e42fe493a5ef565c74fff260d
Parents: b15c842
Author: Matt McCline <mm...@hortonworks.com>
Authored: Tue Jun 5 01:20:43 2018 -0700
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Tue Jun 5 01:20:43 2018 -0700

----------------------------------------------------------------------
 .../queries/clientpositive/delete_orig_table.q  |   2 -
 .../clientpositive/vector_delete_orig_table.q   |  36 ++++
 .../tez/vector_delete_orig_table.q.out          | 180 +++++++++++++++++++
 .../vector_delete_orig_table.q.out              | 152 ++++++++++++++++
 4 files changed, 368 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/f80cff9a/ql/src/test/queries/clientpositive/delete_orig_table.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/delete_orig_table.q b/ql/src/test/queries/clientpositive/delete_orig_table.q
index c009b76..b196a74 100644
--- a/ql/src/test/queries/clientpositive/delete_orig_table.q
+++ b/ql/src/test/queries/clientpositive/delete_orig_table.q
@@ -1,7 +1,5 @@
 --! qt:dataset:alltypesorc
--- Suppress vectorization due to known bug.  See HIVE-19109.
 set hive.vectorized.execution.enabled=false;
-set hive.test.vectorized.execution.enabled.override=disable;
 
 set hive.support.concurrency=true;
 set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;

http://git-wip-us.apache.org/repos/asf/hive/blob/f80cff9a/ql/src/test/queries/clientpositive/vector_delete_orig_table.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_delete_orig_table.q b/ql/src/test/queries/clientpositive/vector_delete_orig_table.q
new file mode 100644
index 0000000..f914408
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_delete_orig_table.q
@@ -0,0 +1,36 @@
+--! qt:dataset:alltypesorc
+set hive.vectorized.execution.enabled=true;
+set hive.explain.user=false;
+set hive.fetch.task.conversion=none;
+
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/delete_orig_table;
+dfs -copyFromLocal ../../data/files/alltypesorc ${system:test.tmp.dir}/delete_orig_table/00000_0;
+
+create table acid_dot(
+    ctinyint TINYINT,
+    csmallint SMALLINT,
+    cint INT,
+    cbigint BIGINT,
+    cfloat FLOAT,
+    cdouble DOUBLE,
+    cstring1 STRING,
+    cstring2 STRING,
+    ctimestamp1 TIMESTAMP,
+    ctimestamp2 TIMESTAMP,
+    cboolean1 BOOLEAN,
+    cboolean2 BOOLEAN) clustered by (cint) into 1 buckets stored as orc location '${system:test.tmp.dir}/delete_orig_table' TBLPROPERTIES ('transactional'='true');
+
+explain vectorization detail
+select count(*) from acid_dot;
+
+select count(*) from acid_dot;
+
+delete from acid_dot where cint < -1070551679;
+
+select count(*) from acid_dot;
+
+dfs -rmr ${system:test.tmp.dir}/delete_orig_table;

http://git-wip-us.apache.org/repos/asf/hive/blob/f80cff9a/ql/src/test/results/clientpositive/tez/vector_delete_orig_table.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_delete_orig_table.q.out b/ql/src/test/results/clientpositive/tez/vector_delete_orig_table.q.out
new file mode 100644
index 0000000..d1d8cc0
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/vector_delete_orig_table.q.out
@@ -0,0 +1,180 @@
+PREHOOK: query: create table acid_dot(
+    ctinyint TINYINT,
+    csmallint SMALLINT,
+    cint INT,
+    cbigint BIGINT,
+    cfloat FLOAT,
+    cdouble DOUBLE,
+    cstring1 STRING,
+    cstring2 STRING,
+    ctimestamp1 TIMESTAMP,
+    ctimestamp2 TIMESTAMP,
+    cboolean1 BOOLEAN,
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+PREHOOK: Input: hdfs://### HDFS PATH ###
+PREHOOK: Output: database:default
+PREHOOK: Output: default@acid_dot
+POSTHOOK: query: create table acid_dot(
+    ctinyint TINYINT,
+    csmallint SMALLINT,
+    cint INT,
+    cbigint BIGINT,
+    cfloat FLOAT,
+    cdouble DOUBLE,
+    cstring1 STRING,
+    cstring2 STRING,
+    ctimestamp1 TIMESTAMP,
+    ctimestamp2 TIMESTAMP,
+    cboolean1 BOOLEAN,
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Input: hdfs://### HDFS PATH ###
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@acid_dot
+PREHOOK: query: explain vectorization detail
+select count(*) from acid_dot
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select count(*) from acid_dot
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: acid_dot
+                  Statistics: Num rows: 5865 Data size: 2956160 Basic stats: COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Select Operator
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: []
+                    Statistics: Num rows: 5865 Data size: 2956160 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: count()
+                      Group By Vectorization:
+                          aggregators: VectorUDAFCountStar(*) -> bigint
+                          className: VectorGroupByOperator
+                          groupByMode: HASH
+                          native: false
+                          vectorProcessingMode: HASH
+                          projectedOutputColumnNums: [0]
+                      mode: hash
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        sort order: 
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkEmptyKeyOperator
+                            keyColumnNums: []
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            valueColumnNums: [0]
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: bigint)
+            Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: []
+                featureSupportInUse: []
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 12
+                    includeColumns: []
+                    dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+        Reducer 2 
+            Execution mode: vectorized
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: 
+                reduceColumnSortOrder: 
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 1
+                    dataColumns: VALUE._col0:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                Group By Vectorization:
+                    aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    native: false
+                    vectorProcessingMode: GLOBAL
+                    projectedOutputColumnNums: [0]
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from acid_dot
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid_dot
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(*) from acid_dot
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid_dot
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+12288
+PREHOOK: query: delete from acid_dot where cint < -1070551679
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid_dot
+PREHOOK: Output: default@acid_dot
+POSTHOOK: query: delete from acid_dot where cint < -1070551679
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid_dot
+POSTHOOK: Output: default@acid_dot
+PREHOOK: query: select count(*) from acid_dot
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid_dot
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(*) from acid_dot
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid_dot
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+12192
+#### A masked pattern was here ####

http://git-wip-us.apache.org/repos/asf/hive/blob/f80cff9a/ql/src/test/results/clientpositive/vector_delete_orig_table.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_delete_orig_table.q.out b/ql/src/test/results/clientpositive/vector_delete_orig_table.q.out
new file mode 100644
index 0000000..4ce897e
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_delete_orig_table.q.out
@@ -0,0 +1,152 @@
+PREHOOK: query: create table acid_dot(
+    ctinyint TINYINT,
+    csmallint SMALLINT,
+    cint INT,
+    cbigint BIGINT,
+    cfloat FLOAT,
+    cdouble DOUBLE,
+    cstring1 STRING,
+    cstring2 STRING,
+    ctimestamp1 TIMESTAMP,
+    ctimestamp2 TIMESTAMP,
+    cboolean1 BOOLEAN,
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+PREHOOK: Output: default@acid_dot
+POSTHOOK: query: create table acid_dot(
+    ctinyint TINYINT,
+    csmallint SMALLINT,
+    cint INT,
+    cbigint BIGINT,
+    cfloat FLOAT,
+    cdouble DOUBLE,
+    cstring1 STRING,
+    cstring2 STRING,
+    ctimestamp1 TIMESTAMP,
+    ctimestamp2 TIMESTAMP,
+    cboolean1 BOOLEAN,
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@acid_dot
+PREHOOK: query: explain vectorization detail
+select count(*) from acid_dot
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select count(*) from acid_dot
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: acid_dot
+            Statistics: Num rows: 5865 Data size: 2956160 Basic stats: COMPLETE Column stats: COMPLETE
+            TableScan Vectorization:
+                native: true
+                vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+            Select Operator
+              Select Vectorization:
+                  className: VectorSelectOperator
+                  native: true
+                  projectedOutputColumnNums: []
+              Statistics: Num rows: 5865 Data size: 2956160 Basic stats: COMPLETE Column stats: COMPLETE
+              Group By Operator
+                aggregations: count()
+                Group By Vectorization:
+                    aggregators: VectorUDAFCountStar(*) -> bigint
+                    className: VectorGroupByOperator
+                    groupByMode: HASH
+                    native: false
+                    vectorProcessingMode: HASH
+                    projectedOutputColumnNums: [0]
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  sort order: 
+                  Reduce Sink Vectorization:
+                      className: VectorReduceSinkOperator
+                      native: false
+                      nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                      nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col0 (type: bigint)
+      Execution mode: vectorized
+      Map Vectorization:
+          enabled: true
+          enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+          inputFormatFeatureSupport: []
+          featureSupportInUse: []
+          inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+          allNative: false
+          usesVectorUDFAdaptor: false
+          vectorized: true
+          rowBatchContext:
+              dataColumnCount: 12
+              includeColumns: []
+              dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+              partitionColumnCount: 0
+              scratchColumnTypeNames: []
+      Reduce Vectorization:
+          enabled: false
+          enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+          enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from acid_dot
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid_dot
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from acid_dot
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid_dot
+#### A masked pattern was here ####
+12288
+PREHOOK: query: delete from acid_dot where cint < -1070551679
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid_dot
+PREHOOK: Output: default@acid_dot
+POSTHOOK: query: delete from acid_dot where cint < -1070551679
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid_dot
+POSTHOOK: Output: default@acid_dot
+PREHOOK: query: select count(*) from acid_dot
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid_dot
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from acid_dot
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid_dot
+#### A masked pattern was here ####
+12192
+#### A masked pattern was here ####