You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/06/05 08:20:52 UTC
hive git commit: HIVE-19109: Vectorization: Enabling vectorization
causes TestCliDriver delete_orig_table.q to produce Wrong Results (Matt
McCline, reviewed by Teddy Choi)
Repository: hive
Updated Branches:
refs/heads/master b15c842b5 -> f80cff9ab
HIVE-19109: Vectorization: Enabling vectorization causes TestCliDriver delete_orig_table.q to produce Wrong Results (Matt McCline, reviewed by Teddy Choi)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f80cff9a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f80cff9a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f80cff9a
Branch: refs/heads/master
Commit: f80cff9abf3beb5e42fe493a5ef565c74fff260d
Parents: b15c842
Author: Matt McCline <mm...@hortonworks.com>
Authored: Tue Jun 5 01:20:43 2018 -0700
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Tue Jun 5 01:20:43 2018 -0700
----------------------------------------------------------------------
.../queries/clientpositive/delete_orig_table.q | 2 -
.../clientpositive/vector_delete_orig_table.q | 36 ++++
.../tez/vector_delete_orig_table.q.out | 180 +++++++++++++++++++
.../vector_delete_orig_table.q.out | 152 ++++++++++++++++
4 files changed, 368 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/f80cff9a/ql/src/test/queries/clientpositive/delete_orig_table.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/delete_orig_table.q b/ql/src/test/queries/clientpositive/delete_orig_table.q
index c009b76..b196a74 100644
--- a/ql/src/test/queries/clientpositive/delete_orig_table.q
+++ b/ql/src/test/queries/clientpositive/delete_orig_table.q
@@ -1,7 +1,5 @@
--! qt:dataset:alltypesorc
--- Suppress vectorization due to known bug. See HIVE-19109.
set hive.vectorized.execution.enabled=false;
-set hive.test.vectorized.execution.enabled.override=disable;
set hive.support.concurrency=true;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
http://git-wip-us.apache.org/repos/asf/hive/blob/f80cff9a/ql/src/test/queries/clientpositive/vector_delete_orig_table.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_delete_orig_table.q b/ql/src/test/queries/clientpositive/vector_delete_orig_table.q
new file mode 100644
index 0000000..f914408
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_delete_orig_table.q
@@ -0,0 +1,36 @@
+--! qt:dataset:alltypesorc
+set hive.vectorized.execution.enabled=true;
+set hive.explain.user=false;
+set hive.fetch.task.conversion=none;
+
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/delete_orig_table;
+dfs -copyFromLocal ../../data/files/alltypesorc ${system:test.tmp.dir}/delete_orig_table/00000_0;
+
+create table acid_dot(
+ ctinyint TINYINT,
+ csmallint SMALLINT,
+ cint INT,
+ cbigint BIGINT,
+ cfloat FLOAT,
+ cdouble DOUBLE,
+ cstring1 STRING,
+ cstring2 STRING,
+ ctimestamp1 TIMESTAMP,
+ ctimestamp2 TIMESTAMP,
+ cboolean1 BOOLEAN,
+ cboolean2 BOOLEAN) clustered by (cint) into 1 buckets stored as orc location '${system:test.tmp.dir}/delete_orig_table' TBLPROPERTIES ('transactional'='true');
+
+explain vectorization detail
+select count(*) from acid_dot;
+
+select count(*) from acid_dot;
+
+delete from acid_dot where cint < -1070551679;
+
+select count(*) from acid_dot;
+
+dfs -rmr ${system:test.tmp.dir}/delete_orig_table;
http://git-wip-us.apache.org/repos/asf/hive/blob/f80cff9a/ql/src/test/results/clientpositive/tez/vector_delete_orig_table.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_delete_orig_table.q.out b/ql/src/test/results/clientpositive/tez/vector_delete_orig_table.q.out
new file mode 100644
index 0000000..d1d8cc0
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/vector_delete_orig_table.q.out
@@ -0,0 +1,180 @@
+PREHOOK: query: create table acid_dot(
+ ctinyint TINYINT,
+ csmallint SMALLINT,
+ cint INT,
+ cbigint BIGINT,
+ cfloat FLOAT,
+ cdouble DOUBLE,
+ cstring1 STRING,
+ cstring2 STRING,
+ ctimestamp1 TIMESTAMP,
+ ctimestamp2 TIMESTAMP,
+ cboolean1 BOOLEAN,
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+PREHOOK: Input: hdfs://### HDFS PATH ###
+PREHOOK: Output: database:default
+PREHOOK: Output: default@acid_dot
+POSTHOOK: query: create table acid_dot(
+ ctinyint TINYINT,
+ csmallint SMALLINT,
+ cint INT,
+ cbigint BIGINT,
+ cfloat FLOAT,
+ cdouble DOUBLE,
+ cstring1 STRING,
+ cstring2 STRING,
+ ctimestamp1 TIMESTAMP,
+ ctimestamp2 TIMESTAMP,
+ cboolean1 BOOLEAN,
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Input: hdfs://### HDFS PATH ###
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@acid_dot
+PREHOOK: query: explain vectorization detail
+select count(*) from acid_dot
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select count(*) from acid_dot
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: acid_dot
+ Statistics: Num rows: 5865 Data size: 2956160 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: []
+ Statistics: Num rows: 5865 Data size: 2956160 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ Group By Vectorization:
+ aggregators: VectorUDAFCountStar(*) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkEmptyKeyOperator
+ keyColumnNums: []
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: [0]
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 12
+ includeColumns: []
+ dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reducer 2
+ Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder:
+ reduceColumnSortOrder:
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 1
+ dataColumns: VALUE._col0:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ native: false
+ vectorProcessingMode: GLOBAL
+ projectedOutputColumnNums: [0]
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from acid_dot
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid_dot
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(*) from acid_dot
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid_dot
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+12288
+PREHOOK: query: delete from acid_dot where cint < -1070551679
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid_dot
+PREHOOK: Output: default@acid_dot
+POSTHOOK: query: delete from acid_dot where cint < -1070551679
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid_dot
+POSTHOOK: Output: default@acid_dot
+PREHOOK: query: select count(*) from acid_dot
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid_dot
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(*) from acid_dot
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid_dot
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+12192
+#### A masked pattern was here ####
http://git-wip-us.apache.org/repos/asf/hive/blob/f80cff9a/ql/src/test/results/clientpositive/vector_delete_orig_table.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_delete_orig_table.q.out b/ql/src/test/results/clientpositive/vector_delete_orig_table.q.out
new file mode 100644
index 0000000..4ce897e
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_delete_orig_table.q.out
@@ -0,0 +1,152 @@
+PREHOOK: query: create table acid_dot(
+ ctinyint TINYINT,
+ csmallint SMALLINT,
+ cint INT,
+ cbigint BIGINT,
+ cfloat FLOAT,
+ cdouble DOUBLE,
+ cstring1 STRING,
+ cstring2 STRING,
+ ctimestamp1 TIMESTAMP,
+ ctimestamp2 TIMESTAMP,
+ cboolean1 BOOLEAN,
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+PREHOOK: Output: default@acid_dot
+POSTHOOK: query: create table acid_dot(
+ ctinyint TINYINT,
+ csmallint SMALLINT,
+ cint INT,
+ cbigint BIGINT,
+ cfloat FLOAT,
+ cdouble DOUBLE,
+ cstring1 STRING,
+ cstring2 STRING,
+ ctimestamp1 TIMESTAMP,
+ ctimestamp2 TIMESTAMP,
+ cboolean1 BOOLEAN,
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@acid_dot
+PREHOOK: query: explain vectorization detail
+select count(*) from acid_dot
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select count(*) from acid_dot
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: acid_dot
+ Statistics: Num rows: 5865 Data size: 2956160 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: []
+ Statistics: Num rows: 5865 Data size: 2956160 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ Group By Vectorization:
+ aggregators: VectorUDAFCountStar(*) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 12
+ includeColumns: []
+ dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Vectorization:
+ enabled: false
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+ enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from acid_dot
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid_dot
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from acid_dot
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid_dot
+#### A masked pattern was here ####
+12288
+PREHOOK: query: delete from acid_dot where cint < -1070551679
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid_dot
+PREHOOK: Output: default@acid_dot
+POSTHOOK: query: delete from acid_dot where cint < -1070551679
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid_dot
+POSTHOOK: Output: default@acid_dot
+PREHOOK: query: select count(*) from acid_dot
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid_dot
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from acid_dot
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid_dot
+#### A masked pattern was here ####
+12192
+#### A masked pattern was here ####