You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jd...@apache.org on 2018/02/20 20:50:04 UTC
hive git commit: HIVE-18742: Vectorization acid/inputformat check
should allow NullRowsInputFormat/OneNullRowInputFormat (Jason Dere,
reviewed by Sergey Shelukhin)
Repository: hive
Updated Branches:
refs/heads/master 111ed0964 -> e51f7c9d2
HIVE-18742: Vectorization acid/inputformat check should allow NullRowsInputFormat/OneNullRowInputFormat (Jason Dere, reviewed by Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e51f7c9d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e51f7c9d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e51f7c9d
Branch: refs/heads/master
Commit: e51f7c9d277c8a1a7a289063b9bcf43ad6de8e99
Parents: 111ed09
Author: Jason Dere <jd...@hortonworks.com>
Authored: Tue Feb 20 12:49:16 2018 -0800
Committer: Jason Dere <jd...@hortonworks.com>
Committed: Tue Feb 20 12:49:16 2018 -0800
----------------------------------------------------------------------
.../hive/ql/optimizer/physical/Vectorizer.java | 12 +-
.../test/queries/clientpositive/acid_nullscan.q | 17 ++
.../results/clientpositive/acid_nullscan.q.out | 162 +++++++++++++++++++
3 files changed, 190 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/e51f7c9d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 27b53b8..52ef2d3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -101,6 +101,8 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedSupport.Support;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
+import org.apache.hadoop.hive.ql.io.NullRowsInputFormat;
+import org.apache.hadoop.hive.ql.io.OneNullRowInputFormat;
import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
@@ -353,6 +355,14 @@ public class Vectorizer implements PhysicalPlanResolver {
vectorDeserializeTextSupportSet.addAll(Arrays.asList(Support.values()));
}
+ private static final Set<String> supportedAcidInputFormats = new TreeSet<String>();
+ static {
+ supportedAcidInputFormats.add(OrcInputFormat.class.getName());
+ // For metadataonly or empty rows optimizations, null/onerow input format can be selected.
+ supportedAcidInputFormats.add(NullRowsInputFormat.class.getName());
+ supportedAcidInputFormats.add(OneNullRowInputFormat.class.getName());
+ }
+
private BaseWork currentBaseWork;
private Operator<? extends OperatorDesc> currentOperator;
private Collection<Class<?>> vectorizedInputFormatExcludes;
@@ -1201,7 +1211,7 @@ public class Vectorizer implements PhysicalPlanResolver {
// Today, ACID tables are only ORC and that format is vectorizable. Verify these
// assumptions.
Preconditions.checkState(isInputFileFormatVectorized);
- Preconditions.checkState(inputFileFormatClassName.equals(OrcInputFormat.class.getName()));
+ Preconditions.checkState(supportedAcidInputFormats.contains(inputFileFormatClassName));
if (!useVectorizedInputFileFormat) {
enabledConditionsNotMetList.add("Vectorizing ACID tables requires "
http://git-wip-us.apache.org/repos/asf/hive/blob/e51f7c9d/ql/src/test/queries/clientpositive/acid_nullscan.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/acid_nullscan.q b/ql/src/test/queries/clientpositive/acid_nullscan.q
new file mode 100644
index 0000000..d048231
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/acid_nullscan.q
@@ -0,0 +1,17 @@
+
+set hive.mapred.mode=nonstrict;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.vectorized.execution.enabled=true;
+
+CREATE TABLE acid_vectorized(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true');
+insert into table acid_vectorized select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10;
+insert into table acid_vectorized values (1, 'bar');
+
+explain extended
+select sum(a) from acid_vectorized where false;
+
+select sum(a) from acid_vectorized where false;
+
http://git-wip-us.apache.org/repos/asf/hive/blob/e51f7c9d/ql/src/test/results/clientpositive/acid_nullscan.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/acid_nullscan.q.out b/ql/src/test/results/clientpositive/acid_nullscan.q.out
new file mode 100644
index 0000000..7fcc831
--- /dev/null
+++ b/ql/src/test/results/clientpositive/acid_nullscan.q.out
@@ -0,0 +1,162 @@
+PREHOOK: query: CREATE TABLE acid_vectorized(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@acid_vectorized
+POSTHOOK: query: CREATE TABLE acid_vectorized(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@acid_vectorized
+PREHOOK: query: insert into table acid_vectorized select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@acid_vectorized
+POSTHOOK: query: insert into table acid_vectorized select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@acid_vectorized
+POSTHOOK: Lineage: acid_vectorized.a SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: acid_vectorized.b SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ]
+PREHOOK: query: insert into table acid_vectorized values (1, 'bar')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@acid_vectorized
+POSTHOOK: query: insert into table acid_vectorized values (1, 'bar')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@acid_vectorized
+POSTHOOK: Lineage: acid_vectorized.a SCRIPT []
+POSTHOOK: Lineage: acid_vectorized.b SCRIPT []
+PREHOOK: query: explain extended
+select sum(a) from acid_vectorized where false
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+select sum(a) from acid_vectorized where false
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: acid_vectorized
+ Statistics: Num rows: 1 Data size: 24510 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: false (type: boolean)
+ Statistics: Num rows: 1 Data size: 24510 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(a)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
+ Execution mode: vectorized
+ Path -> Alias:
+ nullscan://null/default.acid_vectorized/part_ [acid_vectorized]
+ Path -> Partition:
+ nullscan://null/default.acid_vectorized/part_
+ Partition
+ input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count 2
+ bucket_field_name a
+ column.name.delimiter ,
+ columns a,b
+ columns.comments
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.acid_vectorized
+ numFiles 3
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct acid_vectorized { i32 a, string b}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe
+ totalSize 2451
+ transactional true
+ transactional_properties default
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.NullStructSerDe
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count 2
+ bucket_field_name a
+ column.name.delimiter ,
+ columns a,b
+ columns.comments
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.acid_vectorized
+ numFiles 3
+ numRows 0
+ rawDataSize 0
+ serialization.ddl struct acid_vectorized { i32 a, string b}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ totalSize 2451
+ transactional true
+ transactional_properties default
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.acid_vectorized
+ name: default.acid_vectorized
+ Truncated Path -> Alias:
+ nullscan://null/default.acid_vectorized/part_ [acid_vectorized]
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0
+ columns.types bigint
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select sum(a) from acid_vectorized where false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid_vectorized
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(a) from acid_vectorized where false
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid_vectorized
+#### A masked pattern was here ####
+NULL