You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jd...@apache.org on 2018/02/20 20:50:04 UTC

hive git commit: HIVE-18742: Vectorization acid/inputformat check should allow NullRowsInputFormat/OneNullRowInputFormat (Jason Dere, reviewed by Sergey Shelukhin)

Repository: hive
Updated Branches:
  refs/heads/master 111ed0964 -> e51f7c9d2


HIVE-18742: Vectorization acid/inputformat check should allow NullRowsInputFormat/OneNullRowInputFormat (Jason Dere, reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e51f7c9d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e51f7c9d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e51f7c9d

Branch: refs/heads/master
Commit: e51f7c9d277c8a1a7a289063b9bcf43ad6de8e99
Parents: 111ed09
Author: Jason Dere <jd...@hortonworks.com>
Authored: Tue Feb 20 12:49:16 2018 -0800
Committer: Jason Dere <jd...@hortonworks.com>
Committed: Tue Feb 20 12:49:16 2018 -0800

----------------------------------------------------------------------
 .../hive/ql/optimizer/physical/Vectorizer.java  |  12 +-
 .../test/queries/clientpositive/acid_nullscan.q |  17 ++
 .../results/clientpositive/acid_nullscan.q.out  | 162 +++++++++++++++++++
 3 files changed, 190 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/e51f7c9d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 27b53b8..52ef2d3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -101,6 +101,8 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedSupport.Support;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
+import org.apache.hadoop.hive.ql.io.NullRowsInputFormat;
+import org.apache.hadoop.hive.ql.io.OneNullRowInputFormat;
 import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
 import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
@@ -353,6 +355,14 @@ public class Vectorizer implements PhysicalPlanResolver {
     vectorDeserializeTextSupportSet.addAll(Arrays.asList(Support.values()));
   }
 
+  private static final Set<String> supportedAcidInputFormats = new TreeSet<String>();
+  static {
+    supportedAcidInputFormats.add(OrcInputFormat.class.getName());
+    // For metadataonly or empty rows optimizations, null/onerow input format can be selected.
+    supportedAcidInputFormats.add(NullRowsInputFormat.class.getName());
+    supportedAcidInputFormats.add(OneNullRowInputFormat.class.getName());
+  }
+
   private BaseWork currentBaseWork;
   private Operator<? extends OperatorDesc> currentOperator;
   private Collection<Class<?>> vectorizedInputFormatExcludes;
@@ -1201,7 +1211,7 @@ public class Vectorizer implements PhysicalPlanResolver {
         // Today, ACID tables are only ORC and that format is vectorizable.  Verify these
         // assumptions.
         Preconditions.checkState(isInputFileFormatVectorized);
-        Preconditions.checkState(inputFileFormatClassName.equals(OrcInputFormat.class.getName()));
+        Preconditions.checkState(supportedAcidInputFormats.contains(inputFileFormatClassName));
 
         if (!useVectorizedInputFileFormat) {
           enabledConditionsNotMetList.add("Vectorizing ACID tables requires "

http://git-wip-us.apache.org/repos/asf/hive/blob/e51f7c9d/ql/src/test/queries/clientpositive/acid_nullscan.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/acid_nullscan.q b/ql/src/test/queries/clientpositive/acid_nullscan.q
new file mode 100644
index 0000000..d048231
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/acid_nullscan.q
@@ -0,0 +1,17 @@
+
+set hive.mapred.mode=nonstrict;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.vectorized.execution.enabled=true;
+
+CREATE TABLE acid_vectorized(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true');
+insert into table acid_vectorized select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10;
+insert into table acid_vectorized values (1, 'bar');
+
+explain extended
+select sum(a) from acid_vectorized where false;
+
+select sum(a) from acid_vectorized where false;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/e51f7c9d/ql/src/test/results/clientpositive/acid_nullscan.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/acid_nullscan.q.out b/ql/src/test/results/clientpositive/acid_nullscan.q.out
new file mode 100644
index 0000000..7fcc831
--- /dev/null
+++ b/ql/src/test/results/clientpositive/acid_nullscan.q.out
@@ -0,0 +1,162 @@
+PREHOOK: query: CREATE TABLE acid_vectorized(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@acid_vectorized
+POSTHOOK: query: CREATE TABLE acid_vectorized(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@acid_vectorized
+PREHOOK: query: insert into table acid_vectorized select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@acid_vectorized
+POSTHOOK: query: insert into table acid_vectorized select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@acid_vectorized
+POSTHOOK: Lineage: acid_vectorized.a SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: acid_vectorized.b SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ]
+PREHOOK: query: insert into table acid_vectorized values (1, 'bar')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@acid_vectorized
+POSTHOOK: query: insert into table acid_vectorized values (1, 'bar')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@acid_vectorized
+POSTHOOK: Lineage: acid_vectorized.a SCRIPT []
+POSTHOOK: Lineage: acid_vectorized.b SCRIPT []
+PREHOOK: query: explain extended
+select sum(a) from acid_vectorized where false
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+select sum(a) from acid_vectorized where false
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: acid_vectorized
+            Statistics: Num rows: 1 Data size: 24510 Basic stats: COMPLETE Column stats: NONE
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate: false (type: boolean)
+              Statistics: Num rows: 1 Data size: 24510 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: sum(a)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  null sort order: 
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  tag: -1
+                  value expressions: _col0 (type: bigint)
+                  auto parallelism: false
+      Execution mode: vectorized
+      Path -> Alias:
+        nullscan://null/default.acid_vectorized/part_ [acid_vectorized]
+      Path -> Partition:
+        nullscan://null/default.acid_vectorized/part_ 
+          Partition
+            input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            properties:
+              bucket_count 2
+              bucket_field_name a
+              column.name.delimiter ,
+              columns a,b
+              columns.comments 
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.acid_vectorized
+              numFiles 3
+              numRows 0
+              rawDataSize 0
+              serialization.ddl struct acid_vectorized { i32 a, string b}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe
+              totalSize 2451
+              transactional true
+              transactional_properties default
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.NullStructSerDe
+          
+              input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+              output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+              properties:
+                bucket_count 2
+                bucket_field_name a
+                column.name.delimiter ,
+                columns a,b
+                columns.comments 
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.acid_vectorized
+                numFiles 3
+                numRows 0
+                rawDataSize 0
+                serialization.ddl struct acid_vectorized { i32 a, string b}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                totalSize 2451
+                transactional true
+                transactional_properties default
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+              name: default.acid_vectorized
+            name: default.acid_vectorized
+      Truncated Path -> Alias:
+        nullscan://null/default.acid_vectorized/part_ [acid_vectorized]
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                properties:
+                  columns _col0
+                  columns.types bigint
+                  escape.delim \
+                  hive.serialization.extend.additional.nesting.levels true
+                  serialization.escape.crlf true
+                  serialization.format 1
+                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select sum(a) from acid_vectorized where false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid_vectorized
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(a) from acid_vectorized where false
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid_vectorized
+#### A masked pattern was here ####
+NULL