You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by px...@apache.org on 2016/07/19 18:00:55 UTC
hive git commit: HIVE-14277: Disable StatsOptimizer for all ACID tables (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

Repository: hive
Updated Branches:
  refs/heads/master de5ae86ee -> e3d784908


HIVE-14277: Disable StatsOptimizer for all ACID tables (Pengcheng Xiong, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e3d78490
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e3d78490
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e3d78490

Branch: refs/heads/master
Commit: e3d7849086f1154ee8b5975e6e34638a5682f4a6
Parents: de5ae86
Author: Pengcheng Xiong <px...@apache.org>
Authored: Tue Jul 19 11:00:29 2016 -0700
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Tue Jul 19 11:00:29 2016 -0700

----------------------------------------------------------------------
 .../hive/ql/optimizer/StatsOptimizer.java       |   5 +
 .../queries/clientpositive/acid_table_stats.q   |  14 ++
 .../clientpositive/acid_table_stats.q.out       | 183 +++++++++++++++++++
 3 files changed, 202 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/e3d78490/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
index 7febfd5..0c17246 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
@@ -45,6 +45,7 @@ import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
 import org.apache.hadoop.hive.ql.exec.SelectOperator;
 import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.exec.TaskFactory;
+import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
 import org.apache.hadoop.hive.ql.lib.Dispatcher;
@@ -318,6 +319,10 @@ public class StatsOptimizer extends Transform {
         }
 
         Table tbl = tsOp.getConf().getTableMetadata();
+        if (AcidUtils.isAcidTable(tbl)) {
+          Logger.info("Table " + tbl.getTableName() + " is ACID table. Skip StatsOptimizer.");
+          return null;
+        }
         List<Object> oneRow = new ArrayList<Object>();
 
         Hive hive = Hive.get(pctx.getConf());

http://git-wip-us.apache.org/repos/asf/hive/blob/e3d78490/ql/src/test/queries/clientpositive/acid_table_stats.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/acid_table_stats.q b/ql/src/test/queries/clientpositive/acid_table_stats.q
index 45da8d4..23d0df4 100644
--- a/ql/src/test/queries/clientpositive/acid_table_stats.q
+++ b/ql/src/test/queries/clientpositive/acid_table_stats.q
@@ -31,6 +31,13 @@ analyze table acid partition(ds='2008-04-08') compute statistics for columns;
 
 desc formatted acid partition(ds='2008-04-08');
 
+set hive.compute.query.using.stats=false;
+select count(*) from acid where ds='2008-04-08';
+
+set hive.compute.query.using.stats=true;
+explain select count(*) from acid where ds='2008-04-08';
+select count(*) from acid where ds='2008-04-08';
+
 insert into table acid partition(ds)  select key,value,ds from srcpart;
 
 desc formatted acid partition(ds='2008-04-08');
@@ -39,6 +46,13 @@ analyze table acid partition(ds='2008-04-08') compute statistics;
 
 desc formatted acid partition(ds='2008-04-08');
 
+set hive.compute.query.using.stats=true;
+explain select count(*) from acid where ds='2008-04-08';
+select count(*) from acid where ds='2008-04-08';
+
+analyze table acid partition(ds='2008-04-08') compute statistics for columns;
+explain select max(key) from acid where ds='2008-04-08';
+
 drop table acid;
 CREATE TABLE acid(key string, value string) PARTITIONED BY(ds string) CLUSTERED BY(key) INTO 2 BUCKETS STORED AS ORC;
 

http://git-wip-us.apache.org/repos/asf/hive/blob/e3d78490/ql/src/test/results/clientpositive/acid_table_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/acid_table_stats.q.out b/ql/src/test/results/clientpositive/acid_table_stats.q.out
index f662a48..4d51511 100644
--- a/ql/src/test/results/clientpositive/acid_table_stats.q.out
+++ b/ql/src/test/results/clientpositive/acid_table_stats.q.out
@@ -273,6 +273,74 @@ Bucket Columns:     	[key]
 Sort Columns:       	[]                  	 
 Storage Desc Params:	 	 
 	serialization.format	1                   
+PREHOOK: query: select count(*) from acid where ds='2008-04-08'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid
+PREHOOK: Input: default@acid@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from acid where ds='2008-04-08'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid
+POSTHOOK: Input: default@acid@ds=2008-04-08
+#### A masked pattern was here ####
+1000
+PREHOOK: query: explain select count(*) from acid where ds='2008-04-08'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(*) from acid where ds='2008-04-08'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: acid
+            Statistics: Num rows: 1000 Data size: 208000 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              Statistics: Num rows: 1000 Data size: 208000 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count()
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from acid where ds='2008-04-08'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid
+PREHOOK: Input: default@acid@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from acid where ds='2008-04-08'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid
+POSTHOOK: Input: default@acid@ds=2008-04-08
+#### A masked pattern was here ####
+1000
 PREHOOK: query: insert into table acid partition(ds)  select key,value,ds from srcpart
 PREHOOK: type: QUERY
 PREHOOK: Input: default@srcpart
@@ -381,6 +449,121 @@ Bucket Columns:     	[key]
 Sort Columns:       	[]                  	 
 Storage Desc Params:	 	 
 	serialization.format	1                   
+PREHOOK: query: explain select count(*) from acid where ds='2008-04-08'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(*) from acid where ds='2008-04-08'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: acid
+            Statistics: Num rows: 2000 Data size: 416000 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              Statistics: Num rows: 2000 Data size: 416000 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count()
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from acid where ds='2008-04-08'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid
+PREHOOK: Input: default@acid@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from acid where ds='2008-04-08'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid
+POSTHOOK: Input: default@acid@ds=2008-04-08
+#### A masked pattern was here ####
+2000
+PREHOOK: query: analyze table acid partition(ds='2008-04-08') compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid
+PREHOOK: Input: default@acid@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table acid partition(ds='2008-04-08') compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid
+POSTHOOK: Input: default@acid@ds=2008-04-08
+#### A masked pattern was here ####
+PREHOOK: query: explain select max(key) from acid where ds='2008-04-08'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select max(key) from acid where ds='2008-04-08'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: acid
+            Statistics: Num rows: 2000 Data size: 416000 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: key
+              Statistics: Num rows: 2000 Data size: 416000 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: max(key)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: string)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: max(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
 PREHOOK: query: drop table acid
 PREHOOK: type: DROPTABLE
 PREHOOK: Input: default@acid