You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by px...@apache.org on 2016/07/19 18:00:55 UTC
hive git commit: HIVE-14277: Disable StatsOptimizer for all ACID
tables (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master de5ae86ee -> e3d784908
HIVE-14277: Disable StatsOptimizer for all ACID tables (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e3d78490
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e3d78490
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e3d78490
Branch: refs/heads/master
Commit: e3d7849086f1154ee8b5975e6e34638a5682f4a6
Parents: de5ae86
Author: Pengcheng Xiong <px...@apache.org>
Authored: Tue Jul 19 11:00:29 2016 -0700
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Tue Jul 19 11:00:29 2016 -0700
----------------------------------------------------------------------
.../hive/ql/optimizer/StatsOptimizer.java | 5 +
.../queries/clientpositive/acid_table_stats.q | 14 ++
.../clientpositive/acid_table_stats.q.out | 183 +++++++++++++++++++
3 files changed, 202 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/e3d78490/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
index 7febfd5..0c17246 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
@@ -45,6 +45,7 @@ import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.SelectOperator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.TaskFactory;
+import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
import org.apache.hadoop.hive.ql.lib.Dispatcher;
@@ -318,6 +319,10 @@ public class StatsOptimizer extends Transform {
}
Table tbl = tsOp.getConf().getTableMetadata();
+ if (AcidUtils.isAcidTable(tbl)) {
+ Logger.info("Table " + tbl.getTableName() + " is ACID table. Skip StatsOptimizer.");
+ return null;
+ }
List<Object> oneRow = new ArrayList<Object>();
Hive hive = Hive.get(pctx.getConf());
http://git-wip-us.apache.org/repos/asf/hive/blob/e3d78490/ql/src/test/queries/clientpositive/acid_table_stats.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/acid_table_stats.q b/ql/src/test/queries/clientpositive/acid_table_stats.q
index 45da8d4..23d0df4 100644
--- a/ql/src/test/queries/clientpositive/acid_table_stats.q
+++ b/ql/src/test/queries/clientpositive/acid_table_stats.q
@@ -31,6 +31,13 @@ analyze table acid partition(ds='2008-04-08') compute statistics for columns;
desc formatted acid partition(ds='2008-04-08');
+set hive.compute.query.using.stats=false;
+select count(*) from acid where ds='2008-04-08';
+
+set hive.compute.query.using.stats=true;
+explain select count(*) from acid where ds='2008-04-08';
+select count(*) from acid where ds='2008-04-08';
+
insert into table acid partition(ds) select key,value,ds from srcpart;
desc formatted acid partition(ds='2008-04-08');
@@ -39,6 +46,13 @@ analyze table acid partition(ds='2008-04-08') compute statistics;
desc formatted acid partition(ds='2008-04-08');
+set hive.compute.query.using.stats=true;
+explain select count(*) from acid where ds='2008-04-08';
+select count(*) from acid where ds='2008-04-08';
+
+analyze table acid partition(ds='2008-04-08') compute statistics for columns;
+explain select max(key) from acid where ds='2008-04-08';
+
drop table acid;
CREATE TABLE acid(key string, value string) PARTITIONED BY(ds string) CLUSTERED BY(key) INTO 2 BUCKETS STORED AS ORC;
http://git-wip-us.apache.org/repos/asf/hive/blob/e3d78490/ql/src/test/results/clientpositive/acid_table_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/acid_table_stats.q.out b/ql/src/test/results/clientpositive/acid_table_stats.q.out
index f662a48..4d51511 100644
--- a/ql/src/test/results/clientpositive/acid_table_stats.q.out
+++ b/ql/src/test/results/clientpositive/acid_table_stats.q.out
@@ -273,6 +273,74 @@ Bucket Columns: [key]
Sort Columns: []
Storage Desc Params:
serialization.format 1
+PREHOOK: query: select count(*) from acid where ds='2008-04-08'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid
+PREHOOK: Input: default@acid@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from acid where ds='2008-04-08'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid
+POSTHOOK: Input: default@acid@ds=2008-04-08
+#### A masked pattern was here ####
+1000
+PREHOOK: query: explain select count(*) from acid where ds='2008-04-08'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(*) from acid where ds='2008-04-08'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: acid
+ Statistics: Num rows: 1000 Data size: 208000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 1000 Data size: 208000 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from acid where ds='2008-04-08'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid
+PREHOOK: Input: default@acid@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from acid where ds='2008-04-08'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid
+POSTHOOK: Input: default@acid@ds=2008-04-08
+#### A masked pattern was here ####
+1000
PREHOOK: query: insert into table acid partition(ds) select key,value,ds from srcpart
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart
@@ -381,6 +449,121 @@ Bucket Columns: [key]
Sort Columns: []
Storage Desc Params:
serialization.format 1
+PREHOOK: query: explain select count(*) from acid where ds='2008-04-08'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(*) from acid where ds='2008-04-08'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: acid
+ Statistics: Num rows: 2000 Data size: 416000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 2000 Data size: 416000 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from acid where ds='2008-04-08'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid
+PREHOOK: Input: default@acid@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from acid where ds='2008-04-08'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid
+POSTHOOK: Input: default@acid@ds=2008-04-08
+#### A masked pattern was here ####
+2000
+PREHOOK: query: analyze table acid partition(ds='2008-04-08') compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid
+PREHOOK: Input: default@acid@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table acid partition(ds='2008-04-08') compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid
+POSTHOOK: Input: default@acid@ds=2008-04-08
+#### A masked pattern was here ####
+PREHOOK: query: explain select max(key) from acid where ds='2008-04-08'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select max(key) from acid where ds='2008-04-08'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: acid
+ Statistics: Num rows: 2000 Data size: 416000 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 2000 Data size: 416000 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: max(key)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: max(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
PREHOOK: query: drop table acid
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@acid