You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kr...@apache.org on 2023/02/13 05:55:55 UTC
[hive] branch master updated: HIVE-27007: Iceberg: Use BasicStats from iceberg table's currrentSnapshot.summary() for query planning (Simhadri Govindappa, reviewed by Krisztian Kasa, Soumyakanti Das, Zsolt Miskolczi)
This is an automated email from the ASF dual-hosted git repository.
krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 9658838a46b HIVE-27007: Iceberg: Use BasicStats from iceberg table's currrentSnapshot.summary() for query planning (Simhadri Govindappa, reviewed by Krisztian Kasa, Soumyakanti Das, Zsolt Miskolczi)
9658838a46b is described below
commit 9658838a46bcb0d07cc896ca17ad8dc7b2ba4b35
Author: SimhadriGovindappa <si...@gmail.com>
AuthorDate: Mon Feb 13 11:25:46 2023 +0530
HIVE-27007: Iceberg: Use BasicStats from iceberg table's currrentSnapshot.summary() for query planning (Simhadri Govindappa, reviewed by Krisztian Kasa, Soumyakanti Das, Zsolt Miskolczi)
---
.../java/org/apache/hadoop/hive/conf/HiveConf.java | 2 +
.../iceberg/mr/hive/HiveIcebergStorageHandler.java | 45 ++-
.../positive/use_basic_stats_from_iceberg.q | 39 ++
.../positive/use_basic_stats_from_iceberg.q.out | 412 +++++++++++++++++++++
.../apache/hadoop/hive/ql/stats/BasicStats.java | 8 +
5 files changed, 490 insertions(+), 16 deletions(-)
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index b1b441dce7b..14d6837a3bd 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2207,6 +2207,8 @@ public class HiveConf extends Configuration {
"padding tolerance config (hive.exec.orc.block.padding.tolerance)."),
HIVE_ORC_CODEC_POOL("hive.use.orc.codec.pool", false,
"Whether to use codec pool in ORC. Disable if there are bugs with codec reuse."),
+ HIVE_USE_STATS_FROM("hive.use.stats.from","iceberg","Use stats from iceberg table snapshot for query " +
+ "planning. This has three values metastore, puffin and iceberg"),
HIVEUSEEXPLICITRCFILEHEADER("hive.exec.rcfile.use.explicit.header", true,
"If this is set the header for RCFiles will simply be RCF. If this is not\n" +
diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index fc54f826e63..74c123f48d3 100644
--- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -130,6 +130,9 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H
private static final String ICEBERG_URI_PREFIX = "iceberg://";
private static final Splitter TABLE_NAME_SPLITTER = Splitter.on("..");
private static final String TABLE_NAME_SEPARATOR = "..";
+ private static final String ICEBERG = "iceberg";
+ private static final String PUFFIN = "puffin";
+
/**
* Function template for producing a custom sort expression function:
* Takes the source column index and the bucket count to creat a function where Iceberg bucket UDF is used to build
@@ -312,24 +315,34 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H
org.apache.hadoop.hive.ql.metadata.Table hmsTable = partish.getTable();
TableDesc tableDesc = Utilities.getTableDesc(hmsTable);
Table table = Catalogs.loadTable(conf, tableDesc.getProperties());
+ String statsSource = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_USE_STATS_FROM).toLowerCase();
Map<String, String> stats = Maps.newHashMap();
- if (table.currentSnapshot() != null) {
- Map<String, String> summary = table.currentSnapshot().summary();
- if (summary != null) {
- if (summary.containsKey(SnapshotSummary.TOTAL_DATA_FILES_PROP)) {
- stats.put(StatsSetupConst.NUM_FILES, summary.get(SnapshotSummary.TOTAL_DATA_FILES_PROP));
- }
- if (summary.containsKey(SnapshotSummary.TOTAL_RECORDS_PROP)) {
- stats.put(StatsSetupConst.ROW_COUNT, summary.get(SnapshotSummary.TOTAL_RECORDS_PROP));
- }
- if (summary.containsKey(SnapshotSummary.TOTAL_FILE_SIZE_PROP)) {
- stats.put(StatsSetupConst.TOTAL_SIZE, summary.get(SnapshotSummary.TOTAL_FILE_SIZE_PROP));
+ switch (statsSource) {
+ case ICEBERG:
+ if (table.currentSnapshot() != null) {
+ Map<String, String> summary = table.currentSnapshot().summary();
+ if (summary != null) {
+ if (summary.containsKey(SnapshotSummary.TOTAL_DATA_FILES_PROP)) {
+ stats.put(StatsSetupConst.NUM_FILES, summary.get(SnapshotSummary.TOTAL_DATA_FILES_PROP));
+ }
+ if (summary.containsKey(SnapshotSummary.TOTAL_RECORDS_PROP)) {
+ stats.put(StatsSetupConst.ROW_COUNT, summary.get(SnapshotSummary.TOTAL_RECORDS_PROP));
+ }
+ if (summary.containsKey(SnapshotSummary.TOTAL_FILE_SIZE_PROP)) {
+ stats.put(StatsSetupConst.TOTAL_SIZE, summary.get(SnapshotSummary.TOTAL_FILE_SIZE_PROP));
+ }
+ }
+ } else {
+ stats.put(StatsSetupConst.NUM_FILES, "0");
+ stats.put(StatsSetupConst.ROW_COUNT, "0");
+ stats.put(StatsSetupConst.TOTAL_SIZE, "0");
}
- }
- } else {
- stats.put(StatsSetupConst.NUM_FILES, "0");
- stats.put(StatsSetupConst.ROW_COUNT, "0");
- stats.put(StatsSetupConst.TOTAL_SIZE, "0");
+ break;
+ case PUFFIN:
+ // place holder for puffin
+ break;
+ default:
+ // fall back to metastore
}
return stats;
}
diff --git a/iceberg/iceberg-handler/src/test/queries/positive/use_basic_stats_from_iceberg.q b/iceberg/iceberg-handler/src/test/queries/positive/use_basic_stats_from_iceberg.q
new file mode 100644
index 00000000000..90e2d95d1df
--- /dev/null
+++ b/iceberg/iceberg-handler/src/test/queries/positive/use_basic_stats_from_iceberg.q
@@ -0,0 +1,39 @@
+-- Mask random uuid
+--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/
+set hive.stats.autogather=true;
+set hive.stats.column.autogather=true;
+
+drop table if exists tbl_ice;
+set hive.use.stats.from = metastore;
+create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2');
+insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56);
+explain select * from tbl_ice order by a, b, c;
+
+drop table if exists tbl_ice;
+set hive.use.stats.from = iceberg;
+create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2');
+insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56);
+explain select * from tbl_ice order by a, b, c;
+
+drop table if exists tbl_ice;
+drop table if exists t1 ;
+drop table if exists t2 ;
+create table t1 (a int) stored by iceberg tblproperties ('format-version'='2');
+create table t2 (b int) stored by iceberg tblproperties ('format-version'='2');
+describe formatted t1;
+describe formatted t2;
+explain select * from t1 join t2 on t1.a = t2.b;
+
+drop table if exists tbl_ice;
+create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2');
+explain select * from tbl_ice order by a, b, c;
+select count(*) from tbl_ice ;
+insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56);
+
+explain select * from tbl_ice order by a, b, c;
+select * from tbl_ice order by a, b, c;
+select count(*) from tbl_ice ;
+
+insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56);
+explain select * from tbl_ice order by a, b, c;
+select count(*) from tbl_ice ;
diff --git a/iceberg/iceberg-handler/src/test/results/positive/use_basic_stats_from_iceberg.q.out b/iceberg/iceberg-handler/src/test/results/positive/use_basic_stats_from_iceberg.q.out
new file mode 100644
index 00000000000..29f7fff01e8
--- /dev/null
+++ b/iceberg/iceberg-handler/src/test/results/positive/use_basic_stats_from_iceberg.q.out
@@ -0,0 +1,412 @@
+PREHOOK: query: drop table if exists tbl_ice
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists tbl_ice
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: explain select * from tbl_ice order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from tbl_ice order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 2 vectorized
+ File Output Operator [FS_8]
+ Select Operator [SEL_7] (rows=9 width=95)
+ Output:["_col0","_col1","_col2"]
+ <-Map 1 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_6]
+ Select Operator [SEL_5] (rows=9 width=95)
+ Output:["_col0","_col1","_col2"]
+ TableScan [TS_0] (rows=9 width=95)
+ default@tbl_ice,tbl_ice,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"]
+
+PREHOOK: query: drop table if exists tbl_ice
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: drop table if exists tbl_ice
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: explain select * from tbl_ice order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from tbl_ice order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 2 vectorized
+ File Output Operator [FS_8]
+ Select Operator [SEL_7] (rows=9 width=95)
+ Output:["_col0","_col1","_col2"]
+ <-Map 1 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_6]
+ Select Operator [SEL_5] (rows=9 width=95)
+ Output:["_col0","_col1","_col2"]
+ TableScan [TS_0] (rows=9 width=95)
+ default@tbl_ice,tbl_ice,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"]
+
+PREHOOK: query: drop table if exists tbl_ice
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: drop table if exists tbl_ice
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: drop table if exists t1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists t1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists t2
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists t2
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table t1 (a int) stored by iceberg tblproperties ('format-version'='2')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1 (a int) stored by iceberg tblproperties ('format-version'='2')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: create table t2 (b int) stored by iceberg tblproperties ('format-version'='2')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t2
+POSTHOOK: query: create table t2 (b int) stored by iceberg tblproperties ('format-version'='2')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t2
+PREHOOK: query: describe formatted t1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@t1
+POSTHOOK: query: describe formatted t1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@t1
+# col_name data_type comment
+a int
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\"}}
+ bucketing_version 2
+ engine.hive.enabled true
+ format-version 2
+ iceberg.orc.files.only false
+ metadata_location hdfs://### HDFS PATH ###
+ numFiles 0
+ numRows 0
+ rawDataSize 0
+ serialization.format 1
+ storage_handler org.apache.iceberg.mr.hive.HiveIcebergStorageHandler
+ table_type ICEBERG
+ totalSize 0
+#### A masked pattern was here ####
+ uuid #Masked#
+ write.delete.mode merge-on-read
+ write.merge.mode merge-on-read
+ write.update.mode merge-on-read
+
+# Storage Information
+SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe
+InputFormat: org.apache.iceberg.mr.hive.HiveIcebergInputFormat
+OutputFormat: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
+Compressed: No
+Sort Columns: []
+PREHOOK: query: describe formatted t2
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@t2
+POSTHOOK: query: describe formatted t2
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@t2
+# col_name data_type comment
+b int
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\"}}
+ bucketing_version 2
+ engine.hive.enabled true
+ format-version 2
+ iceberg.orc.files.only false
+ metadata_location hdfs://### HDFS PATH ###
+ numFiles 0
+ numRows 0
+ rawDataSize 0
+ serialization.format 1
+ storage_handler org.apache.iceberg.mr.hive.HiveIcebergStorageHandler
+ table_type ICEBERG
+ totalSize 0
+#### A masked pattern was here ####
+ uuid #Masked#
+ write.delete.mode merge-on-read
+ write.merge.mode merge-on-read
+ write.update.mode merge-on-read
+
+# Storage Information
+SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe
+InputFormat: org.apache.iceberg.mr.hive.HiveIcebergInputFormat
+OutputFormat: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
+Compressed: No
+Sort Columns: []
+PREHOOK: query: explain select * from t1 join t2 on t1.a = t2.b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from t1 join t2 on t1.a = t2.b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 2
+ File Output Operator [FS_10]
+ Merge Join Operator [MERGEJOIN_25] (rows=1 width=4)
+ Conds:RS_28._col0=RS_31._col0(Inner),Output:["_col0","_col1"]
+ <-Map 1 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_28]
+ PartitionCols:_col0
+ Select Operator [SEL_27] (rows=1 width=4)
+ Output:["_col0"]
+ Filter Operator [FIL_26] (rows=1 width=4)
+ predicate:a is not null
+ TableScan [TS_0] (rows=1 width=4)
+ default@t1,t1,Tbl:COMPLETE,Col:NONE,Output:["a"]
+ <-Map 3 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_31]
+ PartitionCols:_col0
+ Select Operator [SEL_30] (rows=1 width=4)
+ Output:["_col0"]
+ Filter Operator [FIL_29] (rows=1 width=4)
+ predicate:b is not null
+ TableScan [TS_3] (rows=1 width=4)
+ default@t2,t2,Tbl:COMPLETE,Col:NONE,Output:["b"]
+
+PREHOOK: query: drop table if exists tbl_ice
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists tbl_ice
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: explain select * from tbl_ice order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from tbl_ice order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 2 vectorized
+ File Output Operator [FS_8]
+ Select Operator [SEL_7] (rows=1 width=192)
+ Output:["_col0","_col1","_col2"]
+ <-Map 1 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_6]
+ Select Operator [SEL_5] (rows=1 width=192)
+ Output:["_col0","_col1","_col2"]
+ TableScan [TS_0] (rows=1 width=192)
+ default@tbl_ice,tbl_ice,Tbl:COMPLETE,Col:NONE,Output:["a","b","c"]
+
+PREHOOK: query: select count(*) from tbl_ice
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(*) from tbl_ice
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+0
+PREHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: explain select * from tbl_ice order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from tbl_ice order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 2 vectorized
+ File Output Operator [FS_8]
+ Select Operator [SEL_7] (rows=9 width=95)
+ Output:["_col0","_col1","_col2"]
+ <-Map 1 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_6]
+ Select Operator [SEL_5] (rows=9 width=95)
+ Output:["_col0","_col1","_col2"]
+ TableScan [TS_0] (rows=9 width=95)
+ default@tbl_ice,tbl_ice,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"]
+
+PREHOOK: query: select * from tbl_ice order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from tbl_ice order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+1 one 50
+2 two 51
+2 two 51
+2 two 51
+3 three 52
+4 four 53
+5 five 54
+111 one 55
+333 two 56
+PREHOOK: query: select count(*) from tbl_ice
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(*) from tbl_ice
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+9
+PREHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: explain select * from tbl_ice order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from tbl_ice order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 2 vectorized
+ File Output Operator [FS_8]
+ Select Operator [SEL_7] (rows=18 width=95)
+ Output:["_col0","_col1","_col2"]
+ <-Map 1 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_6]
+ Select Operator [SEL_5] (rows=18 width=95)
+ Output:["_col0","_col1","_col2"]
+ TableScan [TS_0] (rows=18 width=95)
+ default@tbl_ice,tbl_ice,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"]
+
+PREHOOK: query: select count(*) from tbl_ice
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(*) from tbl_ice
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+18
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStats.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStats.java
index 83e4f8e9da0..ba675dcd9d3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStats.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStats.java
@@ -242,6 +242,7 @@ public class BasicStats {
public BasicStats(Partish p) {
partish = p;
+ checkForBasicStatsFromStorageHandler();
rowCount = parseLong(StatsSetupConst.ROW_COUNT);
rawDataSize = parseLong(StatsSetupConst.RAW_DATA_SIZE);
totalSize = parseLong(StatsSetupConst.TOTAL_SIZE);
@@ -281,6 +282,13 @@ public class BasicStats {
}
+ private void checkForBasicStatsFromStorageHandler() {
+ if (partish.getTable() != null && partish.getTable().isNonNative() &&
+ partish.getTable().getStorageHandler().canProvideBasicStatistics()) {
+ partish.getPartParameters().putAll(partish.getTable().getStorageHandler().getBasicStatistics(partish));
+ }
+ }
+
public long getNumRows() {
return currentNumRows;
}