You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kr...@apache.org on 2023/02/13 05:55:55 UTC

[hive] branch master updated: HIVE-27007: Iceberg: Use BasicStats from iceberg table's currrentSnapshot.summary() for query planning (Simhadri Govindappa, reviewed by Krisztian Kasa, Soumyakanti Das, Zsolt Miskolczi)

This is an automated email from the ASF dual-hosted git repository.

krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 9658838a46b HIVE-27007: Iceberg: Use BasicStats from iceberg table's currrentSnapshot.summary() for query planning (Simhadri Govindappa, reviewed by Krisztian Kasa, Soumyakanti Das, Zsolt Miskolczi)
9658838a46b is described below

commit 9658838a46bcb0d07cc896ca17ad8dc7b2ba4b35
Author: SimhadriGovindappa <si...@gmail.com>
AuthorDate: Mon Feb 13 11:25:46 2023 +0530

    HIVE-27007: Iceberg: Use BasicStats from iceberg table's currrentSnapshot.summary() for query planning (Simhadri Govindappa, reviewed by Krisztian Kasa, Soumyakanti Das, Zsolt Miskolczi)
---
 .../java/org/apache/hadoop/hive/conf/HiveConf.java |   2 +
 .../iceberg/mr/hive/HiveIcebergStorageHandler.java |  45 ++-
 .../positive/use_basic_stats_from_iceberg.q        |  39 ++
 .../positive/use_basic_stats_from_iceberg.q.out    | 412 +++++++++++++++++++++
 .../apache/hadoop/hive/ql/stats/BasicStats.java    |   8 +
 5 files changed, 490 insertions(+), 16 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index b1b441dce7b..14d6837a3bd 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2207,6 +2207,8 @@ public class HiveConf extends Configuration {
         "padding tolerance config (hive.exec.orc.block.padding.tolerance)."),
     HIVE_ORC_CODEC_POOL("hive.use.orc.codec.pool", false,
         "Whether to use codec pool in ORC. Disable if there are bugs with codec reuse."),
+    HIVE_USE_STATS_FROM("hive.use.stats.from","iceberg","Use stats from iceberg table snapshot for query " +
+        "planning. This has three values metastore, puffin and iceberg"),
 
     HIVEUSEEXPLICITRCFILEHEADER("hive.exec.rcfile.use.explicit.header", true,
         "If this is set the header for RCFiles will simply be RCF.  If this is not\n" +
diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index fc54f826e63..74c123f48d3 100644
--- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -130,6 +130,9 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H
   private static final String ICEBERG_URI_PREFIX = "iceberg://";
   private static final Splitter TABLE_NAME_SPLITTER = Splitter.on("..");
   private static final String TABLE_NAME_SEPARATOR = "..";
+  private static final String ICEBERG = "iceberg";
+  private static final String PUFFIN = "puffin";
+
   /**
    * Function template for producing a custom sort expression function:
    * Takes the source column index and the bucket count to creat a function where Iceberg bucket UDF is used to build
@@ -312,24 +315,34 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H
     org.apache.hadoop.hive.ql.metadata.Table hmsTable = partish.getTable();
     TableDesc tableDesc = Utilities.getTableDesc(hmsTable);
     Table table = Catalogs.loadTable(conf, tableDesc.getProperties());
+    String statsSource = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_USE_STATS_FROM).toLowerCase();
     Map<String, String> stats = Maps.newHashMap();
-    if (table.currentSnapshot() != null) {
-      Map<String, String> summary = table.currentSnapshot().summary();
-      if (summary != null) {
-        if (summary.containsKey(SnapshotSummary.TOTAL_DATA_FILES_PROP)) {
-          stats.put(StatsSetupConst.NUM_FILES, summary.get(SnapshotSummary.TOTAL_DATA_FILES_PROP));
-        }
-        if (summary.containsKey(SnapshotSummary.TOTAL_RECORDS_PROP)) {
-          stats.put(StatsSetupConst.ROW_COUNT, summary.get(SnapshotSummary.TOTAL_RECORDS_PROP));
-        }
-        if (summary.containsKey(SnapshotSummary.TOTAL_FILE_SIZE_PROP)) {
-          stats.put(StatsSetupConst.TOTAL_SIZE, summary.get(SnapshotSummary.TOTAL_FILE_SIZE_PROP));
+    switch (statsSource) {
+      case ICEBERG:
+        if (table.currentSnapshot() != null) {
+          Map<String, String> summary = table.currentSnapshot().summary();
+          if (summary != null) {
+            if (summary.containsKey(SnapshotSummary.TOTAL_DATA_FILES_PROP)) {
+              stats.put(StatsSetupConst.NUM_FILES, summary.get(SnapshotSummary.TOTAL_DATA_FILES_PROP));
+            }
+            if (summary.containsKey(SnapshotSummary.TOTAL_RECORDS_PROP)) {
+              stats.put(StatsSetupConst.ROW_COUNT, summary.get(SnapshotSummary.TOTAL_RECORDS_PROP));
+            }
+            if (summary.containsKey(SnapshotSummary.TOTAL_FILE_SIZE_PROP)) {
+              stats.put(StatsSetupConst.TOTAL_SIZE, summary.get(SnapshotSummary.TOTAL_FILE_SIZE_PROP));
+            }
+          }
+        } else {
+          stats.put(StatsSetupConst.NUM_FILES, "0");
+          stats.put(StatsSetupConst.ROW_COUNT, "0");
+          stats.put(StatsSetupConst.TOTAL_SIZE, "0");
         }
-      }
-    } else {
-      stats.put(StatsSetupConst.NUM_FILES, "0");
-      stats.put(StatsSetupConst.ROW_COUNT, "0");
-      stats.put(StatsSetupConst.TOTAL_SIZE, "0");
+        break;
+      case PUFFIN:
+        // place holder for puffin
+        break;
+      default:
+        // fall back to metastore
     }
     return stats;
   }
diff --git a/iceberg/iceberg-handler/src/test/queries/positive/use_basic_stats_from_iceberg.q b/iceberg/iceberg-handler/src/test/queries/positive/use_basic_stats_from_iceberg.q
new file mode 100644
index 00000000000..90e2d95d1df
--- /dev/null
+++ b/iceberg/iceberg-handler/src/test/queries/positive/use_basic_stats_from_iceberg.q
@@ -0,0 +1,39 @@
+-- Mask random uuid
+--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/
+set hive.stats.autogather=true;
+set hive.stats.column.autogather=true;
+
+drop table if exists tbl_ice;
+set hive.use.stats.from = metastore;
+create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2');
+insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56);
+explain select * from tbl_ice order by a, b, c;
+
+drop table if exists tbl_ice;
+set hive.use.stats.from = iceberg;
+create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2');
+insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56);
+explain select * from tbl_ice order by a, b, c;
+
+drop table if exists tbl_ice;
+drop table if exists  t1 ;
+drop table if exists  t2 ;
+create table t1 (a int) stored by iceberg tblproperties ('format-version'='2');
+create table t2 (b int) stored by iceberg tblproperties ('format-version'='2');
+describe formatted t1;
+describe formatted t2;
+explain select * from t1 join t2 on t1.a = t2.b;
+
+drop table if exists tbl_ice;
+create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2');
+explain select * from tbl_ice order by a, b, c;
+select count(*) from tbl_ice ;
+insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56);
+
+explain select * from tbl_ice order by a, b, c;
+select * from tbl_ice order by a, b, c;
+select count(*) from tbl_ice ;
+
+insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56);
+explain select * from tbl_ice order by a, b, c;
+select count(*) from tbl_ice ;
diff --git a/iceberg/iceberg-handler/src/test/results/positive/use_basic_stats_from_iceberg.q.out b/iceberg/iceberg-handler/src/test/results/positive/use_basic_stats_from_iceberg.q.out
new file mode 100644
index 00000000000..29f7fff01e8
--- /dev/null
+++ b/iceberg/iceberg-handler/src/test/results/positive/use_basic_stats_from_iceberg.q.out
@@ -0,0 +1,412 @@
+PREHOOK: query: drop table if exists tbl_ice
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists tbl_ice
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: explain select * from tbl_ice order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from tbl_ice order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:-1
+    Stage-1
+      Reducer 2 vectorized
+      File Output Operator [FS_8]
+        Select Operator [SEL_7] (rows=9 width=95)
+          Output:["_col0","_col1","_col2"]
+        <-Map 1 [SIMPLE_EDGE] vectorized
+          SHUFFLE [RS_6]
+            Select Operator [SEL_5] (rows=9 width=95)
+              Output:["_col0","_col1","_col2"]
+              TableScan [TS_0] (rows=9 width=95)
+                default@tbl_ice,tbl_ice,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"]
+
+PREHOOK: query: drop table if exists tbl_ice
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: drop table if exists tbl_ice
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: explain select * from tbl_ice order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from tbl_ice order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:-1
+    Stage-1
+      Reducer 2 vectorized
+      File Output Operator [FS_8]
+        Select Operator [SEL_7] (rows=9 width=95)
+          Output:["_col0","_col1","_col2"]
+        <-Map 1 [SIMPLE_EDGE] vectorized
+          SHUFFLE [RS_6]
+            Select Operator [SEL_5] (rows=9 width=95)
+              Output:["_col0","_col1","_col2"]
+              TableScan [TS_0] (rows=9 width=95)
+                default@tbl_ice,tbl_ice,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"]
+
+PREHOOK: query: drop table if exists tbl_ice
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: drop table if exists tbl_ice
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: drop table if exists  t1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists  t1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists  t2
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists  t2
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table t1 (a int) stored by iceberg tblproperties ('format-version'='2')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1 (a int) stored by iceberg tblproperties ('format-version'='2')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: create table t2 (b int) stored by iceberg tblproperties ('format-version'='2')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t2
+POSTHOOK: query: create table t2 (b int) stored by iceberg tblproperties ('format-version'='2')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t2
+PREHOOK: query: describe formatted t1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@t1
+POSTHOOK: query: describe formatted t1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@t1
+# col_name            	data_type           	comment             
+a                   	int                 	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\"}}
+	bucketing_version   	2                   
+	engine.hive.enabled 	true                
+	format-version      	2                   
+	iceberg.orc.files.only	false               
+	metadata_location   	hdfs://### HDFS PATH ###
+	numFiles            	0                   
+	numRows             	0                   
+	rawDataSize         	0                   
+	serialization.format	1                   
+	storage_handler     	org.apache.iceberg.mr.hive.HiveIcebergStorageHandler
+	table_type          	ICEBERG             
+	totalSize           	0                   
+#### A masked pattern was here ####
+	uuid                	#Masked#
+	write.delete.mode   	merge-on-read       
+	write.merge.mode    	merge-on-read       
+	write.update.mode   	merge-on-read       
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.iceberg.mr.hive.HiveIcebergSerDe	 
+InputFormat:        	org.apache.iceberg.mr.hive.HiveIcebergInputFormat	 
+OutputFormat:       	org.apache.iceberg.mr.hive.HiveIcebergOutputFormat	 
+Compressed:         	No                  	 
+Sort Columns:       	[]                  	 
+PREHOOK: query: describe formatted t2
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@t2
+POSTHOOK: query: describe formatted t2
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@t2
+# col_name            	data_type           	comment             
+b                   	int                 	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\"}}
+	bucketing_version   	2                   
+	engine.hive.enabled 	true                
+	format-version      	2                   
+	iceberg.orc.files.only	false               
+	metadata_location   	hdfs://### HDFS PATH ###
+	numFiles            	0                   
+	numRows             	0                   
+	rawDataSize         	0                   
+	serialization.format	1                   
+	storage_handler     	org.apache.iceberg.mr.hive.HiveIcebergStorageHandler
+	table_type          	ICEBERG             
+	totalSize           	0                   
+#### A masked pattern was here ####
+	uuid                	#Masked#
+	write.delete.mode   	merge-on-read       
+	write.merge.mode    	merge-on-read       
+	write.update.mode   	merge-on-read       
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.iceberg.mr.hive.HiveIcebergSerDe	 
+InputFormat:        	org.apache.iceberg.mr.hive.HiveIcebergInputFormat	 
+OutputFormat:       	org.apache.iceberg.mr.hive.HiveIcebergOutputFormat	 
+Compressed:         	No                  	 
+Sort Columns:       	[]                  	 
+PREHOOK: query: explain select * from t1 join t2 on t1.a = t2.b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from t1 join t2 on t1.a = t2.b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:-1
+    Stage-1
+      Reducer 2
+      File Output Operator [FS_10]
+        Merge Join Operator [MERGEJOIN_25] (rows=1 width=4)
+          Conds:RS_28._col0=RS_31._col0(Inner),Output:["_col0","_col1"]
+        <-Map 1 [SIMPLE_EDGE] vectorized
+          SHUFFLE [RS_28]
+            PartitionCols:_col0
+            Select Operator [SEL_27] (rows=1 width=4)
+              Output:["_col0"]
+              Filter Operator [FIL_26] (rows=1 width=4)
+                predicate:a is not null
+                TableScan [TS_0] (rows=1 width=4)
+                  default@t1,t1,Tbl:COMPLETE,Col:NONE,Output:["a"]
+        <-Map 3 [SIMPLE_EDGE] vectorized
+          SHUFFLE [RS_31]
+            PartitionCols:_col0
+            Select Operator [SEL_30] (rows=1 width=4)
+              Output:["_col0"]
+              Filter Operator [FIL_29] (rows=1 width=4)
+                predicate:b is not null
+                TableScan [TS_3] (rows=1 width=4)
+                  default@t2,t2,Tbl:COMPLETE,Col:NONE,Output:["b"]
+
+PREHOOK: query: drop table if exists tbl_ice
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists tbl_ice
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: explain select * from tbl_ice order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from tbl_ice order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:-1
+    Stage-1
+      Reducer 2 vectorized
+      File Output Operator [FS_8]
+        Select Operator [SEL_7] (rows=1 width=192)
+          Output:["_col0","_col1","_col2"]
+        <-Map 1 [SIMPLE_EDGE] vectorized
+          SHUFFLE [RS_6]
+            Select Operator [SEL_5] (rows=1 width=192)
+              Output:["_col0","_col1","_col2"]
+              TableScan [TS_0] (rows=1 width=192)
+                default@tbl_ice,tbl_ice,Tbl:COMPLETE,Col:NONE,Output:["a","b","c"]
+
+PREHOOK: query: select count(*) from tbl_ice
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(*) from tbl_ice
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+0
+PREHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: explain select * from tbl_ice order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from tbl_ice order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:-1
+    Stage-1
+      Reducer 2 vectorized
+      File Output Operator [FS_8]
+        Select Operator [SEL_7] (rows=9 width=95)
+          Output:["_col0","_col1","_col2"]
+        <-Map 1 [SIMPLE_EDGE] vectorized
+          SHUFFLE [RS_6]
+            Select Operator [SEL_5] (rows=9 width=95)
+              Output:["_col0","_col1","_col2"]
+              TableScan [TS_0] (rows=9 width=95)
+                default@tbl_ice,tbl_ice,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"]
+
+PREHOOK: query: select * from tbl_ice order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from tbl_ice order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+1	one	50
+2	two	51
+2	two	51
+2	two	51
+3	three	52
+4	four	53
+5	five	54
+111	one	55
+333	two	56
+PREHOOK: query: select count(*) from tbl_ice
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(*) from tbl_ice
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+9
+PREHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: explain select * from tbl_ice order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from tbl_ice order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:-1
+    Stage-1
+      Reducer 2 vectorized
+      File Output Operator [FS_8]
+        Select Operator [SEL_7] (rows=18 width=95)
+          Output:["_col0","_col1","_col2"]
+        <-Map 1 [SIMPLE_EDGE] vectorized
+          SHUFFLE [RS_6]
+            Select Operator [SEL_5] (rows=18 width=95)
+              Output:["_col0","_col1","_col2"]
+              TableScan [TS_0] (rows=18 width=95)
+                default@tbl_ice,tbl_ice,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"]
+
+PREHOOK: query: select count(*) from tbl_ice
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(*) from tbl_ice
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+18
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStats.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStats.java
index 83e4f8e9da0..ba675dcd9d3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStats.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStats.java
@@ -242,6 +242,7 @@ public class BasicStats {
   public BasicStats(Partish p) {
     partish = p;
 
+    checkForBasicStatsFromStorageHandler();
     rowCount = parseLong(StatsSetupConst.ROW_COUNT);
     rawDataSize = parseLong(StatsSetupConst.RAW_DATA_SIZE);
     totalSize = parseLong(StatsSetupConst.TOTAL_SIZE);
@@ -281,6 +282,13 @@ public class BasicStats {
 
   }
 
+  private void checkForBasicStatsFromStorageHandler() {
+    if (partish.getTable() != null && partish.getTable().isNonNative() &&
+        partish.getTable().getStorageHandler().canProvideBasicStatistics()) {
+      partish.getPartParameters().putAll(partish.getTable().getStorageHandler().getBasicStatistics(partish));
+    }
+  }
+
   public long getNumRows() {
     return currentNumRows;
   }