You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ay...@apache.org on 2023/05/04 17:29:17 UTC

[hive] branch branch-3 updated: HIVE-27312: Backport of HIVE-24965- Describe table partition stats fetch should be configurable. (#4285) (Diksha, reviewed by Ayush Saxena, Aman Raj)

This is an automated email from the ASF dual-hosted git repository.

ayushsaxena pushed a commit to branch branch-3
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/branch-3 by this push:
     new 55d809ee1c4 HIVE-27312: Backport of HIVE-24965- Describe table partition stats fetch should be configurable. (#4285) (Diksha, reviewed by Ayush Saxena, Aman Raj)
55d809ee1c4 is described below

commit 55d809ee1c4ff5eef8a5a93f7a7f12e111edfb7b
Author: Diksha628 <43...@users.noreply.github.com>
AuthorDate: Thu May 4 22:59:08 2023 +0530

    HIVE-27312: Backport of HIVE-24965- Describe table partition stats fetch should be configurable. (#4285) (Diksha, reviewed by Ayush Saxena, Aman Raj)
    
    Backports: HIVE-24965: Describe table partition stats fetch should be configurable(Kevin Cheung, reviewed by Sankar Hariappan)
    Signed-off-by: Sankar Hariappan <sa...@apache.org>
---
 .../java/org/apache/hadoop/hive/conf/HiveConf.java |   3 +
 .../org/apache/hadoop/hive/ql/exec/DDLTask.java    |   3 +-
 .../test/queries/clientpositive/describe_table.q   |   8 ++
 .../results/clientpositive/describe_table.q.out    | 101 +++++++++++++++++++++
 4 files changed, 114 insertions(+), 1 deletion(-)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 7573cb0b6d6..c35a0a0fba1 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -4481,6 +4481,9 @@ public class HiveConf extends Configuration {
         "Comma-separated list of class names extending EventConsumer," +
          "to handle the NotificationEvents retreived by the notification event poll."),
 
+    HIVE_DESCRIBE_PARTITIONED_TABLE_IGNORE_STATS("hive.describe.partitionedtable.ignore.stats", false,
+        "Disable partitioned table stats collection for 'DESCRIBE FORMATTED' or 'DESCRIBE EXTENDED' commands."),
+
     /* BLOBSTORE section */
 
     HIVE_BLOBSTORE_SUPPORTED_SCHEMES("hive.blobstore.supported.schemes", "s3,s3a,s3n",
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
index 264e9a4eb8c..e8d3b4cb8fd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
@@ -3667,7 +3667,8 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
         }
 
         if (descTbl.isExt() || descTbl.isFormatted()) {
-          if (tbl.isPartitioned() && part == null) {
+          boolean disablePartitionStats = conf.getBoolVar(HiveConf.ConfVars.HIVE_DESCRIBE_PARTITIONED_TABLE_IGNORE_STATS);
+          if (tbl.isPartitioned() && part == null && !disablePartitionStats) {
             // No partitioned specified for partitioned table, lets fetch all.
             Map<String,String> tblProps = tbl.getParameters() == null ? new HashMap<String,String>() : tbl.getParameters();
             Map<String, Long> valueMap = new HashMap<>();
diff --git a/ql/src/test/queries/clientpositive/describe_table.q b/ql/src/test/queries/clientpositive/describe_table.q
index 07fd6fc4276..8fd3bc8d547 100644
--- a/ql/src/test/queries/clientpositive/describe_table.q
+++ b/ql/src/test/queries/clientpositive/describe_table.q
@@ -34,6 +34,14 @@ alter table srcpart_serdeprops set serdeproperties('A1234'='3');
 describe formatted srcpart_serdeprops;
 drop table srcpart_serdeprops;
 
+CREATE TABLE IF NOT EXISTS desc_parttable_stats (somenumber int) PARTITIONED BY (yr int);
+INSERT INTO desc_parttable_stats values(0,1),(0,2),(0,3);
+set hive.describe.partitionedtable.ignore.stats=true;
+describe formatted desc_parttable_stats;
+set hive.describe.partitionedtable.ignore.stats=false;
+describe formatted desc_parttable_stats;
+DROP TABLE IF EXISTS desc_parttable_stats;
+
 CREATE DATABASE IF NOT EXISTS name1;
 CREATE DATABASE IF NOT EXISTS name2;
 use name1;
diff --git a/ql/src/test/results/clientpositive/describe_table.q.out b/ql/src/test/results/clientpositive/describe_table.q.out
index 8c7a16c4b65..6f37beedc34 100644
--- a/ql/src/test/results/clientpositive/describe_table.q.out
+++ b/ql/src/test/results/clientpositive/describe_table.q.out
@@ -481,6 +481,107 @@ POSTHOOK: query: drop table srcpart_serdeprops
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Input: default@srcpart_serdeprops
 POSTHOOK: Output: default@srcpart_serdeprops
+PREHOOK: query: CREATE TABLE IF NOT EXISTS desc_parttable_stats (somenumber int) PARTITIONED BY (yr int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@desc_parttable_stats
+POSTHOOK: query: CREATE TABLE IF NOT EXISTS desc_parttable_stats (somenumber int) PARTITIONED BY (yr int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@desc_parttable_stats
+PREHOOK: query: INSERT INTO desc_parttable_stats values(0,1),(0,2),(0,3)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@desc_parttable_stats
+POSTHOOK: query: INSERT INTO desc_parttable_stats values(0,1),(0,2),(0,3)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@desc_parttable_stats@yr=1
+POSTHOOK: Output: default@desc_parttable_stats@yr=2
+POSTHOOK: Output: default@desc_parttable_stats@yr=3
+POSTHOOK: Lineage: desc_parttable_stats PARTITION(yr=1).somenumber SCRIPT []
+POSTHOOK: Lineage: desc_parttable_stats PARTITION(yr=2).somenumber SCRIPT []
+POSTHOOK: Lineage: desc_parttable_stats PARTITION(yr=3).somenumber SCRIPT []
+PREHOOK: query: describe formatted desc_parttable_stats
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@desc_parttable_stats
+POSTHOOK: query: describe formatted desc_parttable_stats
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@desc_parttable_stats
+# col_name            	data_type           	comment             
+somenumber          	int                 	                    
+	 	 
+# Partition Information	 	 
+# col_name            	data_type           	comment             
+yr                  	int                 	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+	bucketing_version   	2                   
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: describe formatted desc_parttable_stats
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@desc_parttable_stats
+POSTHOOK: query: describe formatted desc_parttable_stats
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@desc_parttable_stats
+# col_name            	data_type           	comment             
+somenumber          	int                 	                    
+	 	 
+# Partition Information	 	 
+# col_name            	data_type           	comment             
+yr                  	int                 	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\"}
+	bucketing_version   	2                   
+	numFiles            	3                   
+	numPartitions       	3                   
+	numRows             	3                   
+	rawDataSize         	3                   
+	totalSize           	6                   
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: DROP TABLE IF EXISTS desc_parttable_stats
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@desc_parttable_stats
+PREHOOK: Output: default@desc_parttable_stats
+POSTHOOK: query: DROP TABLE IF EXISTS desc_parttable_stats
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@desc_parttable_stats
+POSTHOOK: Output: default@desc_parttable_stats
 PREHOOK: query: CREATE DATABASE IF NOT EXISTS name1
 PREHOOK: type: CREATEDATABASE
 PREHOOK: Output: database:name1