You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2017/11/04 22:22:37 UTC

hive git commit: HIVE-17932 : Remove option to control partition level basic stats fetching (Zoltan Haindrich via Ashutosh Chauhan)

Repository: hive
Updated Branches:
  refs/heads/master 806bd14de -> 6c8713677


HIVE-17932 : Remove option to control partition level basic stats fetching (Zoltan Haindrich via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan <ha...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6c871367
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6c871367
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6c871367

Branch: refs/heads/master
Commit: 6c8713677b6299c9ddca1a6093fe11a1e7032bb1
Parents: 806bd14
Author: Zoltan Haindrich <ki...@rxd.hu>
Authored: Sat Nov 4 15:21:42 2017 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Sat Nov 4 15:21:42 2017 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |  9 ------
 .../ql/optimizer/calcite/RelOptHiveTable.java   |  4 +--
 .../apache/hadoop/hive/ql/stats/StatsUtils.java | 31 ++++++++------------
 .../columnStatsUpdateForStatsOptimizer_1.q      |  1 -
 .../columnStatsUpdateForStatsOptimizer_2.q      |  1 -
 5 files changed, 15 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/6c871367/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 48341a8..15ab625 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1780,15 +1780,6 @@ public class HiveConf extends Configuration {
         "Average row size is computed from average column size of all columns in the row. In the absence\n" +
         "of column statistics and for variable length complex columns like map, the average number of\n" +
         "entries/values can be specified using this config."),
-    // statistics annotation fetches stats for each partition, which can be expensive. turning
-    // this off will result in basic sizes being fetched from namenode instead
-    HIVE_STATS_FETCH_PARTITION_STATS("hive.stats.fetch.partition.stats", true,
-        "Annotation of operator tree with statistics information requires partition level basic\n" +
-        "statistics like number of rows, data size and file size. Partition statistics are fetched from\n" +
-        "metastore. Fetching partition statistics for each needed partition can be expensive when the\n" +
-        "number of partitions is high. This flag can be used to disable fetching of partition statistics\n" +
-        "from metastore. When this flag is disabled, Hive will make calls to filesystem to get file sizes\n" +
-        "and will estimate the number of rows from row schema."),
     // statistics annotation fetches column statistics for all required columns which can
     // be very expensive sometimes
     HIVE_STATS_FETCH_COLUMN_STATS("hive.stats.fetch.column.stats", false,

http://git-wip-us.apache.org/repos/asf/hive/blob/6c871367/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
index 60152ac..23094d1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
@@ -451,7 +451,7 @@ public class RelOptHiveTable extends RelOptAbstractTable {
         try {
           Statistics stats = StatsUtils.collectStatistics(hiveConf, null,
               hiveTblMetadata, hiveNonPartitionCols, nonPartColNamesThatRqrStats,
-              colStatsCached, nonPartColNamesThatRqrStats, true, true);
+              colStatsCached, nonPartColNamesThatRqrStats, true);
           rowCount = stats.getNumRows();
           for (String c : nonPartColNamesThatRqrStats) {
             ColStatistics cs = stats.getColumnStatisticsFromColName(c);
@@ -517,7 +517,7 @@ public class RelOptHiveTable extends RelOptAbstractTable {
           } else {
             Statistics stats = StatsUtils.collectStatistics(hiveConf, partitionList,
                 hiveTblMetadata, hiveNonPartitionCols, nonPartColNamesThatRqrStats, colStatsCached,
-                nonPartColNamesThatRqrStats, true, true);
+                nonPartColNamesThatRqrStats, true);
             rowCount = stats.getNumRows();
             hiveColStats = new ArrayList<ColStatistics>();
             for (String c : nonPartColNamesThatRqrStats) {

http://git-wip-us.apache.org/repos/asf/hive/blob/6c871367/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index 9ffd12a..ce7c96c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -153,13 +153,11 @@ public class StatsUtils {
 
     boolean fetchColStats =
         HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_FETCH_COLUMN_STATS);
-    boolean fetchPartStats =
-        HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_FETCH_PARTITION_STATS);
     boolean testMode =
         HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_IN_TEST);
 
     return collectStatistics(conf, partList, table, schema, neededColumns, colStatsCache, referencedColumns,
-        fetchColStats, fetchPartStats, testMode);
+        fetchColStats, testMode);
   }
 
   private static long getDataSize(HiveConf conf, Table table) {
@@ -321,15 +319,15 @@ public class StatsUtils {
 
   public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList,
       Table table, List<ColumnInfo> schema, List<String> neededColumns, ColumnStatsList colStatsCache,
-      List<String> referencedColumns, boolean fetchColStats, boolean fetchPartStats)
+      List<String> referencedColumns, boolean fetchColStats)
       throws HiveException {
     return collectStatistics(conf, partList, table, schema, neededColumns, colStatsCache,
-        referencedColumns, fetchColStats, fetchPartStats, false);
+        referencedColumns, fetchColStats, false);
   }
 
   private static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList,
       Table table, List<ColumnInfo> schema, List<String> neededColumns, ColumnStatsList colStatsCache,
-      List<String> referencedColumns, boolean fetchColStats, boolean fetchPartStats, boolean failIfCacheMiss)
+      List<String> referencedColumns, boolean fetchColStats, boolean failIfCacheMiss)
       throws HiveException {
 
     Statistics stats = new Statistics();
@@ -373,19 +371,14 @@ public class StatsUtils {
       List<Long> rowCounts = Lists.newArrayList();
       List<Long> dataSizes = Lists.newArrayList();
 
-      if (fetchPartStats) {
-        rowCounts = getBasicStatForPartitions(
-            table, partList.getNotDeniedPartns(), StatsSetupConst.ROW_COUNT);
-        dataSizes =  getBasicStatForPartitions(
-            table, partList.getNotDeniedPartns(), StatsSetupConst.RAW_DATA_SIZE);
+      rowCounts = getBasicStatForPartitions(table, partList.getNotDeniedPartns(), StatsSetupConst.ROW_COUNT);
+      dataSizes = getBasicStatForPartitions(table, partList.getNotDeniedPartns(), StatsSetupConst.RAW_DATA_SIZE);
 
-        nr = getSumIgnoreNegatives(rowCounts);
+      nr = getSumIgnoreNegatives(rowCounts);
+      ds = getSumIgnoreNegatives(dataSizes);
+      if (ds <= 0) {
+        dataSizes = getBasicStatForPartitions(table, partList.getNotDeniedPartns(), StatsSetupConst.TOTAL_SIZE);
         ds = getSumIgnoreNegatives(dataSizes);
-        if (ds <= 0) {
-          dataSizes = getBasicStatForPartitions(
-              table, partList.getNotDeniedPartns(), StatsSetupConst.TOTAL_SIZE);
-          ds = getSumIgnoreNegatives(dataSizes);
-        }
       }
 
       // if data size still could not be determined, then fall back to filesytem to get file
@@ -910,7 +903,9 @@ public class StatsUtils {
         boolean isNull = (cs == null) ? true: (cs.isEstimated());
         hasStats |= !isNull;
         hasNull |= isNull;
-        if (hasNull && hasStats) break;
+        if (hasNull && hasStats) {
+          break;
+        }
       }
     }
     State result = (hasStats

http://git-wip-us.apache.org/repos/asf/hive/blob/6c871367/ql/src/test/queries/clientpositive/columnStatsUpdateForStatsOptimizer_1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/columnStatsUpdateForStatsOptimizer_1.q b/ql/src/test/queries/clientpositive/columnStatsUpdateForStatsOptimizer_1.q
index 199c74c..9cd9150 100644
--- a/ql/src/test/queries/clientpositive/columnStatsUpdateForStatsOptimizer_1.q
+++ b/ql/src/test/queries/clientpositive/columnStatsUpdateForStatsOptimizer_1.q
@@ -1,5 +1,4 @@
 set hive.stats.fetch.column.stats=true; 
-set hive.stats.fetch.partition.stats=true; 
 set hive.compute.query.using.stats=true; 
 set hive.mapred.mode=nonstrict;
 

http://git-wip-us.apache.org/repos/asf/hive/blob/6c871367/ql/src/test/queries/clientpositive/columnStatsUpdateForStatsOptimizer_2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/columnStatsUpdateForStatsOptimizer_2.q b/ql/src/test/queries/clientpositive/columnStatsUpdateForStatsOptimizer_2.q
index 222d85f..5fa7aec 100644
--- a/ql/src/test/queries/clientpositive/columnStatsUpdateForStatsOptimizer_2.q
+++ b/ql/src/test/queries/clientpositive/columnStatsUpdateForStatsOptimizer_2.q
@@ -1,5 +1,4 @@
 set hive.stats.fetch.column.stats=true;
-set hive.stats.fetch.partition.stats=true;
 set hive.compute.query.using.stats=true;