You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2017/11/04 22:22:37 UTC
hive git commit: HIVE-17932 : Remove option to control partition
level basic stats fetching (Zoltan Haindrich via Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master 806bd14de -> 6c8713677
HIVE-17932 : Remove option to control partition level basic stats fetching (Zoltan Haindrich via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6c871367
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6c871367
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6c871367
Branch: refs/heads/master
Commit: 6c8713677b6299c9ddca1a6093fe11a1e7032bb1
Parents: 806bd14
Author: Zoltan Haindrich <ki...@rxd.hu>
Authored: Sat Nov 4 15:21:42 2017 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Sat Nov 4 15:21:42 2017 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/hive/conf/HiveConf.java | 9 ------
.../ql/optimizer/calcite/RelOptHiveTable.java | 4 +--
.../apache/hadoop/hive/ql/stats/StatsUtils.java | 31 ++++++++------------
.../columnStatsUpdateForStatsOptimizer_1.q | 1 -
.../columnStatsUpdateForStatsOptimizer_2.q | 1 -
5 files changed, 15 insertions(+), 31 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/6c871367/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 48341a8..15ab625 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1780,15 +1780,6 @@ public class HiveConf extends Configuration {
"Average row size is computed from average column size of all columns in the row. In the absence\n" +
"of column statistics and for variable length complex columns like map, the average number of\n" +
"entries/values can be specified using this config."),
- // statistics annotation fetches stats for each partition, which can be expensive. turning
- // this off will result in basic sizes being fetched from namenode instead
- HIVE_STATS_FETCH_PARTITION_STATS("hive.stats.fetch.partition.stats", true,
- "Annotation of operator tree with statistics information requires partition level basic\n" +
- "statistics like number of rows, data size and file size. Partition statistics are fetched from\n" +
- "metastore. Fetching partition statistics for each needed partition can be expensive when the\n" +
- "number of partitions is high. This flag can be used to disable fetching of partition statistics\n" +
- "from metastore. When this flag is disabled, Hive will make calls to filesystem to get file sizes\n" +
- "and will estimate the number of rows from row schema."),
// statistics annotation fetches column statistics for all required columns which can
// be very expensive sometimes
HIVE_STATS_FETCH_COLUMN_STATS("hive.stats.fetch.column.stats", false,
http://git-wip-us.apache.org/repos/asf/hive/blob/6c871367/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
index 60152ac..23094d1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
@@ -451,7 +451,7 @@ public class RelOptHiveTable extends RelOptAbstractTable {
try {
Statistics stats = StatsUtils.collectStatistics(hiveConf, null,
hiveTblMetadata, hiveNonPartitionCols, nonPartColNamesThatRqrStats,
- colStatsCached, nonPartColNamesThatRqrStats, true, true);
+ colStatsCached, nonPartColNamesThatRqrStats, true);
rowCount = stats.getNumRows();
for (String c : nonPartColNamesThatRqrStats) {
ColStatistics cs = stats.getColumnStatisticsFromColName(c);
@@ -517,7 +517,7 @@ public class RelOptHiveTable extends RelOptAbstractTable {
} else {
Statistics stats = StatsUtils.collectStatistics(hiveConf, partitionList,
hiveTblMetadata, hiveNonPartitionCols, nonPartColNamesThatRqrStats, colStatsCached,
- nonPartColNamesThatRqrStats, true, true);
+ nonPartColNamesThatRqrStats, true);
rowCount = stats.getNumRows();
hiveColStats = new ArrayList<ColStatistics>();
for (String c : nonPartColNamesThatRqrStats) {
http://git-wip-us.apache.org/repos/asf/hive/blob/6c871367/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index 9ffd12a..ce7c96c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -153,13 +153,11 @@ public class StatsUtils {
boolean fetchColStats =
HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_FETCH_COLUMN_STATS);
- boolean fetchPartStats =
- HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_FETCH_PARTITION_STATS);
boolean testMode =
HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_IN_TEST);
return collectStatistics(conf, partList, table, schema, neededColumns, colStatsCache, referencedColumns,
- fetchColStats, fetchPartStats, testMode);
+ fetchColStats, testMode);
}
private static long getDataSize(HiveConf conf, Table table) {
@@ -321,15 +319,15 @@ public class StatsUtils {
public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList,
Table table, List<ColumnInfo> schema, List<String> neededColumns, ColumnStatsList colStatsCache,
- List<String> referencedColumns, boolean fetchColStats, boolean fetchPartStats)
+ List<String> referencedColumns, boolean fetchColStats)
throws HiveException {
return collectStatistics(conf, partList, table, schema, neededColumns, colStatsCache,
- referencedColumns, fetchColStats, fetchPartStats, false);
+ referencedColumns, fetchColStats, false);
}
private static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList,
Table table, List<ColumnInfo> schema, List<String> neededColumns, ColumnStatsList colStatsCache,
- List<String> referencedColumns, boolean fetchColStats, boolean fetchPartStats, boolean failIfCacheMiss)
+ List<String> referencedColumns, boolean fetchColStats, boolean failIfCacheMiss)
throws HiveException {
Statistics stats = new Statistics();
@@ -373,19 +371,14 @@ public class StatsUtils {
List<Long> rowCounts = Lists.newArrayList();
List<Long> dataSizes = Lists.newArrayList();
- if (fetchPartStats) {
- rowCounts = getBasicStatForPartitions(
- table, partList.getNotDeniedPartns(), StatsSetupConst.ROW_COUNT);
- dataSizes = getBasicStatForPartitions(
- table, partList.getNotDeniedPartns(), StatsSetupConst.RAW_DATA_SIZE);
+ rowCounts = getBasicStatForPartitions(table, partList.getNotDeniedPartns(), StatsSetupConst.ROW_COUNT);
+ dataSizes = getBasicStatForPartitions(table, partList.getNotDeniedPartns(), StatsSetupConst.RAW_DATA_SIZE);
- nr = getSumIgnoreNegatives(rowCounts);
+ nr = getSumIgnoreNegatives(rowCounts);
+ ds = getSumIgnoreNegatives(dataSizes);
+ if (ds <= 0) {
+ dataSizes = getBasicStatForPartitions(table, partList.getNotDeniedPartns(), StatsSetupConst.TOTAL_SIZE);
ds = getSumIgnoreNegatives(dataSizes);
- if (ds <= 0) {
- dataSizes = getBasicStatForPartitions(
- table, partList.getNotDeniedPartns(), StatsSetupConst.TOTAL_SIZE);
- ds = getSumIgnoreNegatives(dataSizes);
- }
}
// if data size still could not be determined, then fall back to filesytem to get file
@@ -910,7 +903,9 @@ public class StatsUtils {
boolean isNull = (cs == null) ? true: (cs.isEstimated());
hasStats |= !isNull;
hasNull |= isNull;
- if (hasNull && hasStats) break;
+ if (hasNull && hasStats) {
+ break;
+ }
}
}
State result = (hasStats
http://git-wip-us.apache.org/repos/asf/hive/blob/6c871367/ql/src/test/queries/clientpositive/columnStatsUpdateForStatsOptimizer_1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/columnStatsUpdateForStatsOptimizer_1.q b/ql/src/test/queries/clientpositive/columnStatsUpdateForStatsOptimizer_1.q
index 199c74c..9cd9150 100644
--- a/ql/src/test/queries/clientpositive/columnStatsUpdateForStatsOptimizer_1.q
+++ b/ql/src/test/queries/clientpositive/columnStatsUpdateForStatsOptimizer_1.q
@@ -1,5 +1,4 @@
set hive.stats.fetch.column.stats=true;
-set hive.stats.fetch.partition.stats=true;
set hive.compute.query.using.stats=true;
set hive.mapred.mode=nonstrict;
http://git-wip-us.apache.org/repos/asf/hive/blob/6c871367/ql/src/test/queries/clientpositive/columnStatsUpdateForStatsOptimizer_2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/columnStatsUpdateForStatsOptimizer_2.q b/ql/src/test/queries/clientpositive/columnStatsUpdateForStatsOptimizer_2.q
index 222d85f..5fa7aec 100644
--- a/ql/src/test/queries/clientpositive/columnStatsUpdateForStatsOptimizer_2.q
+++ b/ql/src/test/queries/clientpositive/columnStatsUpdateForStatsOptimizer_2.q
@@ -1,5 +1,4 @@
set hive.stats.fetch.column.stats=true;
-set hive.stats.fetch.partition.stats=true;
set hive.compute.query.using.stats=true;