You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2019/12/14 02:31:20 UTC
[hive] branch master updated: HIVE-22632 : Improve
estimateRowSizeFromSchema (Vineet Garg via Zoltan Haindrich)
This is an automated email from the ASF dual-hosted git repository.
hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new e5f3538 HIVE-22632 : Improve estimateRowSizeFromSchema (Vineet Garg via Zoltan Haindrich)
e5f3538 is described below
commit e5f3538442838dfe6988be595ec34bc48cefe0ec
Author: Vineet Garg <vg...@apache.org>
AuthorDate: Fri Dec 13 18:30:35 2019 -0800
HIVE-22632 : Improve estimateRowSizeFromSchema (Vineet Garg via Zoltan Haindrich)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
---
.../stats/annotation/StatsRulesProcFactory.java | 2 +-
.../apache/hadoop/hive/ql/stats/StatsUtils.java | 44 ++++++++++++----------
2 files changed, 25 insertions(+), 21 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index 5a7c1af..f5c9cbd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -2575,7 +2575,7 @@ public class StatsRulesProcFactory {
}
}
if (neededColumns.size() != 0) {
- int restColumnsDefaultSize =
+ long restColumnsDefaultSize =
StatsUtils.estimateRowSizeFromSchema(conf, jop.getSchema().getSignature(), neededColumns);
newDataSize = StatsUtils.safeAdd(newDataSize, StatsUtils.safeMult(restColumnsDefaultSize, newNumRows));
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index 3f71ee4..8084dcd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -649,37 +649,41 @@ public class StatsUtils {
return range;
}
- public static int estimateRowSizeFromSchema(HiveConf conf, List<ColumnInfo> schema) {
- List<String> neededColumns = new ArrayList<>();
+ private static long getAvgColSize(final ColumnInfo columnInfo, HiveConf conf) {
+ ObjectInspector oi = columnInfo.getObjectInspector();
+ String colTypeLowerCase = columnInfo.getTypeName().toLowerCase();
+ if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME)
+ || colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.LIST_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.MAP_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.STRUCT_TYPE_NAME)
+ || colTypeLowerCase.startsWith(serdeConstants.UNION_TYPE_NAME)) {
+ return getAvgColLenOf(conf, oi, colTypeLowerCase);
+ } else {
+ return getAvgColLenOfFixedLengthTypes(colTypeLowerCase);
+ }
+ }
+
+ public static long estimateRowSizeFromSchema(HiveConf conf, List<ColumnInfo> schema) {
+ long avgRowSize = 0;
for (ColumnInfo ci : schema) {
- neededColumns.add(ci.getInternalName());
+ avgRowSize += getAvgColSize(ci, conf);
}
- return estimateRowSizeFromSchema(conf, schema, neededColumns);
+ return avgRowSize;
}
- public static int estimateRowSizeFromSchema(HiveConf conf, List<ColumnInfo> schema,
+ public static long estimateRowSizeFromSchema(HiveConf conf, List<ColumnInfo> schema,
List<String> neededColumns) {
- int avgRowSize = 0;
+ long avgRowSize = 0;
for (String neededCol : neededColumns) {
ColumnInfo ci = getColumnInfoForColumn(neededCol, schema);
if (ci == null) {
// No need to collect statistics of index columns
continue;
}
- ObjectInspector oi = ci.getObjectInspector();
- String colTypeLowerCase = ci.getTypeName().toLowerCase();
- if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME)
- || colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME)
- || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)
- || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME)
- || colTypeLowerCase.startsWith(serdeConstants.LIST_TYPE_NAME)
- || colTypeLowerCase.startsWith(serdeConstants.MAP_TYPE_NAME)
- || colTypeLowerCase.startsWith(serdeConstants.STRUCT_TYPE_NAME)
- || colTypeLowerCase.startsWith(serdeConstants.UNION_TYPE_NAME)) {
- avgRowSize += getAvgColLenOf(conf, oi, colTypeLowerCase);
- } else {
- avgRowSize += getAvgColLenOfFixedLengthTypes(colTypeLowerCase);
- }
+ avgRowSize += getAvgColSize(ci, conf);
}
return avgRowSize;
}