You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2018/02/07 16:56:22 UTC
hive git commit: HIVE-18611 : Avoid memory allocation of aggregation
buffer during stats computation (Ashutosh Chauhan via Gopal V)
Repository: hive
Updated Branches:
refs/heads/master 07492e0d2 -> bf93128e7
HIVE-18611 : Avoid memory allocation of aggregation buffer during stats computation (Ashutosh Chauhan via Gopal V)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bf93128e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bf93128e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bf93128e
Branch: refs/heads/master
Commit: bf93128e7fef309bed324a493a83c060220a9743
Parents: 07492e0
Author: Ashutosh Chauhan <ha...@apache.org>
Authored: Thu Feb 1 23:37:51 2018 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Wed Feb 7 08:55:55 2018 -0800
----------------------------------------------------------------------
.../stats/annotation/StatsRulesProcFactory.java | 7 ++++++-
.../hive/ql/udf/generic/GenericUDAFBloomFilter.java | 12 +++++++++++-
.../hive/ql/udf/generic/GenericUDAFEvaluator.java | 11 +++++++++++
.../java/org/apache/hive/common/util/BloomKFilter.java | 6 +++---
4 files changed, 31 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/bf93128e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index cbadfa4..9a3f81c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -488,7 +488,7 @@ public class StatsRulesProcFactory {
factor *= columnFactor > 1d ? 1d : columnFactor;
}
float inFactor = HiveConf.getFloatVar(aspCtx.getConf(), HiveConf.ConfVars.HIVE_STATS_IN_CLAUSE_FACTOR);
- return Math.round( (double) numRows * factor * inFactor);
+ return Math.round( numRows * factor * inFactor);
}
private long evaluateBetweenExpr(Statistics stats, ExprNodeDesc pred, long currNumRows, AnnotateStatsProcCtx aspCtx,
@@ -1313,6 +1313,11 @@ public class StatsRulesProcFactory {
// each evaluator has constant java object overhead
avgValSize += gop.javaObjectOverHead;
GenericUDAFEvaluator.AggregationBuffer agg = null;
+ int evaluatorEstimate = aggregationEvaluators[i].estimate();
+ if (evaluatorEstimate > 0) {
+ avgValSize += evaluatorEstimate;
+ continue;
+ }
try {
agg = aggregationEvaluators[i].getNewAggregationBuffer();
} catch (HiveException e) {
http://git-wip-us.apache.org/repos/asf/hive/blob/bf93128e/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java
index 0c92d2a..ca8bc8f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java
@@ -82,7 +82,7 @@ public class GenericUDAFBloomFilter implements GenericUDAFResolver2 {
private PrimitiveObjectInspector inputOI;
// Bloom filter rest
- private ByteArrayOutputStream result = new ByteArrayOutputStream();
+ private final ByteArrayOutputStream result = new ByteArrayOutputStream();
private transient byte[] scratchBuffer = new byte[HiveDecimal.SCRATCH_BUFFER_LEN_TO_BYTES];
@@ -102,6 +102,16 @@ public class GenericUDAFBloomFilter implements GenericUDAFResolver2 {
return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
}
+ @Override
+ public int estimate() {
+ long entries = Math.min(getExpectedEntries(), maxEntries);
+ long numBits = (long) (-entries * Math.log(BloomKFilter.DEFAULT_FPP) / (Math.log(2) * Math.log(2)));
+ int nLongs = (int) Math.ceil((double) numBits / (double) Long.SIZE);
+ // additional bits to pad long array to block size
+ int padLongs = 8 - nLongs % 8;
+ return (nLongs + padLongs) * Long.SIZE / 8;
+ }
+
/**
* Class for storing the BloomFilter
*/
http://git-wip-us.apache.org/repos/asf/hive/blob/bf93128e/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEvaluator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEvaluator.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEvaluator.java
index c3498b7..3a3e4b6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEvaluator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEvaluator.java
@@ -70,6 +70,17 @@ public abstract class GenericUDAFEvaluator implements Closeable {
}
/**
+ * Although similar to AbstractAggregationBuffer::estimate(), it differs from it in 2 aspects
+ * 1) This avoids creation of AggregationBuffer which may result in large memory allocation
+ * 2) This is used only while compiling query as oppose to AbstractAggregationBuffer version
+ * which may be used in both runtime as well as compile time.
+ * @return
+ */
+ public int estimate() {
+ return -1;
+ }
+
+ /**
* Mode.
*
*/
http://git-wip-us.apache.org/repos/asf/hive/blob/bf93128e/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java b/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java
index 9ecc2ba..6ccf5ab 100644
--- a/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java
+++ b/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java
@@ -36,15 +36,15 @@ import java.util.Arrays;
* This implementation has much lesser L1 data cache misses than {@link BloomFilter}.
*/
public class BloomKFilter {
- private byte[] BYTE_ARRAY_4 = new byte[4];
- private byte[] BYTE_ARRAY_8 = new byte[8];
+ private final byte[] BYTE_ARRAY_4 = new byte[4];
+ private final byte[] BYTE_ARRAY_8 = new byte[8];
public static final float DEFAULT_FPP = 0.05f;
private static final int DEFAULT_BLOCK_SIZE = 8;
private static final int DEFAULT_BLOCK_SIZE_BITS = (int) (Math.log(DEFAULT_BLOCK_SIZE) / Math.log(2));
private static final int DEFAULT_BLOCK_OFFSET_MASK = DEFAULT_BLOCK_SIZE - 1;
private static final int DEFAULT_BIT_OFFSET_MASK = Long.SIZE - 1;
private final long[] masks = new long[DEFAULT_BLOCK_SIZE];
- private BitSet bitSet;
+ private final BitSet bitSet;
private final int m;
private final int k;
// spread k-1 bits to adjacent longs, default is 8