You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2018/02/07 16:56:22 UTC

hive git commit: HIVE-18611 : Avoid memory allocation of aggregation buffer during stats computation (Ashutosh Chauhan via Gopal V)

Repository: hive
Updated Branches:
  refs/heads/master 07492e0d2 -> bf93128e7


HIVE-18611 : Avoid memory allocation of aggregation buffer during stats computation (Ashutosh Chauhan via Gopal V)

Signed-off-by: Ashutosh Chauhan <ha...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bf93128e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bf93128e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bf93128e

Branch: refs/heads/master
Commit: bf93128e7fef309bed324a493a83c060220a9743
Parents: 07492e0
Author: Ashutosh Chauhan <ha...@apache.org>
Authored: Thu Feb 1 23:37:51 2018 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Wed Feb 7 08:55:55 2018 -0800

----------------------------------------------------------------------
 .../stats/annotation/StatsRulesProcFactory.java         |  7 ++++++-
 .../hive/ql/udf/generic/GenericUDAFBloomFilter.java     | 12 +++++++++++-
 .../hive/ql/udf/generic/GenericUDAFEvaluator.java       | 11 +++++++++++
 .../java/org/apache/hive/common/util/BloomKFilter.java  |  6 +++---
 4 files changed, 31 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/bf93128e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index cbadfa4..9a3f81c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -488,7 +488,7 @@ public class StatsRulesProcFactory {
         factor *= columnFactor > 1d ? 1d : columnFactor;
       }
       float inFactor = HiveConf.getFloatVar(aspCtx.getConf(), HiveConf.ConfVars.HIVE_STATS_IN_CLAUSE_FACTOR);
-      return Math.round( (double) numRows * factor * inFactor);
+      return Math.round( numRows * factor * inFactor);
     }
 
     private long evaluateBetweenExpr(Statistics stats, ExprNodeDesc pred, long currNumRows, AnnotateStatsProcCtx aspCtx,
@@ -1313,6 +1313,11 @@ public class StatsRulesProcFactory {
           // each evaluator has constant java object overhead
           avgValSize += gop.javaObjectOverHead;
           GenericUDAFEvaluator.AggregationBuffer agg = null;
+          int evaluatorEstimate = aggregationEvaluators[i].estimate();
+          if (evaluatorEstimate > 0) {
+            avgValSize += evaluatorEstimate;
+            continue;
+          }
           try {
             agg = aggregationEvaluators[i].getNewAggregationBuffer();
           } catch (HiveException e) {

http://git-wip-us.apache.org/repos/asf/hive/blob/bf93128e/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java
index 0c92d2a..ca8bc8f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBloomFilter.java
@@ -82,7 +82,7 @@ public class GenericUDAFBloomFilter implements GenericUDAFResolver2 {
     private PrimitiveObjectInspector inputOI;
 
     // Bloom filter rest
-    private ByteArrayOutputStream result = new ByteArrayOutputStream();
+    private final ByteArrayOutputStream result = new ByteArrayOutputStream();
 
     private transient byte[] scratchBuffer = new byte[HiveDecimal.SCRATCH_BUFFER_LEN_TO_BYTES];
 
@@ -102,6 +102,16 @@ public class GenericUDAFBloomFilter implements GenericUDAFResolver2 {
       return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
     }
 
+    @Override
+    public int estimate() {
+      long entries = Math.min(getExpectedEntries(), maxEntries);
+      long numBits = (long) (-entries * Math.log(BloomKFilter.DEFAULT_FPP) / (Math.log(2) * Math.log(2)));
+      int nLongs = (int) Math.ceil((double) numBits / (double) Long.SIZE);
+      // additional bits to pad long array to block size
+      int padLongs = 8 - nLongs % 8;
+      return (nLongs + padLongs) * Long.SIZE / 8;
+    }
+
     /**
      * Class for storing the BloomFilter
      */

http://git-wip-us.apache.org/repos/asf/hive/blob/bf93128e/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEvaluator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEvaluator.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEvaluator.java
index c3498b7..3a3e4b6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEvaluator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEvaluator.java
@@ -70,6 +70,17 @@ public abstract class GenericUDAFEvaluator implements Closeable {
   }
 
   /**
+   * Although similar to AbstractAggregationBuffer::estimate(), it differs from it in 2 aspects
+   * 1) This avoids creation of AggregationBuffer which may result in large memory allocation
+   * 2) This is used only while compiling query as oppose to AbstractAggregationBuffer version
+   * which may be used in both runtime as well as compile time.
+   * @return
+   */
+  public int estimate() {
+    return -1;
+  }
+
+  /**
    * Mode.
    *
    */

http://git-wip-us.apache.org/repos/asf/hive/blob/bf93128e/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java b/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java
index 9ecc2ba..6ccf5ab 100644
--- a/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java
+++ b/storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java
@@ -36,15 +36,15 @@ import java.util.Arrays;
  * This implementation has much lesser L1 data cache misses than {@link BloomFilter}.
  */
 public class BloomKFilter {
-  private byte[] BYTE_ARRAY_4 = new byte[4];
-  private byte[] BYTE_ARRAY_8 = new byte[8];
+  private final byte[] BYTE_ARRAY_4 = new byte[4];
+  private final byte[] BYTE_ARRAY_8 = new byte[8];
   public static final float DEFAULT_FPP = 0.05f;
   private static final int DEFAULT_BLOCK_SIZE = 8;
   private static final int DEFAULT_BLOCK_SIZE_BITS = (int) (Math.log(DEFAULT_BLOCK_SIZE) / Math.log(2));
   private static final int DEFAULT_BLOCK_OFFSET_MASK = DEFAULT_BLOCK_SIZE - 1;
   private static final int DEFAULT_BIT_OFFSET_MASK = Long.SIZE - 1;
   private final long[] masks = new long[DEFAULT_BLOCK_SIZE];
-  private BitSet bitSet;
+  private final BitSet bitSet;
   private final int m;
   private final int k;
   // spread k-1 bits to adjacent longs, default is 8