You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by to...@apache.org on 2018/08/17 23:19:39 UTC
impala git commit: IMPALA-7406. Avoid memory overhead of FlatBuffer object wrappers for FileDescriptor

Repository: impala
Updated Branches:
  refs/heads/master 199c0f2f9 -> 1dd4403fd


IMPALA-7406. Avoid memory overhead of FlatBuffer object wrappers for FileDescriptor

This patch saves approximately 100 bytes per file in the catalog and
impalad heap by changing HdfsPartition to store raw byte[] arrays
instead of FileDescriptor wrapper objects. The outward-facing APIs of
HdfsPartition remain the same: the patch uses Guava's lazy list
transformation utility to return a List which is backed by the
underlying byte[] and wraps on the fly.

No new tests are added since existing tests cover these code paths from
an end-to-end basis. I tested the memory usage by running a catalogd
with background loading enabled, waiting until loading finished, and
then running 'jmap -histo:live' a few times on the catalogd until the
result stabilized. I then used a quick python script[1] to diff the
histograms and look for any changes with high magnitude. I found the
following significant diffs:

Object counts:
  Negative (good):
    java.nio.HeapByteBuffer: 12131 -> 35 (-12096)
    org.apache.impala.fb.FbFileDesc: 12090 -> 0 (-12090)
    org.apache.impala.catalog.HdfsPartition$FileDescriptor: 12090 -> 0 (-12090)
    java.util.ArrayList: 37825 -> 28028 (-9797)
    [Ljava.lang.Object;: 24393 -> 15035 (-9358)
  Positive (bad):
   com.google.common.collect.RegularImmutableList: 1694 -> 2019 (+325)
   com.google.common.collect.SingletonImmutableList: 8560 -> 19142 (+10582)

  Net: -55431 + 10934 = -44497

Heap size:
  Negative (good):
    java.nio.HeapByteBuffer: 582288 -> 1680 (-580608)
    [Ljava.lang.Object;: 3560960 -> 3026448 (-534512)
    org.apache.impala.fb.FbFileDesc: 290160 -> 0 (-290160)
    java.util.ArrayList: 907800 -> 672672 (-235128)
    org.apache.impala.catalog.HdfsPartition$FileDescriptor: 193440 -> 0 (-193440)
  Positive (bad):
    com.google.common.collect.RegularImmutableList: 54208 -> 64608 (+10400)
    com.google.common.collect.SingletonImmutableList: 205440 -> 459408 (+253968)

  Net: -1833848 + 264368 = -1569480

To summarize, the measurements confirm that this patch saved about 130
bytes and 3.7 objects per file descriptor. The savings are a bit more
than estimated because switching from ArrayList to ImmutableList inside
HdfsPartition seems to have also yielded some savings as it optimizes
for the singleton and empty case.

In terms of performance, it would be very unlikely that this patch
causes a regression. The short-lived allocations of the wrapper objects
will all come out of the TLAB, and even loading a partition with a
million partitions will only allocate ~100MB of short-lived objects,
which should only take a few milliseconds to collect (young-generation
collections take time relative to the number of live objects, not dead
objects). The other overheads associated with any operation on a
million-partition table would surely dominate.

[1] https://gist.github.com/cb9c52907bbc416b055ffdaa34aaaa77

Change-Id: I986dd5c05ac8904898d01a6ef3487bd7225e0500
Reviewed-on: http://gerrit.cloudera.org:8080/11232
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Csaba Ringhofer <cs...@cloudera.com>
Reviewed-by: Vuk Ercegovac <ve...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/1dd4403f
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/1dd4403f
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/1dd4403f

Branch: refs/heads/master
Commit: 1dd4403fdcae665b0f343bf3fb7cb644c1875851
Parents: 199c0f2
Author: Todd Lipcon <to...@apache.org>
Authored: Wed Aug 15 00:21:12 2018 -0700
Committer: Todd Lipcon <to...@apache.org>
Committed: Fri Aug 17 18:41:36 2018 +0000

----------------------------------------------------------------------
 .../apache/impala/catalog/HdfsPartition.java    | 70 +++++++++++++++++---
 1 file changed, 60 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/1dd4403f/fe/src/main/java/org/apache/impala/catalog/HdfsPartition.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsPartition.java b/fe/src/main/java/org/apache/impala/catalog/HdfsPartition.java
index f5e0654..05b66f7 100644
--- a/fe/src/main/java/org/apache/impala/catalog/HdfsPartition.java
+++ b/fe/src/main/java/org/apache/impala/catalog/HdfsPartition.java
@@ -26,6 +26,8 @@ import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicLong;
 
+import javax.annotation.Nonnull;
+
 import org.apache.hadoop.fs.BlockLocation;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
@@ -54,6 +56,7 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Function;
 import com.google.common.base.Joiner;
 import com.google.common.base.Objects;
 import com.google.common.base.Preconditions;
@@ -214,6 +217,35 @@ public class HdfsPartition implements FeFsPartition, PrunablePartition {
     public int compareTo(FileDescriptor otherFd) {
       return getFileName().compareTo(otherFd.getFileName());
     }
+
+    /**
+     * Function to convert from a byte[] flatbuffer to the wrapper class. Note that
+     * this returns a shallow copy which continues to reflect any changes to the
+     * passed byte[].
+     */
+    public static final Function<byte[], FileDescriptor> FROM_BYTES =
+        new Function<byte[], FileDescriptor>() {
+          @Override
+          public FileDescriptor apply(byte[] input) {
+            ByteBuffer bb = ByteBuffer.wrap(input);
+            return new FileDescriptor(FbFileDesc.getRootAsFbFileDesc(bb));
+          }
+        };
+
+    /**
+     * Function to convert from the wrapper class to a raw byte[]. Note that
+     * this returns a shallow copy and callers should not modify the returned array.
+     */
+    public static final Function<FileDescriptor, byte[]> TO_BYTES =
+        new Function<FileDescriptor, byte[]>() {
+          @Override
+          public byte[] apply(FileDescriptor fd) {
+            ByteBuffer bb = fd.fbFileDescriptor_.getByteBuffer();
+            byte[] arr = bb.array();
+            assert bb.arrayOffset() == 0 && bb.remaining() == arr.length;
+            return arr;
+          }
+        };
   }
 
   /**
@@ -428,7 +460,21 @@ public class HdfsPartition implements FeFsPartition, PrunablePartition {
    * It's easy to add per-file metadata to FileDescriptor if this changes.
    */
   private final HdfsStorageDescriptor fileFormatDescriptor_;
-  private List<FileDescriptor> fileDescriptors_;
+
+  /**
+   * The file descriptors of this partition, encoded as flatbuffers. Storing the raw
+   * byte arrays here instead of the FileDescriptor object saves 100 bytes per file
+   * given the following overhead:
+   * - FileDescriptor object:
+   *    - 16 byte object header
+   *    - 8 byte reference to FbFileDesc
+   * - FbFileDesc superclass:
+   *    - 16 byte object header
+   *    - 56-byte ByteBuffer
+   *    - 4-byte padding (objects are word-aligned)
+   */
+  @Nonnull
+  private ImmutableList<byte[]> encodedFileDescriptors_;
   private HdfsPartitionLocationCompressor.Location location_;
   private final static Logger LOG = LoggerFactory.getLogger(HdfsPartition.class);
   private boolean isDirty_ = false;
@@ -576,17 +622,21 @@ public class HdfsPartition implements FeFsPartition, PrunablePartition {
   public LiteralExpr getPartitionValue(int i) { return partitionKeyValues_.get(i); }
   @Override // FeFsPartition
   public List<HdfsPartition.FileDescriptor> getFileDescriptors() {
-    return fileDescriptors_;
+    // Return a lazily transformed list from our internal bytes storage.
+    return Lists.transform(encodedFileDescriptors_, FileDescriptor.FROM_BYTES);
   }
   public void setFileDescriptors(List<FileDescriptor> descriptors) {
-    fileDescriptors_ = descriptors;
+    // Store an eagerly transformed-and-copied list so that we drop the memory usage
+    // of the flatbuffer wrapper.
+    encodedFileDescriptors_ = ImmutableList.copyOf(Lists.transform(
+        descriptors, FileDescriptor.TO_BYTES));
   }
   @Override // FeFsPartition
   public int getNumFileDescriptors() {
-    return fileDescriptors_ == null ? 0 : fileDescriptors_.size();
+    return encodedFileDescriptors_.size();
   }
 
-  public boolean hasFileDescriptors() { return !fileDescriptors_.isEmpty(); }
+  public boolean hasFileDescriptors() { return !encodedFileDescriptors_.isEmpty(); }
 
   // Struct-style class for caching all the information we need to reconstruct an
   // HMS-compatible Partition object, for use in RPCs to the metastore. We do this rather
@@ -682,7 +732,7 @@ public class HdfsPartition implements FeFsPartition, PrunablePartition {
       org.apache.hadoop.hive.metastore.api.Partition msPartition,
       List<LiteralExpr> partitionKeyValues,
       HdfsStorageDescriptor fileFormatDescriptor,
-      Collection<HdfsPartition.FileDescriptor> fileDescriptors, long id,
+      List<HdfsPartition.FileDescriptor> fileDescriptors, long id,
       HdfsPartitionLocationCompressor.Location location, TAccessLevel accessLevel) {
     table_ = table;
     if (msPartition == null) {
@@ -692,7 +742,7 @@ public class HdfsPartition implements FeFsPartition, PrunablePartition {
     }
     location_ = location;
     partitionKeyValues_ = ImmutableList.copyOf(partitionKeyValues);
-    fileDescriptors_ = ImmutableList.copyOf(fileDescriptors);
+    setFileDescriptors(fileDescriptors);
     fileFormatDescriptor_ = fileFormatDescriptor;
     id_ = id;
     accessLevel_ = accessLevel;
@@ -709,7 +759,7 @@ public class HdfsPartition implements FeFsPartition, PrunablePartition {
       org.apache.hadoop.hive.metastore.api.Partition msPartition,
       List<LiteralExpr> partitionKeyValues,
       HdfsStorageDescriptor fileFormatDescriptor,
-      Collection<HdfsPartition.FileDescriptor> fileDescriptors,
+      List<HdfsPartition.FileDescriptor> fileDescriptors,
       TAccessLevel accessLevel) {
     this(table, msPartition, partitionKeyValues, fileFormatDescriptor, fileDescriptors,
         partitionIdCounter_.getAndIncrement(),
@@ -732,7 +782,7 @@ public class HdfsPartition implements FeFsPartition, PrunablePartition {
   @Override
   public long getSize() {
     long result = 0;
-    for (HdfsPartition.FileDescriptor fileDescriptor: fileDescriptors_) {
+    for (HdfsPartition.FileDescriptor fileDescriptor: getFileDescriptors()) {
       result += fileDescriptor.getFileLength();
     }
     return result;
@@ -741,7 +791,7 @@ public class HdfsPartition implements FeFsPartition, PrunablePartition {
   @Override
   public String toString() {
     return Objects.toStringHelper(this)
-      .add("fileDescriptors", fileDescriptors_)
+      .add("fileDescriptors", getFileDescriptors())
       .toString();
   }