You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by ju...@apache.org on 2017/05/12 22:10:00 UTC

parquet-mr git commit: PARQUET-852: Slowly ramp up sizes of byte[] in ByteBasedBitPackingEncoder

Repository: parquet-mr
Updated Branches:
  refs/heads/master fd7cfed07 -> 1de41ef4b


PARQUET-852: Slowly ramp up sizes of byte[] in ByteBasedBitPackingEncoder

https://issues.apache.org/jira/browse/PARQUET-852

Author: John Jenkins <jj...@kcg.com>

Closes #401 from JohnPJenkins/PARQUET-852 and squashes the following commits:

334acec [John Jenkins] PARQUET-852: Slowly ramp up sizes of byte[] in ByteBasedBitPackingEncoder


Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/1de41ef4
Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/1de41ef4
Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/1de41ef4

Branch: refs/heads/master
Commit: 1de41ef4baeee1c95e245837299f8be265294445
Parents: fd7cfed
Author: John Jenkins <jj...@kcg.com>
Authored: Fri May 12 15:09:56 2017 -0700
Committer: Julien Le Dem <ju...@apache.org>
Committed: Fri May 12 15:09:56 2017 -0700

----------------------------------------------------------------------
 .../bitpacking/ByteBasedBitPackingEncoder.java  | 30 ++++++++++++++------
 .../TestByteBasedBitPackingEncoder.java         | 18 ++++++++----
 2 files changed, 34 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/1de41ef4/parquet-encoding/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBasedBitPackingEncoder.java
----------------------------------------------------------------------
diff --git a/parquet-encoding/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBasedBitPackingEncoder.java b/parquet-encoding/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBasedBitPackingEncoder.java
index cc23e8f..0bc8b30 100644
--- a/parquet-encoding/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBasedBitPackingEncoder.java
+++ b/parquet-encoding/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBasedBitPackingEncoder.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -39,11 +39,14 @@ public class ByteBasedBitPackingEncoder {
   private static final Logger LOG = LoggerFactory.getLogger(ByteBasedBitPackingEncoder.class);
 
   private static final int VALUES_WRITTEN_AT_A_TIME = 8;
+  private static final int MAX_SLAB_SIZE_MULT = 64 * 1024;
+  private static final int INITIAL_SLAB_SIZE_MULT = 1024;
 
   private final int bitWidth;
   private final BytePacker packer;
   private final int[] input = new int[VALUES_WRITTEN_AT_A_TIME];
-  private final int slabSize;
+  private int slabSize;
+  private long totalFullSlabSize;
   private int inputSize;
   private byte[] packed;
   private int packedPosition;
@@ -56,8 +59,9 @@ public class ByteBasedBitPackingEncoder {
   public ByteBasedBitPackingEncoder(int bitWidth, Packer packer) {
     this.bitWidth = bitWidth;
     this.inputSize = 0;
+    this.totalFullSlabSize = 0;
     // must be a multiple of bitWidth
-    this.slabSize = bitWidth * 64 * 1024;
+    this.slabSize = (bitWidth == 0) ? 1 : (bitWidth * INITIAL_SLAB_SIZE_MULT);
     initPackedSlab();
     this.packer = packer.newBytePacker(bitWidth);
   }
@@ -75,6 +79,10 @@ public class ByteBasedBitPackingEncoder {
       pack();
       if (packedPosition == slabSize) {
         slabs.add(BytesInput.from(packed));
+        totalFullSlabSize += slabSize;
+        if (slabSize < bitWidth * MAX_SLAB_SIZE_MULT) {
+          slabSize *= 2;
+        }
         initPackedSlab();
       }
     }
@@ -99,7 +107,7 @@ public class ByteBasedBitPackingEncoder {
   public BytesInput toBytes() throws IOException {
     int packedByteLength = packedPosition + BytesUtils.paddedByteCountFromBits(inputSize * bitWidth);
 
-    LOG.debug("writing {} bytes", (slabs.size() * slabSize + packedByteLength));
+    LOG.debug("writing {} bytes", (totalFullSlabSize + packedByteLength));
     if (inputSize > 0) {
       for (int i = inputSize; i < input.length; i++) {
         input[i] = 0;
@@ -113,18 +121,24 @@ public class ByteBasedBitPackingEncoder {
    * @return size of the data as it would be written
    */
   public long getBufferSize() {
-    return BytesUtils.paddedByteCountFromBits(totalValues * bitWidth);
+    return BytesUtils.paddedByteCountFromBits((totalValues + inputSize) * bitWidth);
   }
 
   /**
    * @return total memory allocated
    */
   public long getAllocatedSize() {
-    return (slabs.size() * slabSize) + packed.length + input.length * 4;
+    return totalFullSlabSize + packed.length + input.length * 4;
   }
 
   public String memUsageString(String prefix) {
     return String.format("%s ByteBitPacking %d slabs, %d bytes", prefix, slabs.size(), getAllocatedSize());
   }
 
+  /**
+   * @return number of full slabs along with the current slab (debug aid)
+   */
+  int getNumSlabs() {
+    return slabs.size() + 1;
+  }
 }

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/1de41ef4/parquet-encoding/src/test/java/org/apache/parquet/column/values/bitpacking/TestByteBasedBitPackingEncoder.java
----------------------------------------------------------------------
diff --git a/parquet-encoding/src/test/java/org/apache/parquet/column/values/bitpacking/TestByteBasedBitPackingEncoder.java b/parquet-encoding/src/test/java/org/apache/parquet/column/values/bitpacking/TestByteBasedBitPackingEncoder.java
index 293b961..b49595b 100644
--- a/parquet-encoding/src/test/java/org/apache/parquet/column/values/bitpacking/TestByteBasedBitPackingEncoder.java
+++ b/parquet-encoding/src/test/java/org/apache/parquet/column/values/bitpacking/TestByteBasedBitPackingEncoder.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -18,22 +18,28 @@
  */
 package org.apache.parquet.column.values.bitpacking;
 
+import org.apache.parquet.bytes.BytesUtils;
 import org.junit.Test;
 
+import static org.junit.Assert.assertEquals;
+
 public class TestByteBasedBitPackingEncoder {
 
   @Test
   public void testSlabBoundary() {
-    for (int i = 0; i < 32; i++) {
+    for (int i = 0; i <= 32; i++) {
       final ByteBasedBitPackingEncoder encoder = new ByteBasedBitPackingEncoder(i, Packer.BIG_ENDIAN);
-      // make sure to write more than a slab
-      for (int j = 0; j < 64 * 1024 * 32 + 10; j++) {
+      // make sure to write through the progression of slabs
+      final int totalValues = 191 * 1024 * 8 + 10;
+      for (int j = 0; j < totalValues; j++) {
         try {
           encoder.writeInt(j);
         } catch (Exception e) {
           throw new RuntimeException(i + ": error writing " + j, e);
         }
       }
+      assertEquals(BytesUtils.paddedByteCountFromBits(totalValues * i), encoder.getBufferSize());
+      assertEquals(i == 0 ? 1 : 9, encoder.getNumSlabs());
     }
   }