You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by ju...@apache.org on 2017/05/12 22:10:00 UTC
parquet-mr git commit: PARQUET-852: Slowly ramp up sizes of byte[] in
ByteBasedBitPackingEncoder
Repository: parquet-mr
Updated Branches:
refs/heads/master fd7cfed07 -> 1de41ef4b
PARQUET-852: Slowly ramp up sizes of byte[] in ByteBasedBitPackingEncoder
https://issues.apache.org/jira/browse/PARQUET-852
Author: John Jenkins <jj...@kcg.com>
Closes #401 from JohnPJenkins/PARQUET-852 and squashes the following commits:
334acec [John Jenkins] PARQUET-852: Slowly ramp up sizes of byte[] in ByteBasedBitPackingEncoder
Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/1de41ef4
Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/1de41ef4
Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/1de41ef4
Branch: refs/heads/master
Commit: 1de41ef4baeee1c95e245837299f8be265294445
Parents: fd7cfed
Author: John Jenkins <jj...@kcg.com>
Authored: Fri May 12 15:09:56 2017 -0700
Committer: Julien Le Dem <ju...@apache.org>
Committed: Fri May 12 15:09:56 2017 -0700
----------------------------------------------------------------------
.../bitpacking/ByteBasedBitPackingEncoder.java | 30 ++++++++++++++------
.../TestByteBasedBitPackingEncoder.java | 18 ++++++++----
2 files changed, 34 insertions(+), 14 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/1de41ef4/parquet-encoding/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBasedBitPackingEncoder.java
----------------------------------------------------------------------
diff --git a/parquet-encoding/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBasedBitPackingEncoder.java b/parquet-encoding/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBasedBitPackingEncoder.java
index cc23e8f..0bc8b30 100644
--- a/parquet-encoding/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBasedBitPackingEncoder.java
+++ b/parquet-encoding/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBasedBitPackingEncoder.java
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -39,11 +39,14 @@ public class ByteBasedBitPackingEncoder {
private static final Logger LOG = LoggerFactory.getLogger(ByteBasedBitPackingEncoder.class);
private static final int VALUES_WRITTEN_AT_A_TIME = 8;
+ private static final int MAX_SLAB_SIZE_MULT = 64 * 1024;
+ private static final int INITIAL_SLAB_SIZE_MULT = 1024;
private final int bitWidth;
private final BytePacker packer;
private final int[] input = new int[VALUES_WRITTEN_AT_A_TIME];
- private final int slabSize;
+ private int slabSize;
+ private long totalFullSlabSize;
private int inputSize;
private byte[] packed;
private int packedPosition;
@@ -56,8 +59,9 @@ public class ByteBasedBitPackingEncoder {
public ByteBasedBitPackingEncoder(int bitWidth, Packer packer) {
this.bitWidth = bitWidth;
this.inputSize = 0;
+ this.totalFullSlabSize = 0;
// must be a multiple of bitWidth
- this.slabSize = bitWidth * 64 * 1024;
+ this.slabSize = (bitWidth == 0) ? 1 : (bitWidth * INITIAL_SLAB_SIZE_MULT);
initPackedSlab();
this.packer = packer.newBytePacker(bitWidth);
}
@@ -75,6 +79,10 @@ public class ByteBasedBitPackingEncoder {
pack();
if (packedPosition == slabSize) {
slabs.add(BytesInput.from(packed));
+ totalFullSlabSize += slabSize;
+ if (slabSize < bitWidth * MAX_SLAB_SIZE_MULT) {
+ slabSize *= 2;
+ }
initPackedSlab();
}
}
@@ -99,7 +107,7 @@ public class ByteBasedBitPackingEncoder {
public BytesInput toBytes() throws IOException {
int packedByteLength = packedPosition + BytesUtils.paddedByteCountFromBits(inputSize * bitWidth);
- LOG.debug("writing {} bytes", (slabs.size() * slabSize + packedByteLength));
+ LOG.debug("writing {} bytes", (totalFullSlabSize + packedByteLength));
if (inputSize > 0) {
for (int i = inputSize; i < input.length; i++) {
input[i] = 0;
@@ -113,18 +121,24 @@ public class ByteBasedBitPackingEncoder {
* @return size of the data as it would be written
*/
public long getBufferSize() {
- return BytesUtils.paddedByteCountFromBits(totalValues * bitWidth);
+ return BytesUtils.paddedByteCountFromBits((totalValues + inputSize) * bitWidth);
}
/**
* @return total memory allocated
*/
public long getAllocatedSize() {
- return (slabs.size() * slabSize) + packed.length + input.length * 4;
+ return totalFullSlabSize + packed.length + input.length * 4;
}
public String memUsageString(String prefix) {
return String.format("%s ByteBitPacking %d slabs, %d bytes", prefix, slabs.size(), getAllocatedSize());
}
+ /**
+ * @return number of full slabs along with the current slab (debug aid)
+ */
+ int getNumSlabs() {
+ return slabs.size() + 1;
+ }
}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/1de41ef4/parquet-encoding/src/test/java/org/apache/parquet/column/values/bitpacking/TestByteBasedBitPackingEncoder.java
----------------------------------------------------------------------
diff --git a/parquet-encoding/src/test/java/org/apache/parquet/column/values/bitpacking/TestByteBasedBitPackingEncoder.java b/parquet-encoding/src/test/java/org/apache/parquet/column/values/bitpacking/TestByteBasedBitPackingEncoder.java
index 293b961..b49595b 100644
--- a/parquet-encoding/src/test/java/org/apache/parquet/column/values/bitpacking/TestByteBasedBitPackingEncoder.java
+++ b/parquet-encoding/src/test/java/org/apache/parquet/column/values/bitpacking/TestByteBasedBitPackingEncoder.java
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -18,22 +18,28 @@
*/
package org.apache.parquet.column.values.bitpacking;
+import org.apache.parquet.bytes.BytesUtils;
import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+
public class TestByteBasedBitPackingEncoder {
@Test
public void testSlabBoundary() {
- for (int i = 0; i < 32; i++) {
+ for (int i = 0; i <= 32; i++) {
final ByteBasedBitPackingEncoder encoder = new ByteBasedBitPackingEncoder(i, Packer.BIG_ENDIAN);
- // make sure to write more than a slab
- for (int j = 0; j < 64 * 1024 * 32 + 10; j++) {
+ // make sure to write through the progression of slabs
+ final int totalValues = 191 * 1024 * 8 + 10;
+ for (int j = 0; j < totalValues; j++) {
try {
encoder.writeInt(j);
} catch (Exception e) {
throw new RuntimeException(i + ": error writing " + j, e);
}
}
+ assertEquals(BytesUtils.paddedByteCountFromBits(totalValues * i), encoder.getBufferSize());
+ assertEquals(i == 0 ? 1 : 9, encoder.getNumSlabs());
}
}