You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by sh...@apache.org on 2022/10/09 18:59:05 UTC

[parquet-mr] branch master updated: Performance optimization to ByteBitPackingValuesReader (#962)

This is an automated email from the ASF dual-hosted git repository.

shangxinli pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git


The following commit(s) were added to refs/heads/master by this push:
     new 44dc3a4ae Performance optimization to ByteBitPackingValuesReader (#962)
44dc3a4ae is described below

commit 44dc3a4aef8e7746408381a7b11ff7ab8e888c3f
Author: Timothy N. Miller <94...@users.noreply.github.com>
AuthorDate: Sun Oct 9 14:59:00 2022 -0400

    Performance optimization to ByteBitPackingValuesReader (#962)
    
    Remove object creation out of critical path
    Move less-used code into separate function to encourage JIT to inline
    more frequently used code.
---
 .../bitpacking/ByteBitPackingValuesReader.java     | 38 +++++++++++++---------
 1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBitPackingValuesReader.java b/parquet-column/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBitPackingValuesReader.java
index 1fa62d4b5..0294b6c13 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBitPackingValuesReader.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBitPackingValuesReader.java
@@ -38,31 +38,39 @@ public class ByteBitPackingValuesReader extends ValuesReader {
   private final int[] decoded = new int[VALUES_AT_A_TIME];
   private int decodedPosition = VALUES_AT_A_TIME - 1;
   private ByteBufferInputStream in;
+  private final byte[] tempEncode;
 
   public ByteBitPackingValuesReader(int bound, Packer packer) {
     this.bitWidth = BytesUtils.getWidthFromMaxInt(bound);
     this.packer = packer.newBytePacker(bitWidth);
+    // Create and retain byte array to avoid object creation in the critical path
+    this.tempEncode = new byte[this.bitWidth];
+  }
+
+  private void readMore() {
+    try {
+      int avail = in.available();
+      if (avail < bitWidth) {
+        in.read(tempEncode, 0, avail);
+        // Clear the portion of the array we didn't read into
+        for (int i=avail; i<bitWidth; i++) tempEncode[i] = 0;
+      } else {
+        in.read(tempEncode, 0, bitWidth);
+      }
+
+      // The "deprecated" unpacker is faster than using the one that takes ByteBuffer
+      packer.unpack8Values(tempEncode, 0, decoded, 0);
+    } catch (IOException e) {
+      throw new ParquetDecodingException("Failed to read packed values", e);
+    }
+    decodedPosition = 0;
   }
 
   @Override
   public int readInteger() {
     ++ decodedPosition;
     if (decodedPosition == decoded.length) {
-      try {
-        if (in.available() < bitWidth) {
-          // unpack8Values needs at least bitWidth bytes to read from,
-          // We have to fill in 0 byte at the end of encoded bytes.
-          byte[] tempEncode = new byte[bitWidth];
-          in.read(tempEncode, 0, in.available());
-          packer.unpack8Values(tempEncode, 0, decoded, 0);
-        } else {
-          ByteBuffer encoded = in.slice(bitWidth);
-          packer.unpack8Values(encoded, encoded.position(), decoded, 0);
-        }
-      } catch (IOException e) {
-        throw new ParquetDecodingException("Failed to read packed values", e);
-      }
-      decodedPosition = 0;
+      readMore();
     }
     return decoded[decodedPosition];
   }