You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ra...@apache.org on 2019/07/11 07:00:48 UTC

[arrow] branch master updated: ARROW-5903: [Java] Optimise set methods in decimal vector

This is an automated email from the ASF dual-hosted git repository.

ravindra pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new ae03c54  ARROW-5903: [Java] Optimise set methods in decimal vector
ae03c54 is described below

commit ae03c544beae36637e4418159ad951ac6440fac1
Author: Pindikura Ravindra <ra...@dremio.com>
AuthorDate: Thu Jul 11 12:30:27 2019 +0530

    ARROW-5903: [Java] Optimise set methods in decimal vector
    
    - reduce the number of bound checks
    - simplify loop
    
    Author: Pindikura Ravindra <ra...@dremio.com>
    
    Closes #4847 from pravindra/arrow-5903-new and squashes the following commits:
    
    8376b8b69 <Pindikura Ravindra> ARROW-5903: fix style check error
    5a4daec97 <Pindikura Ravindra> ARROW-5903: Optimise set methods in decimal vector
---
 .../arrow/vector/DecimalVectorBenchmarks.java      | 124 +++++++++++++++++++++
 .../org/apache/arrow/vector/DecimalVector.java     |  85 +++++++-------
 2 files changed, 165 insertions(+), 44 deletions(-)

diff --git a/java/performance/src/test/java/org/apache/arrow/vector/DecimalVectorBenchmarks.java b/java/performance/src/test/java/org/apache/arrow/vector/DecimalVectorBenchmarks.java
new file mode 100644
index 0000000..aaa8deb
--- /dev/null
+++ b/java/performance/src/test/java/org/apache/arrow/vector/DecimalVectorBenchmarks.java
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.math.BigDecimal;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.junit.Test;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+import io.netty.buffer.ArrowBuf;
+
+/**
+ * Benchmarks for {@link DecimalVector}.
+ */
+@State(Scope.Benchmark)
+public class DecimalVectorBenchmarks {
+
+  private static final int VECTOR_LENGTH = 1024;
+
+  private static final int ALLOCATOR_CAPACITY = 1024 * 1024;
+
+  private BufferAllocator allocator;
+
+  private DecimalVector vector;
+
+  private ArrowBuf fromBuf;
+
+  byte[] fromByteArray;
+
+  /**
+   * Setup benchmarks.
+   */
+  @Setup
+  public void prepare() {
+    allocator = new RootAllocator(ALLOCATOR_CAPACITY);
+    vector = new DecimalVector("vector", allocator, 38, 16);
+    vector.allocateNew(VECTOR_LENGTH);
+
+    fromBuf = allocator.buffer(VECTOR_LENGTH * DecimalVector.TYPE_WIDTH);
+    for (int i = 0; i < VECTOR_LENGTH; i++) {
+      byte[] bytes = BigDecimal.valueOf(i).unscaledValue().toByteArray();
+      fromBuf.setBytes(i * DecimalVector.TYPE_WIDTH, bytes);
+    }
+
+    fromByteArray = new byte[DecimalVector.TYPE_WIDTH];
+    fromBuf.getBytes(0, fromByteArray);
+  }
+
+  /**
+   * Tear down benchmarks.
+   */
+  @TearDown
+  public void tearDown() {
+    fromBuf.close();
+    vector.close();
+    allocator.close();
+  }
+
+  /**
+   * Test writing on {@link DecimalVector} from arrow buf.
+   */
+  @Benchmark
+  @BenchmarkMode(Mode.AverageTime)
+  @OutputTimeUnit(TimeUnit.MICROSECONDS)
+  public void setBigEndianArrowBufBenchmark() {
+    int offset = 0;
+
+    for (int i = 0; i < VECTOR_LENGTH; i++) {
+      vector.setBigEndianSafe(i, offset, fromBuf, DecimalVector.TYPE_WIDTH);
+      offset += 8;
+    }
+  }
+
+  /**
+   * Test writing on {@link DecimalVector} from byte array.
+   */
+  @Benchmark
+  @BenchmarkMode(Mode.AverageTime)
+  @OutputTimeUnit(TimeUnit.MICROSECONDS)
+  public void setBigEndianByteArrayBenchmark() {
+    for (int i = 0; i < VECTOR_LENGTH; i++) {
+      vector.setBigEndian(i, fromByteArray);
+    }
+  }
+
+  @Test
+  public void evaluate() throws RunnerException {
+    Options opt = new OptionsBuilder()
+        .include(DecimalVectorBenchmarks.class.getSimpleName())
+        .forks(1)
+        .build();
+
+    new Runner(opt).run();
+  }
+}
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java b/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java
index 1db83a1..cf77186 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java
@@ -34,6 +34,7 @@ import org.apache.arrow.vector.util.DecimalUtility;
 import org.apache.arrow.vector.util.TransferPair;
 
 import io.netty.buffer.ArrowBuf;
+import io.netty.util.internal.PlatformDependent;
 
 /**
  * DecimalVector implements a fixed width vector (16 bytes) of
@@ -206,41 +207,30 @@ public class DecimalVector extends BaseFixedWidthVector {
   public void setBigEndian(int index, byte[] value) {
     BitVectorHelper.setValidityBitToOne(validityBuffer, index);
     final int length = value.length;
-    int startIndex = index * TYPE_WIDTH;
-    if (length == TYPE_WIDTH) {
-      for (int i = TYPE_WIDTH - 1; i >= 3; i -= 4) {
-        valueBuffer.setByte(startIndex, value[i]);
-        valueBuffer.setByte(startIndex + 1, value[i - 1]);
-        valueBuffer.setByte(startIndex + 2, value[i - 2]);
-        valueBuffer.setByte(startIndex + 3, value[i - 3]);
-        startIndex += 4;
-      }
 
-      return;
+    // do the bound check.
+    valueBuffer.checkBytes(index * TYPE_WIDTH, (index + 1) * TYPE_WIDTH);
+
+    long outAddress = valueBuffer.memoryAddress() + index * TYPE_WIDTH;
+    // swap bytes to convert BE to LE
+    for (int byteIdx = 0; byteIdx < length; ++byteIdx) {
+      PlatformDependent.putByte(outAddress + byteIdx, value[length - 1 - byteIdx]);
     }
 
-    if (length == 0) {
-      valueBuffer.setZero(startIndex, TYPE_WIDTH);
+    if (length == TYPE_WIDTH) {
       return;
     }
 
-    if (length < 16) {
-      for (int i = length - 1; i >= 0; i--) {
-        valueBuffer.setByte(startIndex, value[i]);
-        startIndex++;
-      }
-
+    if (length == 0) {
+      PlatformDependent.setMemory(outAddress, DecimalVector.TYPE_WIDTH, (byte)0);
+    } else if (length < TYPE_WIDTH) {
+      // sign extend
       final byte pad = (byte) (value[0] < 0 ? 0xFF : 0x00);
-      final int maxStartIndex = (index + 1) * TYPE_WIDTH;
-      while (startIndex < maxStartIndex) {
-        valueBuffer.setByte(startIndex, pad);
-        startIndex++;
-      }
-
-      return;
+      PlatformDependent.setMemory(outAddress + length, DecimalVector.TYPE_WIDTH - length, pad);
+    } else {
+      throw new IllegalArgumentException(
+          "Invalid decimal value length. Valid length in [1 - 16], got " + length);
     }
-
-    throw new IllegalArgumentException("Invalid decimal value length. Valid length in [1 - 16], got " + length);
   }
 
   /**
@@ -265,17 +255,19 @@ public class DecimalVector extends BaseFixedWidthVector {
   public void setSafe(int index, int start, ArrowBuf buffer, int length) {
     handleSafe(index);
     BitVectorHelper.setValidityBitToOne(validityBuffer, index);
-    int startIndexInVector = index * TYPE_WIDTH;
-    valueBuffer.setBytes(startIndexInVector, buffer, start, length);
+
+    // do the bound checks.
+    buffer.checkBytes(start, start + length);
+    valueBuffer.checkBytes(index * TYPE_WIDTH, (index + 1) * TYPE_WIDTH);
+
+    long inAddress = buffer.memoryAddress() + start;
+    long outAddress = valueBuffer.memoryAddress() + index * TYPE_WIDTH;
+    PlatformDependent.copyMemory(inAddress, outAddress, length);
     // sign extend
     if (length < 16) {
-      byte msb = buffer.getByte(start + length - 1);
+      byte msb = PlatformDependent.getByte(inAddress + length - 1);
       final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
-      int startIndex = startIndexInVector + length;
-      int endIndex = startIndexInVector + TYPE_WIDTH;
-      for (int i = startIndex; i < endIndex; i++) {
-        valueBuffer.setByte(i, pad);
-      }
+      PlatformDependent.setMemory(outAddress + length, DecimalVector.TYPE_WIDTH - length, pad);
     }
   }
 
@@ -290,19 +282,24 @@ public class DecimalVector extends BaseFixedWidthVector {
   public void setBigEndianSafe(int index, int start, ArrowBuf buffer, int length) {
     handleSafe(index);
     BitVectorHelper.setValidityBitToOne(validityBuffer, index);
-    int startIndexInVector = index * TYPE_WIDTH;
-    for (int i = start + length - 1; i >= start; i--) {
-      valueBuffer.setByte(startIndexInVector, buffer.getByte(i));
-      startIndexInVector++;
+
+    // do the bound checks.
+    buffer.checkBytes(start, start + length);
+    valueBuffer.checkBytes(index * TYPE_WIDTH, (index + 1) * TYPE_WIDTH);
+
+    // not using buffer.getByte() to avoid boundary checks for every byte.
+    long inAddress = buffer.memoryAddress() + start;
+    long outAddress = valueBuffer.memoryAddress() + index * TYPE_WIDTH;
+    // swap bytes to convert BE to LE
+    for (int byteIdx = 0; byteIdx < length; ++byteIdx) {
+      byte val = PlatformDependent.getByte((inAddress + length - 1) - byteIdx);
+      PlatformDependent.putByte(outAddress + byteIdx, val);
     }
     // sign extend
     if (length < 16) {
-      byte msb = buffer.getByte(start);
+      byte msb = PlatformDependent.getByte(inAddress);
       final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
-      int endIndex = startIndexInVector + TYPE_WIDTH - length;
-      for (int i = startIndexInVector; i < endIndex; i++) {
-        valueBuffer.setByte(i, pad);
-      }
+      PlatformDependent.setMemory(outAddress + length, DecimalVector.TYPE_WIDTH - length, pad);
     }
   }