You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ra...@apache.org on 2019/07/11 07:00:48 UTC
[arrow] branch master updated: ARROW-5903: [Java] Optimise set
methods in decimal vector
This is an automated email from the ASF dual-hosted git repository.
ravindra pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new ae03c54 ARROW-5903: [Java] Optimise set methods in decimal vector
ae03c54 is described below
commit ae03c544beae36637e4418159ad951ac6440fac1
Author: Pindikura Ravindra <ra...@dremio.com>
AuthorDate: Thu Jul 11 12:30:27 2019 +0530
ARROW-5903: [Java] Optimise set methods in decimal vector
- reduce the number of bound checks
- simplify loop
Author: Pindikura Ravindra <ra...@dremio.com>
Closes #4847 from pravindra/arrow-5903-new and squashes the following commits:
8376b8b69 <Pindikura Ravindra> ARROW-5903: fix style check error
5a4daec97 <Pindikura Ravindra> ARROW-5903: Optimise set methods in decimal vector
---
.../arrow/vector/DecimalVectorBenchmarks.java | 124 +++++++++++++++++++++
.../org/apache/arrow/vector/DecimalVector.java | 85 +++++++-------
2 files changed, 165 insertions(+), 44 deletions(-)
diff --git a/java/performance/src/test/java/org/apache/arrow/vector/DecimalVectorBenchmarks.java b/java/performance/src/test/java/org/apache/arrow/vector/DecimalVectorBenchmarks.java
new file mode 100644
index 0000000..aaa8deb
--- /dev/null
+++ b/java/performance/src/test/java/org/apache/arrow/vector/DecimalVectorBenchmarks.java
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.math.BigDecimal;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.junit.Test;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+import io.netty.buffer.ArrowBuf;
+
+/**
+ * Benchmarks for {@link DecimalVector}.
+ */
+@State(Scope.Benchmark)
+public class DecimalVectorBenchmarks {
+
+ private static final int VECTOR_LENGTH = 1024;
+
+ private static final int ALLOCATOR_CAPACITY = 1024 * 1024;
+
+ private BufferAllocator allocator;
+
+ private DecimalVector vector;
+
+ private ArrowBuf fromBuf;
+
+ byte[] fromByteArray;
+
+ /**
+ * Setup benchmarks.
+ */
+ @Setup
+ public void prepare() {
+ allocator = new RootAllocator(ALLOCATOR_CAPACITY);
+ vector = new DecimalVector("vector", allocator, 38, 16);
+ vector.allocateNew(VECTOR_LENGTH);
+
+ fromBuf = allocator.buffer(VECTOR_LENGTH * DecimalVector.TYPE_WIDTH);
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ byte[] bytes = BigDecimal.valueOf(i).unscaledValue().toByteArray();
+ fromBuf.setBytes(i * DecimalVector.TYPE_WIDTH, bytes);
+ }
+
+ fromByteArray = new byte[DecimalVector.TYPE_WIDTH];
+ fromBuf.getBytes(0, fromByteArray);
+ }
+
+ /**
+ * Tear down benchmarks.
+ */
+ @TearDown
+ public void tearDown() {
+ fromBuf.close();
+ vector.close();
+ allocator.close();
+ }
+
+ /**
+ * Test writing on {@link DecimalVector} from arrow buf.
+ */
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.MICROSECONDS)
+ public void setBigEndianArrowBufBenchmark() {
+ int offset = 0;
+
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ vector.setBigEndianSafe(i, offset, fromBuf, DecimalVector.TYPE_WIDTH);
+ offset += 8;
+ }
+ }
+
+ /**
+ * Test writing on {@link DecimalVector} from byte array.
+ */
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.MICROSECONDS)
+ public void setBigEndianByteArrayBenchmark() {
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ vector.setBigEndian(i, fromByteArray);
+ }
+ }
+
+ @Test
+ public void evaluate() throws RunnerException {
+ Options opt = new OptionsBuilder()
+ .include(DecimalVectorBenchmarks.class.getSimpleName())
+ .forks(1)
+ .build();
+
+ new Runner(opt).run();
+ }
+}
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java b/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java
index 1db83a1..cf77186 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java
@@ -34,6 +34,7 @@ import org.apache.arrow.vector.util.DecimalUtility;
import org.apache.arrow.vector.util.TransferPair;
import io.netty.buffer.ArrowBuf;
+import io.netty.util.internal.PlatformDependent;
/**
* DecimalVector implements a fixed width vector (16 bytes) of
@@ -206,41 +207,30 @@ public class DecimalVector extends BaseFixedWidthVector {
public void setBigEndian(int index, byte[] value) {
BitVectorHelper.setValidityBitToOne(validityBuffer, index);
final int length = value.length;
- int startIndex = index * TYPE_WIDTH;
- if (length == TYPE_WIDTH) {
- for (int i = TYPE_WIDTH - 1; i >= 3; i -= 4) {
- valueBuffer.setByte(startIndex, value[i]);
- valueBuffer.setByte(startIndex + 1, value[i - 1]);
- valueBuffer.setByte(startIndex + 2, value[i - 2]);
- valueBuffer.setByte(startIndex + 3, value[i - 3]);
- startIndex += 4;
- }
- return;
+ // do the bound check.
+ valueBuffer.checkBytes(index * TYPE_WIDTH, (index + 1) * TYPE_WIDTH);
+
+ long outAddress = valueBuffer.memoryAddress() + index * TYPE_WIDTH;
+ // swap bytes to convert BE to LE
+ for (int byteIdx = 0; byteIdx < length; ++byteIdx) {
+ PlatformDependent.putByte(outAddress + byteIdx, value[length - 1 - byteIdx]);
}
- if (length == 0) {
- valueBuffer.setZero(startIndex, TYPE_WIDTH);
+ if (length == TYPE_WIDTH) {
return;
}
- if (length < 16) {
- for (int i = length - 1; i >= 0; i--) {
- valueBuffer.setByte(startIndex, value[i]);
- startIndex++;
- }
-
+ if (length == 0) {
+ PlatformDependent.setMemory(outAddress, DecimalVector.TYPE_WIDTH, (byte)0);
+ } else if (length < TYPE_WIDTH) {
+ // sign extend
final byte pad = (byte) (value[0] < 0 ? 0xFF : 0x00);
- final int maxStartIndex = (index + 1) * TYPE_WIDTH;
- while (startIndex < maxStartIndex) {
- valueBuffer.setByte(startIndex, pad);
- startIndex++;
- }
-
- return;
+ PlatformDependent.setMemory(outAddress + length, DecimalVector.TYPE_WIDTH - length, pad);
+ } else {
+ throw new IllegalArgumentException(
+ "Invalid decimal value length. Valid length in [1 - 16], got " + length);
}
-
- throw new IllegalArgumentException("Invalid decimal value length. Valid length in [1 - 16], got " + length);
}
/**
@@ -265,17 +255,19 @@ public class DecimalVector extends BaseFixedWidthVector {
public void setSafe(int index, int start, ArrowBuf buffer, int length) {
handleSafe(index);
BitVectorHelper.setValidityBitToOne(validityBuffer, index);
- int startIndexInVector = index * TYPE_WIDTH;
- valueBuffer.setBytes(startIndexInVector, buffer, start, length);
+
+ // do the bound checks.
+ buffer.checkBytes(start, start + length);
+ valueBuffer.checkBytes(index * TYPE_WIDTH, (index + 1) * TYPE_WIDTH);
+
+ long inAddress = buffer.memoryAddress() + start;
+ long outAddress = valueBuffer.memoryAddress() + index * TYPE_WIDTH;
+ PlatformDependent.copyMemory(inAddress, outAddress, length);
// sign extend
if (length < 16) {
- byte msb = buffer.getByte(start + length - 1);
+ byte msb = PlatformDependent.getByte(inAddress + length - 1);
final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
- int startIndex = startIndexInVector + length;
- int endIndex = startIndexInVector + TYPE_WIDTH;
- for (int i = startIndex; i < endIndex; i++) {
- valueBuffer.setByte(i, pad);
- }
+ PlatformDependent.setMemory(outAddress + length, DecimalVector.TYPE_WIDTH - length, pad);
}
}
@@ -290,19 +282,24 @@ public class DecimalVector extends BaseFixedWidthVector {
public void setBigEndianSafe(int index, int start, ArrowBuf buffer, int length) {
handleSafe(index);
BitVectorHelper.setValidityBitToOne(validityBuffer, index);
- int startIndexInVector = index * TYPE_WIDTH;
- for (int i = start + length - 1; i >= start; i--) {
- valueBuffer.setByte(startIndexInVector, buffer.getByte(i));
- startIndexInVector++;
+
+ // do the bound checks.
+ buffer.checkBytes(start, start + length);
+ valueBuffer.checkBytes(index * TYPE_WIDTH, (index + 1) * TYPE_WIDTH);
+
+ // not using buffer.getByte() to avoid boundary checks for every byte.
+ long inAddress = buffer.memoryAddress() + start;
+ long outAddress = valueBuffer.memoryAddress() + index * TYPE_WIDTH;
+ // swap bytes to convert BE to LE
+ for (int byteIdx = 0; byteIdx < length; ++byteIdx) {
+ byte val = PlatformDependent.getByte((inAddress + length - 1) - byteIdx);
+ PlatformDependent.putByte(outAddress + byteIdx, val);
}
// sign extend
if (length < 16) {
- byte msb = buffer.getByte(start);
+ byte msb = PlatformDependent.getByte(inAddress);
final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
- int endIndex = startIndexInVector + TYPE_WIDTH - length;
- for (int i = startIndexInVector; i < endIndex; i++) {
- valueBuffer.setByte(i, pad);
- }
+ PlatformDependent.setMemory(outAddress + length, DecimalVector.TYPE_WIDTH - length, pad);
}
}