You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by li...@apache.org on 2020/09/28 02:10:16 UTC
[arrow] branch master updated: ARROW-9965: [Java] Improve
performance of BaseFixedWidthVector.setSafe by optimizing capacity
calculations
This is an automated email from the ASF dual-hosted git repository.
liyafan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 848c225 ARROW-9965: [Java] Improve performance of BaseFixedWidthVector.setSafe by optimizing capacity calculations
848c225 is described below
commit 848c22541e530ab25b7146a7a9a3653a403dd9af
Author: Josiah <jo...@gmail.com>
AuthorDate: Mon Sep 28 10:08:51 2020 +0800
ARROW-9965: [Java] Improve performance of BaseFixedWidthVector.setSafe by optimizing capacity calculations
It turns out that setSafe performs a very expensive integer division when trying to compute buffer capacity; specifically, it divides by the field size, which isn't hardcoded. Although it is typically a power of 2, this doesn't compile down to a bitshift.
Special-casing and forcing a bitshift operation results in a ~300% increase in benchmarks that use a hot loop to set Arrow vectors. We have a similar use-case in an internal data-intensive service.
Benchmark results with arrow.enable_unsafe_memory_access=true
Before:
```
Benchmark Mode Cnt Score Error Units
IntBenchmarks.setIntDirectly avgt 15 9.563 ± 0.335 us/op
IntBenchmarks.setWithValueHolder avgt 15 9.266 ± 0.064 us/op
IntBenchmarks.setWithWriter avgt 15 18.806 ± 0.154 us/op
```
After:
```
Benchmark Mode Cnt Score Error Units
IntBenchmarks.setIntDirectly avgt 15 3.490 ± 0.175 us/op
IntBenchmarks.setWithValueHolder avgt 15 3.806 ± 0.015 us/op
IntBenchmarks.setWithWriter avgt 15 5.490 ± 0.304 us/op
```
See https://issues.apache.org/jira/browse/ARROW-9965 for further benchmarks, and an analysis of the root cause of the slowdown.
Closes #8214 from josiahyan/ARROW-9965
Authored-by: Josiah <jo...@gmail.com>
Signed-off-by: liyafan82 <fa...@foxmail.com>
---
.../apache/arrow/vector/BaseFixedWidthVector.java | 23 +++++++++++++++++++---
.../java/org/apache/arrow/vector/BitVector.java | 10 +++-------
2 files changed, 23 insertions(+), 10 deletions(-)
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java
index ee47f6d..ea31a4f 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java
@@ -49,6 +49,7 @@ public abstract class BaseFixedWidthVector extends BaseValueVector
private final int typeWidth;
protected int lastValueCapacity;
+ protected int actualValueCapacity;
protected final Field field;
private int allocationMonitor;
@@ -72,6 +73,7 @@ public abstract class BaseFixedWidthVector extends BaseValueVector
validityBuffer = allocator.getEmpty();
valueBuffer = allocator.getEmpty();
lastValueCapacity = INITIAL_VALUE_ALLOCATION;
+ refreshValueCapacity();
}
@@ -182,14 +184,21 @@ public abstract class BaseFixedWidthVector extends BaseValueVector
*/
@Override
public int getValueCapacity() {
- return Math.min(getValueBufferValueCapacity(), getValidityBufferValueCapacity());
+ return actualValueCapacity;
}
- private int getValueBufferValueCapacity() {
+ /**
+ * Call this if you change the capacity of valueBuffer or validityBuffer.
+ */
+ protected void refreshValueCapacity() {
+ actualValueCapacity = Math.min(getValueBufferValueCapacity(), getValidityBufferValueCapacity());
+ }
+
+ protected int getValueBufferValueCapacity() {
return capAtMaxInt(valueBuffer.capacity() / typeWidth);
}
- private int getValidityBufferValueCapacity() {
+ protected int getValidityBufferValueCapacity() {
return capAtMaxInt(validityBuffer.capacity() * 8);
}
@@ -238,6 +247,7 @@ public abstract class BaseFixedWidthVector extends BaseValueVector
valueCount = 0;
validityBuffer = releaseBuffer(validityBuffer);
valueBuffer = releaseBuffer(valueBuffer);
+ refreshValueCapacity();
}
/* used to step down the memory allocation */
@@ -331,6 +341,7 @@ public abstract class BaseFixedWidthVector extends BaseValueVector
validityBuffer = buffers.getValidityBuf();
zeroVector();
+ refreshValueCapacity();
lastValueCapacity = getValueCapacity();
}
@@ -343,6 +354,7 @@ public abstract class BaseFixedWidthVector extends BaseValueVector
private void allocateValidityBuffer(final int validityBufferSize) {
validityBuffer = allocator.buffer(validityBufferSize);
validityBuffer.readerIndex(0);
+ refreshValueCapacity();
}
/**
@@ -441,6 +453,7 @@ public abstract class BaseFixedWidthVector extends BaseValueVector
validityBuffer.getReferenceManager().release();
validityBuffer = newValidityBuffer;
+ refreshValueCapacity();
lastValueCapacity = getValueCapacity();
}
@@ -492,6 +505,7 @@ public abstract class BaseFixedWidthVector extends BaseValueVector
validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator);
valueBuffer.getReferenceManager().release();
valueBuffer = dataBuffer.getReferenceManager().retain(dataBuffer, allocator);
+ refreshValueCapacity();
valueCount = fieldNode.getLength();
}
@@ -572,6 +586,7 @@ public abstract class BaseFixedWidthVector extends BaseValueVector
target.validityBuffer = transferBuffer(validityBuffer, target.allocator);
target.valueBuffer = transferBuffer(valueBuffer, target.allocator);
target.valueCount = valueCount;
+ target.refreshValueCapacity();
clear();
}
@@ -602,6 +617,7 @@ public abstract class BaseFixedWidthVector extends BaseValueVector
final int sliceLength = length * typeWidth;
final ArrowBuf slicedBuffer = valueBuffer.slice(startPoint, sliceLength);
target.valueBuffer = transferBuffer(slicedBuffer, target.allocator);
+ target.refreshValueCapacity();
}
/**
@@ -623,6 +639,7 @@ public abstract class BaseFixedWidthVector extends BaseValueVector
}
target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget);
target.validityBuffer.getReferenceManager().retain(1);
+ target.refreshValueCapacity();
} else {
/* Copy data
* When the first bit starts from the middle of a byte (offset != 0),
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java
index 14d357f..3bcfd98 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java
@@ -119,14 +119,9 @@ public final class BitVector extends BaseFixedWidthVector {
lastValueCapacity = valueCount;
}
- /**
- * Get the current value capacity for the vector.
- *
- * @return number of elements that vector can hold.
- */
@Override
- public int getValueCapacity() {
- return capAtMaxInt(validityBuffer.capacity() * 8);
+ protected int getValueBufferValueCapacity() {
+ return capAtMaxInt(valueBuffer.capacity() * 8);
}
/**
@@ -171,6 +166,7 @@ public final class BitVector extends BaseFixedWidthVector {
validityBuffer, target.validityBuffer);
target.valueBuffer = splitAndTransferBuffer(startIndex, length, target,
valueBuffer, target.valueBuffer);
+ target.refreshValueCapacity();
target.setValueCount(length);
}