You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by em...@apache.org on 2019/06/27 07:20:51 UTC
[arrow] branch master updated: ARROW-5705: [Java] Optimize
BaseValueVector#computeCombinedBufferSize logic
This is an automated email from the ASF dual-hosted git repository.
emkornfield pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new aaea93f ARROW-5705: [Java] Optimize BaseValueVector#computeCombinedBufferSize logic
aaea93f is described below
commit aaea93f332d03463db33ddccfcaa772bf33d2253
Author: tianchen <ni...@alibaba-inc.com>
AuthorDate: Thu Jun 27 00:18:03 2019 -0700
ARROW-5705: [Java] Optimize BaseValueVector#computeCombinedBufferSize logic
Related to [ARROW-5705](https://issues.apache.org/jira/browse/ARROW-5705).
Now in BaseValueVector#computeCombinedBufferSize, it computes validity buffer size as follow:
roundUp8(getValidityBufferSizeFromCount(valueCount))
which can be be expanded to
(((valueCount + 7) >> 3 + 7) / 8) * 8
Seems there's no need to compute bufferSize first and expression above could be replaced with:
(valueCount + 63) / 64 * 8
In this way, performance of computeCombinedBufferSize would be improved. Performance test:
Before:
BaseValueVectorBenchmarks.testC_omputeCombinedBufferSize_ avgt 5 4083.180 ± 180.363 ns/op
After:
BaseValueVectorBenchmarks.testC_omputeCombinedBufferSize_ avgt 5 3808.635 ± 162.347 ns/op
Author: tianchen <ni...@alibaba-inc.com>
Closes #4671 from tianchen92/ARROW-5705 and squashes the following commits:
1999fde2e <tianchen> fix
6df5f2a66 <tianchen> opt allocFixedDataAndValidityBufs
fc5a5a696 <tianchen> Optimize BaseValueVector#computeCombinedBufferSize logic
---
.../arrow/vector/BaseValueVectorBenchmarks.java | 97 ++++++++++++++++++++++
.../org/apache/arrow/vector/BaseValueVector.java | 9 +-
2 files changed, 104 insertions(+), 2 deletions(-)
diff --git a/java/performance/src/test/java/org/apache/arrow/vector/BaseValueVectorBenchmarks.java b/java/performance/src/test/java/org/apache/arrow/vector/BaseValueVectorBenchmarks.java
new file mode 100644
index 0000000..ddcb3b5
--- /dev/null
+++ b/java/performance/src/test/java/org/apache/arrow/vector/BaseValueVectorBenchmarks.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.util.concurrent.TimeUnit;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.junit.Test;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+/**
+ * Benchmarks for {@link BaseValueVector}.
+ */
+@State(Scope.Benchmark)
+public class BaseValueVectorBenchmarks {
+
+ private static final int VECTOR_LENGTH = 1024;
+
+ private static final int ALLOCATOR_CAPACITY = 1024 * 1024;
+
+ private BufferAllocator allocator;
+
+ private IntVector vector;
+
+ /**
+ * Setup benchmarks.
+ */
+ @Setup
+ public void prepare() {
+ allocator = new RootAllocator(ALLOCATOR_CAPACITY);
+ vector = new IntVector("vector", allocator);
+ vector.allocateNew(VECTOR_LENGTH);
+ }
+
+ /**
+ * Tear down benchmarks.
+ */
+ @TearDown
+ public void tearDown() {
+ vector.close();
+ allocator.close();
+ }
+
+ /**
+ * Test {@link BaseValueVector#computeCombinedBufferSize(int, int)}.
+ * @return useless. To avoid DCE by JIT.
+ */
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.NANOSECONDS)
+ public int testComputeCombinedBufferSize() {
+ int totalSize = 0;
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ totalSize += vector.computeCombinedBufferSize(i, 4);
+ }
+ return totalSize;
+ }
+
+ @Test
+ public void evaluate() throws RunnerException {
+ Options opt = new OptionsBuilder()
+ .include(BaseValueVectorBenchmarks.class.getSimpleName())
+ .forks(1)
+ .build();
+
+ new Runner(opt).run();
+ }
+
+
+}
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java
index e60aeea..f2f6a1f 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java
@@ -122,12 +122,17 @@ public abstract class BaseValueVector implements ValueVector {
return ((size + 7) / 8) * 8;
}
+ /* round up bytes for the validity buffer for the given valueCount */
+ private static long roundUp8ForValidityBuffer(int valueCount) {
+ return ((valueCount + 63) >> 6) << 3;
+ }
+
long computeCombinedBufferSize(int valueCount, int typeWidth) {
Preconditions.checkArgument(valueCount >= 0, "valueCount must be >= 0");
Preconditions.checkArgument(typeWidth >= 0, "typeWidth must be >= 0");
// compute size of validity buffer.
- long bufferSize = roundUp8(getValidityBufferSizeFromCount(valueCount));
+ long bufferSize = roundUp8ForValidityBuffer(valueCount);
// add the size of the value buffer.
if (typeWidth == 0) {
@@ -173,7 +178,7 @@ public abstract class BaseValueVector implements ValueVector {
// requested size. Utilize the allocated buffer fully.;
int actualCount = (int) ((bufferSize * 8.0) / (8 * typeWidth + 1));
do {
- validityBufferSize = (int) roundUp8(getValidityBufferSizeFromCount(actualCount));
+ validityBufferSize = (int) roundUp8ForValidityBuffer(actualCount);
dataBufferSize = (int) roundUp8(actualCount * typeWidth);
if (validityBufferSize + dataBufferSize <= bufferSize) {
break;