You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by em...@apache.org on 2019/06/27 07:20:51 UTC

[arrow] branch master updated: ARROW-5705: [Java] Optimize BaseValueVector#computeCombinedBufferSize logic

This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new aaea93f  ARROW-5705: [Java] Optimize BaseValueVector#computeCombinedBufferSize logic
aaea93f is described below

commit aaea93f332d03463db33ddccfcaa772bf33d2253
Author: tianchen <ni...@alibaba-inc.com>
AuthorDate: Thu Jun 27 00:18:03 2019 -0700

    ARROW-5705: [Java] Optimize BaseValueVector#computeCombinedBufferSize logic
    
    Related to [ARROW-5705](https://issues.apache.org/jira/browse/ARROW-5705).
    Now in BaseValueVector#computeCombinedBufferSize, it computes validity buffer size as follow:
    
    roundUp8(getValidityBufferSizeFromCount(valueCount))
    
    which can be be expanded to
    
    (((valueCount + 7) >> 3 + 7) / 8) * 8
    
    Seems there's no need to compute bufferSize first and expression above could be replaced with:
    
    (valueCount + 63) / 64 * 8
    
    In this way, performance of computeCombinedBufferSize would be improved. Performance test:
    
    Before:
    BaseValueVectorBenchmarks.testC_omputeCombinedBufferSize_ avgt 5 4083.180 ± 180.363 ns/op
    
    After:
    
    BaseValueVectorBenchmarks.testC_omputeCombinedBufferSize_ avgt 5 3808.635 ± 162.347 ns/op
    
    Author: tianchen <ni...@alibaba-inc.com>
    
    Closes #4671 from tianchen92/ARROW-5705 and squashes the following commits:
    
    1999fde2e <tianchen> fix
    6df5f2a66 <tianchen> opt allocFixedDataAndValidityBufs
    fc5a5a696 <tianchen> Optimize BaseValueVector#computeCombinedBufferSize logic
---
 .../arrow/vector/BaseValueVectorBenchmarks.java    | 97 ++++++++++++++++++++++
 .../org/apache/arrow/vector/BaseValueVector.java   |  9 +-
 2 files changed, 104 insertions(+), 2 deletions(-)

diff --git a/java/performance/src/test/java/org/apache/arrow/vector/BaseValueVectorBenchmarks.java b/java/performance/src/test/java/org/apache/arrow/vector/BaseValueVectorBenchmarks.java
new file mode 100644
index 0000000..ddcb3b5
--- /dev/null
+++ b/java/performance/src/test/java/org/apache/arrow/vector/BaseValueVectorBenchmarks.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.util.concurrent.TimeUnit;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.junit.Test;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+/**
+ * Benchmarks for {@link BaseValueVector}.
+ */
+@State(Scope.Benchmark)
+public class BaseValueVectorBenchmarks {
+
+  private static final int VECTOR_LENGTH = 1024;
+
+  private static final int ALLOCATOR_CAPACITY = 1024 * 1024;
+
+  private BufferAllocator allocator;
+
+  private IntVector vector;
+
+  /**
+   * Setup benchmarks.
+   */
+  @Setup
+  public void prepare() {
+    allocator = new RootAllocator(ALLOCATOR_CAPACITY);
+    vector = new IntVector("vector", allocator);
+    vector.allocateNew(VECTOR_LENGTH);
+  }
+
+  /**
+   * Tear down benchmarks.
+   */
+  @TearDown
+  public void tearDown() {
+    vector.close();
+    allocator.close();
+  }
+
+  /**
+   * Test {@link BaseValueVector#computeCombinedBufferSize(int, int)}.
+   * @return useless. To avoid DCE by JIT.
+   */
+  @Benchmark
+  @BenchmarkMode(Mode.AverageTime)
+  @OutputTimeUnit(TimeUnit.NANOSECONDS)
+  public int testComputeCombinedBufferSize() {
+    int totalSize = 0;
+    for (int i = 0; i < VECTOR_LENGTH; i++) {
+      totalSize += vector.computeCombinedBufferSize(i, 4);
+    }
+    return totalSize;
+  }
+
+  @Test
+  public void evaluate() throws RunnerException {
+    Options opt = new OptionsBuilder()
+        .include(BaseValueVectorBenchmarks.class.getSimpleName())
+        .forks(1)
+        .build();
+
+    new Runner(opt).run();
+  }
+
+
+}
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java
index e60aeea..f2f6a1f 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java
@@ -122,12 +122,17 @@ public abstract class BaseValueVector implements ValueVector {
     return ((size + 7) / 8) * 8;
   }
 
+  /* round up bytes for the validity buffer for the given valueCount */
+  private static long roundUp8ForValidityBuffer(int valueCount) {
+    return ((valueCount + 63) >> 6) << 3;
+  }
+
   long computeCombinedBufferSize(int valueCount, int typeWidth) {
     Preconditions.checkArgument(valueCount >= 0, "valueCount must be >= 0");
     Preconditions.checkArgument(typeWidth >= 0, "typeWidth must be >= 0");
 
     // compute size of validity buffer.
-    long bufferSize = roundUp8(getValidityBufferSizeFromCount(valueCount));
+    long bufferSize = roundUp8ForValidityBuffer(valueCount);
 
     // add the size of the value buffer.
     if (typeWidth == 0) {
@@ -173,7 +178,7 @@ public abstract class BaseValueVector implements ValueVector {
       // requested size. Utilize the allocated buffer fully.;
       int actualCount = (int) ((bufferSize * 8.0) / (8 * typeWidth + 1));
       do {
-        validityBufferSize = (int) roundUp8(getValidityBufferSizeFromCount(actualCount));
+        validityBufferSize = (int) roundUp8ForValidityBuffer(actualCount);
         dataBufferSize = (int) roundUp8(actualCount * typeWidth);
         if (validityBufferSize + dataBufferSize <= bufferSize) {
           break;