You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by cu...@apache.org on 2018/01/05 21:09:18 UTC

[arrow] branch master updated: ARROW-1962: [Java] Adding reset to ValueVector interface

This is an automated email from the ASF dual-hosted git repository.

cutlerb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 4dbce60  ARROW-1962: [Java] Adding reset to ValueVector interface
4dbce60 is described below

commit 4dbce607d50031a405af39d36e08cd03c5ffc764
Author: Bryan Cutler <cu...@gmail.com>
AuthorDate: Fri Jan 5 13:08:59 2018 -0800

    ARROW-1962: [Java] Adding reset to ValueVector interface
    
    Adding `reset()` to the ValueVector interface and implementing where it is not done already.  Removing unused abstract class BaseDataValueVector that is not used anymore by the UnionVector.
    
    Expanded reset tests to check that valueCount is 0, and buffers have same capacity and zeroed out.
    
    Author: Bryan Cutler <cu...@gmail.com>
    
    Closes #1455 from BryanCutler/java-reset-ValueVector-ARROW-1962 and squashes the following commits:
    
    da994e1 [Bryan Cutler] typo
    a52e7db [Bryan Cutler] expanded reset documentations
    1526a83 [Bryan Cutler] improved vector reset testing
    a251d10 [Bryan Cutler] reset should zero data buffer and set value count to 0
    bf2a16a [Bryan Cutler] add reset to NullableMapVector to zero validity buffer
    7fbde5b [Bryan Cutler] need to zero out vector buffers when reset
    b59addf [Bryan Cutler] adding reset to ValueVector interface, removing BaseDataValueVector
---
 .../src/main/codegen/templates/UnionVector.java    |   8 +-
 .../apache/arrow/vector/BaseDataValueVector.java   | 129 ---------------------
 .../apache/arrow/vector/BaseFixedWidthVector.java  |   2 +
 .../arrow/vector/BaseVariableWidthVector.java      |   2 +
 .../java/org/apache/arrow/vector/ValueVector.java  |  11 +-
 .../java/org/apache/arrow/vector/ZeroVector.java   |   4 +
 .../vector/complex/BaseRepeatedValueVector.java    |   7 ++
 .../arrow/vector/complex/FixedSizeListVector.java  |   7 ++
 .../apache/arrow/vector/complex/ListVector.java    |   7 ++
 .../org/apache/arrow/vector/complex/MapVector.java |   8 ++
 .../arrow/vector/complex/NullableMapVector.java    |  13 ++-
 .../org/apache/arrow/vector/TestVectorReset.java   | 109 +++++++++++++++--
 12 files changed, 165 insertions(+), 142 deletions(-)

diff --git a/java/vector/src/main/codegen/templates/UnionVector.java b/java/vector/src/main/codegen/templates/UnionVector.java
index 9377bd0..aa8178a 100644
--- a/java/vector/src/main/codegen/templates/UnionVector.java
+++ b/java/vector/src/main/codegen/templates/UnionVector.java
@@ -29,7 +29,6 @@ import io.netty.buffer.ArrowBuf;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Iterator;
-import org.apache.arrow.vector.BaseDataValueVector;
 import org.apache.arrow.vector.complex.impl.ComplexCopier;
 import org.apache.arrow.vector.util.CallBack;
 import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
@@ -319,6 +318,13 @@ public class UnionVector implements FieldVector {
   }
 
   @Override
+  public void reset() {
+    valueCount = 0;
+    typeBuffer.setZero(0, typeBuffer.capacity());
+    internalMap.reset();
+  }
+
+  @Override
   public Field getField() {
     List<org.apache.arrow.vector.types.pojo.Field> childFields = new ArrayList<>();
     List<FieldVector> children = internalMap.getChildren();
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseDataValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseDataValueVector.java
deleted file mode 100644
index 8067513..0000000
--- a/java/vector/src/main/java/org/apache/arrow/vector/BaseDataValueVector.java
+++ /dev/null
@@ -1,129 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * <p>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.arrow.vector;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.arrow.memory.BufferAllocator;
-import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
-
-import io.netty.buffer.ArrowBuf;
-import org.apache.arrow.vector.util.CallBack;
-import org.apache.arrow.vector.util.TransferPair;
-
-
-public abstract class BaseDataValueVector extends BaseValueVector implements BufferBacked {
-
-  public static void load(ArrowFieldNode fieldNode, List<BufferBacked> vectors, List<ArrowBuf> buffers) {
-    int expectedSize = vectors.size();
-    if (buffers.size() != expectedSize) {
-      throw new IllegalArgumentException("Illegal buffer count, expected " + expectedSize + ", got: " + buffers.size());
-    }
-    for (int i = 0; i < expectedSize; i++) {
-      vectors.get(i).load(fieldNode, buffers.get(i));
-    }
-  }
-
-  public static void truncateBufferBasedOnSize(List<ArrowBuf> buffers, int bufferIndex, int byteSize) {
-    if (bufferIndex >= buffers.size()) {
-      throw new IllegalArgumentException("no buffer at index " + bufferIndex + ": " + buffers);
-    }
-    ArrowBuf buffer = buffers.get(bufferIndex);
-    if (buffer.writerIndex() < byteSize) {
-      throw new IllegalArgumentException("can not truncate buffer to a larger size " + byteSize + ": " + buffer.writerIndex());
-    }
-    buffer.writerIndex(byteSize);
-  }
-
-  public static List<ArrowBuf> unload(List<BufferBacked> vectors) {
-    List<ArrowBuf> result = new ArrayList<>(vectors.size());
-    for (BufferBacked vector : vectors) {
-      result.add(vector.unLoad());
-    }
-    return result;
-  }
-
-  protected ArrowBuf data;
-
-  public BaseDataValueVector(String name, BufferAllocator allocator) {
-    super(name, allocator);
-    data = allocator.getEmpty();
-  }
-
-  @Override
-  public void clear() {
-    data.release();
-    data = allocator.getEmpty();
-    super.clear();
-  }
-
-  @Override
-  public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) {
-    return getTransferPair(ref, allocator);
-  }
-
-  @Override
-  public ArrowBuf[] getBuffers(boolean clear) {
-    ArrowBuf[] out;
-    if (getBufferSize() == 0) {
-      out = new ArrowBuf[0];
-    } else {
-      out = new ArrowBuf[]{data};
-      data.readerIndex(0);
-      if (clear) {
-        data.retain(1);
-      }
-    }
-    if (clear) {
-      clear();
-    }
-    return out;
-  }
-
-  @Override
-  public int getBufferSize() {
-    if (getValueCount() == 0) {
-      return 0;
-    }
-    return data.writerIndex();
-  }
-
-  public ArrowBuf getBuffer() {
-    return data;
-  }
-
-  @Override
-  public void load(ArrowFieldNode fieldNode, ArrowBuf data) {
-    this.data.release();
-    this.data = data.retain(allocator);
-  }
-
-  @Override
-  public ArrowBuf unLoad() {
-    return this.data.readerIndex(0);
-  }
-
-  /**
-   * This method has a similar effect of allocateNew() without actually clearing and reallocating
-   * the value vector. The purpose is to move the value vector to a "mutate" state
-   */
-  public void reset() {
-  }
-}
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java
index 77026d4..702db9f 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java
@@ -208,7 +208,9 @@ public abstract class BaseFixedWidthVector extends BaseValueVector
    * Reset the vector to initial state. Same as {@link #zeroVector()}.
    * Note that this method doesn't release any memory.
    */
+  @Override
   public void reset() {
+    valueCount = 0;
     zeroVector();
   }
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
index fbadb35..fff329a 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
@@ -193,6 +193,7 @@ public abstract class BaseVariableWidthVector extends BaseValueVector
   public void zeroVector() {
     initValidityBuffer();
     initOffsetBuffer();
+    valueBuffer.setZero(0, valueBuffer.capacity());
   }
 
   /* zero out the validity buffer */
@@ -212,6 +213,7 @@ public abstract class BaseVariableWidthVector extends BaseValueVector
   public void reset() {
     zeroVector();
     lastSet = -1;
+    valueCount = 0;
   }
 
   /**
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java
index e77c1b1..24cf59a 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java
@@ -100,11 +100,20 @@ public interface ValueVector extends Closeable, Iterable<ValueVector> {
   void close();
 
   /**
-   * Release the underlying ArrowBuf and reset the ValueVector to empty.
+   * Release any owned ArrowBuf and reset the ValueVector to the initial state. If the
+   * vector has any child vectors, they will also be cleared.
    */
   void clear();
 
   /**
+   * Reset the ValueVector to the initial state without releasing any owned ArrowBuf.
+   * Buffer capacities will remain unchanged and any previous data will be zeroed out.
+   * This includes buffers for data, validity, offset, etc. If the vector has any
+   * child vectors, they will also be reset.
+   */
+  void reset();
+
+  /**
    * Get information about how this field is materialized.
    *
    * @return the field corresponding to this vector
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ZeroVector.java b/java/vector/src/main/java/org/apache/arrow/vector/ZeroVector.java
index 962a1c9..2d3c543 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ZeroVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ZeroVector.java
@@ -73,6 +73,10 @@ public class ZeroVector implements FieldVector {
   }
 
   @Override
+  public void reset() {
+  }
+
+  @Override
   public Field getField() {
     return new Field(DATA_VECTOR_NAME, FieldType.nullable(new Null()), null);
   }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java
index 9a23fd8..d0a664a 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java
@@ -187,6 +187,13 @@ public abstract class BaseRepeatedValueVector extends BaseValueVector implements
   }
 
   @Override
+  public void reset() {
+    offsetBuffer.setZero(0, offsetBuffer.capacity());
+    vector.reset();
+    valueCount = 0;
+  }
+
+  @Override
   public ArrowBuf[] getBuffers(boolean clear) {
     final ArrowBuf[] buffers;
     if (getBufferSize() == 0) {
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
index 93a8127..9314a25 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
@@ -272,6 +272,13 @@ public class FixedSizeListVector extends BaseValueVector implements FieldVector,
   }
 
   @Override
+  public void reset() {
+    validityBuffer.setZero(0, validityBuffer.capacity());
+    vector.reset();
+    valueCount = 0;
+  }
+
+  @Override
   public ArrowBuf[] getBuffers(boolean clear) {
     setReaderAndWriterIndex();
     final ArrowBuf[] buffers;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
index 0c1daf4..8aeeb7e 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
@@ -513,6 +513,13 @@ public class ListVector extends BaseRepeatedValueVector implements FieldVector,
     lastSet = 0;
   }
 
+  @Override
+  public void reset() {
+    super.reset();
+    validityBuffer.setZero(0, validityBuffer.capacity());
+    lastSet = 0;
+  }
+
   /**
    * Return the underlying buffers associated with this vector. Note that this doesn't
    * impact the reference counts for this buffer so it only should be used for in-context
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java
index e130845..6eab6ef 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java
@@ -303,6 +303,14 @@ public class MapVector extends AbstractMapVector {
   }
 
   @Override
+  public void reset() {
+    for (final ValueVector v : getChildren()) {
+      v.reset();
+    }
+    valueCount = 0;
+  }
+
+  @Override
   public Field getField() {
     List<Field> children = new ArrayList<>();
     for (ValueVector child : getChildren()) {
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java
index d887b73..fb84d23 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java
@@ -306,6 +306,15 @@ public class NullableMapVector extends MapVector implements FieldVector {
   }
 
   /**
+   * Reset this vector to empty, does not release buffers
+   */
+  @Override
+  public void reset() {
+    super.reset();
+    validityBuffer.setZero(0, validityBuffer.capacity());
+  }
+
+  /**
    * Release the validity buffer
    */
   private void clearValidityBuffer() {
@@ -493,8 +502,4 @@ public class NullableMapVector extends MapVector implements FieldVector {
     super.setValueCount(valueCount);
     this.valueCount = valueCount;
   }
-
-  public void reset() {
-    valueCount = 0;
-  }
 }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java
index 28903b1..84ea965 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java
@@ -19,13 +19,23 @@
 package org.apache.arrow.vector;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 
+import io.netty.buffer.ArrowBuf;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.complex.*;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList;
+import org.apache.arrow.vector.types.pojo.ArrowType.Int;
+import org.apache.arrow.vector.types.pojo.FieldType;
+
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 
+import java.nio.charset.StandardCharsets;
+
 public class TestVectorReset {
 
   private BufferAllocator allocator;
@@ -40,15 +50,100 @@ public class TestVectorReset {
     allocator.close();
   }
 
+  private void resetVectorAndVerify(ValueVector vector, ArrowBuf[] bufs) {
+    int[] sizeBefore = new int[bufs.length];
+    for (int i = 0; i < bufs.length; i++) {
+      sizeBefore[i] = bufs[i].capacity();
+    }
+    vector.reset();
+    for (int i = 0; i < bufs.length; i++) {
+      assertEquals(sizeBefore[i], bufs[i].capacity());
+      verifyBufferZeroed(bufs[i]);
+    }
+    assertEquals(0, vector.getValueCount());
+  }
+
+  private void verifyBufferZeroed(ArrowBuf buf) {
+    for (int i = 0; i < buf.capacity(); i++) {
+      assertTrue((byte) 0 == buf.getByte(i));
+    }
+  }
+
   @Test
   public void testFixedTypeReset() {
-    try (final UInt4Vector vector = new UInt4Vector("", allocator)) {
-      vector.allocateNew();
-      final int sizeBefore = vector.getBufferSize();
-      vector.reAlloc();
-      vector.reset();
-      final int sizeAfter = vector.getBufferSize();
-      assertEquals(sizeBefore, sizeAfter);
+    try (final UInt4Vector vector = new UInt4Vector("UInt4", allocator)) {
+      vector.allocateNewSafe();
+      vector.setNull(0);
+      vector.setValueCount(1);
+      resetVectorAndVerify(vector, vector.getBuffers(false));
+    }
+  }
+
+  @Test
+  public void testVariableTypeReset() {
+    try (final VarCharVector vector = new VarCharVector("VarChar", allocator)) {
+      vector.allocateNewSafe();
+      vector.set(0, "a".getBytes(StandardCharsets.UTF_8));
+      vector.setLastSet(0);
+      vector.setValueCount(1);
+      resetVectorAndVerify(vector, vector.getBuffers(false));
+      assertEquals(-1, vector.getLastSet());
+    }
+  }
+
+  @Test
+  public void testListTypeReset() {
+    try (final ListVector variableList = new ListVector("VarList", allocator, FieldType.nullable(MinorType.INT.getType()), null);
+         final FixedSizeListVector fixedList = new FixedSizeListVector("FixedList", allocator, FieldType.nullable(new FixedSizeList(2)), null)
+    ) {
+      // ListVector
+      variableList.allocateNewSafe();
+      variableList.startNewValue(0);
+      variableList.endValue(0, 0);
+      variableList.setValueCount(1);
+      resetVectorAndVerify(variableList, variableList.getBuffers(false));
+      assertEquals(0, variableList.getLastSet());
+
+      // FixedSizeListVector
+      fixedList.allocateNewSafe();
+      fixedList.setNull(0);
+      fixedList.setValueCount(1);
+      resetVectorAndVerify(fixedList, fixedList.getBuffers(false));
+    }
+  }
+
+  @Test
+  public void testMapTypeReset() {
+    try (final MapVector mapVector = new MapVector("Map", allocator, FieldType.nullable(MinorType.INT.getType()), null);
+         final NullableMapVector nullableMapVector = new NullableMapVector("NullableMap", allocator, FieldType.nullable(MinorType.INT.getType()), null)
+    ) {
+      // MapVector
+      mapVector.allocateNewSafe();
+      IntVector mapChild = mapVector.addOrGet("child", FieldType.nullable(new Int(32, true)), IntVector.class);
+      mapChild.setNull(0);
+      mapVector.setValueCount(1);
+      resetVectorAndVerify(mapVector, mapVector.getBuffers(false));
+
+      // NullableMapVector
+      nullableMapVector.allocateNewSafe();
+      nullableMapVector.setNull(0);
+      nullableMapVector.setValueCount(1);
+      resetVectorAndVerify(nullableMapVector, nullableMapVector.getBuffers(false));
+    }
+  }
+
+  @Test
+  public void testUnionTypeReset() {
+    try (final UnionVector vector = new UnionVector("Union", allocator, null);
+         final IntVector dataVector = new IntVector("Int", allocator)
+    ) {
+      vector.getBufferSize();
+      vector.allocateNewSafe();
+      dataVector.allocateNewSafe();
+      vector.addVector(dataVector);
+      dataVector.setNull(0);
+      vector.setValueCount(1);
+      resetVectorAndVerify(vector, vector.getBuffers(false));
     }
   }
 }

-- 
To stop receiving notification emails like this one, please contact
['"commits@arrow.apache.org" <co...@arrow.apache.org>'].