You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by am...@apache.org on 2018/03/31 19:47:41 UTC

[3/3] drill git commit: DRILL-6234: Improved documentation for VariableWidthVector mutators, and added simple unit tests demonstrating mutator behavior.

DRILL-6234: Improved documentation for VariableWidthVector mutators, and added simple unit tests demonstrating mutator behavior.

close apache/drill#1164


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/9a6cb59b
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/9a6cb59b
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/9a6cb59b

Branch: refs/heads/master
Commit: 9a6cb59b9b7a5b127e5f60309ce2f506ede9652a
Parents: f5b8223
Author: Timothy Farkas <ti...@apache.org>
Authored: Tue Mar 13 17:24:28 2018 -0700
Committer: Aman Sinha <as...@maprtech.com>
Committed: Fri Mar 30 22:47:31 2018 -0700

----------------------------------------------------------------------
 exec/vector/pom.xml                             |   7 +-
 .../templates/VariableLengthVectors.java        |  61 ++++++++
 .../apache/drill/exec/vector/ValueVector.java   |   3 +-
 .../exec/vector/VariableLengthVectorTest.java   | 141 +++++++++++++++++++
 4 files changed, 210 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/9a6cb59b/exec/vector/pom.xml
----------------------------------------------------------------------
diff --git a/exec/vector/pom.xml b/exec/vector/pom.xml
index 0184305..21e138d 100644
--- a/exec/vector/pom.xml
+++ b/exec/vector/pom.xml
@@ -65,7 +65,12 @@
       <version>0.7.1</version>
     </dependency>
 
-
+    <dependency>
+      <groupId>org.apache.drill</groupId>
+      <artifactId>drill-common</artifactId>
+      <version>${project.version}</version>
+      <classifier>tests</classifier>
+    </dependency>
   </dependencies>
 
 

http://git-wip-us.apache.org/repos/asf/drill/blob/9a6cb59b/exec/vector/src/main/codegen/templates/VariableLengthVectors.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/codegen/templates/VariableLengthVectors.java b/exec/vector/src/main/codegen/templates/VariableLengthVectors.java
index 516eb52..ab995cd 100644
--- a/exec/vector/src/main/codegen/templates/VariableLengthVectors.java
+++ b/exec/vector/src/main/codegen/templates/VariableLengthVectors.java
@@ -512,6 +512,8 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V
   }
 
   /**
+   * <h4>Overview</h4>
+   * <p>
    * Mutable${minor.class} implements a vector of variable width values.  Elements in the vector
    * are accessed by position from the logical start of the vector.  A fixed width offsetVector
    * is used to convert an element's position to it's offset from the start of the (0-based)
@@ -520,6 +522,46 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V
    *   The equivalent Java primitive is '${minor.javaType!type.javaType}'
    *
    * NB: this class is automatically generated from ValueVectorTypes.tdd using FreeMarker.
+   * </p>
+   * <h4>Contract</h4>
+   * <p>
+   *   <ol>
+   *     <li>
+   *       <b>Supported Writes:</b> {@link VariableWidthVector}s do not support random writes. In contrast {@link org.apache.drill.exec.vector.FixedWidthVector}s do
+   *       allow random writes but special care is needed.
+   *       </li>
+   *     <li>
+   *       <b>Writing Values:</b> All set methods must be called with a consecutive sequence of indices. With a few exceptions:
+   *       <ol>
+   *         <li>You can update the last index you just set.</li>
+   *         <li>You can reset a previous index (call it Idx), but you must assume all the data after Idx is corrupt. Also
+   *         note that the memory consumed by data that came after Idx is not released.</li>
+   *       </ol>
+   *     </li>
+   *     <li>
+   *       <b>Setting Value Count:</b> Vectors aren't explicitly aware of how many values they contain. So you must keep track of the
+   *       number of values you've written to the vector and once you are done writing to the vector you must call {@link Mutator#setValueCount(int)}.
+   *       It is possible to trim the vector by setting the value count to be less than the number of values currently contained in the vector. Note the extra memory consumed in
+   *       the data buffer is not freed when this is done.
+   *     </li>
+   *     <li>
+   *       <b>Memory Allocation:</b> When setting a value at an index you must do one of the following to ensure you do not get an {@link IndexOutOfBoundsException}.
+   *       <ol>
+   *         <li>
+   *           Allocate the exact amount of memory you need when using the {@link Mutator#set(int, byte[])} methods. If you do not
+   *           manually allocate sufficient memory an {@link IndexOutOfBoundsException} can be thrown when the data buffer runs out of space.
+   *         </li>
+   *         <li>
+   *           Or you can use the {@link Mutator#setSafe(int, byte[])} methods, which will automatically grow your data buffer to
+   *           fit your data.
+   *         </li>
+   *       </ol>
+   *     </li>
+   *     <li>
+   *       <b>Immutability:</b> Once a vector has been populated with data and {@link #setValueCount(int)} has been called, it should be considered immutable.
+   *     </li>
+   *   </ol>
+   * </p>
    */
   public final class Mutator extends BaseValueVector.BaseMutator implements VariableWidthVector.VariableWidthMutator {
 
@@ -703,6 +745,25 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V
       data.setBytes(currentOffset, holder.buffer, holder.start, length);
     }
 
+    /**
+     * <h4>Notes on Usage</h4>
+     * <p>
+     * For {@link VariableWidthVector}s this method can be used in the following cases:
+     * <ul>
+     *   <li>Setting the actual number of elements currently contained in the vector.</li>
+     *   <li>Trimming the vector to have fewer elements than it current does.</li>
+     * </ul>
+     * </p>
+     * <h4>Caveats</h4>
+     * <p>
+     *   It is important to note that for {@link org.apache.drill.exec.vector.FixedWidthVector}s this method can also be used to expand the vector.
+     *   However, {@link VariableWidthVector} do not support this usage and this method will throw an {@link IndexOutOfBoundsException} if you attempt
+     *   to use it in this way. Expansion of valueCounts is not supported mainly because there is no benefit, since you would still have to rely on the setSafe
+     *   methods to appropriatly expand the data buffer and populate the vector anyway (since by definition we do not know the width of elements). See DRILL-6234 for details.
+     * </p>
+     * <h4>Method Documentation</h4>
+     * {@inheritDoc}
+     */
     @Override
     public void setValueCount(int valueCount) {
       final int currentByteCapacity = getByteCapacity();

http://git-wip-us.apache.org/repos/asf/drill/blob/9a6cb59b/exec/vector/src/main/java/org/apache/drill/exec/vector/ValueVector.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/ValueVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/ValueVector.java
index f873cc6..2659810 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/ValueVector.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/ValueVector.java
@@ -293,7 +293,8 @@ public interface ValueVector extends Closeable, Iterable<ValueVector> {
    */
   interface Mutator {
     /**
-     * Sets the number of values that is stored in this vector to the given value count.
+     * Sets the number of values that is stored in this vector to the given value count. <b>WARNING!</b> Once the
+     * valueCount is set, the vector should be considered immutable.
      *
      * @param valueCount  value count to set.
      */

http://git-wip-us.apache.org/repos/asf/drill/blob/9a6cb59b/exec/vector/src/test/java/org/apache/drill/exec/vector/VariableLengthVectorTest.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/test/java/org/apache/drill/exec/vector/VariableLengthVectorTest.java b/exec/vector/src/test/java/org/apache/drill/exec/vector/VariableLengthVectorTest.java
new file mode 100644
index 0000000..eaee597
--- /dev/null
+++ b/exec/vector/src/test/java/org/apache/drill/exec/vector/VariableLengthVectorTest.java
@@ -0,0 +1,141 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector;
+
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.common.types.Types;
+import org.apache.drill.exec.memory.RootAllocator;
+import org.apache.drill.exec.record.MaterializedField;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * This test uses {@link VarCharVector} to test the template code in VariableLengthVector.
+ */
+public class VariableLengthVectorTest
+{
+  /**
+   * If the vector contains 1000 records, setting a value count of 1000 should work.
+   */
+  @Test
+  public void testSettingSameValueCount()
+  {
+    try (RootAllocator allocator = new RootAllocator(10_000_000)) {
+      final MaterializedField field = MaterializedField.create("stringCol", Types.required(TypeProtos.MinorType.VARCHAR));
+      final VarCharVector vector = new VarCharVector(field, allocator);
+
+      vector.allocateNew();
+
+      try {
+        final int size = 1000;
+        final VarCharVector.Mutator mutator = vector.getMutator();
+        final VarCharVector.Accessor accessor = vector.getAccessor();
+
+        setSafeIndexStrings("", 0, size, mutator);
+
+        mutator.setValueCount(size);
+        Assert.assertEquals(size, accessor.getValueCount());
+        checkIndexStrings("", 0, size, accessor);
+      } finally {
+        vector.clear();
+      }
+    }
+  }
+
+  /**
+   * Test truncating data. If you have 10000 records, reduce the vector to 1000 records.
+   */
+  @Test
+  public void testTrunicateVectorSetValueCount()
+  {
+    try (RootAllocator allocator = new RootAllocator(10_000_000)) {
+      final MaterializedField field = MaterializedField.create("stringCol", Types.required(TypeProtos.MinorType.VARCHAR));
+      final VarCharVector vector = new VarCharVector(field, allocator);
+
+      vector.allocateNew();
+
+      try {
+        final int size = 1000;
+        final int fluffSize = 10000;
+        final VarCharVector.Mutator mutator = vector.getMutator();
+        final VarCharVector.Accessor accessor = vector.getAccessor();
+
+        setSafeIndexStrings("", 0, size, mutator);
+        setSafeIndexStrings("first cut ", size, fluffSize, mutator);
+
+        mutator.setValueCount(fluffSize);
+        Assert.assertEquals(fluffSize, accessor.getValueCount());
+
+        checkIndexStrings("", 0, size, accessor);
+
+      } finally {
+        vector.clear();
+      }
+    }
+  }
+
+  /**
+   * Set 10000 values. Then go back and set new values starting at the 1001 the record.
+   */
+  @Test
+  public void testSetBackTracking()
+  {
+    try (RootAllocator allocator = new RootAllocator(10_000_000)) {
+      final MaterializedField field = MaterializedField.create("stringCol", Types.required(TypeProtos.MinorType.VARCHAR));
+      final VarCharVector vector = new VarCharVector(field, allocator);
+
+      vector.allocateNew();
+
+      try {
+        final int size = 1000;
+        final int fluffSize = 10000;
+        final VarCharVector.Mutator mutator = vector.getMutator();
+        final VarCharVector.Accessor accessor = vector.getAccessor();
+
+        setSafeIndexStrings("", 0, size, mutator);
+        setSafeIndexStrings("first cut ", size, fluffSize, mutator);
+        setSafeIndexStrings("redone cut ", size, fluffSize, mutator);
+
+        mutator.setValueCount(fluffSize);
+        Assert.assertEquals(fluffSize, accessor.getValueCount());
+
+        checkIndexStrings("", 0, size, accessor);
+        checkIndexStrings("redone cut ", size, fluffSize, accessor);
+
+      } finally {
+        vector.clear();
+      }
+    }
+  }
+
+  public static void setSafeIndexStrings(String prefix, int offset, int size, VarCharVector.Mutator mutator)
+  {
+    for (int index = offset; index < size; index++) {
+      final String indexString = prefix + "String num " + index;
+      mutator.setSafe(index, indexString.getBytes());
+    }
+  }
+
+  public static void checkIndexStrings(String prefix, int offset, int size, VarCharVector.Accessor accessor)
+  {
+    for (int index = offset; index < size; index++) {
+      final String indexString = prefix + "String num " + index;
+      Assert.assertArrayEquals(indexString.getBytes(), accessor.get(index));
+    }
+  }
+}