You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by am...@apache.org on 2018/03/31 19:47:41 UTC
[3/3] drill git commit: DRILL-6234: Improved documentation for
VariableWidthVector mutators,
and added simple unit tests demonstrating mutator behavior.
DRILL-6234: Improved documentation for VariableWidthVector mutators, and added simple unit tests demonstrating mutator behavior.
close apache/drill#1164
Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/9a6cb59b
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/9a6cb59b
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/9a6cb59b
Branch: refs/heads/master
Commit: 9a6cb59b9b7a5b127e5f60309ce2f506ede9652a
Parents: f5b8223
Author: Timothy Farkas <ti...@apache.org>
Authored: Tue Mar 13 17:24:28 2018 -0700
Committer: Aman Sinha <as...@maprtech.com>
Committed: Fri Mar 30 22:47:31 2018 -0700
----------------------------------------------------------------------
exec/vector/pom.xml | 7 +-
.../templates/VariableLengthVectors.java | 61 ++++++++
.../apache/drill/exec/vector/ValueVector.java | 3 +-
.../exec/vector/VariableLengthVectorTest.java | 141 +++++++++++++++++++
4 files changed, 210 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/drill/blob/9a6cb59b/exec/vector/pom.xml
----------------------------------------------------------------------
diff --git a/exec/vector/pom.xml b/exec/vector/pom.xml
index 0184305..21e138d 100644
--- a/exec/vector/pom.xml
+++ b/exec/vector/pom.xml
@@ -65,7 +65,12 @@
<version>0.7.1</version>
</dependency>
-
+ <dependency>
+ <groupId>org.apache.drill</groupId>
+ <artifactId>drill-common</artifactId>
+ <version>${project.version}</version>
+ <classifier>tests</classifier>
+ </dependency>
</dependencies>
http://git-wip-us.apache.org/repos/asf/drill/blob/9a6cb59b/exec/vector/src/main/codegen/templates/VariableLengthVectors.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/codegen/templates/VariableLengthVectors.java b/exec/vector/src/main/codegen/templates/VariableLengthVectors.java
index 516eb52..ab995cd 100644
--- a/exec/vector/src/main/codegen/templates/VariableLengthVectors.java
+++ b/exec/vector/src/main/codegen/templates/VariableLengthVectors.java
@@ -512,6 +512,8 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V
}
/**
+ * <h4>Overview</h4>
+ * <p>
* Mutable${minor.class} implements a vector of variable width values. Elements in the vector
* are accessed by position from the logical start of the vector. A fixed width offsetVector
* is used to convert an element's position to it's offset from the start of the (0-based)
@@ -520,6 +522,46 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V
* The equivalent Java primitive is '${minor.javaType!type.javaType}'
*
* NB: this class is automatically generated from ValueVectorTypes.tdd using FreeMarker.
+ * </p>
+ * <h4>Contract</h4>
+ * <p>
+ * <ol>
+ * <li>
+ * <b>Supported Writes:</b> {@link VariableWidthVector}s do not support random writes. In contrast {@link org.apache.drill.exec.vector.FixedWidthVector}s do
+ * allow random writes but special care is needed.
+ * </li>
+ * <li>
+ * <b>Writing Values:</b> All set methods must be called with a consecutive sequence of indices. With a few exceptions:
+ * <ol>
+ * <li>You can update the last index you just set.</li>
+ * <li>You can reset a previous index (call it Idx), but you must assume all the data after Idx is corrupt. Also
+ * note that the memory consumed by data that came after Idx is not released.</li>
+ * </ol>
+ * </li>
+ * <li>
+ * <b>Setting Value Count:</b> Vectors aren't explicitly aware of how many values they contain. So you must keep track of the
+ * number of values you've written to the vector and once you are done writing to the vector you must call {@link Mutator#setValueCount(int)}.
+ * It is possible to trim the vector by setting the value count to be less than the number of values currently contained in the vector. Note the extra memory consumed in
+ * the data buffer is not freed when this is done.
+ * </li>
+ * <li>
+ * <b>Memory Allocation:</b> When setting a value at an index you must do one of the following to ensure you do not get an {@link IndexOutOfBoundsException}.
+ * <ol>
+ * <li>
+ * Allocate the exact amount of memory you need when using the {@link Mutator#set(int, byte[])} methods. If you do not
+ * manually allocate sufficient memory an {@link IndexOutOfBoundsException} can be thrown when the data buffer runs out of space.
+ * </li>
+ * <li>
+ * Or you can use the {@link Mutator#setSafe(int, byte[])} methods, which will automatically grow your data buffer to
+ * fit your data.
+ * </li>
+ * </ol>
+ * </li>
+ * <li>
+ * <b>Immutability:</b> Once a vector has been populated with data and {@link #setValueCount(int)} has been called, it should be considered immutable.
+ * </li>
+ * </ol>
+ * </p>
*/
public final class Mutator extends BaseValueVector.BaseMutator implements VariableWidthVector.VariableWidthMutator {
@@ -703,6 +745,25 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V
data.setBytes(currentOffset, holder.buffer, holder.start, length);
}
+ /**
+ * <h4>Notes on Usage</h4>
+ * <p>
+ * For {@link VariableWidthVector}s this method can be used in the following cases:
+ * <ul>
+ * <li>Setting the actual number of elements currently contained in the vector.</li>
+ * <li>Trimming the vector to have fewer elements than it current does.</li>
+ * </ul>
+ * </p>
+ * <h4>Caveats</h4>
+ * <p>
+ * It is important to note that for {@link org.apache.drill.exec.vector.FixedWidthVector}s this method can also be used to expand the vector.
+ * However, {@link VariableWidthVector} do not support this usage and this method will throw an {@link IndexOutOfBoundsException} if you attempt
+ * to use it in this way. Expansion of valueCounts is not supported mainly because there is no benefit, since you would still have to rely on the setSafe
+ * methods to appropriatly expand the data buffer and populate the vector anyway (since by definition we do not know the width of elements). See DRILL-6234 for details.
+ * </p>
+ * <h4>Method Documentation</h4>
+ * {@inheritDoc}
+ */
@Override
public void setValueCount(int valueCount) {
final int currentByteCapacity = getByteCapacity();
http://git-wip-us.apache.org/repos/asf/drill/blob/9a6cb59b/exec/vector/src/main/java/org/apache/drill/exec/vector/ValueVector.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/ValueVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/ValueVector.java
index f873cc6..2659810 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/ValueVector.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/ValueVector.java
@@ -293,7 +293,8 @@ public interface ValueVector extends Closeable, Iterable<ValueVector> {
*/
interface Mutator {
/**
- * Sets the number of values that is stored in this vector to the given value count.
+ * Sets the number of values that is stored in this vector to the given value count. <b>WARNING!</b> Once the
+ * valueCount is set, the vector should be considered immutable.
*
* @param valueCount value count to set.
*/
http://git-wip-us.apache.org/repos/asf/drill/blob/9a6cb59b/exec/vector/src/test/java/org/apache/drill/exec/vector/VariableLengthVectorTest.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/test/java/org/apache/drill/exec/vector/VariableLengthVectorTest.java b/exec/vector/src/test/java/org/apache/drill/exec/vector/VariableLengthVectorTest.java
new file mode 100644
index 0000000..eaee597
--- /dev/null
+++ b/exec/vector/src/test/java/org/apache/drill/exec/vector/VariableLengthVectorTest.java
@@ -0,0 +1,141 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector;
+
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.common.types.Types;
+import org.apache.drill.exec.memory.RootAllocator;
+import org.apache.drill.exec.record.MaterializedField;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * This test uses {@link VarCharVector} to test the template code in VariableLengthVector.
+ */
+public class VariableLengthVectorTest
+{
+ /**
+ * If the vector contains 1000 records, setting a value count of 1000 should work.
+ */
+ @Test
+ public void testSettingSameValueCount()
+ {
+ try (RootAllocator allocator = new RootAllocator(10_000_000)) {
+ final MaterializedField field = MaterializedField.create("stringCol", Types.required(TypeProtos.MinorType.VARCHAR));
+ final VarCharVector vector = new VarCharVector(field, allocator);
+
+ vector.allocateNew();
+
+ try {
+ final int size = 1000;
+ final VarCharVector.Mutator mutator = vector.getMutator();
+ final VarCharVector.Accessor accessor = vector.getAccessor();
+
+ setSafeIndexStrings("", 0, size, mutator);
+
+ mutator.setValueCount(size);
+ Assert.assertEquals(size, accessor.getValueCount());
+ checkIndexStrings("", 0, size, accessor);
+ } finally {
+ vector.clear();
+ }
+ }
+ }
+
+ /**
+ * Test truncating data. If you have 10000 records, reduce the vector to 1000 records.
+ */
+ @Test
+ public void testTrunicateVectorSetValueCount()
+ {
+ try (RootAllocator allocator = new RootAllocator(10_000_000)) {
+ final MaterializedField field = MaterializedField.create("stringCol", Types.required(TypeProtos.MinorType.VARCHAR));
+ final VarCharVector vector = new VarCharVector(field, allocator);
+
+ vector.allocateNew();
+
+ try {
+ final int size = 1000;
+ final int fluffSize = 10000;
+ final VarCharVector.Mutator mutator = vector.getMutator();
+ final VarCharVector.Accessor accessor = vector.getAccessor();
+
+ setSafeIndexStrings("", 0, size, mutator);
+ setSafeIndexStrings("first cut ", size, fluffSize, mutator);
+
+ mutator.setValueCount(fluffSize);
+ Assert.assertEquals(fluffSize, accessor.getValueCount());
+
+ checkIndexStrings("", 0, size, accessor);
+
+ } finally {
+ vector.clear();
+ }
+ }
+ }
+
+ /**
+ * Set 10000 values. Then go back and set new values starting at the 1001 the record.
+ */
+ @Test
+ public void testSetBackTracking()
+ {
+ try (RootAllocator allocator = new RootAllocator(10_000_000)) {
+ final MaterializedField field = MaterializedField.create("stringCol", Types.required(TypeProtos.MinorType.VARCHAR));
+ final VarCharVector vector = new VarCharVector(field, allocator);
+
+ vector.allocateNew();
+
+ try {
+ final int size = 1000;
+ final int fluffSize = 10000;
+ final VarCharVector.Mutator mutator = vector.getMutator();
+ final VarCharVector.Accessor accessor = vector.getAccessor();
+
+ setSafeIndexStrings("", 0, size, mutator);
+ setSafeIndexStrings("first cut ", size, fluffSize, mutator);
+ setSafeIndexStrings("redone cut ", size, fluffSize, mutator);
+
+ mutator.setValueCount(fluffSize);
+ Assert.assertEquals(fluffSize, accessor.getValueCount());
+
+ checkIndexStrings("", 0, size, accessor);
+ checkIndexStrings("redone cut ", size, fluffSize, accessor);
+
+ } finally {
+ vector.clear();
+ }
+ }
+ }
+
+ public static void setSafeIndexStrings(String prefix, int offset, int size, VarCharVector.Mutator mutator)
+ {
+ for (int index = offset; index < size; index++) {
+ final String indexString = prefix + "String num " + index;
+ mutator.setSafe(index, indexString.getBytes());
+ }
+ }
+
+ public static void checkIndexStrings(String prefix, int offset, int size, VarCharVector.Accessor accessor)
+ {
+ for (int index = offset; index < size; index++) {
+ final String indexString = prefix + "String num " + index;
+ Assert.assertArrayEquals(indexString.getBytes(), accessor.get(index));
+ }
+ }
+}