You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by pr...@apache.org on 2017/12/21 05:19:28 UTC
[01/15] drill git commit: DRILL-5657: Size-aware vector writer
structure
Repository: drill
Updated Branches:
refs/heads/master eb0c40306 -> 40de8ca4f
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/dummy/package-info.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/dummy/package-info.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/dummy/package-info.java
new file mode 100644
index 0000000..9bc654b
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/dummy/package-info.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * This package provides a "dummy" set of writers. The dummy writers provide
+ * the same API as the "real" writers, but the dummy writers simply discard
+ * their data. The dummy writers are used when implementing projection:
+ * non-projected columns may still have to be processed (as in a CSV file,
+ * say), but their values are not needed. One way to do this is to do an
+ * if-statement for each value:<pre><code>
+ * if (column-a-is-projected) {
+ * aWriter.setSomething(value);
+ * }</code></pre>
+ * The dummy writers convert the if-statement into a virtual function call,
+ * same as is done to handle the type-specific nature of vectors:
+ * <pre><code>
+ * aWriter.setSomething(value);
+ * </code></pre>
+ * <p>
+ * The theory is that the virtual function dispatch is simpler, and faster,
+ * than doing continual if-checks everywhere in the code.
+ * <p>
+ * The dummy writers reside in this package so that the various factory
+ * methods can automatically build the dummy versions when given a null
+ * value vector (which we then interpret to mean that there is no physical
+ * backing to the column.)
+ * <p>
+ * At present, most methods that return a value simply return zero or
+ * null.
+ * Experience will show whether it is worthwhile implementing some
+ * basics, such as a value type or index. For now, these return null,
+ * assuming that the caller won't do anything with the column other
+ * than set a value.
+ * <p>
+ * Some simpler dummy writers appear as nested classes inside the
+ * "real" writers.
+ */
+
+package org.apache.drill.exec.vector.accessor.writer.dummy;
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/package-info.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/package-info.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/package-info.java
new file mode 100644
index 0000000..f536c09
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/package-info.java
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Implementation of the vector writers. The code will make much more sense if
+ * we start with a review of Drill’s complex vector data model. Drill has 38+
+ * data (“minor”) types. Drill also has three cardinalities (“modes”). The
+ * result is over 120+ different vector types. Then, when you add maps, repeated
+ * maps, lists and repeated lists, you rapidly get an explosion of types that
+ * the writer code must handle.
+ *
+ * <h4>Understanding the Vector Model</h4>
+ *
+ * Vectors can be categorized along multiple dimensions:
+ * <ul>
+ * <li>By data (minor) type</li>
+ * <li>By cardinality (mode)</li>
+ * <li>By fixed or variable width</li>
+ * <li>By repeat levels</li>
+ * </ul>
+ * <p>
+ * A repeated map, a list, a repeated list and any array (repeated) scalar all
+ * are array-like. Nullable and required modes are identical (single values),
+ * but a nullable has an additional is-set (“bit”) vector.
+ * <p>
+ * The writers (and readers) borrow concepts from JSON and relational theory
+ * to simplify the problem:
+ * <p>
+ * <ul>
+ * <li>Both the top-level row, and a Drill map are “tuples” and are treated
+ * similarly in the model.</li>
+ * <li>All non-map, non-list (that is, scalar) data types are treated
+ * uniformly.</li>
+ * <li>All arrays (whether a list, a repeated list, a repeated map, or a
+ * repeated scalar) are treated uniformly.</li>
+ * </ul>
+ *
+ * <h4>Repeat Levels</h4>
+ *
+ * JSON and Parquet can be understood as a series of one or more "repeat
+ * levels." First, let's identify the repeat levels above the batch
+ * level:
+ * <ul>
+ * <li>The top-most level is the "result set": the entire collection of
+ * rows that come from a file (or other data source.)</li>
+ * <li>Result sets are divided into batches: collections of up to 64K
+ * rows.</li>
+ * </ul>
+ *
+ * Then, within a batch:
+ * <ul>
+ * <li>Each batch is a collection or rows. A batch-level index points
+ * to the current row.</li>
+ * </ul>Scalar arrays introduce a repeat level: each row has 0, 1 or
+ * many elements in the array-valued column. An offset vector indexes
+ * to the first value for each row. Each scalar array has its own
+ * per-array index to point to the next write position.</li>
+ * <li>Map arrays introduce a repeat level for a group of columns
+ * (those that make up the map.) A single offset vector points to
+ * the common start position for the columns. A common index points
+ * to the common next write position.<li>
+ * <li>Lists also introduce a repeat level. (Details to be worked
+ * out.</li>
+ * </ul>
+ *
+ * For repeated vectors, one can think of the structure either top-down
+ * or bottom-up:
+ * <ul>
+ * <li>Top down: the row position points into an offset vector. The
+ * offset vector value points to either the data value, or into another
+ * offset vector.</li>
+ * <li>Bottom-up: values are appended to the end of the vector. Values
+ * are "pinched off" to form an array (for repeated maps) or for a row.
+ * In this view, indexes bubble upward. The inner-most last write position
+ * is written as the array end position in the enclosing offset vector.
+ * This may occur up several levels.</li>
+ * </ul>
+ *
+ * <h4>Writer Data Model</h4>
+ *
+ * The above leads to a very simple, JSON-like data model:
+ * <ul>
+ * <li>A tuple reader or writer models a row. (Usually via a subclass.) Column
+ * are accessible by name or position.</li>
+ * <li>Every column is modeled as an object.</li>
+ * <li>The object can have an object type: scalar, tuple or array.</li>
+ * <li>An array has a single element type (but many run-time elements)</li>
+ * <li>A scalar can be nullable or not, and provides a uniform get/set
+ * interface.</li>
+ * </ul>
+ * <p>
+ * This data model is similar to; but has important differences from, the prior,
+ * generated, readers and writers.
+ * <p>
+ * The object layer is new: it is the simplest way to model the three “object
+ * types.” An app using this code would use just the leaf scalar readers and
+ * writers.
+ *
+ * <h4>Writer Performance</h4>
+ *
+ * To maximize performance, have a single version for all "data modes":
+ * (nullable, required, repeated). Some items of note:
+ * <ul>
+ * <li>The writers bypass DrillBuf and the UDLE to needed writes to direct
+ * memory.</li>
+ * <li>The writers buffer the buffer address and implement a number of methods
+ * to synchronize that address when the buffer changes (on a new batch or during
+ * vector resize).</li>
+ * <li>Writing require a single bounds check. In most cases, the write is within
+ * bounds so the single check is all that is needed.</li>
+ * <li>If the write is out of bounds, then the writer determines the new vector
+ * size and performs the needed reallocation. To avoid multiple doublings, the
+ * writer computes the needed new size and allocates that size directly.</li>
+ * <li>Vector reallocation is improved to eliminate zeroing the new half of the
+ * buffer, data is left “garbage-filled.”</li>
+ * <li>If the vector would grow beyond 16 MB, then overflow is triggered, via a
+ * listener, which causes the buffer to be replaced. The write then
+ * continues.</li>
+ * <li>Offset vector updates are integrated into the writers using an
+ * `OffsetVectorWriter`. This writer caches the last write position so that each
+ * array write needs a single offset update, rather than the read and write as
+ * in previous code.</li>
+ * <li>The writers keep track of the “last write position” and perform
+ * “fill-empties” work if the new write position is more than one position
+ * behind the last write. All types now correctly support “fill-empties”
+ * (before, only nullable types did so reliably.)</li>
+ * <li>Null handling is done by an additional writer layer that wraps the
+ * underlying data writer. This avoids the need for a special nullable writer:
+ * the same nullable layer works for all data types.</li>
+ * <li>Array handling is done similarly: an array writer manages the offset
+ * vector and works the same for repeated scalars, repeated maps and
+ * (eventually) lists and repeated lists.</li>
+ * </ul>
+ */
+
+package org.apache.drill.exec.vector.accessor.writer;
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/AbstractMapVector.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/AbstractMapVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/AbstractMapVector.java
index 6b60471..5ac28c5 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/AbstractMapVector.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/AbstractMapVector.java
@@ -64,7 +64,6 @@ public abstract class AbstractMapVector extends AbstractContainerVector {
valueVector.close();
}
vectors.clear();
-
super.close();
}
@@ -178,7 +177,7 @@ public abstract class AbstractMapVector extends AbstractContainerVector {
*
* Note that this method does not enforce any vector type check nor throws a schema change exception.
*/
- protected void putChild(String name, ValueVector vector) {
+ public void putChild(String name, ValueVector vector) {
putVector(name, vector);
field.addChild(vector.getField());
}
@@ -280,6 +279,16 @@ public abstract class AbstractMapVector extends AbstractContainerVector {
}
@Override
+ public int getAllocatedSize() {
+ int size = 0;
+
+ for (final ValueVector v : vectors.values()) {
+ size += v.getAllocatedSize();
+ }
+ return size;
+ }
+
+ @Override
public void collectLedgers(Set<BufferLedger> ledgers) {
for (final ValueVector v : vectors.values()) {
v.collectLedgers(ledgers);
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/BaseRepeatedValueVector.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/BaseRepeatedValueVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/BaseRepeatedValueVector.java
index 2b41b8b..8472f80 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/BaseRepeatedValueVector.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/BaseRepeatedValueVector.java
@@ -87,14 +87,10 @@ public abstract class BaseRepeatedValueVector extends BaseValueVector implements
@Override
- public UInt4Vector getOffsetVector() {
- return offsets;
- }
+ public UInt4Vector getOffsetVector() { return offsets; }
@Override
- public ValueVector getDataVector() {
- return vector;
- }
+ public ValueVector getDataVector() { return vector; }
@Override
public void setInitialCapacity(int numRecords) {
@@ -127,6 +123,11 @@ public abstract class BaseRepeatedValueVector extends BaseValueVector implements
}
@Override
+ public int getAllocatedSize() {
+ return offsets.getAllocatedSize() + vector.getAllocatedSize();
+ }
+
+ @Override
public int getBufferSizeFor(int valueCount) {
if (valueCount == 0) {
return 0;
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/ListVector.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/ListVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/ListVector.java
index 9569946..7de5ce6 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/ListVector.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/ListVector.java
@@ -52,7 +52,6 @@ public class ListVector extends BaseRepeatedValueVector {
private Accessor accessor = new Accessor();
private UnionListWriter writer;
private UnionListReader reader;
- private CallBack callBack;
public ListVector(MaterializedField field, BufferAllocator allocator, CallBack callBack) {
super(field, allocator);
@@ -61,7 +60,6 @@ public class ListVector extends BaseRepeatedValueVector {
this.field.addChild(getDataVector().getField());
this.writer = new UnionListWriter(this);
this.reader = new UnionListReader(this);
- this.callBack = callBack;
}
public UnionListWriter getWriter() {
@@ -203,6 +201,8 @@ public class ListVector extends BaseRepeatedValueVector {
.addChild(bits.getMetadata())
.addChild(vector.getMetadata());
}
+
+ @Override
public <T extends ValueVector> AddOrGetResult<T> addOrGetVector(VectorDescriptor descriptor) {
AddOrGetResult<T> result = super.addOrGetVector(descriptor);
reader = new UnionListReader(this);
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/MapVector.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/MapVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/MapVector.java
index 19c910b..4a501b8 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/MapVector.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/MapVector.java
@@ -67,10 +67,7 @@ public class MapVector extends AbstractMapVector {
}
@Override
- public FieldReader getReader() {
- //return new SingleMapReaderImpl(MapVector.this);
- return reader;
- }
+ public FieldReader getReader() { return reader; }
transient private MapTransferPair ephPair;
transient private MapSingleCopier ephPair2;
@@ -95,9 +92,7 @@ public class MapVector extends AbstractMapVector {
}
@Override
- protected boolean supportsDirectRead() {
- return true;
- }
+ protected boolean supportsDirectRead() { return true; }
public Iterator<String> fieldNameIterator() {
return getChildFieldNames().iterator();
@@ -124,6 +119,15 @@ public class MapVector extends AbstractMapVector {
}
@Override
+ public int getAllocatedSize() {
+ int size = 0;
+ for (final ValueVector v : this) {
+ size += v.getAllocatedSize();
+ }
+ return size;
+ }
+
+ @Override
public int getBufferSizeFor(final int valueCount) {
if (valueCount == 0) {
return 0;
@@ -353,6 +357,10 @@ public class MapVector extends AbstractMapVector {
return getChildByOrdinal(id);
}
+ public void setMapValueCount(int valueCount) {
+ this.valueCount = valueCount;
+ }
+
public class Mutator extends BaseValueVector.BaseMutator {
@Override
@@ -360,7 +368,7 @@ public class MapVector extends AbstractMapVector {
for (final ValueVector v : getChildren()) {
v.getMutator().setValueCount(valueCount);
}
- MapVector.this.valueCount = valueCount;
+ setMapValueCount(valueCount);
}
@Override
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/RepeatedListVector.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/RepeatedListVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/RepeatedListVector.java
index be9ebee..6442417 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/RepeatedListVector.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/RepeatedListVector.java
@@ -198,14 +198,10 @@ public class RepeatedListVector extends AbstractContainerVector
}
@Override
- public RepeatedListAccessor getAccessor() {
- return accessor;
- }
+ public RepeatedListAccessor getAccessor() { return accessor; }
@Override
- public RepeatedListMutator getMutator() {
- return mutator;
- }
+ public RepeatedListMutator getMutator() { return mutator; }
@Override
public FieldReader getReader() {
@@ -277,11 +273,8 @@ public class RepeatedListVector extends AbstractContainerVector
}
}
-
- @Override
- public RepeatedListReaderImpl getReader() {
- return reader;
- }
+ @Override
+ public RepeatedListReaderImpl getReader() { return reader; }
@Override
public DelegateRepeatedVector.RepeatedListAccessor getAccessor() {
@@ -334,6 +327,11 @@ public class RepeatedListVector extends AbstractContainerVector
}
@Override
+ public int getAllocatedSize() {
+ return delegate.getAllocatedSize();
+ }
+
+ @Override
public int getBufferSizeFor(final int valueCount) {
return delegate.getBufferSizeFor(valueCount);
}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/RepeatedMapVector.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/RepeatedMapVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/RepeatedMapVector.java
index 6b29258..57f1a67 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/RepeatedMapVector.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/RepeatedMapVector.java
@@ -63,18 +63,22 @@ public class RepeatedMapVector extends AbstractMapVector
private final Mutator mutator = new Mutator();
private final EmptyValuePopulator emptyPopulator;
- public RepeatedMapVector(MaterializedField field, BufferAllocator allocator, CallBack callBack){
+ public RepeatedMapVector(MaterializedField field, BufferAllocator allocator, CallBack callBack) {
super(field, allocator, callBack);
this.offsets = new UInt4Vector(BaseRepeatedValueVector.OFFSETS_FIELD, allocator);
this.emptyPopulator = new EmptyValuePopulator(offsets);
}
- @Override
- public UInt4Vector getOffsetVector() {
- return offsets;
+ public RepeatedMapVector(MaterializedField field, UInt4Vector offsets, CallBack callBack) {
+ super(field, offsets.getAllocator(), callBack);
+ this.offsets = offsets;
+ this.emptyPopulator = new EmptyValuePopulator(offsets);
}
@Override
+ public UInt4Vector getOffsetVector() { return offsets; }
+
+ @Override
public ValueVector getDataVector() {
throw new UnsupportedOperationException();
}
@@ -93,9 +97,7 @@ public class RepeatedMapVector extends AbstractMapVector
}
@Override
- public RepeatedMapReaderImpl getReader() {
- return reader;
- }
+ public RepeatedMapReaderImpl getReader() { return reader; }
@Override
public void allocateNew(int groupCount, int innerValueCount) {
@@ -137,6 +139,11 @@ public class RepeatedMapVector extends AbstractMapVector
}
@Override
+ public int getAllocatedSize() {
+ return offsets.getAllocatedSize() + super.getAllocatedSize();
+ }
+
+ @Override
public int getBufferSizeFor(final int valueCount) {
if (valueCount == 0) {
return 0;
[05/15] drill git commit: DRILL-5657: Size-aware vector writer
structure
Posted by pr...@apache.org.
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/codegen/templates/RepeatedValueVectors.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/codegen/templates/RepeatedValueVectors.java b/exec/vector/src/main/codegen/templates/RepeatedValueVectors.java
index 4da3d9e..c20ee89 100644
--- a/exec/vector/src/main/codegen/templates/RepeatedValueVectors.java
+++ b/exec/vector/src/main/codegen/templates/RepeatedValueVectors.java
@@ -18,6 +18,9 @@
import java.lang.Override;
+import org.apache.drill.common.types.DataMode;
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.exec.record.MaterializedField;
import org.apache.drill.exec.record.TransferPair;
import org.apache.drill.exec.vector.complex.BaseRepeatedValueVector;
import org.mortbay.jetty.servlet.Holder;
@@ -55,7 +58,10 @@ public final class Repeated${minor.class}Vector extends BaseRepeatedValueVector
public Repeated${minor.class}Vector(MaterializedField field, BufferAllocator allocator) {
super(field, allocator);
- addOrGetVector(VectorDescriptor.create(Types.required(field.getType().getMinorType())));
+ MajorType majorType = field.getType();
+ addOrGetVector(VectorDescriptor.create(Types.withScaleAndPrecision(
+ majorType.getMinorType(), DataMode.REQUIRED,
+ majorType.getScale(), majorType.getPrecision())));
}
@Override
@@ -341,25 +347,12 @@ public final class Repeated${minor.class}Vector extends BaseRepeatedValueVector
addSafe(index, bytes, 0, bytes.length);
}
- public void addEntry(int index, byte[] bytes) throws VectorOverflowException {
- addEntry(index, bytes, 0, bytes.length);
- }
-
public void addSafe(int index, byte[] bytes, int start, int length) {
final int nextOffset = offsets.getAccessor().get(index+1);
values.getMutator().setSafe(nextOffset, bytes, start, length);
offsets.getMutator().setSafe(index+1, nextOffset+1);
}
- public void addEntry(int index, byte[] bytes, int start, int length) throws VectorOverflowException {
- if (index >= MAX_ROW_COUNT) {
- throw new VectorOverflowException();
- }
- final int nextOffset = offsets.getAccessor().get(index+1);
- values.getMutator().setArrayItem(nextOffset, bytes, start, length);
- offsets.getMutator().setSafe(index+1, nextOffset+1);
- }
-
<#else>
public void addSafe(int index, ${minor.javaType!type.javaType} srcValue) {
final int nextOffset = offsets.getAccessor().get(index+1);
@@ -367,15 +360,6 @@ public final class Repeated${minor.class}Vector extends BaseRepeatedValueVector
offsets.getMutator().setSafe(index+1, nextOffset+1);
}
- public void addEntry(int index, ${minor.javaType!type.javaType} srcValue) throws VectorOverflowException {
- if (index >= MAX_ROW_COUNT) {
- throw new VectorOverflowException();
- }
- final int nextOffset = offsets.getAccessor().get(index+1);
- values.getMutator().setArrayItem(nextOffset, srcValue);
- offsets.getMutator().setSafe(index+1, nextOffset+1);
- }
-
</#if>
public void setSafe(int index, Repeated${minor.class}Holder h) {
final ${minor.class}Holder ih = new ${minor.class}Holder();
@@ -393,14 +377,6 @@ public final class Repeated${minor.class}Vector extends BaseRepeatedValueVector
offsets.getMutator().setSafe(index+1, nextOffset+1);
}
- public void addEntry(int index, ${minor.class}Holder holder) throws VectorOverflowException {
- if (index >= MAX_ROW_COUNT) {
- throw new VectorOverflowException();
- }
- final int nextOffset = offsets.getAccessor().get(index+1);
- values.getMutator().setArrayItem(nextOffset, holder);
- offsets.getMutator().setSafe(index+1, nextOffset+1);
- }
public void addSafe(int index, Nullable${minor.class}Holder holder) {
final int nextOffset = offsets.getAccessor().get(index+1);
@@ -408,15 +384,6 @@ public final class Repeated${minor.class}Vector extends BaseRepeatedValueVector
offsets.getMutator().setSafe(index+1, nextOffset+1);
}
- public void addEntry(int index, Nullable${minor.class}Holder holder) throws VectorOverflowException {
- if (index >= MAX_ROW_COUNT) {
- throw new VectorOverflowException();
- }
- final int nextOffset = offsets.getAccessor().get(index+1);
- values.getMutator().setArrayItem(nextOffset, holder);
- offsets.getMutator().setSafe(index+1, nextOffset+1);
- }
-
/**
* Backfill missing offsets from the given last written position to the
* given current write position. Used by the "new" size-safe column
@@ -427,11 +394,7 @@ public final class Repeated${minor.class}Vector extends BaseRepeatedValueVector
* @param index the current write position to be initialized
*/
- public void fillEmptiesBounded(int lastWrite, int index)
- throws VectorOverflowException {
- if (index >= UInt4Vector.MAX_ROW_COUNT) {
- throw new VectorOverflowException();
- }
+ public void fillEmpties(int lastWrite, int index) {
// If last write was 2, offsets are [0, 3, 6]
// If next write is 4, offsets must be: [0, 3, 6, 6, 6]
// Remember the offsets are one more than row count.
@@ -449,15 +412,6 @@ public final class Repeated${minor.class}Vector extends BaseRepeatedValueVector
offsets.getMutator().setSafe(rowIndex+1, nextOffset+1);
}
- public void addEntry(int rowIndex, <#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>) throws VectorOverflowException {
- if (rowIndex >= MAX_ROW_COUNT) {
- throw new VectorOverflowException();
- }
- final int nextOffset = offsets.getAccessor().get(rowIndex+1);
- values.getMutator().setArrayItem(nextOffset, <#list fields as field>${field.name}<#if field_has_next>, </#if></#list>);
- offsets.getMutator().setSafe(rowIndex+1, nextOffset+1);
- }
-
</#if>
<#if minor.class == "Decimal28Sparse" || minor.class == "Decimal38Sparse">
public void addSafe(int index, BigDecimal value) {
@@ -466,15 +420,6 @@ public final class Repeated${minor.class}Vector extends BaseRepeatedValueVector
offsets.getMutator().setSafe(index+1, nextOffset+1);
}
- public void addEntry(int index, BigDecimal value) throws VectorOverflowException {
- if (index >= MAX_ROW_COUNT) {
- throw new VectorOverflowException();
- }
- final int nextOffset = offsets.getAccessor().get(index+1);
- values.getMutator().setArrayItem(nextOffset, value);
- offsets.getMutator().setSafe(index+1, nextOffset+1);
- }
-
</#if>
protected void add(int index, ${minor.class}Holder holder) {
final int nextOffset = offsets.getAccessor().get(index+1);
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/codegen/templates/UnionVector.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/codegen/templates/UnionVector.java b/exec/vector/src/main/codegen/templates/UnionVector.java
index c198544..a46779d 100644
--- a/exec/vector/src/main/codegen/templates/UnionVector.java
+++ b/exec/vector/src/main/codegen/templates/UnionVector.java
@@ -116,7 +116,6 @@ public class UnionVector implements ValueVector {
}
return mapVector;
}
-
<#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
<#assign fields = minor.fields!type.fields />
<#assign uncappedName = name?uncap_first/>
@@ -136,9 +135,7 @@ public class UnionVector implements ValueVector {
}
return ${uncappedName}Vector;
}
-
</#if>
-
</#list></#list>
private static final MajorType LIST_TYPE = Types.optional(MinorType.LIST);
@@ -183,8 +180,7 @@ public class UnionVector implements ValueVector {
}
@Override
- public void setInitialCapacity(int numRecords) {
- }
+ public void setInitialCapacity(int numRecords) { }
@Override
public int getValueCapacity() {
@@ -192,8 +188,7 @@ public class UnionVector implements ValueVector {
}
@Override
- public void close() {
- }
+ public void close() { }
@Override
public void clear() {
@@ -201,9 +196,7 @@ public class UnionVector implements ValueVector {
}
@Override
- public MaterializedField getField() {
- return field;
- }
+ public MaterializedField getField() { return field; }
@Override
public void collectLedgers(Set<BufferLedger> ledgers) {
@@ -290,9 +283,7 @@ public class UnionVector implements ValueVector {
}
@Override
- public void splitAndTransfer(int startIndex, int length) {
-
- }
+ public void splitAndTransfer(int startIndex, int length) { }
@Override
public ValueVector getTo() {
@@ -306,14 +297,10 @@ public class UnionVector implements ValueVector {
}
@Override
- public Accessor getAccessor() {
- return accessor;
- }
+ public Accessor getAccessor() { return accessor; }
@Override
- public Mutator getMutator() {
- return mutator;
- }
+ public Mutator getMutator() { return mutator; }
@Override
public FieldReader getReader() {
@@ -347,6 +334,11 @@ public class UnionVector implements ValueVector {
}
@Override
+ public int getAllocatedSize() {
+ return internalMap.getAllocatedSize();
+ }
+
+ @Override
public int getBufferSizeFor(final int valueCount) {
if (valueCount == 0) {
return 0;
@@ -381,7 +373,6 @@ public class UnionVector implements ValueVector {
public class Accessor extends BaseValueVector.BaseAccessor {
-
@Override
public Object getObject(int index) {
int type = typeVector.getAccessor().get(index);
@@ -406,12 +397,9 @@ public class UnionVector implements ValueVector {
}
}
- public byte[] get(int index) {
- return null;
- }
+ public byte[] get(int index) { return null; }
- public void get(int index, ComplexHolder holder) {
- }
+ public void get(int index, ComplexHolder holder) { }
public void get(int index, UnionHolder holder) {
FieldReader reader = new UnionReader(UnionVector.this);
@@ -420,9 +408,7 @@ public class UnionVector implements ValueVector {
}
@Override
- public int getValueCount() {
- return valueCount;
- }
+ public int getValueCount() { return valueCount; }
@Override
public boolean isNull(int index) {
@@ -436,7 +422,7 @@ public class UnionVector implements ValueVector {
public class Mutator extends BaseValueVector.BaseMutator {
- UnionWriter writer;
+ protected UnionWriter writer;
@Override
public void setValueCount(int valueCount) {
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/codegen/templates/VariableLengthVectors.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/codegen/templates/VariableLengthVectors.java b/exec/vector/src/main/codegen/templates/VariableLengthVectors.java
index e5432da..a29194a 100644
--- a/exec/vector/src/main/codegen/templates/VariableLengthVectors.java
+++ b/exec/vector/src/main/codegen/templates/VariableLengthVectors.java
@@ -54,7 +54,6 @@ package org.apache.drill.exec.vector;
*/
public final class ${minor.class}Vector extends BaseDataValueVector implements VariableWidthVector {
- private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(${minor.class}Vector.class);
private static final int DEFAULT_RECORD_BYTE_COUNT = 8;
private static final int INITIAL_BYTE_COUNT = Math.min(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT, MAX_BUFFER_SIZE);
@@ -68,20 +67,17 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V
private final Accessor accessor;
private final Mutator mutator;
- private final UInt${type.width}Vector.Accessor oAccessor;
-
private int allocationSizeInBytes = INITIAL_BYTE_COUNT;
private int allocationMonitor = 0;
public ${minor.class}Vector(MaterializedField field, BufferAllocator allocator) {
super(field, allocator);
- this.oAccessor = offsetVector.getAccessor();
this.accessor = new Accessor();
this.mutator = new Mutator();
}
@Override
- public FieldReader getReader(){
+ public FieldReader getReader() {
return reader;
}
@@ -94,6 +90,11 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V
}
@Override
+ public int getAllocatedSize() {
+ return offsetVector.getAllocatedSize() + data.capacity();
+ }
+
+ @Override
public int getBufferSizeFor(final int valueCount) {
if (valueCount == 0) {
return 0;
@@ -104,12 +105,12 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V
}
@Override
- public int getValueCapacity(){
+ public int getValueCapacity() {
return Math.max(offsetVector.getValueCapacity() - 1, 0);
}
@Override
- public int getByteCapacity(){
+ public int getByteCapacity() {
return data.capacity();
}
@@ -124,7 +125,7 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V
*/
public int getVarByteLength(){
final int valueCount = getAccessor().getValueCount();
- if(valueCount == 0) {
+ if (valueCount == 0) {
return 0;
}
return offsetVector.getAccessor().get(valueCount);
@@ -132,10 +133,10 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V
@Override
public SerializedField getMetadata() {
- return getMetadataBuilder() //
+ return getMetadataBuilder()
.addChild(offsetVector.getMetadata())
- .setValueCount(getAccessor().getValueCount()) //
- .setBufferLength(getBufferSize()) //
+ .setValueCount(getAccessor().getValueCount())
+ .setBufferLength(getBufferSize())
.build();
}
@@ -170,21 +171,21 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V
return buffers;
}
- public long getOffsetAddr(){
+ public long getOffsetAddr() {
return offsetVector.getBuffer().memoryAddress();
}
- public UInt${type.width}Vector getOffsetVector(){
+ public UInt${type.width}Vector getOffsetVector() {
return offsetVector;
}
@Override
- public TransferPair getTransferPair(BufferAllocator allocator){
+ public TransferPair getTransferPair(BufferAllocator allocator) {
return new TransferImpl(getField(), allocator);
}
@Override
- public TransferPair getTransferPair(String ref, BufferAllocator allocator){
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
return new TransferImpl(getField().withPath(ref), allocator);
}
@@ -193,7 +194,7 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V
return new TransferImpl((${minor.class}Vector) to);
}
- public void transferTo(${minor.class}Vector target){
+ public void transferTo(${minor.class}Vector target) {
target.clear();
this.offsetVector.transferTo(target.offsetVector);
target.data = data.transferOwnership(target.allocator).buffer;
@@ -272,23 +273,23 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V
}
private class TransferImpl implements TransferPair{
- ${minor.class}Vector to;
+ private final ${minor.class}Vector to;
public TransferImpl(MaterializedField field, BufferAllocator allocator){
to = new ${minor.class}Vector(field, allocator);
}
- public TransferImpl(${minor.class}Vector to){
+ public TransferImpl(${minor.class}Vector to) {
this.to = to;
}
@Override
- public ${minor.class}Vector getTo(){
+ public ${minor.class}Vector getTo() {
return to;
}
@Override
- public void transfer(){
+ public void transfer() {
transferTo(to);
}
@@ -309,7 +310,7 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V
if (size > MAX_ALLOCATION_SIZE) {
throw new OversizedAllocationException("Requested amount of memory is more than max allowed allocation size");
}
- allocationSizeInBytes = (int) size;
+ allocationSizeInBytes = (int)size;
offsetVector.setInitialCapacity(valueCount + 1);
}
@@ -386,12 +387,17 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V
throw new OversizedAllocationException("Unable to expand the buffer. Max allowed buffer size is reached.");
}
- logger.trace("Reallocating VarChar, new size {}", newAllocationSize);
- final DrillBuf newBuf = allocator.buffer((int)newAllocationSize);
+ reallocRaw((int) newAllocationSize);
+ }
+
+ @Override
+ public DrillBuf reallocRaw(int newAllocationSize) {
+ final DrillBuf newBuf = allocator.buffer(newAllocationSize);
newBuf.setBytes(0, data, 0, data.capacity());
data.release();
data = newBuf;
- allocationSizeInBytes = (int)newAllocationSize;
+ allocationSizeInBytes = newAllocationSize;
+ return data;
}
public void decrementAllocationMonitor() {
@@ -430,6 +436,7 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V
public final class Accessor extends BaseValueVector.BaseAccessor implements VariableWidthAccessor {
final UInt${type.width}Vector.Accessor oAccessor = offsetVector.getAccessor();
+
public long getStartEnd(int index){
return oAccessor.getTwoAsLong(index);
}
@@ -463,7 +470,6 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V
holder.buffer = data;
}
-
<#switch minor.class>
<#case "VarChar">
@Override
@@ -541,10 +547,6 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V
}
}
- public void setScalar(int index, byte[] bytes) throws VectorOverflowException {
- setScalar(index, bytes, 0, bytes.length);
- }
-
/**
* Set the variable length element at the specified index to the supplied byte array.
*
@@ -575,23 +577,6 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V
}
}
- public void setScalar(int index, DrillBuf bytes, int start, int length) throws VectorOverflowException {
- assert index >= 0;
-
- if (index >= MAX_ROW_COUNT) {
- throw new VectorOverflowException();
- }
- int currentOffset = offsetVector.getAccessor().get(index);
- final int newSize = currentOffset + length;
- if (newSize > MAX_BUFFER_SIZE) {
- throw new VectorOverflowException();
- }
- while (! data.setBytesBounded(currentOffset, bytes, start, length)) {
- reAlloc();
- }
- offsetVector.getMutator().setSafe(index + 1, newSize);
- }
-
public void setSafe(int index, byte[] bytes, int start, int length) {
assert index >= 0;
@@ -608,28 +593,6 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V
}
}
- public void setScalar(int index, byte[] bytes, int start, int length) throws VectorOverflowException {
- if (index >= MAX_ROW_COUNT) {
- throw new VectorOverflowException();
- }
- setArrayItem(index, bytes, start, length);
- }
-
- public void setArrayItem(int index, byte[] bytes, int start, int length) throws VectorOverflowException {
- assert index >= 0;
-
- final int currentOffset = offsetVector.getAccessor().get(index);
- final int newSize = currentOffset + length;
- if (newSize > MAX_BUFFER_SIZE) {
- throw new VectorOverflowException();
- }
-
- while (! data.setBytesBounded(currentOffset, bytes, start, length)) {
- reAlloc();
- }
- offsetVector.getMutator().setSafe(index + 1, newSize);
- }
-
@Override
public void setValueLengthSafe(int index, int length) {
final int offset = offsetVector.getAccessor().get(index);
@@ -654,32 +617,6 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V
}
}
- public void setScalar(int index, int start, int end, DrillBuf buffer) throws VectorOverflowException {
- if (index >= MAX_ROW_COUNT) {
- throw new VectorOverflowException();
- }
- setArrayItem(index, start, end, buffer);
- }
-
- public void setArrayItem(int index, int start, int end, DrillBuf buffer) throws VectorOverflowException {
- final int len = end - start;
- final int outputStart = offsetVector.data.get${(minor.javaType!type.javaType)?cap_first}(index * ${type.width});
- final int newSize = outputStart + len;
- if (newSize > MAX_BUFFER_SIZE) {
- throw new VectorOverflowException();
- }
-
- offsetVector.getMutator().setSafe(index+1, newSize);
- try{
- buffer.getBytes(start, data, outputStart, len);
- } catch (IndexOutOfBoundsException e) {
- while (data.capacity() < newSize) {
- reAlloc();
- }
- buffer.getBytes(start, data, outputStart, len);
- }
- }
-
public void setSafe(int index, Nullable${minor.class}Holder holder) {
assert holder.isSet == 1;
@@ -700,37 +637,6 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V
offsetVector.getMutator().setSafe(index+1, outputStart + len);
}
- public void setScalar(int index, Nullable${minor.class}Holder holder) throws VectorOverflowException {
- if (index >= MAX_ROW_COUNT) {
- throw new VectorOverflowException();
- }
- setArrayItem(index, holder);
- }
-
- public void setArrayItem(int index, Nullable${minor.class}Holder holder) throws VectorOverflowException {
- assert holder.isSet == 1;
-
- final int start = holder.start;
- final int end = holder.end;
- final int len = end - start;
-
- final int outputStart = offsetVector.data.get${(minor.javaType!type.javaType)?cap_first}(index * ${type.width});
- final int newSize = outputStart + len;
- if (newSize > MAX_BUFFER_SIZE) {
- throw new VectorOverflowException();
- }
-
- try {
- holder.buffer.getBytes(start, data, outputStart, len);
- } catch (IndexOutOfBoundsException e) {
- while (data.capacity() < newSize) {
- reAlloc();
- }
- holder.buffer.getBytes(start, data, outputStart, len);
- }
- offsetVector.getMutator().setSafe(index+1, newSize);
- }
-
public void setSafe(int index, ${minor.class}Holder holder) {
final int start = holder.start;
final int end = holder.end;
@@ -748,34 +654,6 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V
offsetVector.getMutator().setSafe( index+1, outputStart + len);
}
- public void setScalar(int index, ${minor.class}Holder holder) throws VectorOverflowException {
- if (index >= MAX_ROW_COUNT) {
- throw new VectorOverflowException();
- }
- setArrayItem(index, holder);
- }
-
- public void setArrayItem(int index, ${minor.class}Holder holder) throws VectorOverflowException {
- final int start = holder.start;
- final int end = holder.end;
- final int len = end - start;
- final int outputStart = offsetVector.data.get${(minor.javaType!type.javaType)?cap_first}(index * ${type.width});
- final int newSize = outputStart + len;
- if (newSize > MAX_BUFFER_SIZE) {
- throw new VectorOverflowException();
- }
-
- try {
- holder.buffer.getBytes(start, data, outputStart, len);
- } catch (IndexOutOfBoundsException e) {
- while(data.capacity() < newSize) {
- reAlloc();
- }
- holder.buffer.getBytes(start, data, outputStart, len);
- }
- offsetVector.getMutator().setSafe( index+1, newSize);
- }
-
/**
* Backfill missing offsets from the given last written position to the
* given current write position. Used by the "new" size-safe column
@@ -785,20 +663,9 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V
* to be copied forward
* @param index the current write position filling occurs up to,
* but not including, this position
- * @throws VectorOverflowException if the item was written, false if the index would
- * overfill the vector
*/
- public void fillEmptiesBounded(int lastWrite, int index)
- throws VectorOverflowException {
-
- // Index is the next write index, which might be "virtual",
- // that is, past the last row at EOF. This check only protects
- // the actual data written here, which is up to index-1.
-
- if (index > UInt4Vector.MAX_ROW_COUNT) {
- throw new VectorOverflowException();
- }
+ public void fillEmpties(int lastWrite, int index) {
// If last write was 2, offsets are [0, 3, 6]
// If next write is 4, offsets must be: [0, 3, 6, 6, 6]
// Remember the offsets are one more than row count.
@@ -810,7 +677,7 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V
}
}
- protected void set(int index, int start, int length, DrillBuf buffer){
+ protected void set(int index, int start, int length, DrillBuf buffer) {
assert index >= 0;
final int currentOffset = offsetVector.getAccessor().get(index);
offsetVector.getMutator().set(index + 1, currentOffset + length);
@@ -818,33 +685,20 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V
data.setBytes(currentOffset, bb);
}
- protected void set(int index, Nullable${minor.class}Holder holder){
+ protected void set(int index, Nullable${minor.class}Holder holder) {
final int length = holder.end - holder.start;
final int currentOffset = offsetVector.getAccessor().get(index);
offsetVector.getMutator().set(index + 1, currentOffset + length);
data.setBytes(currentOffset, holder.buffer, holder.start, length);
}
- protected void set(int index, ${minor.class}Holder holder){
+ protected void set(int index, ${minor.class}Holder holder) {
final int length = holder.end - holder.start;
final int currentOffset = offsetVector.getAccessor().get(index);
offsetVector.getMutator().set(index + 1, currentOffset + length);
data.setBytes(currentOffset, holder.buffer, holder.start, length);
}
- <#if (minor.class == "VarChar")>
- public void setScalar(int index, String value) throws VectorOverflowException {
- if (index >= MAX_ROW_COUNT) {
- throw new VectorOverflowException();
- }
- // Treat a null string as an empty string.
- if (value != null) {
- byte encoded[] = value.getBytes(Charsets.UTF_8);
- setScalar(index, encoded, 0, encoded.length);
- }
- }
-
- </#if>
@Override
public void setValueCount(int valueCount) {
final int currentByteCapacity = getByteCapacity();
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/record/ColumnMetadata.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/record/ColumnMetadata.java b/exec/vector/src/main/java/org/apache/drill/exec/record/ColumnMetadata.java
new file mode 100644
index 0000000..558aab8
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/record/ColumnMetadata.java
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.record;
+
+import org.apache.drill.common.types.TypeProtos.DataMode;
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+
+/**
+ * Metadata description of a column including names, types and structure
+ * information.
+ */
+
+public interface ColumnMetadata {
+ enum StructureType {
+ PRIMITIVE, LIST, TUPLE
+ }
+
+ public static final int DEFAULT_ARRAY_SIZE = 10;
+
+ ColumnMetadata.StructureType structureType();
+ TupleMetadata mapSchema();
+ MaterializedField schema();
+ String name();
+ MajorType majorType();
+ MinorType type();
+ DataMode mode();
+ boolean isNullable();
+ boolean isArray();
+ boolean isVariableWidth();
+ boolean isMap();
+ boolean isList();
+
+ /**
+ * Report whether one column is equivalent to another. Columns are equivalent
+ * if they have the same name, type and structure (ignoring internal structure
+ * such as offset vectors.)
+ */
+
+ boolean isEquivalent(ColumnMetadata other);
+
+ /**
+ * For variable-width columns, specify the expected column width to be used
+ * when allocating a new vector. Does nothing for fixed-width columns.
+ *
+ * @param width the expected column width
+ */
+
+ void setExpectedWidth(int width);
+
+ /**
+ * Get the expected width for a column. This is the actual width for fixed-
+ * width columns, the specified width (defaulting to 50) for variable-width
+ * columns.
+ * @return the expected column width of the each data value. Does not include
+ * "overhead" space such as for the null-value vector or offset vector
+ */
+
+ int expectedWidth();
+
+ /**
+ * For an array column, specify the expected average array cardinality.
+ * Ignored for non-array columns. Used when allocating new vectors.
+ *
+ * @param childCount the expected average array cardinality. Defaults to
+ * 1 for non-array columns, 10 for array columns
+ */
+
+ void setExpectedElementCount(int childCount);
+
+ /**
+ * Returns the expected array cardinality for array columns, or 1 for
+ * non-array columns.
+ *
+ * @return the expected value cardinality per value (per-row for top-level
+ * columns, per array element for arrays within lists)
+ */
+
+ int expectedElementCount();
+
+ /**
+ * Create an empty version of this column. If the column is a scalar,
+ * produces a simple copy. If a map, produces a clone without child
+ * columns.
+ *
+ * @return empty clone of this column
+ */
+
+ ColumnMetadata cloneEmpty();
+
+ /**
+ * Reports whether, in this context, the column is projected outside
+ * of the context. (That is, whether the column is backed by an actual
+ * value vector.)
+ */
+
+ boolean isProjected();
+ void setProjected(boolean projected);
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/record/MaterializedField.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/record/MaterializedField.java b/exec/vector/src/main/java/org/apache/drill/exec/record/MaterializedField.java
index 4d29d55..b4b23c7 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/record/MaterializedField.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/record/MaterializedField.java
@@ -92,6 +92,10 @@ public class MaterializedField {
return withPathAndType(name, getType());
}
+ public MaterializedField cloneEmpty() {
+ return create(name, type);
+ }
+
public MaterializedField withType(MajorType type) {
return withPathAndType(name, type);
}
@@ -170,7 +174,37 @@ public class MaterializedField {
Objects.equals(this.type, other.type);
}
+ /**
+ * Determine if one column is logically equivalent to another. This is
+ * a tricky issue. The rules here:
+ * <ul>
+ * <li>The other schema is assumed to be non-null (unlike
+ * <tt>equals()</tt>).</li>
+ * <li>Names must be identical, ignoring case. (Drill, like SQL, is
+ * case insensitive.)
+ * <li>Type, mode, precision and scale must be identical.</li>
+ * <li>Child columns are ignored unless the type is a map. That is, the
+ * hidden "$bits" and "$offsets" vector columns are not compared, as
+ * one schema may be an "original" (without these hidden columns) while
+ * the other may come from a vector (which has the hidden columns added.
+ * The standard <tt>equals()</tt> comparison does consider hidden
+ * columns.</li>
+ * <li>For maps, the child columns are compared recursively. This version
+ * requires that the two sets of columns appear in the same order. (It
+ * assumes it is being used in a context where column indexes make
+ * sense.) Operators that want to reconcile two maps that differ only in
+ * column order need a different comparison.</li>
+ * </ul>
+ *
+ * @param other another field
+ * @return <tt>true</tt> if the columns are identical according to the
+ * above rules, <tt>false</tt> if they differ
+ */
+
public boolean isEquivalent(MaterializedField other) {
+ if (this == other) {
+ return true;
+ }
if (! name.equalsIgnoreCase(other.name)) {
return false;
}
@@ -199,7 +233,7 @@ public class MaterializedField {
return true;
}
- if (children == null || other.children == null) {
+ if (children == null || other.children == null) {
return children == other.children;
}
if (children.size() != other.children.size()) {
@@ -226,11 +260,12 @@ public class MaterializedField {
* Includes field name, its type with precision and scale if any and data mode.
* Nested fields if any are included. Number of nested fields to include is limited to 10.</p>
*
- * <b>FIELD_NAME(TYPE(PRECISION,SCALE):DATA_MODE)[NESTED_FIELD_1, NESTED_FIELD_2]</b>
+ * <b>FIELD_NAME(TYPE(PRECISION,SCALE):DATA_MODE)[NESTED_FIELD_1, NESTED_FIELD_2]</b><br>
* <p>Example: ok(BIT:REQUIRED), col(VARCHAR(3):OPTIONAL), emp_id(DECIMAL28SPARSE(6,0):REQUIRED)</p>
*
* @return materialized field string representation
*/
+
@Override
public String toString() {
final int maxLen = 10;
@@ -258,7 +293,7 @@ public class MaterializedField {
.append(childString);
return builder.toString();
-}
+ }
/**
* Return true if two fields have identical MinorType and Mode.
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/record/TupleMetadata.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/record/TupleMetadata.java b/exec/vector/src/main/java/org/apache/drill/exec/record/TupleMetadata.java
new file mode 100644
index 0000000..8f597be
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/record/TupleMetadata.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.record;
+
+import java.util.List;
+
+/**
+ * Metadata description of the schema of a row or a map.
+ * In Drill, both rows and maps are
+ * tuples: both are an ordered collection of values, defined by a
+ * schema. Each tuple has a schema that defines the column ordering
+ * for indexed access. Each tuple also provides methods to get column
+ * accessors by name or index.
+ * <p>
+ * Models the physical schema of a row set showing the logical hierarchy of fields
+ * with map fields as first-class fields. Map members appear as children
+ * under the map, much as they appear in the physical value-vector
+ * implementation.
+ * <ul>
+ * <li>Provides fast lookup by name or index.</li>
+ * <li>Provides a nested schema, in this same form, for maps.</li>
+ * </ul>
+ * This form is useful when performing semantic analysis and when
+ * working with vectors.
+ * <p>
+ * In the future, this structure will also gather metadata useful
+ * for vector processing such as expected widths and so on.
+ */
+
+public interface TupleMetadata extends Iterable<ColumnMetadata> {
+
+ /**
+ * Add a new column to the schema.
+ *
+ * @param columnSchema
+ * @return the index of the new column
+ */
+ ColumnMetadata add(MaterializedField field);
+ int addColumn(ColumnMetadata column);
+
+ int size();
+ boolean isEmpty();
+ int index(String name);
+ ColumnMetadata metadata(int index);
+ ColumnMetadata metadata(String name);
+ MaterializedField column(int index);
+ MaterializedField column(String name);
+ boolean isEquivalent(TupleMetadata other);
+ ColumnMetadata parent();
+
+ /**
+ * Return the schema as a list of <tt>MaterializedField</tt> objects
+ * which can be used to create other schemas. Not valid for a
+ * flattened schema.
+ *
+ * @return a list of the top-level fields. Maps contain their child
+ * fields
+ */
+
+ List<MaterializedField> toFieldList();
+
+ /**
+ * Full name of the column. Note: this name cannot be used to look up
+ * the column because of ambiguity. The name "a.b.c" may mean a single
+ * column with that name, or may mean maps "a", and "b" with column "c",
+ * etc.
+ *
+ * @return full, dotted, column name
+ */
+
+ String fullName(ColumnMetadata column);
+ String fullName(int index);
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/record/TupleNameSpace.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/record/TupleNameSpace.java b/exec/vector/src/main/java/org/apache/drill/exec/record/TupleNameSpace.java
new file mode 100644
index 0000000..5853c93
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/record/TupleNameSpace.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.record;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.drill.common.map.CaseInsensitiveMap;
+
+import com.google.common.collect.ImmutableList;
+
+/**
+ * Implementation of a tuple name space. Tuples allow both indexed and
+ * named access to their members.
+ *
+ * @param <T> the type of object representing each column
+ */
+
+public class TupleNameSpace<T> implements Iterable<T> {
+ private final Map<String,Integer> nameSpace = CaseInsensitiveMap.newHashMap();
+ private final List<T> entries = new ArrayList<>();
+
+ public int add(String key, T value) {
+ if (indexOf(key) != -1) {
+ throw new IllegalArgumentException("Duplicate entry: " + key);
+ }
+ int index = entries.size();
+ nameSpace.put(key, index);
+ entries.add(value);
+ return index;
+ }
+
+ public T get(int index) {
+ return entries.get(index);
+ }
+
+ public T get(String key) {
+ int index = indexOf(key);
+ if (index == -1) {
+ return null;
+ }
+ return get(index);
+ }
+
+ public int indexOf(String key) {
+ Integer index = nameSpace.get(key);
+ if (index == null) {
+ return -1;
+ }
+ return index;
+ }
+
+ public int count() { return entries.size(); }
+
+ @Override
+ public Iterator<T> iterator() {
+ return entries.iterator();
+ }
+
+ public boolean isEmpty() {
+ return entries.isEmpty();
+ }
+
+ public List<T> entries() {
+ return ImmutableList.copyOf(entries);
+ }
+
+ @Override
+ public String toString() {
+ return entries.toString();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/AllocationHelper.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/AllocationHelper.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/AllocationHelper.java
index 100997e..4fd0cbd 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/AllocationHelper.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/AllocationHelper.java
@@ -39,7 +39,7 @@ public class AllocationHelper {
((VariableWidthVector) vector).allocateNew(valueCount * bytesPerValue, valueCount);
} else if (vector instanceof RepeatedFixedWidthVectorLike) {
((RepeatedFixedWidthVectorLike) vector).allocateNew(valueCount, childValCount);
- } else if (vector instanceof RepeatedVariableWidthVectorLike && childValCount > 0 && bytesPerValue > 0) {
+ } else if (vector instanceof RepeatedVariableWidthVectorLike) {
// Assertion thrown if byte count is zero in the full allocateNew,
// so use default version instead.
((RepeatedVariableWidthVectorLike) vector).allocateNew(childValCount * bytesPerValue, valueCount, childValCount);
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/BaseDataValueVector.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/BaseDataValueVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/BaseDataValueVector.java
index e98a417..4391e8c 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/BaseDataValueVector.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/BaseDataValueVector.java
@@ -37,6 +37,16 @@ public abstract class BaseDataValueVector extends BaseValueVector {
data = allocator.getEmpty();
}
+ /**
+ * Core of vector allocation. Given a new size (which must be a power of two), allocate
+ * the new buffer, copy the current values, and leave the unused parts garbage-filled.
+ *
+ * @param newAllocationSize new buffer size as a power of two
+ * @return the new buffer
+ */
+
+ public abstract DrillBuf reallocRaw(int newAllocationSize);
+
@Override
public void clear() {
if (data != null) {
@@ -82,6 +92,11 @@ public abstract class BaseDataValueVector extends BaseValueVector {
return data.writerIndex();
}
+ @Override
+ public int getAllocatedSize() {
+ return data.capacity();
+ }
+
public DrillBuf getBuffer() { return data; }
/**
@@ -101,6 +116,7 @@ public abstract class BaseDataValueVector extends BaseValueVector {
// No state in an Accessor to reset
}
+ @Override
public void collectLedgers(Set<BufferLedger> ledgers) {
BufferLedger ledger = data.getLedger();
if (ledger != null) {
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/BitVector.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/BitVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/BitVector.java
index f879fc4..219db12 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/BitVector.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/BitVector.java
@@ -193,6 +193,16 @@ public final class BitVector extends BaseDataValueVector implements FixedWidthVe
allocationSizeInBytes = curSize;
}
+ // This version uses the base version because this vector appears to not be
+ // used, so not worth the effort to avoid zero-fill.
+
+ public DrillBuf reallocRaw(int newAllocationSize) {
+ while (allocationSizeInBytes < newAllocationSize) {
+ reAlloc();
+ }
+ return data;
+ }
+
/**
* {@inheritDoc}
*/
@@ -438,20 +448,6 @@ public final class BitVector extends BaseDataValueVector implements FixedWidthVe
set(index, value);
}
- public void setScalar(int index, int value) throws VectorOverflowException {
- if (index >= MAX_COUNT) {
- throw new VectorOverflowException();
- }
- setSafe(index, value);
- }
-
- public void setArrayItem(int index, int value) throws VectorOverflowException {
- if (index >= MAX_CAPACITY) {
- throw new VectorOverflowException();
- }
- setSafe(index, value);
- }
-
public void setSafe(int index, BitHolder holder) {
while(index >= getValueCapacity()) {
reAlloc();
@@ -459,20 +455,6 @@ public final class BitVector extends BaseDataValueVector implements FixedWidthVe
set(index, holder.value);
}
- public void setScalar(int index, BitHolder holder) throws VectorOverflowException {
- if (index >= MAX_COUNT) {
- throw new VectorOverflowException();
- }
- setSafe(index, holder);
- }
-
- public void setArrayItem(int index, BitHolder holder) throws VectorOverflowException {
- if (index >= MAX_CAPACITY) {
- throw new VectorOverflowException();
- }
- setSafe(index, holder);
- }
-
public void setSafe(int index, NullableBitHolder holder) {
while(index >= getValueCapacity()) {
reAlloc();
@@ -480,20 +462,6 @@ public final class BitVector extends BaseDataValueVector implements FixedWidthVe
set(index, holder.value);
}
- public void setScalar(int index, NullableBitHolder holder) throws VectorOverflowException {
- if (index >= MAX_COUNT) {
- throw new VectorOverflowException();
- }
- setSafe(index, holder);
- }
-
- public void setArrayItem(int index, NullableBitHolder holder) throws VectorOverflowException {
- if (index >= MAX_CAPACITY) {
- throw new VectorOverflowException();
- }
- setSafe(index, holder);
- }
-
@Override
public final void setValueCount(int valueCount) {
int currentValueCapacity = getValueCapacity();
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/FixedWidthVector.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/FixedWidthVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/FixedWidthVector.java
index a9a1631..09bcdd8 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/FixedWidthVector.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/FixedWidthVector.java
@@ -17,7 +17,6 @@
*/
package org.apache.drill.exec.vector;
-
public interface FixedWidthVector extends ValueVector {
/**
@@ -27,8 +26,8 @@ public interface FixedWidthVector extends ValueVector {
*/
void allocateNew(int valueCount);
-/**
- * Zero out the underlying buffer backing this vector.
- */
+ /**
+ * Zero out the underlying buffer backing this vector.
+ */
void zeroVector();
}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/ObjectVector.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/ObjectVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/ObjectVector.java
index c9edeb0..8d515d8 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/ObjectVector.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/ObjectVector.java
@@ -95,8 +95,7 @@ public class ObjectVector extends BaseValueVector {
}
@Override
- public void generateTestData(int values) {
- }
+ public void generateTestData(int values) { }
@Override
public void exchange(ValueVector.Mutator other) { }
@@ -130,6 +129,9 @@ public class ObjectVector extends BaseValueVector {
}
@Override
+ public int getAllocatedSize() { return 0; }
+
+ @Override
public int getBufferSizeFor(final int valueCount) {
throw new UnsupportedOperationException("ObjectVector does not support this");
}
@@ -147,9 +149,7 @@ public class ObjectVector extends BaseValueVector {
}
@Override
- public MaterializedField getField() {
- return field;
- }
+ public MaterializedField getField() { return field; }
@Override
public TransferPair getTransferPair(BufferAllocator allocator) {
@@ -172,14 +172,10 @@ public class ObjectVector extends BaseValueVector {
}
@Override
- public int getValueCapacity() {
- return maxCount;
- }
+ public int getValueCapacity() { return maxCount; }
@Override
- public Accessor getAccessor() {
- return accessor;
- }
+ public Accessor getAccessor() { return accessor; }
@Override
public DrillBuf[] getBuffers(boolean clear) {
@@ -197,9 +193,7 @@ public class ObjectVector extends BaseValueVector {
}
@Override
- public Mutator getMutator() {
- return mutator;
- }
+ public Mutator getMutator() { return mutator; }
@Override
public Iterator<ValueVector> iterator() {
@@ -222,9 +216,7 @@ public class ObjectVector extends BaseValueVector {
}
@Override
- public int getValueCount() {
- return count;
- }
+ public int getValueCount() { return count; }
public Object get(int index) {
return getObject(index);
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/UntypedNullVector.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/UntypedNullVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/UntypedNullVector.java
index 8288fe2..5565fa4 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/UntypedNullVector.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/UntypedNullVector.java
@@ -27,8 +27,6 @@ import org.apache.drill.exec.record.MaterializedField;
import org.apache.drill.exec.record.TransferPair;
import org.apache.drill.exec.vector.complex.reader.FieldReader;
-import static org.apache.calcite.sql.parser.impl.SqlParserImplConstants.C;
-
/** UntypedNullVector is to represent a value vector with {@link org.apache.drill.common.types.MinorType#NULL}
* All values in the vector represent two semantic implications: 1) the value is unknown, 2) the type is unknown.
* Because of this, we only have to keep track of the number of values in value vector,
@@ -37,7 +35,6 @@ import static org.apache.calcite.sql.parser.impl.SqlParserImplConstants.C;
*
*/
public final class UntypedNullVector extends BaseDataValueVector implements FixedWidthVector {
- private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(UntypedNullVector.class);
/**
* Width of each fixed-width value.
@@ -57,14 +54,10 @@ public final class UntypedNullVector extends BaseDataValueVector implements Fixe
public FieldReader getReader() { throw new UnsupportedOperationException(); }
@Override
- public int getBufferSizeFor(final int valueCount) {
- return 0;
- }
+ public int getBufferSizeFor(final int valueCount) { return 0; }
@Override
- public int getValueCapacity(){
- return ValueVector.MAX_ROW_COUNT;
- }
+ public int getValueCapacity() { return ValueVector.MAX_ROW_COUNT; }
@Override
public Accessor getAccessor() { return accessor; }
@@ -73,31 +66,29 @@ public final class UntypedNullVector extends BaseDataValueVector implements Fixe
public Mutator getMutator() { return mutator; }
@Override
- public void setInitialCapacity(final int valueCount) {
- }
+ public void setInitialCapacity(final int valueCount) { }
@Override
- public void allocateNew() {
- }
+ public void allocateNew() { }
@Override
- public boolean allocateNewSafe() {
- return true;
- }
+ public boolean allocateNewSafe() { return true; }
@Override
- public void allocateNew(final int valueCount) {
- }
+ public void allocateNew(final int valueCount) { }
@Override
- public void reset() {
- }
+ public void reset() { }
/**
* {@inheritDoc}
*/
@Override
- public void zeroVector() {
+ public void zeroVector() { }
+
+ @Override
+ public DrillBuf reallocRaw(int newAllocationSize) {
+ throw new UnsupportedOperationException();
}
@Override
@@ -127,19 +118,15 @@ public final class UntypedNullVector extends BaseDataValueVector implements Fixe
return new TransferImpl((UntypedNullVector) to);
}
- public void transferTo(UntypedNullVector target){
- }
+ public void transferTo(UntypedNullVector target) { }
- public void splitAndTransferTo(int startIndex, int length, UntypedNullVector target) {
- }
+ public void splitAndTransferTo(int startIndex, int length, UntypedNullVector target) { }
@Override
- public int getPayloadByteCount(int valueCount) {
- return 0;
- }
+ public int getPayloadByteCount(int valueCount) { return 0; }
private class TransferImpl implements TransferPair{
- private UntypedNullVector to;
+ private final UntypedNullVector to;
public TransferImpl(MaterializedField field, BufferAllocator allocator){
to = new UntypedNullVector(field, allocator);
@@ -150,9 +137,7 @@ public final class UntypedNullVector extends BaseDataValueVector implements Fixe
}
@Override
- public UntypedNullVector getTo(){
- return to;
- }
+ public UntypedNullVector getTo() { return to; }
@Override
public void transfer(){
@@ -173,11 +158,9 @@ public final class UntypedNullVector extends BaseDataValueVector implements Fixe
}
}
- public void copyFrom(int fromIndex, int thisIndex, UntypedNullVector from){
- }
+ public void copyFrom(int fromIndex, int thisIndex, UntypedNullVector from) { }
- public void copyFromSafe(int fromIndex, int thisIndex, UntypedNullVector from){
- }
+ public void copyFromSafe(int fromIndex, int thisIndex, UntypedNullVector from) { }
private void checkBounds(int index) {
if (index < 0 || index >= valueCount) {
@@ -216,7 +199,6 @@ public final class UntypedNullVector extends BaseDataValueVector implements Fixe
public void get(int index, UntypedNullHolder holder) {
checkBounds(index);
}
-
}
/**
@@ -224,7 +206,7 @@ public final class UntypedNullVector extends BaseDataValueVector implements Fixe
* value counts.
*
*/
- public final class Mutator extends BaseMutator {
+ public final class Mutator extends BaseMutator {
private Mutator() {}
@@ -266,5 +248,4 @@ public final class UntypedNullVector extends BaseDataValueVector implements Fixe
UntypedNullVector.this.valueCount = valueCount;
}
}
-
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/ValueVector.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/ValueVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/ValueVector.java
index a090cad..bc06803 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/ValueVector.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/ValueVector.java
@@ -25,6 +25,7 @@ import io.netty.buffer.DrillBuf;
import org.apache.drill.exec.exception.OutOfMemoryException;
import org.apache.drill.exec.memory.AllocationManager.BufferLedger;
+import org.apache.drill.exec.memory.AllocationManager;
import org.apache.drill.exec.memory.BufferAllocator;
import org.apache.drill.exec.proto.UserBitShared.SerializedField;
import org.apache.drill.exec.record.MaterializedField;
@@ -32,12 +33,15 @@ import org.apache.drill.exec.record.TransferPair;
import org.apache.drill.exec.vector.complex.reader.FieldReader;
/**
- * An abstraction that is used to store a sequence of values in an individual column.
+ * An abstraction that is used to store a sequence of values in an individual
+ * column.
*
- * A {@link ValueVector value vector} stores underlying data in-memory in a columnar fashion that is compact and
- * efficient. The column whose data is stored, is referred by {@link #getField()}.
+ * A {@link ValueVector value vector} stores underlying data in-memory in a
+ * columnar fashion that is compact and efficient. The column whose data is
+ * stored, is referred by {@link #getField()}.
*
- * A vector when instantiated, relies on a {@link org.apache.drill.exec.record.DeadBuf dead buffer}. It is important
+ * A vector when instantiated, relies on a
+ * {@link org.apache.drill.exec.record.DeadBuf dead buffer}. It is important
* that vector is allocated before attempting to read or write.
*
* There are a few "rules" around vectors:
@@ -45,37 +49,34 @@ import org.apache.drill.exec.vector.complex.reader.FieldReader;
* <ul>
* <li>Values need to be written in order (e.g. index 0, 1, 2, 5).</li>
* <li>Null vectors start with all values as null before writing anything.</li>
- * <li>For variable width types, the offset vector should be all zeros before writing.</li>
+ * <li>For variable width types, the offset vector should be all zeros before
+ * writing.</li>
* <li>You must call setValueCount before a vector can be read.</li>
* <li>You should never write to a vector once it has been read.</li>
- * <li>Vectors may not grow larger than the number of bytes specified
- * in {@link #MAX_BUFFER_SIZE} to prevent memory fragmentation. Use the
+ * <li>Vectors may not grow larger than the number of bytes specified in
+ * {@link #MAX_BUFFER_SIZE} to prevent memory fragmentation. Use the
* <tt>setBounded()</tt> methods in the mutator to enforce this rule.</li>
* </ul>
*
- * Please note that the current implementation doesn't enforce those rules, hence we may find few places that
- * deviate from these rules (e.g. offset vectors in Variable Length and Repeated vector)
+ * Please note that the current implementation doesn't enforce those rules,
+ * hence we may find few places that deviate from these rules (e.g. offset
+ * vectors in Variable Length and Repeated vector)
*
* This interface "should" strive to guarantee this order of operation:
* <blockquote>
- * allocate > mutate > setvaluecount > access > clear (or allocate to start the process over).
+ * allocate > mutate > setvaluecount > access > clear (or allocate
+ * to start the process over).
* </blockquote>
*/
+
public interface ValueVector extends Closeable, Iterable<ValueVector> {
/**
* Maximum allowed size of the buffer backing a value vector.
+ * Set to the Netty chunk size to prevent memory fragmentation.
*/
- int MAX_BUFFER_SIZE = VectorUtils.maxSize();
-
- /**
- * Debug-time system option that artificially limits vector lengths
- * for testing. Must be set prior to the first reference to this
- * class. (Made deliberately difficult to prevent misuse...)
- */
-
- String MAX_BUFFER_SIZE_KEY = "drill.max_vector";
+ int MAX_BUFFER_SIZE = AllocationManager.chunkSize();
/**
* Maximum allowed row count in a vector. Repeated vectors
@@ -167,10 +168,24 @@ public interface ValueVector extends Closeable, Iterable<ValueVector> {
/**
* Returns the number of bytes that is used by this vector instance.
+ * This is a bit of a misnomer. Returns the number of bytes used by
+ * data in this instance.
*/
int getBufferSize();
/**
+ * Returns the total size of buffers allocated by this vector. Has
+ * meaning only when vectors are directly allocated and each vector
+ * has its own buffer. Does not have meaning for vectors deserialized
+ * from the network or disk in which multiple vectors share the
+ * same vector.
+ *
+ * @return allocated buffer size, in bytes
+ */
+
+ int getAllocatedSize();
+
+ /**
* Returns the number of bytes that is used by this vector if it holds the given number
* of values. The result will be the same as if Mutator.setValueCount() were called, followed
* by calling getBufferSize(), but without any of the closing side-effects that setValueCount()
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/VariableWidthVector.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/VariableWidthVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/VariableWidthVector.java
index d04234c..f5373d0 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/VariableWidthVector.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/VariableWidthVector.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -33,8 +33,10 @@ public interface VariableWidthVector extends ValueVector {
*/
int getByteCapacity();
+ @Override
VariableWidthMutator getMutator();
+ @Override
VariableWidthAccessor getAccessor();
interface VariableWidthAccessor extends Accessor {
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/VectorUtils.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/VectorUtils.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/VectorUtils.java
deleted file mode 100644
index 6b29eb2..0000000
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/VectorUtils.java
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.exec.vector;
-
-public class VectorUtils {
-
- /**
- * Vectors cannot be any larger than the Netty memory allocation
- * block size.
- */
-
- private static final int ABSOLUTE_MAX_SIZE = 16 * 1024 * 1024;
-
- /**
- * Minimum size selected to prevent pathological performance if vectors
- * are limited to an unusably small size. This limit is a judgment call,
- * not based on any known limits.
- */
-
- private static final int ABSOLUTE_MIN_SIZE = 16 * 1024;
-
- private VectorUtils() { }
-
- /**
- * Static function called once per run to compute the maximum
- * vector size, in bytes. Normally uses the hard-coded limit,
- * but allows setting a system property to override the limit
- * for testing. The configured value must be within reasonable
- * bounds.
- * @return the maximum vector size, in bytes
- */
-
- static int maxSize() {
- String prop = System.getProperty( ValueVector.MAX_BUFFER_SIZE_KEY );
- int value = ABSOLUTE_MAX_SIZE;
- if (prop != null) {
- try {
- value = Integer.parseInt(prop);
- value = Math.max(value, ABSOLUTE_MIN_SIZE);
- value = Math.min(value, ABSOLUTE_MAX_SIZE);
- } catch (NumberFormatException e) {
- // Ignore
- }
- }
- return value;
- }
-
-}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/ZeroVector.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/ZeroVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/ZeroVector.java
index e6f0544..fc89d71 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/ZeroVector.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/ZeroVector.java
@@ -17,6 +17,7 @@
*/
package org.apache.drill.exec.vector;
+import java.util.Collections;
import java.util.Iterator;
import java.util.Set;
@@ -105,13 +106,16 @@ public class ZeroVector implements ValueVector {
@Override
public Iterator<ValueVector> iterator() {
- return Iterators.emptyIterator();
+ return Collections.emptyIterator();
}
@Override
public int getBufferSize() { return 0; }
@Override
+ public int getAllocatedSize() { return 0; }
+
+ @Override
public int getBufferSizeFor(final int valueCount) { return 0; }
@Override
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/AccessorUtilities.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/AccessorUtilities.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/AccessorUtilities.java
deleted file mode 100644
index 708d0db..0000000
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/AccessorUtilities.java
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.exec.vector.accessor;
-
-import java.math.BigDecimal;
-
-import org.joda.time.Duration;
-import org.joda.time.Period;
-
-public class AccessorUtilities {
-
- private AccessorUtilities() { }
-
- public static void setFromInt(ColumnWriter writer, int value) {
- switch (writer.valueType()) {
- case BYTES:
- writer.setBytes(Integer.toHexString(value).getBytes());
- break;
- case DOUBLE:
- writer.setDouble(value);
- break;
- case INTEGER:
- writer.setInt(value);
- break;
- case LONG:
- writer.setLong(value);
- break;
- case STRING:
- writer.setString(Integer.toString(value));
- break;
- case DECIMAL:
- writer.setDecimal(BigDecimal.valueOf(value));
- break;
- case PERIOD:
- writer.setPeriod(Duration.millis(value).toPeriod());
- break;
- default:
- throw new IllegalStateException("Unknown writer type: " + writer.valueType());
- }
- }
-
- public static int sv4Batch(int sv4Index) {
- return sv4Index >>> 16;
- }
-
- public static int sv4Index(int sv4Index) {
- return sv4Index & 0xFFFF;
- }
-
- public static void setBooleanArray(ArrayWriter arrayWriter, boolean[] value) {
- for (int i = 0; i < value.length; i++) {
- arrayWriter.setInt(value[i] ? 1 : 0);
- }
- }
-
- public static void setByteArray(ArrayWriter arrayWriter, byte[] value) {
- for (int i = 0; i < value.length; i++) {
- arrayWriter.setInt(value[i]);
- }
- }
-
- public static void setShortArray(ArrayWriter arrayWriter, short[] value) {
- for (int i = 0; i < value.length; i++) {
- arrayWriter.setInt(value[i]);
- }
- }
-
- public static void setIntArray(ArrayWriter arrayWriter, int[] value) {
- for (int i = 0; i < value.length; i++) {
- arrayWriter.setInt(value[i]);
- }
- }
-
- public static void setLongArray(ArrayWriter arrayWriter, long[] value) {
- for (int i = 0; i < value.length; i++) {
- arrayWriter.setLong(value[i]);
- }
- }
-
- public static void setFloatArray(ArrayWriter arrayWriter, float[] value) {
- for (int i = 0; i < value.length; i++) {
- arrayWriter.setDouble(value[i]);
- }
- }
-
- public static void setDoubleArray(ArrayWriter arrayWriter, double[] value) {
- for (int i = 0; i < value.length; i++) {
- arrayWriter.setDouble(value[i]);
- }
- }
-
- public static void setStringArray(ArrayWriter arrayWriter, String[] value) {
- for (int i = 0; i < value.length; i++) {
- arrayWriter.setString(value[i]);
- }
- }
-
- public static void setPeriodArray(ArrayWriter arrayWriter, Period[] value) {
- for (int i = 0; i < value.length; i++) {
- arrayWriter.setPeriod(value[i]);
- }
- }
-
- public static void setBigDecimalArray(ArrayWriter arrayWriter,
- BigDecimal[] value) {
- for (int i = 0; i < value.length; i++) {
- arrayWriter.setDecimal(value[i]);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ArrayReader.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ArrayReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ArrayReader.java
index 040dcda..8f33f0e 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ArrayReader.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ArrayReader.java
@@ -17,36 +17,90 @@
*/
package org.apache.drill.exec.vector.accessor;
-import java.math.BigDecimal;
-
-import org.joda.time.Period;
-
/**
- * Interface to access the values of an array column. In general, each
- * vector implements just one of the get methods. Check the vector type
- * to know which method to use. Though, generally, when writing test
- * code, the type is known to the test writer.
- * <p>
- * Arrays allow random access to the values within the array. The index
- * passed to each method is the index into the array for the current
- * row and column. (This means that arrays are three dimensional:
- * the usual (row, column) dimensions plus an array index dimension:
- * (row, column, array index).
- * <p>
- * Note that the <tt>isNull()</tt> method is provided for completeness,
- * but no Drill array allows null values at present.
+ * Generic array reader. An array is one of the following:
+ * <ul>
+ * <li>Array of scalars. Read the values using {@link #elements()}, which provides
+ * an array-like access to the scalars.</li>
+ * <li>A repeated map. Use {@link #tuple(int)} to get a tuple reader for a
+ * specific array element. Use {@link #size()} to learn the number of maps in
+ * the array.</li>
+ * <li>List of lists. Use the {@link #array(int)} method to get the nested list
+ * at a given index. Use {@link #size()} to learn the number of maps in
+ * the array.</li>
+ * </ul>
+ * {@see ArrayWriter}
*/
-public interface ArrayReader extends ColumnAccessor {
+public interface ArrayReader {
+
+ /**
+ * Number of elements in the array.
+ * @return the number of elements
+ */
+
int size();
- boolean isNull(int index);
- int getInt(int index);
- long getLong(int index);
- double getDouble(int index);
- String getString(int index);
- byte[] getBytes(int index);
- BigDecimal getDecimal(int index);
- Period getPeriod(int index);
- TupleReader map(int index);
+
+ /**
+ * The object type of the list entry. All entries have the same
+ * type.
+ * @return the object type of each entry
+ */
+
+ ObjectType entryType();
+
+ /**
+ * Return a reader for the elements of a scalar array.
+ * @return reader for scalar elements
+ */
+
+ ScalarElementReader elements();
+
+ /**
+ * Return a generic object reader for the array entry. Not available
+ * for scalar elements. Positions the reader to read the selected
+ * element.
+ *
+ * @param index array index
+ * @return generic object reader
+ */
+
+ ObjectReader entry(int index);
+ TupleReader tuple(int index);
ArrayReader array(int index);
+
+ /**
+ * Return the generic object reader for the array element. This
+ * version <i>does not</i> position the reader, the client must
+ * call {@link setPosn()} to set the position. This form allows
+ * up-front setup of the readers when convenient for the caller.
+ */
+
+ ObjectReader entry();
+ TupleReader tuple();
+ ArrayReader array();
+
+ /**
+ * Set the array reader to read a given array entry. Not used for
+ * scalars, only for maps and arrays when using the non-indexed
+ * methods {@link #entry()}, {@link #tuple()} and {@link #array()}.
+ */
+
+ void setPosn(int index);
+
+ /**
+ * Return the entire array as an <tt>List</tt> of objects.
+ * Note, even if the array is scalar, the elements are still returned
+ * as a list. This method is primarily for testing.
+ * @return array as a <tt>List</tt> of objects
+ */
+
+ Object getObject();
+
+ /**
+ * Return the entire array as a string. Primarily for debugging.
+ * @return string representation of the array
+ */
+
+ String getAsString();
}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ArrayWriter.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ArrayWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ArrayWriter.java
index 16ff89e..49a1e77 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ArrayWriter.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ArrayWriter.java
@@ -18,25 +18,65 @@
package org.apache.drill.exec.vector.accessor;
/**
- * Writer for values into an array. Array writes are write-once,
- * sequential: each call to a <tt>setFoo()</tt> method writes a
- * value and advances the array index.
+ * Writer for values into an array. Array writes are write-once, sequential:
+ * each call to a <tt>setFoo()</tt> method writes a value and advances the array
+ * index.
* <p>
* {@see ArrayReader}
*/
-public interface ArrayWriter extends ColumnAccessor, ScalarWriter {
+public interface ArrayWriter {
+
+ /**
+ * Number of elements written thus far to the array.
+ * @return the number of elements
+ */
int size();
/**
- * Determine if the next position is valid for writing. Will be invalid
- * if the writer hits a size or other limit.
+ * The object type of the list entry. All entries have the same
+ * type.
+ * @return the object type of each entry
+ */
+
+ ObjectWriter entry();
+
+ /**
+ * Return a generic object writer for the array entry.
+ *
+ * @return generic object reader
+ */
+
+ ObjectType entryType();
+ ScalarWriter scalar();
+ TupleWriter tuple();
+ ArrayWriter array();
+
+ /**
+ * When the array contains a tuple or an array, call <tt>save()</tt>
+ * after each array value. Not necessary when writing scalars; each
+ * set operation calls save automatically.
+ */
+
+ void save();
+
+ /**
+ * Write the values of an array from a list of arguments.
+ * @param values values for each array element
+ * @throws VectorOverflowException
+ */
+ void set(Object ...values);
+
+ /**
+ * Write the array given an array of values. The type of array must match
+ * the type of element in the array. That is, if the value is an <tt>int</tt>,
+ * provide an <tt>int[]</tt> array.
*
- * @return true if another item is available and the reader is positioned
- * at that item, false if no more items are available and the reader
- * is no longer valid
+ * @param array array of values to write
+ * @throws VectorOverflowException
*/
- boolean valid();
+ void setObject(Object array);
+// void setList(List<? extends Object> list);
}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnAccessor.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnAccessor.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnAccessor.java
deleted file mode 100644
index 44cd48a..0000000
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnAccessor.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.exec.vector.accessor;
-
-/**
- * Common base interface for columns readers and writers. Provides
- * the access type for the column. Note that multiple Drill types and
- * data modes map to the same access type.
- */
-
-public interface ColumnAccessor {
- public enum ValueType {
- INTEGER, LONG, DOUBLE, STRING, BYTES, DECIMAL, PERIOD, ARRAY, MAP
- }
-
- /**
- * Describe the type of the value. This is a compression of the
- * value vector type: it describes which method will return the
- * vector value.
- * @return the value type which indicates which get method
- * is valid for the column
- */
-
- ColumnAccessor.ValueType valueType();
-}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnReader.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnReader.java
deleted file mode 100644
index 4932567..0000000
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnReader.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.exec.vector.accessor;
-
-import java.math.BigDecimal;
-
-import org.joda.time.Period;
-
-/**
- * Defines a reader to obtain values from value vectors using
- * a simple, uniform interface. Vector values are mapped to
- * their "natural" representations: the representation closest
- * to the actual vector value. For date and time values, this
- * generally means a numeric value. Applications can then map
- * this value to Java objects as desired. Decimal types all
- * map to BigDecimal as that is the only way in Java to
- * represent large decimal values.
- * <p>
- * In general, a column maps to just one value. However, derived
- * classes may choose to provide type conversions if convenient.
- * An exception is thrown if a call is made to a method that
- * is not supported by the column type.
- * <p>
- * Values of scalars are provided directly, using the get method
- * for the target type. Maps and arrays are structured types and
- * require another level of reader abstraction to access each value
- * in the structure.
- */
-
-public interface ColumnReader extends ColumnAccessor {
-
- /**
- * Report if the column is null. Non-nullable columns always
- * return <tt>false</tt>.
- * @return true if the column value is null, false if the
- * value is set
- */
- boolean isNull();
- int getInt();
- long getLong();
- double getDouble();
- String getString();
- byte[] getBytes();
- BigDecimal getDecimal();
- Period getPeriod();
- Object getObject();
- TupleReader map();
- ArrayReader array();
-}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnReaderIndex.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnReaderIndex.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnReaderIndex.java
new file mode 100644
index 0000000..b40b705
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnReaderIndex.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor;
+
+/**
+ * Index into a vector batch, or an array, at read time.
+ * Supports direct, indirect and hyper-batches.
+ */
+
+public interface ColumnReaderIndex {
+ int batchIndex();
+ int vectorIndex();
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnWriter.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnWriter.java
deleted file mode 100644
index 0cc691c..0000000
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnWriter.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.exec.vector.accessor;
-
-/**
- * Defines a writer to set values for value vectors using
- * a simple, uniform interface. Vector values are mapped to
- * their "natural" representations: the representation closest
- * to the actual vector value. For date and time values, this
- * generally means a numeric value. Applications can then map
- * this value to Java objects as desired. Decimal types all
- * map to BigDecimal as that is the only way in Java to
- * represent large decimal values.
- * <p>
- * In general, a column maps to just one value. However, derived
- * classes may choose to provide type conversions if convenient.
- * An exception is thrown if a call is made to a method that
- * is not supported by the column type.
- * <p>
- * Values of scalars are set directly, using the get method
- * for the target type. Maps and arrays are structured types and
- * require another level of writer abstraction to access each value
- * in the structure.
- */
-
-public interface ColumnWriter extends ColumnAccessor, ScalarWriter {
- void setNull();
- TupleWriter map();
- ArrayWriter array();
-}
[13/15] drill git commit: DRILL-5657: Size-aware vector writer
structure
Posted by pr...@apache.org.
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/package-info.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/package-info.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/package-info.java
new file mode 100644
index 0000000..4c11499
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/package-info.java
@@ -0,0 +1,304 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Handles the details of the result set loader implementation.
+ * <p>
+ * The primary purpose of this loader, and the most complex to understand and
+ * maintain, is overflow handling.
+ *
+ * <h4>Detailed Use Cases</h4>
+ *
+ * Let's examine it by considering a number of
+ * use cases.
+ * <table style="border: 1px solid; border-collapse: collapse;">
+ * <tr><th>Row</th><th>a</th><th>b</th><th>c</th><th>d</th><th>e</th><th>f</th><th>g</th><th>h</th></tr>
+ * <tr><td>n-2</td><td>X</td><td>X</td><td>X</td><td>X</td><td>X</td><td>X</td><td>-</td><td>-</td></tr>
+ * <tr><td>n-1</td><td>X</td><td>X</td><td>X</td><td>X</td><td> </td><td> </td><td>-</td><td>-</td></tr>
+ * <tr><td>n </td><td>X</td><td>!</td><td>O</td><td> </td><td>O</td><td> </td><td>O</td><td> </td></tr>
+ * </table>
+ * Here:
+ * <ul>
+ * <li>n-2, n-1, and n are rows. n is the overflow row.</li>
+ * <li>X indicates a value was written before overflow.</li>
+ * <li>Blank indicates no value was written in that row.</li>
+ * <li>! indicates the value that triggered overflow.</li>
+ * <li>- indicates a column that did not exist prior to overflow.</li>
+ * <li>O indicates a value written after overflow.</li>
+ * </ul>
+ * Column a is written before overflow occurs, b causes overflow, and all other
+ * columns either are not written, or written after overflow.
+ * <p>
+ * The scenarios, identified by column names above, are:
+ * <dl>
+ * <dt>a</dt>
+ * <dd>a contains values for all three rows.
+ * <ul>
+ * <li>Two values were written in the "main" batch, while a third was written to
+ * what becomes the overflow row.</li>
+ * <li>When overflow occurs, the last write position is at n. It must be moved
+ * back to n-1.</li>
+ * <li>Since data was written to the overflow row, it is copied to the look-
+ * ahead batch.</li>
+ * <li>The last write position in the lookahead batch is 0 (since data was
+ * copied into the 0th row.</li>
+ * <li>When harvesting, no empty-filling is needed. Values in the main
+ * batch are zero-filled when the batch is finished, values in the look-ahead
+ * batch are back-filled when the first value is written.</li>
+ * <li>When starting the next batch, the last write position must be set to 0 to
+ * reflect the presence of the value for row n.</li>
+ * </ul>
+ * </dd>
+ * <dt>b</dt>
+ * <dd>b contains values for all three rows. The value for row n triggers
+ * overflow.
+ * <ul>
+ * <li>The last write position is at n-1, which is kept for the "main"
+ * vector.</li>
+ * <li>A new overflow vector is created and starts empty, with the last write
+ * position at -1.</li>
+ * <li>Once created, b is immediately written to the overflow vector, advancing
+ * the last write position to 0.</li>
+ * <li>Harvesting, and starting the next for column b works the same as column
+ * a.</li>
+ * </ul>
+ * </dd>
+ * <dt>c</dt>
+ * <dd>Column c has values for all rows.
+ * <ul>
+ * <li>The value for row n is written after overflow.</li>
+ * <li>At overflow, the last write position is at n-1.</li>
+ * <li>At overflow, a new lookahead vector is created with the last write
+ * position at -1.</li>
+ * <li>The value of c is written to the lookahead vector, advancing the last
+ * write position to -1.</li>
+ * <li>Harvesting, and starting the next for column c works the same as column
+ * a.</li>
+ * </ul>
+ * </dd>
+ * <dt>d</dt>
+ * <dd>Column d writes values to the last two rows before overflow, but not to
+ * the overflow row.
+ * <ul>
+ * <li>The last write position for the main batch is at n-1.</li>
+ * <li>The last write position in the lookahead batch remains at -1.</li>
+ * <li>Harvesting for column d requires filling an empty value for row n-1.</li>
+ * <li>When starting the next batch, the last write position must be set to -1,
+ * indicating no data yet written.</li>
+ * </ul>
+ * </dd>
+ * <dt>f</dt>
+ * <dd>Column f has no data in the last position of the main batch, and no data
+ * in the overflow row.
+ * <ul>
+ * <li>The last write position is at n-2.</li>
+ * <li>An empty value must be written into position n-1 during harvest.</li>
+ * <li>On start of the next batch, the last write position starts at -1.</li>
+ * </ul>
+ * </dd>
+ * <dt>g</dt>
+ * <dd>Column g is added after overflow, and has a value written to the overflow
+ * row.
+ * <ul>
+ * <li>On harvest, column g is simply skipped.</li>
+ * <li>On start of the next row, the last write position can be left unchanged
+ * since no "exchange" was done.</li>
+ * </ul>
+ * </dd>
+ * <dt>h</dt>
+ * <dd>Column h is added after overflow, but does not have data written to it
+ * during the overflow row. Similar to column g, but the last write position
+ * starts at -1 for the next batch.</dd>
+ * </dl>
+ *
+ * <h4>General Rules</h4>
+ *
+ * The above can be summarized into a smaller set of rules:
+ * <p>
+ * At the time of overflow on row n:
+ * <ul>
+ * <li>Create or clear the lookahead vector.</li>
+ * <li>Copy (last write position - n + 1) values from row n in the old vector to 0
+ * in the new one. If the copy count is negative, copy nothing. (A negative
+ * copy count means that the last write position is behind the current
+ * row position. Should not occur after back-filling.)</li>
+ * <li>Save the last write position from the old vector, clamped at n.
+ * (That is, if the last write position is before n, keep it. If at
+ * n+1, set it back to n.)</li>
+ * <li>Set the last write position of the overflow vector to (original last
+ * write position - n), clamped at -1. That is, if the original last write
+ * position was before n, the new one is -1. If the original last write
+ * position is after n, shift it down by n places.</li>
+ * <li>Swap buffers from the main vectors and the overflow vectors. This sets
+ * aside the main values, and allows writing to continue using the overflow
+ * buffers.</li>
+ * </ul>
+ * <p>
+ * As the overflow write proceeds:
+ * <ul>
+ * <li>For existing columns, write as normal. The last write position moves from
+ * -1 to 0.</li>
+ * <li>Columns not written leave the last write position at -1.</li>
+ * <li>If a new column appears, set its last write position to -1. If it is then
+ * written, proceed as in the first point above.</li>
+ * </ul>
+ * <p>
+ * At harvest time:
+ * <ul>
+ * <li>For every writer, save the last write position.</li>
+ * <li>Swap the overflow and main buffers to put the main batch back into the
+ * main vectors.</li>
+ * <li>Reset the last write position for all writers to the values saved at
+ * overflow time above.</li>
+ * <li>Finish the batch for the main vectors as normal. No special handling
+ * needed.</li>
+ * </ul>
+ * <p>
+ * When starting the next batch:
+ * <ul>
+ * <li>Swap buffers again, putting the overflow row back into the main vectors.
+ * (At this point, the harvested vectors should all have zero buffers.)</li>
+ * <li>Restore the last write position saved during harvest.</li>
+ * </ul>
+ * <h4>Constraints</h4>
+ * A number of constraints are worth pointing out:
+ * <ul>
+ * <li>Writers are bound to vectors, so we can't easily swap vectors during
+ * overflow.</li>
+ * <li>The project operator to which this operator feeds data also binds to
+ * vectors, so the same set of vectors must be presented on every batch.</li>
+ * <li>The client binds to writers, so we cannot swap writers between main and
+ * overflow batches.</li>
+ * <li>Therefore, the unit of swapping is the buffer that backs the vectors.
+ * </li>
+ * <li>Swapping is not copying; it is only exchanging pointers.</li>
+ * <li>The only copying in this entire process occurs when moving previously-
+ * written values in the overflow row to the new vector at the time of
+ * overflow.</li>
+ * </ul>
+ *
+ * <h4>Arrays</h4>
+ *
+ * The above covers the case of scalar, top-level columns. The extension to
+ * scalar maps is straightforward: at run time, the members of maps are just
+ * simple scalar vectors that reside in a map name space, but the structure
+ * of map fields is the same as for top-level fields. (Think of map fields
+ * as being "flattened" into the top-level tuple.)
+ * <p>
+ * Arrays are a different matter: each row can have many values associated
+ * with it. Consider an array of scalars. We have:
+ * <pre><code>
+ * Row 0 Row 1 Row 2
+ * 0 1 2 3 4 5 6 7 8
+ * [ [a b c] [d e f] | [g h i] ]
+ * </code></pre>
+ * Here, the letters indicate values. The brackets show the overall vector
+ * (outer brackets) and individual rows (inner brackets). The vertical line
+ * shows where overflow occurred. The same rules as discussed earier still
+ * apply, but we must consider both the row indexes and the array indexes.
+ * <ul>
+ * <li>Overflow occurs at the row level. Here row 2 overflowed and must
+ * be moved to the look-ahead vector.</li>
+ * <li>Value movement occurs at the value level. Here, values 6, 7 and 8
+ * must be move to the look-ahead vector.</li>
+ * </ul>
+ * The result, after overflow, is:
+ * <pre><code>
+ * Row 0 Row 1 Row 0
+ * 0 1 2 3 4 5 0 1 2
+ * [ [a b c] [d e f] ] [ [g h i] ]
+ * </code></pre>
+ * Further, we must consider lists: a column may consist of a list of
+ * arrays. Or, a column may consist of an array of maps, one of which is
+ * a list of arrays. So, the above reasoning must apply recursively down
+ * the value tree.
+ * <p>
+ * As it turns out, there is a simple recursive algorithm, which is a
+ * simple extension of the reasoning for the top-level scalar case, that can
+ * handle arrays:
+ * <ul>
+ * <li>Start with the row index of the overflow row.</li>
+ * <li>If column c, say, is an array, obtain the index of the first value for
+ * the overflow row.</li>
+ * <li>If c is a list, or a repeated map, then repeat the above, for each
+ * member of c (a single column for a list, a set of columns for a map), but
+ * replace the row index with the index of the first element.</li>
+ * </ul>
+ * The result will be a walk of the value tree in which the overflow index
+ * starts as an index relative to the result set (a row index), and is
+ * recursively replaced with an array offset for each level of the array.
+ *
+ * <h4>Resynching Writers after Overflow</h4>
+ *
+ * When an overflow occurs, our focus is starts with the single top-level row
+ * that will not fit into the current batch. We move this row to the look-ahead
+ * vectors. Doing so is quite simple when each row is a simple tuple. As
+ * described above, the work is quite a bit more complex when the structure
+ * is a JSON-like tree flattened into vectors.
+ * <p>
+ * Consider the writers. Each writer corresponds to a single vector. Writers
+ * are grouped into logical tree nodes. Those in the root node write to
+ * (single, scalar) columns that are either top-level columns, or nested
+ * some level down in single-value (not array) tuples. Another tree level
+ * occurs in an array: the elements of the array use a different
+ * (faster-changing) index than the top (row-level) writers. Different arrays
+ * have different indexes: a row may have, say, four elements in array A,
+ * but 20 elements in array B.
+ * <p>
+ * Further, arrays can be singular (a repeated int, say) or for an entire
+ * tuple (a repeated map.) And, since Drill supports the full JSON model, in
+ * the most general case, there is a tree of array indexes that can be nested
+ * to an arbitrary level. (A row can have an array of maps which contains a
+ * column that is, itself, a list of repeated maps, a field of which is an
+ * array of ints.)
+ * <p>
+ * Writers handle this index tree via a tree of {@link ColumnWriterIndex}
+ * objects, often specialized for various tasks.
+ * <p>
+ * Now we can get to the key concept in this section: how we update those indexes
+ * after an overflow. The top-level index reverts to zero. (We start writing
+ * the 0th row in the new look-ahead batch.) But, nested indexes (those for arrays)
+ * will start at some other position depending on the number elements already
+ * written in an overflow row. The number of such elements is determined by a
+ * top-down traversal of the tree (to determine the start offset of each array
+ * for the row.) Resetting the writer indexes is a bottom-up process: based on
+ * the number of elements in that array, the writer index is reset to match.
+ * <p>
+ * This flow is the opposite of the "normal" case in which a new batch is started
+ * top-down, with each index being reset to zero.
+ *
+ * <h4>The Need for a Uniform Structure</h4>
+ *
+ * Drill has vastly different implementations and interfaces for:
+ * <ul>
+ * <li>Result sets (as a {@link VectorContainer}),</li>
+ * <li>Arrays (as a generated repeated vector),</li>
+ * <li>Lists (as a {@link ListVector}),</li>
+ * <li>Repeated lists (as a {@link RepeatedList vector}, and</li>
+ * <li>Repeated maps ({@link RepeatedMapVector}.</li>
+ * </ul>
+ * If we were to work directly with the above abstractions the code would be
+ * vastly complex. Instead, we abstract out the common structure into the
+ * {@link TupleMode} abstraction. In particular, we use the
+ * single tuple model which works with a single batch. This model provides a
+ * simple, uniform interface to work with columns and tuples (rows, maps),
+ * and a simple way to work with arrays. This interface reduces the above
+ * array algorithm to a simple set of recursive method calls.
+ */
+
+package org.apache.drill.exec.physical.rowSet.impl;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/BaseTupleModel.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/BaseTupleModel.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/BaseTupleModel.java
new file mode 100644
index 0000000..40da4ec
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/BaseTupleModel.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.model;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.drill.exec.record.ColumnMetadata;
+import org.apache.drill.exec.record.TupleMetadata;
+import org.apache.drill.exec.record.TupleSchema;
+import org.apache.drill.exec.record.TupleSchema.AbstractColumnMetadata;
+import org.apache.drill.exec.record.VectorContainer;
+
+/**
+ * Base implementation for a tuple model which is common to the "single"
+ * and "hyper" cases. Deals primarily with the structure of the model,
+ * which is common between the two physical implementations.
+ */
+
+public abstract class BaseTupleModel implements TupleModel {
+
+ public static abstract class BaseColumnModel implements ColumnModel {
+
+ /**
+ * Extended schema associated with a column.
+ */
+
+ protected final ColumnMetadata schema;
+
+ public BaseColumnModel(ColumnMetadata schema) {
+ this.schema = schema;
+ }
+
+ @Override
+ public ColumnMetadata schema() { return schema; }
+
+ @Override
+ public TupleModel mapModel() { return null; }
+ }
+
+ /**
+ * Columns within the tuple. Columns may, themselves, be represented
+ * as tuples.
+ */
+
+ protected final List<ColumnModel> columns;
+
+ /**
+ * Descriptive schema associated with the columns above. Unlike a
+ * {@link VectorContainer}, this abstraction keeps the schema in sync
+ * with vectors as columns are added.
+ */
+
+ protected final TupleSchema schema;
+
+ public BaseTupleModel() {
+
+ // Schema starts empty and is built as columns are added.
+ // This ensures that the schema stays in sync with the
+ // backing vectors.
+
+ schema = new TupleSchema();
+ columns = new ArrayList<>();
+ }
+
+ public BaseTupleModel(TupleSchema schema, List<ColumnModel> columns) {
+ this.schema = schema;
+ this.columns = columns;
+ assert schema.size() == columns.size();
+ }
+
+ @Override
+ public TupleMetadata schema() { return schema; }
+
+ @Override
+ public int size() { return schema.size(); }
+
+ @Override
+ public ColumnModel column(int index) {
+ return columns.get(index);
+ }
+
+ @Override
+ public ColumnModel column(String name) {
+ return column(schema.index(name));
+ }
+
+ /**
+ * Perform the work of keeping the list of columns and schema in-sync
+ * as columns are added. This is protected because derived classes
+ * must add logic to keep the new column in sync with the underlying
+ * container or map vector.
+ *
+ * @param column column implementation to add
+ */
+
+ protected void addBaseColumn(BaseColumnModel column) {
+ schema.add((AbstractColumnMetadata) column.schema());
+ columns.add(column);
+ assert columns.size() == schema.size();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/ContainerVisitor.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/ContainerVisitor.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/ContainerVisitor.java
new file mode 100644
index 0000000..28c8c59
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/ContainerVisitor.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.model;
+
+import org.apache.drill.common.types.TypeProtos.DataMode;
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.VectorContainer;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.complex.AbstractMapVector;
+import org.apache.drill.exec.vector.complex.BaseRepeatedValueVector;
+import org.apache.drill.exec.vector.complex.ListVector;
+import org.apache.drill.exec.vector.complex.RepeatedListVector;
+import org.apache.drill.exec.vector.complex.RepeatedMapVector;
+
+public class ContainerVisitor<R, A> {
+
+ public R apply(VectorContainer container, A arg) {
+ return visitContainer(container, arg);
+ }
+
+ private R visitContainer(VectorContainer container, A arg) {
+ return visitChildren(container, arg);
+ }
+
+ public R visitChildren(VectorContainer container, A arg) {
+ for (int i = 0; i < container.getNumberOfColumns(); i++) {
+ @SuppressWarnings("resource")
+ ValueVector vector = container.getValueVector(i).getValueVector();
+ apply(vector, arg);
+ }
+ return null;
+ }
+
+ protected R apply(ValueVector vector, A arg) {
+ MaterializedField schema = vector.getField();
+ MajorType majorType = schema.getType();
+ MinorType type = majorType.getMinorType();
+ DataMode mode = majorType.getMode();
+ switch (type) {
+ case MAP:
+ if (mode == DataMode.REPEATED) {
+ return visitRepeatedMap((RepeatedMapVector) vector, arg);
+ } else {
+ return visitMap((AbstractMapVector) vector, arg);
+ }
+ case LIST:
+ if (mode == DataMode.REPEATED) {
+ return visitRepeatedList((RepeatedListVector) vector, arg);
+ } else {
+ return visitList((ListVector) vector, arg);
+ }
+ default:
+ if (mode == DataMode.REPEATED) {
+ return visitRepeatedPrimitive((BaseRepeatedValueVector) vector, arg);
+ } else {
+ return visitPrimitive(vector, arg);
+ }
+ }
+ }
+
+ protected R visitRepeatedMap(RepeatedMapVector vector, A arg) {
+ visitChildren(vector, arg);
+ return visitVector(vector, arg);
+ }
+
+ protected R visitMap(AbstractMapVector vector, A arg) {
+ visitChildren(vector, arg);
+ return visitVector(vector, arg);
+ }
+
+ private R visitChildren(AbstractMapVector vector, A arg) {
+ for (int i = 0; i < vector.size(); i++) {
+ apply(vector.getChildByOrdinal(i), arg);
+ }
+ return null;
+ }
+
+ protected R visitRepeatedList(RepeatedListVector vector, A arg) {
+ return visitVector(vector, arg);
+ }
+
+ protected R visitList(ListVector vector, A arg) {
+ return visitVector(vector, arg);
+ }
+
+ protected R visitRepeatedPrimitive(BaseRepeatedValueVector vector, A arg) {
+ return visitVector(vector, arg);
+ }
+
+ protected R visitPrimitive(ValueVector vector, A arg) {
+ return visitVector(vector, arg);
+ }
+
+ protected R visitVector(ValueVector vector, A arg) {
+ return null;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/MetadataProvider.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/MetadataProvider.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/MetadataProvider.java
new file mode 100644
index 0000000..bb5e18e
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/MetadataProvider.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.model;
+
+import org.apache.drill.exec.record.ColumnMetadata;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.TupleMetadata;
+import org.apache.drill.exec.record.TupleSchema;
+
+/**
+ * Interface for retrieving and/or creating metadata given
+ * a vector.
+ */
+
+public interface MetadataProvider {
+ ColumnMetadata metadata(int index, MaterializedField field);
+ MetadataProvider childProvider(ColumnMetadata colMetadata);
+ TupleMetadata tuple();
+
+ public static class VectorDescrip {
+ public final MetadataProvider parent;
+ public final ColumnMetadata metadata;
+
+ public VectorDescrip(MetadataProvider provider, int index,
+ MaterializedField field) {
+ parent = provider;
+ metadata = provider.metadata(index, field);
+ }
+ }
+
+ public static class MetadataCreator implements MetadataProvider {
+
+ private final TupleSchema tuple;
+
+ public MetadataCreator() {
+ tuple = new TupleSchema();
+ }
+
+ public MetadataCreator(TupleSchema tuple) {
+ this.tuple = tuple;
+ }
+
+ @Override
+ public ColumnMetadata metadata(int index, MaterializedField field) {
+ return tuple.addView(field);
+ }
+
+ @Override
+ public MetadataProvider childProvider(ColumnMetadata colMetadata) {
+ return new MetadataCreator((TupleSchema) colMetadata.mapSchema());
+ }
+
+ @Override
+ public TupleMetadata tuple() { return tuple; }
+ }
+
+ public static class MetadataRetrieval implements MetadataProvider {
+
+ private final TupleMetadata tuple;
+
+ public MetadataRetrieval(TupleMetadata schema) {
+ tuple = schema;
+ }
+
+ @Override
+ public ColumnMetadata metadata(int index, MaterializedField field) {
+ return tuple.metadata(index);
+ }
+
+ @Override
+ public MetadataProvider childProvider(ColumnMetadata colMetadata) {
+ return new MetadataRetrieval((TupleSchema) colMetadata.mapSchema());
+ }
+
+ @Override
+ public TupleMetadata tuple() { return tuple; }
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/ReaderIndex.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/ReaderIndex.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/ReaderIndex.java
new file mode 100644
index 0000000..c4b0415
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/ReaderIndex.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.model;
+
+import org.apache.drill.exec.vector.accessor.ColumnReaderIndex;
+
+/**
+ * Row set index base class used when indexing rows within a row
+ * set for a row set reader. Keeps track of the current position,
+ * which starts before the first row, meaning that the client
+ * must call <tt>next()</tt> to advance to the first row.
+ */
+
+public abstract class ReaderIndex implements ColumnReaderIndex {
+
+ protected int rowIndex = -1;
+ protected final int rowCount;
+
+ public ReaderIndex(int rowCount) {
+ this.rowCount = rowCount;
+ }
+
+ public int position() { return rowIndex; }
+ public void set(int index) { rowIndex = index; }
+
+ public boolean next() {
+ if (++rowIndex < rowCount ) {
+ return true;
+ } else {
+ rowIndex--;
+ return false;
+ }
+ }
+
+ public int size() { return rowCount; }
+
+ public boolean valid() { return rowIndex < rowCount; }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/SchemaInference.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/SchemaInference.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/SchemaInference.java
new file mode 100644
index 0000000..3db01dd
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/SchemaInference.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.model;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.record.ColumnMetadata;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.TupleMetadata;
+import org.apache.drill.exec.record.TupleSchema;
+import org.apache.drill.exec.record.VectorContainer;
+
+/**
+ * Produce a metadata schema from a vector container. Used when given a
+ * record batch without metadata.
+ */
+
+public class SchemaInference {
+
+ public TupleMetadata infer(VectorContainer container) {
+ List<ColumnMetadata> columns = new ArrayList<>();
+ for (int i = 0; i < container.getNumberOfColumns(); i++) {
+ MaterializedField field = container.getValueVector(i).getField();
+ columns.add(inferVector(field));
+ }
+ return TupleSchema.fromColumns(columns);
+ }
+
+ private ColumnMetadata inferVector(MaterializedField field) {
+ if (field.getType().getMinorType() == MinorType.MAP) {
+ return TupleSchema.newMap(field, inferMapSchema(field));
+ } else {
+ return TupleSchema.fromField(field);
+ }
+ }
+
+ private TupleSchema inferMapSchema(MaterializedField field) {
+ List<ColumnMetadata> columns = new ArrayList<>();
+ for (MaterializedField child : field.getChildren()) {
+ columns.add(inferVector(child));
+ }
+ return TupleSchema.fromColumns(columns);
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/TupleModel.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/TupleModel.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/TupleModel.java
new file mode 100644
index 0000000..5fcba73
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/TupleModel.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.model;
+
+import javax.sql.RowSet;
+
+import org.apache.drill.exec.record.ColumnMetadata;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.TupleMetadata;
+import org.apache.drill.exec.record.TupleSchema;
+import org.apache.drill.exec.record.VectorContainer;
+import org.apache.drill.exec.vector.complex.AbstractMapVector;
+
+/**
+ * Common interface to access a tuple backed by a vector container or a
+ * map vector. Provides a visitor interface to apply tasks such as vector
+ * allocation, reader or writer creation, and so on. Allows either static
+ * or dynamic vector allocation.
+ * <p>
+ * The terminology used here:
+ * <dl>
+ * <dt>Row set</dt>
+ * <dd>A collection of rows stored as value vectors. Elsewhere in
+ * Drill we call this a "record batch", but that term has been overloaded to
+ * mean the runtime implementation of an operator.</dd>
+ * <dt>Tuple</dt>
+ * <dd>The relational-theory term for a row. Drill maps have a fixed schema.
+ * Impala, Hive and other tools use the term "structure" (or "struct") for
+ * what Drill calls a map. A structure is simply a nested tuple, modeled
+ * here by the same tuple abstraction used for rows.</dd>
+ * <dt>Column</dt>
+ * <dd>A column is represented by a vector (which may have internal
+ * null-flag or offset vectors.) Maps are a kind of column that has an
+ * associated tuple. Because this abstraction models structure, array
+ * columns are grouped with single values: the array-ness is just cardinality.</dd>
+ * <dt>Visitor</dt>
+ * <dd>The visitor abstraction (classic Gang-of-Four pattern) allows adding
+ * functionality without complicating the structure classes. Allows the same
+ * abstraction to be used for the testing {@link RowSet} abstractions and
+ * the scan operator "loader" classes.</dd>
+ * <dt>Metadata</dt>
+ * <dd>Metadata is simply data about data. Here, data about tuples and columns.
+ * The column metadata mostly expands on that available in {@link MaterializedField},
+ * but also adds allocation hints.
+ * </dl>
+ * <p>
+ * This abstraction is the physical dual of a {@link VectorContainer}.
+ * The vectors are "owned" by
+ * the associated container. The structure here simply applies additional
+ * metadata and visitor behavior to allow much easier processing that is
+ * possible with the raw container structure.
+ * <p>
+ * A key value of this abstraction is the extended {@link TupleSchema}
+ * associated with the structure. Unlike a
+ * {@link VectorContainer}, this abstraction keeps the schema in sync
+ * with vectors as columns are added.
+ * <p>
+ * Some future version may wish to merge the two concepts. That way, metadata
+ * discovered by one operator will be available to another. Complex recursive
+ * functions can be replace by a visitor with the recursion handled inside
+ * implementations of this interface.
+ * <p>
+ * Tuples provide access to columns by both index and name. Both the schema and
+ * model classes follow this convention. Compared with the VectorContainer and
+ * {@link AbstractMapVector} classes, the vector index is a first-class concept:
+ * the column model and schema are guaranteed to reside at the same index relative
+ * to the enclosing tuple. In addition, name access is efficient using a hash
+ * index.
+ * <p>
+ * Visitor classes are defined by the "simple" (single batch) and "hyper"
+ * (multi-batch) implementations to allow vector implementations to work
+ * with the specifics of each type of batch.
+ */
+
+public interface TupleModel {
+
+ /**
+ * Common interface to access a column vector, its metadata, and its
+ * tuple definition (for maps.) Provides a visitor interface for common
+ * vector tasks.
+ */
+
+ public interface ColumnModel {
+ ColumnMetadata schema();
+ TupleModel mapModel();
+ }
+
+ /**
+ * Tuple-model interface for the top-level row (tuple) structure.
+ * Provides access to the {@link VectorContainer} representation of the
+ * row set (record batch.)
+ */
+
+ public interface RowSetModel extends TupleModel {
+ VectorContainer container();
+ }
+
+ TupleMetadata schema();
+ int size();
+ ColumnModel column(int index);
+ ColumnModel column(String name);
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/hyper/BaseReaderBuilder.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/hyper/BaseReaderBuilder.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/hyper/BaseReaderBuilder.java
new file mode 100644
index 0000000..ee856be
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/hyper/BaseReaderBuilder.java
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.model.hyper;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.drill.common.types.TypeProtos.DataMode;
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.physical.rowSet.model.MetadataProvider;
+import org.apache.drill.exec.physical.rowSet.model.MetadataProvider.VectorDescrip;
+import org.apache.drill.exec.physical.rowSet.model.ReaderIndex;
+import org.apache.drill.exec.record.HyperVectorWrapper;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.VectorContainer;
+import org.apache.drill.exec.record.VectorWrapper;
+import org.apache.drill.exec.record.selection.SelectionVector4;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.accessor.ColumnReaderIndex;
+import org.apache.drill.exec.vector.accessor.impl.AccessorUtilities;
+import org.apache.drill.exec.vector.accessor.reader.AbstractObjectReader;
+import org.apache.drill.exec.vector.accessor.reader.ColumnReaderFactory;
+import org.apache.drill.exec.vector.accessor.reader.MapReader;
+import org.apache.drill.exec.vector.accessor.reader.ObjectArrayReader;
+import org.apache.drill.exec.vector.accessor.reader.VectorAccessor;
+import org.apache.drill.exec.vector.complex.AbstractMapVector;
+
+public abstract class BaseReaderBuilder {
+
+ /**
+ * Read-only row index into the hyper row set with batch and index
+ * values mapping via an SV4.
+ */
+
+ public static class HyperRowIndex extends ReaderIndex {
+
+ private final SelectionVector4 sv4;
+
+ public HyperRowIndex(SelectionVector4 sv4) {
+ super(sv4.getCount());
+ this.sv4 = sv4;
+ }
+
+ @Override
+ public int vectorIndex() {
+ return AccessorUtilities.sv4Index(sv4.get(rowIndex));
+ }
+
+ @Override
+ public int batchIndex( ) {
+ return AccessorUtilities.sv4Batch(sv4.get(rowIndex));
+ }
+ }
+
+ /**
+ * Vector accessor used by the column accessors to obtain the vector for
+ * each column value. That is, position 0 might be batch 4, index 3,
+ * while position 1 might be batch 1, index 7, and so on.
+ */
+
+ public static class HyperVectorAccessor implements VectorAccessor {
+
+ private final ValueVector[] vectors;
+ private ColumnReaderIndex rowIndex;
+
+ public HyperVectorAccessor(VectorWrapper<?> vw) {
+ vectors = vw.getValueVectors();
+ }
+
+ @Override
+ public void bind(ColumnReaderIndex index) {
+ rowIndex = index;
+ }
+
+ @Override
+ public ValueVector vector() {
+ return vectors[rowIndex.batchIndex()];
+ }
+ }
+
+
+ protected AbstractObjectReader[] buildContainerChildren(
+ VectorContainer container, MetadataProvider mdProvider) {
+ List<AbstractObjectReader> readers = new ArrayList<>();
+ for (int i = 0; i < container.getNumberOfColumns(); i++) {
+ VectorWrapper<?> vw = container.getValueVector(i);
+ VectorDescrip descrip = new VectorDescrip(mdProvider, i, vw.getField());
+ readers.add(buildVectorReader(vw, descrip));
+ }
+ return readers.toArray(new AbstractObjectReader[readers.size()]);
+ }
+
+ @SuppressWarnings("unchecked")
+ private AbstractObjectReader buildVectorReader(VectorWrapper<?> vw, VectorDescrip descrip) {
+ MajorType type = vw.getField().getType();
+ if (type.getMinorType() == MinorType.MAP) {
+ if (type.getMode() == DataMode.REPEATED) {
+ return buildMapArrayReader((HyperVectorWrapper<? extends AbstractMapVector>) vw, descrip);
+ } else {
+ return buildMapReader((HyperVectorWrapper<? extends AbstractMapVector>) vw, descrip);
+ }
+ } else {
+ return buildPrimitiveReader(vw, descrip);
+ }
+ }
+
+ private AbstractObjectReader buildMapArrayReader(HyperVectorWrapper<? extends AbstractMapVector> vectors, VectorDescrip descrip) {
+ AbstractObjectReader mapReader = MapReader.build(descrip.metadata, buildMap(vectors, descrip));
+ return ObjectArrayReader.build(new HyperVectorAccessor(vectors), mapReader);
+ }
+
+ private AbstractObjectReader buildMapReader(HyperVectorWrapper<? extends AbstractMapVector> vectors, VectorDescrip descrip) {
+ return MapReader.build(descrip.metadata, buildMap(vectors, descrip));
+ }
+
+ private AbstractObjectReader buildPrimitiveReader(VectorWrapper<?> vw, VectorDescrip descrip) {
+ return ColumnReaderFactory.buildColumnReader(
+ vw.getField().getType(), new HyperVectorAccessor(vw));
+ }
+
+ private List<AbstractObjectReader> buildMap(HyperVectorWrapper<? extends AbstractMapVector> vectors, VectorDescrip descrip) {
+ List<AbstractObjectReader> readers = new ArrayList<>();
+ MetadataProvider provider = descrip.parent.childProvider(descrip.metadata);
+ MaterializedField mapField = vectors.getField();
+ for (int i = 0; i < mapField.getChildren().size(); i++) {
+ HyperVectorWrapper<? extends ValueVector> child = (HyperVectorWrapper<? extends ValueVector>) vectors.getChildWrapper(new int[] {i});
+ VectorDescrip childDescrip = new VectorDescrip(provider, i, child.getField());
+ readers.add(buildVectorReader(child, childDescrip));
+ i++;
+ }
+ return readers;
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/hyper/package-info.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/hyper/package-info.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/hyper/package-info.java
new file mode 100644
index 0000000..433231e
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/hyper/package-info.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Implementation of a row set model for hyper-batches. A hyper batch is
+ * one that contains a list of batches. The batch is logically comprised
+ * of "hyper-vectors" which are the individual vectors from each batch
+ * stacked "end-to-end."
+ * <p>
+ * Hyper batches allow only reading. So, the only services here are to
+ * parse a hyper-container into a row set model, then use that model to
+ * create a matching set of readers.
+ */
+
+package org.apache.drill.exec.physical.rowSet.model.hyper;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/package-info.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/package-info.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/package-info.java
new file mode 100644
index 0000000..6f24d33
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/package-info.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * The "row set model" provides a "dual" of the vector structure used to create,
+ * allocate and work with a collection of vectors. The model provides an enhanced
+ * "metadata" schema, given by {@link TupleMetadata} and {@link ColumnMetadata},
+ * with allocation hints that goes beyond the {@link MaterializedField}
+ * used by value vectors.
+ * <p>
+ * In an ideal world, this structure would not be necessary; the vectors could, by
+ * themselves, provide the needed structure. However, vectors are used in many
+ * places, in many ways, and are hard to evolve. Further, Drill may eventually
+ * choose to move to Arrow, which would not have the structure provided here.
+ * <p>
+ * A set of visitor classes provide the logic to traverse the vector structure,
+ * avoiding the need for multiple implementations of vector traversal. (Traversal
+ * is needed because maps contain vectors, some of which can be maps, resulting
+ * in a tree structure. Further, the API provided by containers (a top-level
+ * tuple) differs from that of a map vector (nested tuple.) This structure provides
+ * a uniform API for both cases.
+ * <p>
+ * Three primary tasks provided by this structure are:
+ * <ol>
+ * <li>Create writers for a set of vectors. Allow incremental write-time
+ * addition of columns, keeping the vectors, columns and metadata all in
+ * sync.</li>
+ * <li>Create readers for a set of vectors. Vectors are immutable once written,
+ * so the reader mechanism does not provide any dynamic schema change
+ * support.</li>
+ * <li>Allocate vectors based on metadata provided. Allocation metadata
+ * includes estimated widths for variable-width columns and estimated
+ * cardinality for array columns.</li>
+ * </ol>
+ * <p>
+ * Drill supports two kinds of batches, reflected by two implementations of
+ * the structure:
+ * <dl>
+ * <dt>Single batch</dt>
+ * <dd>Represents a single batch in which each column is backed by a single
+ * value vector. Single batches support both reading and writing. Writing can
+ * be done only for "new" batches; reading can be done only after writing
+ * is complete. Modeled by the {#link org.apache.drill.exec.physical.rowSet.model.single
+ * single} package.</dd>
+ * <dt>Hyper batch</dt>
+ * <dd>Represents a stacked set of batches in which each column is backed
+ * by a list of columns. A hyper batch is indexed by an "sv4" (four-byte
+ * selection vector.) A hyper batch allows only reading. Modeled by the
+ * {@link org.apache.drill.exec.physical.rowSet.model.hyper hyper} package.</dd>
+ * </dl>
+ */
+
+package org.apache.drill.exec.physical.rowSet.model;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/single/BaseReaderBuilder.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/single/BaseReaderBuilder.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/single/BaseReaderBuilder.java
new file mode 100644
index 0000000..80ad19f
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/single/BaseReaderBuilder.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.model.single;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.drill.common.types.TypeProtos.DataMode;
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.physical.rowSet.model.MetadataProvider;
+import org.apache.drill.exec.physical.rowSet.model.MetadataProvider.VectorDescrip;
+import org.apache.drill.exec.record.VectorContainer;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.accessor.reader.AbstractObjectReader;
+import org.apache.drill.exec.vector.accessor.reader.ColumnReaderFactory;
+import org.apache.drill.exec.vector.accessor.reader.MapReader;
+import org.apache.drill.exec.vector.accessor.reader.ObjectArrayReader;
+import org.apache.drill.exec.vector.complex.AbstractMapVector;
+import org.apache.drill.exec.vector.complex.MapVector;
+import org.apache.drill.exec.vector.complex.RepeatedMapVector;
+
+public abstract class BaseReaderBuilder {
+
+ protected List<AbstractObjectReader> buildContainerChildren(
+ VectorContainer container, MetadataProvider mdProvider) {
+ List<AbstractObjectReader> writers = new ArrayList<>();
+ for (int i = 0; i < container.getNumberOfColumns(); i++) {
+ @SuppressWarnings("resource")
+ ValueVector vector = container.getValueVector(i).getValueVector();
+ VectorDescrip descrip = new VectorDescrip(mdProvider, i, vector.getField());
+ writers.add(buildVectorReader(vector, descrip));
+ }
+ return writers;
+ }
+
+ private AbstractObjectReader buildVectorReader(ValueVector vector, VectorDescrip descrip) {
+ MajorType type = vector.getField().getType();
+ if (type.getMinorType() == MinorType.MAP) {
+ if (type.getMode() == DataMode.REPEATED) {
+ return buildMapArrayReader((RepeatedMapVector) vector, descrip);
+ } else {
+ return buildMapReader((MapVector) vector, descrip);
+ }
+ } else {
+ return buildPrimitiveReader(vector, descrip);
+ }
+ }
+
+ private AbstractObjectReader buildMapArrayReader(RepeatedMapVector vector, VectorDescrip descrip) {
+ AbstractObjectReader mapReader = MapReader.build(descrip.metadata, buildMap(vector, descrip));
+ return ObjectArrayReader.build(vector, mapReader);
+ }
+
+ private AbstractObjectReader buildMapReader(MapVector vector, VectorDescrip descrip) {
+ return MapReader.build(descrip.metadata, buildMap(vector, descrip));
+ }
+
+ private AbstractObjectReader buildPrimitiveReader(ValueVector vector, VectorDescrip descrip) {
+ return ColumnReaderFactory.buildColumnReader(vector);
+ }
+
+ private List<AbstractObjectReader> buildMap(AbstractMapVector vector, VectorDescrip descrip) {
+ List<AbstractObjectReader> readers = new ArrayList<>();
+ MetadataProvider provider = descrip.parent.childProvider(descrip.metadata);
+ int i = 0;
+ for (ValueVector child : vector) {
+ VectorDescrip childDescrip = new VectorDescrip(provider, i, child.getField());
+ readers.add(buildVectorReader(child, childDescrip));
+ i++;
+ }
+ return readers;
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/single/BaseWriterBuilder.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/single/BaseWriterBuilder.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/single/BaseWriterBuilder.java
new file mode 100644
index 0000000..bab7b39
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/single/BaseWriterBuilder.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.model.single;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.physical.rowSet.model.MetadataProvider;
+import org.apache.drill.exec.physical.rowSet.model.MetadataProvider.VectorDescrip;
+import org.apache.drill.exec.record.VectorContainer;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.accessor.writer.AbstractObjectWriter;
+import org.apache.drill.exec.vector.accessor.writer.ColumnWriterFactory;
+import org.apache.drill.exec.vector.complex.AbstractMapVector;
+
+/**
+ * Build a set of writers for a single (non-hyper) vector container.
+ */
+
+public abstract class BaseWriterBuilder {
+
+ protected List<AbstractObjectWriter> buildContainerChildren(VectorContainer container, MetadataProvider mdProvider) {
+ List<AbstractObjectWriter> writers = new ArrayList<>();
+ for (int i = 0; i < container.getNumberOfColumns(); i++) {
+ @SuppressWarnings("resource")
+ ValueVector vector = container.getValueVector(i).getValueVector();
+ VectorDescrip descrip = new VectorDescrip(mdProvider, i, vector.getField());
+ writers.add(buildVectorWriter(vector, descrip));
+ }
+ return writers;
+ }
+
+ private AbstractObjectWriter buildVectorWriter(ValueVector vector, VectorDescrip descrip) {
+ MajorType type = vector.getField().getType();
+ if (type.getMinorType() == MinorType.MAP) {
+ return ColumnWriterFactory.buildMapWriter(descrip.metadata,
+ (AbstractMapVector) vector,
+ buildMap((AbstractMapVector) vector, descrip));
+ } else {
+ return ColumnWriterFactory.buildColumnWriter(descrip.metadata, vector);
+ }
+ }
+
+ private List<AbstractObjectWriter> buildMap(AbstractMapVector vector, VectorDescrip descrip) {
+ List<AbstractObjectWriter> writers = new ArrayList<>();
+ MetadataProvider provider = descrip.parent.childProvider(descrip.metadata);
+ int i = 0;
+ for (ValueVector child : vector) {
+ VectorDescrip childDescrip = new VectorDescrip(provider, i, child.getField());
+ writers.add(buildVectorWriter(child, childDescrip));
+ i++;
+ }
+ return writers;
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/single/BuildVectorsFromMetadata.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/single/BuildVectorsFromMetadata.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/single/BuildVectorsFromMetadata.java
new file mode 100644
index 0000000..30f60b3
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/single/BuildVectorsFromMetadata.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.model.single;
+
+import org.apache.drill.exec.expr.TypeHelper;
+import org.apache.drill.exec.memory.BufferAllocator;
+import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode;
+import org.apache.drill.exec.record.ColumnMetadata;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.TupleMetadata;
+import org.apache.drill.exec.record.VectorContainer;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.complex.AbstractMapVector;
+
+/**
+ * Build (materialize) as set of vectors based on a provided
+ * metadata schema.
+ */
+
+public class BuildVectorsFromMetadata {
+
+ private final BufferAllocator allocator;
+
+ public BuildVectorsFromMetadata(BufferAllocator allocator) {
+ this.allocator = allocator;
+ }
+
+ public VectorContainer build(TupleMetadata schema) {
+ VectorContainer container = new VectorContainer(allocator);
+ for (int i = 0; i < schema.size(); i++) {
+ container.add(buildVector(schema.metadata(i)));
+ }
+
+ // Build the row set from a matching triple of schema, container and
+ // column models.
+
+ container.buildSchema(SelectionVectorMode.NONE);
+ return container;
+ }
+
+ private ValueVector buildVector(ColumnMetadata metadata) {
+ if (metadata.isMap()) {
+ return buildMap(metadata);
+ } else {
+ return TypeHelper.getNewVector(metadata.schema(), allocator, null);
+ }
+ }
+
+ /**
+ * Build a map column including the members of the map given a map
+ * column schema.
+ *
+ * @param schema the schema of the map column
+ * @return the completed map vector column model
+ */
+
+ private AbstractMapVector buildMap(ColumnMetadata schema) {
+
+ // Creating the map vector will create its contained vectors if we
+ // give it a materialized field with children. So, instead pass a clone
+ // without children so we can add them.
+
+ MaterializedField mapField = schema.schema();
+ MaterializedField emptyClone = MaterializedField.create(mapField.getName(), mapField.getType());
+
+ // Don't get the map vector from the vector cache. Map vectors may
+ // have content that varies from batch to batch. Only the leaf
+ // vectors can be cached.
+
+ AbstractMapVector mapVector = (AbstractMapVector) TypeHelper.getNewVector(emptyClone, allocator, null);
+
+ // Create the contents building the model as we go.
+
+ TupleMetadata mapSchema = schema.mapSchema();
+ for (int i = 0; i < mapSchema.size(); i++) {
+ ColumnMetadata childSchema = mapSchema.metadata(i);
+ mapVector.putChild(childSchema.name(), buildVector(childSchema));
+ }
+
+ return mapVector;
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/single/VectorAllocator.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/single/VectorAllocator.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/single/VectorAllocator.java
new file mode 100644
index 0000000..34a6960
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/single/VectorAllocator.java
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.model.single;
+
+import org.apache.drill.common.types.TypeProtos.DataMode;
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.physical.impl.spill.RecordBatchSizer;
+import org.apache.drill.exec.physical.rowSet.model.MetadataProvider;
+import org.apache.drill.exec.physical.rowSet.model.MetadataProvider.MetadataCreator;
+import org.apache.drill.exec.physical.rowSet.model.MetadataProvider.MetadataRetrieval;
+import org.apache.drill.exec.record.ColumnMetadata;
+import org.apache.drill.exec.record.TupleMetadata;
+import org.apache.drill.exec.record.VectorContainer;
+import org.apache.drill.exec.vector.AllocationHelper;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.complex.AbstractMapVector;
+import org.apache.drill.exec.vector.complex.RepeatedMapVector;
+
+/**
+ * Given a vector container, and a metadata schema that matches the container,
+ * walk the schema tree to allocate new vectors according to a given
+ * row count and the size information provided in column metadata.
+ * <p>
+ * @see {@link AllocationHelper} - the class which this one replaces
+ * @see {@link VectorInitializer} - an earlier cut at implementation
+ * based on data from the {@link RecordBatchSizer}
+ */
+
+// TODO: Does not yet handle lists; lists are a simple extension
+// of the array-handling logic below.
+
+public class VectorAllocator {
+
+ private final VectorContainer container;
+
+ public VectorAllocator(VectorContainer container) {
+ this.container = container;
+ }
+
+ public void allocate(int rowCount) {
+ allocate(rowCount, new MetadataCreator());
+ }
+
+ public void allocate(int rowCount, TupleMetadata schema) {
+ allocate(rowCount, new MetadataRetrieval(schema));
+ }
+
+ public void allocate(int rowCount, MetadataProvider mdProvider) {
+ for (int i = 0; i < container.getNumberOfColumns(); i++) {
+ @SuppressWarnings("resource")
+ ValueVector vector = container.getValueVector(i).getValueVector();
+ allocateVector(vector, mdProvider.metadata(i, vector.getField()), rowCount, mdProvider);
+ }
+ }
+
+ private void allocateVector(ValueVector vector, ColumnMetadata metadata, int valueCount, MetadataProvider mdProvider) {
+ MajorType type = vector.getField().getType();
+ assert vector.getField().getName().equals(metadata.name());
+ assert type.getMinorType() == metadata.type();
+ if (type.getMinorType() == MinorType.MAP) {
+ if (type.getMode() == DataMode.REPEATED) {
+ allocateMapArray((RepeatedMapVector) vector, metadata, valueCount, mdProvider);
+ } else {
+ allocateMap((AbstractMapVector) vector, metadata, valueCount, mdProvider);
+ }
+ } else {
+ allocatePrimitive(vector, metadata, valueCount);
+ }
+ }
+
+ private void allocatePrimitive(ValueVector vector,
+ ColumnMetadata metadata, int valueCount) {
+ AllocationHelper.allocatePrecomputedChildCount(vector,
+ valueCount,
+ metadata.expectedWidth(),
+ metadata.expectedElementCount());
+ }
+
+ private void allocateMapArray(RepeatedMapVector vector,
+ ColumnMetadata metadata, int valueCount, MetadataProvider mdProvider) {
+ ((RepeatedMapVector) vector).getOffsetVector().allocateNew(valueCount);
+ int expectedValueCount = valueCount * metadata.expectedElementCount();
+ allocateMap(vector, metadata, expectedValueCount, mdProvider);
+ }
+
+ private void allocateMap(AbstractMapVector vector, ColumnMetadata metadata, int valueCount, MetadataProvider mdProvider) {
+ MetadataProvider mapProvider = mdProvider.childProvider(metadata);
+ TupleMetadata mapSchema = metadata.mapSchema();
+ assert mapSchema != null;
+ int i = 0;
+ for (ValueVector child : vector) {
+ allocateVector(child, mapProvider.metadata(i, child.getField()), valueCount, mapProvider);
+ i++;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/single/package-info.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/single/package-info.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/single/package-info.java
new file mode 100644
index 0000000..6cb6f27
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/model/single/package-info.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * This set of classes models the structure of a batch consisting
+ * of single vectors (as contrasted with a hyper batch.) Provides tools
+ * or metdata-based construction, allocation, reading and writing of
+ * the vectors.
+ * <p>
+ * The classes here walk the container/map/vector tree to apply
+ * operations.
+ */
+
+package org.apache.drill.exec.physical.rowSet.model.single;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/package-info.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/package-info.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/package-info.java
new file mode 100644
index 0000000..d92c6b7
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/package-info.java
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * Provides a second-generation row set (AKA "record batch") writer used
+ * by client code to<ul>
+ * <li>Define the schema of a result set.</li>
+ * <li>Write data into the vectors backing a row set.</li></ul>
+ * <p>
+ * <h4>Terminology</h4>
+ * The code here follows the "row/column" naming convention rather than
+ * the "record/field" convention.
+ * <dl>
+ * <dt>Result set</dt>
+ * <dd>A set of zero or more row sets that hold rows of data.<dd>
+ * <dt>Row set</dt>
+ * <dd>A collection of rows with a common schema. Also called a "row
+ * batch" or "record batch." (But, in Drill, the term "record batch" also
+ * usually means an operator on that set of records. Here, a row set is
+ * just the rows &nash; separate from operations on that data.</dd>
+ * <dt>Row</dt>
+ * <dd>A single row of data, in the usual database sense. Here, a row is
+ * a kind of tuple (see below) allowing both name and index access to
+ * columns.</dd>
+ * <dt>Tuple</dt>
+ * <dd>In relational theory, a row is a tuple: a collection of values
+ * defined by a schema. Tuple values are indexed by position or name.</dd>
+ * <dt>Column</dt>
+ * <dd>A single value within a row or row set. (Generally, the context
+ * makes clear if the term refers to single value or all values for a
+ * column for a row set. Columns are backed by value vectors.</dd>
+ * <dt>Map</dt>
+ * <dd>In Drill, a map is what other systems call a "structure". It is,
+ * in fact, a nested tuple. In a Java or Python map, each map instance has
+ * a distinct set of name/value pairs. But, in Drill, all map instances have
+ * the same schema; hence the so-called "map" is really a tuple. This
+ * implementation exploits that fact and treats the row, and nested maps,
+ * almost identically: both provide columns indexed by name or position.</dd>
+ * <dt>Row Set Mutator</dt>
+ * <dd>An awkward name, but retains the "mutator" name from the previous
+ * generation. The mechanism to build a result set as series of row sets.</dd>
+ * <dt>Tuple Loader</dt>
+ * <dd>Mechanism to build a single tuple (row or map) by providing name
+ * or index access to columns. A better name would b "tuple writer", but
+ * that name is already used elsewhere.</dd>
+ * <dt>Column Loader</dt>
+ * <dd>Mechanism to write values to a single column.<dd>
+ * </dl>
+ * <h4>Building the Schema</h4>
+ * The row set mutator works for two cases: a known schema or a discovered
+ * schema. A known schema occurs in the case, such as JDBC, where the
+ * underlying data source can describe the schema before reading any rows.
+ * In this case, client code can build the schema and pass that schema to
+ * the mutator directly. Alternatively, the client code can build the
+ * schema column-by-column before the first row is read.
+ * <p>
+ * Readers that discover schema can build the schema incrementally: add
+ * a column, load data for that column for one row, discover the next
+ * column, and so on. Almost any kind of column can be added at any time
+ * within the first batch:<ul>
+ * <li>Required columns are "back-filled" with zeros in the active batch,
+ * if that value
+ * makes sense for the column. (Date and Interval columns will throw an
+ * exception if added after the first row as there is no good "zero"
+ * value for that column. Varchar columns are back-filled with blanks.<li>
+ * <li>Optional (nullable) columns can be added at any time; they are
+ * back-filled with nulls in the active batch. In general, if a column is
+ * added after the first row, it should be nullable, not required, unless
+ * the data source has a "missing = blank or zero" policy.</li>
+ * <li>Repeated (array) columns can be added at any time; they are
+ * back-filled with empty entries in the first batch. Arrays can also be
+ * safely added at any time.</li></ul>
+ * Client code must be aware of the semantics of adding columns at various
+ * times.<ul>
+ * <li>Columns added before or during the first row are the trivial case;
+ * this works for all data types and modes.</li>
+ * <li>Required (non-nullable0 structured columns (Date, Period) cannot be
+ * added after the first row (as there is no good zero-fill value.)</li>
+ * <li>Columns added within the first batch appear to the rest of Drill as
+ * if they were added before the first row: the downstream operators see the
+ * same schema from batch to batch.</li>
+ * <li>Columns added <i>after</i> the first batch will trigger a
+ * schema-change event downstream.</li>
+ * <li>The above is true during an "overflow row" (see below.) Once
+ * overflow occurs, columns added later in that overflow row will actually
+ * appear in the next batch, and will trigger a schema change when that
+ * batch is returned. That is, overflow "time shifts" a row addition from
+ * one batch to the next, and so it also time-shifts the column addition.
+ * </li></ul>
+ * Use the {@link LoaderSchema} class to build the schema. The schema class is
+ * part of the {@link TupleLoader} object available from the
+ * {@link #root()} method.
+ * <h4>Using the Schema</h4>
+ * Presents columns using a physical schema. That is, map columns appear
+ * as columns that provide a nested map schema. Presumes that column
+ * access is primarily structural: first get a map, then process all
+ * columns for the map.
+ * <p>
+ * If the input is a flat structure, then the physical schema has a
+ * flattened schema as the degenerate case.
+ * <p>
+ * In both cases, access to columns is by index or by name. If new columns
+ * are added while loading, their index is always at the end of the existing
+ * columns.
+ * <h4>Writing Data to the Batch</h4>
+ * Each batch is delimited by a call to {@link #startBatch()} and a call to
+ * {@link #harvestWithLookAhead()} to obtain the completed batch. Note that readers do not
+ * call these methods; the scan operator does this work.
+ * <p>
+ * Each row is delimited by a call to {@link #startValue()} and a call to
+ * {@link #saveRow()}. <tt>startRow()</tt> performs initialization necessary
+ * for some vectors such as repeated vectors. <tt>saveRow()</tt> moves the
+ * row pointer ahead.
+ * <p>
+ * A reader can easily reject a row by calling <tt>startRow()</tt>, begin
+ * to load a row, but omitting the call to <tt>saveRow()</tt> In this case,
+ * the next call to <tt>startRow()</tt> repositions the row pointer to the
+ * same row, and new data will overwrite the previous data, effectively erasing
+ * the unwanted row. This also works for the last row; omitting the call to
+ * <tt>saveRow()</tt> causes the batch to hold only the rows actually
+ * saved.
+ * <p>
+ * Readers then write to each column. Columns are accessible via index
+ * ({@link TupleLoader#column(int)} or by name
+ * ({@link TupleLoader#column(String)}. Indexed access is much faster.
+ * Column indexes are defined by the order that columns are added. The first
+ * column is column 0, the second is column 1 and so on.
+ * <p>
+ * Each call to the above methods returns the same column writer, allowing the
+ * reader to cache column writers for additional performance.
+ * <p>
+ * All column writers are of the same class; there is no need to cast to a
+ * type corresponding to the vector. Instead, they provide a variety of
+ * <tt>set<i>Type</i></tt> methods, where the type is one of various Java
+ * primitive or structured types. Most vectors provide just one method, but
+ * others (such as VarChar) provide two. The implementation will throw an
+ * exception if the vector does not support a particular type.
+ * <p>
+ * Note that this class uses the term "loader" for row and column writers
+ * since the term "writer" is already used by the legacy record set mutator
+ * and column writers.
+ * <h4>Handling Batch Limits</h4>
+ * The mutator enforces two sets of batch limits:<ol>
+ * <li>The number of rows per batch. The limit defaults to 64K (the Drill
+ * maximum), but can be set lower by the client.</li>
+ * <li>The size of the largest vector, which is capped at 16 MB. (A future
+ * version may allow adjustable caps, or cap the memory of the entire
+ * batch.</li></ol>
+ * Both limits are presented to the client via the {@link #isFull()}
+ * method. After each call to {@link #saveRow()}, the client should call
+ * <tt>isFull()</tt> to determine if the client can add another row. Note
+ * that failing to do this check will cause the next call to
+ * {@link #startBatch()} to throw an exception.
+ * <p>
+ * The limits have subtle differences, however. Row limits are simple: at
+ * the end of the last row, the mutator notices that no more rows are possible,
+ * and so does not allow starting a new row.
+ * <p>
+ * Vector overflow is more complex. A row may consist of columns (a, b, c).
+ * The client may write column a, but then column b might trigger a vector
+ * overflow. (For example, b is a Varchar, and the value for b is larger than
+ * the space left in the vector.) The client cannot stop and rewrite a. Instead,
+ * the client simply continues writing the row. The mutator, internally, moves
+ * this "overflow" row to a new batch. The overflow row becomes the first row
+ * of the next batch rather than the first row of the current batch.
+ * <p>
+ * For this reason, the client can treat the two overflow cases identically,
+ * as described above.
+ * <p>
+ * There are some subtle differences between the two cases that clients may
+ * occasionally may need to expect:<ul>
+ * <li>When a vector overflow occurs, the returned batch will have one
+ * fewer rows than the client might expect if it is simply counting the rows
+ * written.</li>
+ * <li>A new column added to the batch after overflow occurs will appear in
+ * the <i>next</i> batch, triggering a schema change between the current and
+ * next batches.</li></ul>
+ */
+package org.apache.drill.exec.physical.rowSet;
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/record/BatchSchema.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/BatchSchema.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/BatchSchema.java
index 0497cfd..2d01ef4 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/record/BatchSchema.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/BatchSchema.java
@@ -96,6 +96,9 @@ public class BatchSchema implements Iterable<MaterializedField> {
return result;
}
+ // DRILL-5525: the semantics of this method are badly broken.
+ // Caveat emptor.
+
@Override
public boolean equals(Object obj) {
if (this == obj) {
@@ -108,13 +111,24 @@ public class BatchSchema implements Iterable<MaterializedField> {
return false;
}
BatchSchema other = (BatchSchema) obj;
+ if (selectionVectorMode != other.selectionVectorMode) {
+ return false;
+ }
if (fields == null) {
- if (other.fields != null) {
- return false;
- }
- } else if (!fields.equals(other.fields)) {
+ return other.fields == null;
+ }
+
+ // Compare names.
+ // (DRILL-5525: actually compares all fields.)
+
+ if (!fields.equals(other.fields)) {
return false;
}
+
+ // Compare types
+ // (DRILL-5525: this code is redundant because any differences
+ // will fail above.)
+
for (int i = 0; i < fields.size(); i++) {
MajorType t1 = fields.get(i).getType();
MajorType t2 = other.fields.get(i).getType();
@@ -128,13 +142,25 @@ public class BatchSchema implements Iterable<MaterializedField> {
}
}
}
- if (selectionVectorMode != other.selectionVectorMode) {
- return false;
- }
return true;
}
+ /**
+ * Compare that two schemas are identical according to the rules defined
+ * in {@ link MaterializedField#isEquivalent(MaterializedField)}. In particular,
+ * this method requires that the fields have a 1:1 ordered correspondence
+ * in the two schemas.
+ *
+ * @param other another non-null batch schema
+ * @return <tt>true</tt> if the two schemas are equivalent according to
+ * the {@link MaterializedField#isEquivalent(MaterializedField)} rules,
+ * false otherwise
+ */
+
public boolean isEquivalent(BatchSchema other) {
+ if (this == other) {
+ return true;
+ }
if (fields == null || other.fields == null) {
return fields == other.fields;
}
@@ -172,7 +198,7 @@ public class BatchSchema implements Iterable<MaterializedField> {
}
/**
- * Merge two schema to produce a new, merged schema. The caller is responsible
+ * Merge two schemas to produce a new, merged schema. The caller is responsible
* for ensuring that column names are unique. The order of the fields in the
* new schema is the same as that of this schema, with the other schema's fields
* appended in the order defined in the other schema.
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatch.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatch.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatch.java
index b4ae2d2..acb7a9b 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatch.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatch.java
@@ -19,6 +19,7 @@ package org.apache.drill.exec.record;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.exec.ops.FragmentContext;
+import org.apache.drill.exec.vector.ValueVector;
/**
* A record batch contains a set of field values for a particular range of
@@ -38,7 +39,7 @@ import org.apache.drill.exec.ops.FragmentContext;
public interface RecordBatch extends VectorAccessible {
/** max batch size, limited by 2-byte length in SV2: 65536 = 2^16 */
- public static final int MAX_BATCH_SIZE = 65536;
+ public static final int MAX_BATCH_SIZE = ValueVector.MAX_ROW_COUNT;
/**
* Describes the outcome of incrementing RecordBatch forward by a call to
[06/15] drill git commit: DRILL-5657: Size-aware vector writer
structure
Posted by pr...@apache.org.
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/vector/TestVectorLimits.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/vector/TestVectorLimits.java b/exec/java-exec/src/test/java/org/apache/drill/vector/TestVectorLimits.java
deleted file mode 100644
index 84961b1..0000000
--- a/exec/java-exec/src/test/java/org/apache/drill/vector/TestVectorLimits.java
+++ /dev/null
@@ -1,487 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.vector;
-
-import static org.junit.Assert.*;
-
-import org.apache.drill.categories.VectorTest;
-import org.apache.drill.common.types.TypeProtos.DataMode;
-import org.apache.drill.common.types.TypeProtos.MajorType;
-import org.apache.drill.common.types.TypeProtos.MinorType;
-import org.apache.drill.exec.record.MaterializedField;
-import org.apache.drill.exec.vector.IntVector;
-import org.apache.drill.exec.vector.NullableIntVector;
-import org.apache.drill.exec.vector.NullableVarCharVector;
-import org.apache.drill.exec.vector.RepeatedIntVector;
-import org.apache.drill.exec.vector.ValueVector;
-import org.apache.drill.exec.vector.VarCharVector;
-import org.apache.drill.exec.vector.VectorOverflowException;
-import org.apache.drill.test.DrillTest;
-import org.apache.drill.test.OperatorFixture;
-import org.bouncycastle.util.Arrays;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-import io.netty.buffer.DrillBuf;
-import org.junit.experimental.categories.Category;
-
-/**
- * Test the setScalar() methods in the various generated vector
- * classes. Rather than test all 100+ vectors, we sample a few and
- * rely on the fact that code is generated from a common template.
- */
-
-@Category(VectorTest.class)
-public class TestVectorLimits extends DrillTest {
-
- public static OperatorFixture fixture;
-
- @BeforeClass
- public static void setUpBeforeClass() throws Exception {
- fixture = OperatorFixture.builder().build();
- }
-
- @AfterClass
- public static void tearDownAfterClass() throws Exception {
- fixture.close();
- }
-
- /**
- * Test a vector directly using the vector mutator to ensure
- * that the <tt>setScalar</tt> method works for the maximum
- * row count.
- * <p>
- * This test is a proxy for all the other fixed types, since all
- * share the same code template.
- */
-
- @Test
- public void testFixedVector() {
-
- // Create a non-nullable int vector: a typical fixed-size vector
-
- @SuppressWarnings("resource")
- IntVector vector = new IntVector(makeField(MinorType.INT, DataMode.REQUIRED), fixture.allocator() );
-
- // Sanity test of generated constants.
-
- assertTrue( IntVector.MAX_SCALAR_COUNT <= ValueVector.MAX_ROW_COUNT );
- assertEquals( 4, IntVector.VALUE_WIDTH );
- assertTrue( IntVector.NET_MAX_SCALAR_SIZE <= ValueVector.MAX_BUFFER_SIZE );
-
- // Allocate a default size, small vector. Forces test of
- // the auto-grow (setSafe()) aspect of setScalar().
-
- vector.allocateNew( );
-
- // Write to the vector until it complains. At that point,
- // we should have written up to the static fixed value count
- // (which is computed to stay below the capacity limit.)
-
- IntVector.Mutator mutator = vector.getMutator();
- for (int i = 0; i < 2 * ValueVector.MAX_ROW_COUNT; i++) {
- try {
- mutator.setScalar(i, i);
- } catch (VectorOverflowException e) {
- assertEquals(IntVector.MAX_SCALAR_COUNT, i);
- break;
- }
- }
-
- // The vector should be below the allocation limit. Since this
- // is an int vector, in practice the size will be far below
- // the overall limit (if the limit stays at 16 MB.) But, it should
- // be at the type-specific limit since we filled up the vector.
-
- assertEquals(IntVector.NET_MAX_SCALAR_SIZE, vector.getBuffer().getActualMemoryConsumed());
- vector.close();
- }
-
- @Test
- public void testNullableFixedVector() {
-
- @SuppressWarnings("resource")
- NullableIntVector vector = new NullableIntVector(makeField(MinorType.INT, DataMode.OPTIONAL), fixture.allocator() );
- vector.allocateNew( );
-
- NullableIntVector.Mutator mutator = vector.getMutator();
- for (int i = 0; i < 2 * ValueVector.MAX_ROW_COUNT; i++) {
- try {
- mutator.setScalar(i, i);
- } catch (VectorOverflowException e) {
- assertEquals(IntVector.MAX_SCALAR_COUNT, i);
- break;
- }
- }
-
- vector.close();
- }
-
- /**
- * Repeated fixed vector. Using an int vector, each column array can hold
- * 256 / 4 = 64 values. We write only 10. The vector becomes full when we
- * exceed 64K items.
- */
-
- @Test
- public void testRepeatedFixedVectorCountLimit() {
-
- @SuppressWarnings("resource")
- RepeatedIntVector vector = new RepeatedIntVector(makeField(MinorType.INT, DataMode.REPEATED), fixture.allocator() );
- vector.allocateNew( );
-
- RepeatedIntVector.Mutator mutator = vector.getMutator();
- top:
- for (int i = 0; i < 2 * ValueVector.MAX_ROW_COUNT; i++) {
- if (! mutator.startNewValueBounded(i)) {
- assertEquals(ValueVector.MAX_ROW_COUNT, i);
- // Continue, let's check the addBounded method also
- }
- for (int j = 0; j < 10; j++) {
- try {
- mutator.addEntry(i, i * 100 + j);
- } catch (VectorOverflowException e) {
- assertEquals(ValueVector.MAX_ROW_COUNT, i);
- mutator.setValueCount(i);
- break top;
- }
- }
- }
-
- vector.close();
- }
-
- /**
- * Repeated fixed vector. Using an int vector, each column array can hold
- * 256 / 4 = 64 values. We write 100. The vector becomes full when we
- * exceed the 16 MB size limit.
- */
-
- @Test
- public void testRepeatedFixedVectorBufferLimit() {
-
- @SuppressWarnings("resource")
- RepeatedIntVector vector = new RepeatedIntVector(makeField(MinorType.INT, DataMode.REPEATED), fixture.allocator() );
- vector.allocateNew( );
-
- RepeatedIntVector.Mutator mutator = vector.getMutator();
- top:
- for (int i = 0; i < 2 * ValueVector.MAX_ROW_COUNT; i++) {
- // We'll never hit the value count limit
- assertTrue(mutator.startNewValueBounded(i));
- for (int j = 0; j < 100; j++) {
- try {
- mutator.addEntry(i, i * 100 + j);
- } catch (VectorOverflowException e) {
- // We should have hit the buffer limit before the value limit.
- assertTrue(i < ValueVector.MAX_ROW_COUNT);
- mutator.setValueCount(i);
- break top;
- }
- }
- }
-
- vector.close();
- }
-
- // To be replaced by a test method in a separate commit.
-
- public static MaterializedField makeField(MinorType dataType, DataMode mode) {
- MajorType type = MajorType.newBuilder()
- .setMinorType(dataType)
- .setMode(mode)
- .build();
-
- return MaterializedField.create("foo", type);
- }
-
- /**
- * Baseline test for a variable-width vector using <tt>setSafe</tt> and
- * loading the vector up to the maximum size. Doing so will cause the vector
- * to have a buffer that exceeds the maximum size, demonstrating the
- * need for <tt>setScalar()</tt>.
- */
-
- @Test
- public void variableVectorBaseline() {
-
- // Create a non-nullable VarChar vector: a typical variable-size vector
-
- @SuppressWarnings("resource")
- VarCharVector vector = new VarCharVector(makeField(MinorType.VARCHAR, DataMode.REQUIRED), fixture.allocator() );
- vector.allocateNew( );
-
- // A 16 MB value can hold 64K values of up to 256 bytes each.
- // To force a size overflow, write values much larger.
- // Write the maximum number of values which will silently
- // allow the vector to grow beyond the critical size of 16 MB.
- // Doing this in production would lead to memory fragmentation.
- // So, this is what the setScalar() method assures we don't do.
-
- byte dummyValue[] = new byte[512];
- Arrays.fill(dummyValue, (byte) 'X');
- VarCharVector.Mutator mutator = vector.getMutator();
- for (int i = 0; i < 2 * ValueVector.MAX_ROW_COUNT; i++) {
- mutator.setSafe(i, dummyValue, 0, dummyValue.length);
- }
-
- // The vector should be above the allocation limit.
- // This is why code must migrate to the setScalar() call
- // away from the setSafe() call.
-
- assertTrue(ValueVector.MAX_BUFFER_SIZE < vector.getBuffer().getActualMemoryConsumed());
- vector.close();
- }
-
- /**
- * Test a vector directly using the vector mutator to ensure
- * that the <tt>setScalar</tt> method works for the maximum
- * vector size.
- */
-
- @Test
- public void testWideVariableVector() {
-
- @SuppressWarnings("resource")
- VarCharVector vector = new VarCharVector(makeField(MinorType.VARCHAR, DataMode.REQUIRED), fixture.allocator() );
- vector.allocateNew( );
-
- // A 16 MB value can hold 64K values of up to 256 bytes each.
- // To force a size overflow, write values much larger.
- // Write to the vector until it complains. At that point,
- // we should have written up to the maximum buffer size.
-
- byte dummyValue[] = makeVarCharValue(512);
- VarCharVector.Mutator mutator = vector.getMutator();
- int count = 0;
- for ( ; count < 2 * ValueVector.MAX_ROW_COUNT; count++) {
- try {
- mutator.setScalar(count, dummyValue, 0, dummyValue.length);
- } catch (VectorOverflowException e) {
- break;
- }
- }
-
- // The vector should be at the allocation limit. If it wasn't, we
- // should have grown it to hold more data. The value count will
- // be below the maximum.
-
- mutator.setValueCount(count);
- assertEquals(ValueVector.MAX_BUFFER_SIZE, vector.getBuffer().getActualMemoryConsumed());
- assertTrue(count < ValueVector.MAX_ROW_COUNT);
- vector.close();
- }
-
- private byte[] makeVarCharValue(int n) {
- byte dummyValue[] = new byte[n];
- Arrays.fill(dummyValue, (byte) 'X');
- return dummyValue;
- }
-
- @Test
- public void testNullableWideVariableVector() {
-
- @SuppressWarnings("resource")
- NullableVarCharVector vector = new NullableVarCharVector(makeField(MinorType.VARCHAR, DataMode.OPTIONAL), fixture.allocator() );
- vector.allocateNew( );
-
- byte dummyValue[] = makeVarCharValue(512);
- NullableVarCharVector.Mutator mutator = vector.getMutator();
- int count = 0;
- for ( ; count < 2 * ValueVector.MAX_ROW_COUNT; count++) {
- try {
- mutator.setScalar(count, dummyValue, 0, dummyValue.length);
- } catch (VectorOverflowException e) {
- break;
- }
- }
-
- mutator.setValueCount(count);
- assertEquals(ValueVector.MAX_BUFFER_SIZE, vector.getValuesVector().getBuffer().getActualMemoryConsumed());
- assertTrue(count < ValueVector.MAX_ROW_COUNT);
- vector.close();
- }
-
- /**
- * Test a vector directly using the vector mutator to ensure
- * that the <tt>setScalar</tt> method works for the maximum
- * value count.
- */
-
- @Test
- public void testNarrowVariableVector() {
-
- @SuppressWarnings("resource")
- VarCharVector vector = new VarCharVector(makeField(MinorType.VARCHAR, DataMode.REQUIRED), fixture.allocator() );
- vector.allocateNew( );
-
- // Write small values that fit into 16 MB. We should stop writing
- // when we reach the value count limit.
-
- byte dummyValue[] = makeVarCharValue(254);
- VarCharVector.Mutator mutator = vector.getMutator();
- int count = 0;
- for (; count < 2 * ValueVector.MAX_ROW_COUNT; count++) {
- try {
- mutator.setScalar(count, dummyValue, 0, dummyValue.length);
- } catch (VectorOverflowException e) {
- break;
- }
- }
-
- // Buffer size should be at or below the maximum, with count
- // at the maximum.
-
- mutator.setValueCount(count);
- assertTrue(vector.getBuffer().getActualMemoryConsumed() <= ValueVector.MAX_BUFFER_SIZE);
- assertEquals(ValueVector.MAX_ROW_COUNT, count);
- vector.close();
- }
-
- /**
- * Test a vector directly using the vector mutator to ensure
- * that the <tt>setScalar</tt> method works for the maximum
- * value count. Uses a DrillBuf as input.
- */
-
- @Test
- public void testDirectVariableVector() {
-
- @SuppressWarnings("resource")
- VarCharVector vector = new VarCharVector(makeField(MinorType.VARCHAR, DataMode.REQUIRED), fixture.allocator() );
- vector.allocateNew( );
-
- // Repeat the big-value test, but with data coming from a DrillBuf
- // (direct memory) rather than a heap array.
-
- @SuppressWarnings("resource")
- DrillBuf drillBuf = makeVarCharValueDirect(260);
- VarCharVector.Mutator mutator = vector.getMutator();
- int count = 0;
- for (; count < 2 * ValueVector.MAX_ROW_COUNT; count++) {
- try {
- mutator.setScalar(count, drillBuf, 0, 260);
- } catch (VectorOverflowException e) {
- break;
- }
- }
- drillBuf.close();
-
- // Again, vector should be at the size limit, count below the
- // value limit.
-
- mutator.setValueCount(count);
- assertEquals(ValueVector.MAX_BUFFER_SIZE, vector.getBuffer().getActualMemoryConsumed());
- assertTrue(count < ValueVector.MAX_ROW_COUNT);
- vector.close();
- }
-
- private DrillBuf makeVarCharValueDirect(int n) {
- byte dummyValue[] = makeVarCharValue(n);
- DrillBuf drillBuf = fixture.allocator().buffer(dummyValue.length);
- drillBuf.setBytes(0, dummyValue);
- return drillBuf;
- }
-
- @Test
- public void testDirectNullableVariableVector() {
-
- @SuppressWarnings("resource")
- NullableVarCharVector vector = new NullableVarCharVector(makeField(MinorType.VARCHAR, DataMode.OPTIONAL), fixture.allocator() );
- vector.allocateNew( );
-
- @SuppressWarnings("resource")
- DrillBuf drillBuf = makeVarCharValueDirect(260);
- NullableVarCharVector.Mutator mutator = vector.getMutator();
- int count = 0;
- for (; count < 2 * ValueVector.MAX_ROW_COUNT; count++) {
- try {
- mutator.setScalar(count, drillBuf, 0, 260);
- } catch (VectorOverflowException e) {
- break;
- }
- }
- drillBuf.close();
-
- mutator.setValueCount(count);
- assertEquals(ValueVector.MAX_BUFFER_SIZE, vector.getValuesVector().getBuffer().getActualMemoryConsumed());
- assertTrue(count < ValueVector.MAX_ROW_COUNT);
- vector.close();
- }
-
- public static void main(String args[]) {
- try {
- setUpBeforeClass();
- new TestVectorLimits().performanceTest();
- tearDownAfterClass();
- } catch (Exception e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
-
- private void performanceTest() {
- @SuppressWarnings("resource")
- VarCharVector vector = new VarCharVector(makeField(MinorType.VARCHAR, DataMode.OPTIONAL), fixture.allocator() );
- byte value[] = makeVarCharValue(1);
- int warmCount = 100;
- timeSetSafe(vector, value, warmCount);
- runSetBounded(vector, value, warmCount);
- int runCount = 1000;
- timeSetSafe(vector, value, runCount);
- runSetBounded(vector, value, runCount);
- timeSetSafe(vector, value, runCount);
- vector.close();
- }
-
- private void timeSetSafe(VarCharVector vector, byte[] value, int iterCount) {
- long start = System.currentTimeMillis();
- for (int i = 0; i < iterCount; i++) {
- vector.clear();
- vector.allocateNew( );
-
- VarCharVector.Mutator mutator = vector.getMutator();
- for (int j = 0; j < ValueVector.MAX_ROW_COUNT; j++) {
- mutator.setSafe(j, value, 0, value.length);
- }
- }
- long elapsed = System.currentTimeMillis() - start;
- System.out.println( iterCount + " runs of setSafe: " + elapsed + " ms." );
- }
-
- private void runSetBounded(VarCharVector vector, byte[] value, int iterCount) {
- long start = System.currentTimeMillis();
- for (int i = 0; i < iterCount; i++) {
- vector.clear();
- vector.allocateNew( );
-
- VarCharVector.Mutator mutator = vector.getMutator();
- int posn = 0;
- for (;;) {
- try {
- mutator.setScalar(posn++, value, 0, value.length);
- } catch (VectorOverflowException e) {
- break;
- }
- }
- }
- long elapsed = System.currentTimeMillis() - start;
- System.out.println( iterCount + " runs of setScalar: " + elapsed + " ms." );
- }
-}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/jdbc-all/pom.xml
----------------------------------------------------------------------
diff --git a/exec/jdbc-all/pom.xml b/exec/jdbc-all/pom.xml
index 82910af..9db9c58 100644
--- a/exec/jdbc-all/pom.xml
+++ b/exec/jdbc-all/pom.xml
@@ -509,7 +509,7 @@
This is likely due to you adding new dependencies to a java-exec and not updating the excludes in this module. This is important as it minimizes the size of the dependency of Drill application users.
</message>
- <maxsize>30000000</maxsize>
+ <maxsize>32000000</maxsize>
<minsize>15000000</minsize>
<files>
<file>${project.build.directory}/drill-jdbc-all-${project.version}.jar</file>
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/memory/base/src/main/java/io/netty/buffer/DrillBuf.java
----------------------------------------------------------------------
diff --git a/exec/memory/base/src/main/java/io/netty/buffer/DrillBuf.java b/exec/memory/base/src/main/java/io/netty/buffer/DrillBuf.java
index 9019507..eda189e 100644
--- a/exec/memory/base/src/main/java/io/netty/buffer/DrillBuf.java
+++ b/exec/memory/base/src/main/java/io/netty/buffer/DrillBuf.java
@@ -52,7 +52,6 @@ public final class DrillBuf extends AbstractByteBuf implements AutoCloseable {
private final int offset;
private final BufferLedger ledger;
private final BufferManager bufManager;
-// private final ByteBufAllocator alloc;
private final boolean isEmpty;
private volatile int length;
private final HistoricalLog historicalLog = BaseAllocator.DEBUG ?
@@ -72,7 +71,6 @@ public final class DrillBuf extends AbstractByteBuf implements AutoCloseable {
this.udle = byteBuf;
this.isEmpty = isEmpty;
this.bufManager = manager;
-// this.alloc = alloc;
this.addr = byteBuf.memoryAddress() + offset;
this.ledger = ledger;
this.length = length;
@@ -106,6 +104,8 @@ public final class DrillBuf extends AbstractByteBuf implements AutoCloseable {
}
}
+ public long addr() { return addr; }
+
private long addr(int index) {
return addr + index;
}
@@ -882,4 +882,70 @@ public final class DrillBuf extends AbstractByteBuf implements AutoCloseable {
}
}
+ // The "unsafe" methods are for use ONLY by code that does its own
+ // bounds checking. They are called "unsafe" for a reason: they will crash
+ // the JVM if values are addressed out of bounds.
+
+ /**
+ * Write an integer to the buffer at the given byte index, without
+ * bounds checks.
+ *
+ * @param offset byte (not int) offset of the location to write
+ * @param value the value to write
+ */
+
+ public void unsafePutInt(int offset, int value) {
+ PlatformDependent.putInt(addr + offset, value);
+ }
+
+ /**
+ * Write a long to the buffer at the given byte index, without
+ * bounds checks.
+ *
+ * @param index byte (not long) offset of the location to write
+ * @param value the value to write
+ */
+
+ public void unsafePutLong(int index, long value) {
+ PlatformDependent.putLong(addr + index, value);
+ }
+
+ /**
+ * Write a short to the buffer at the given byte index, without
+ * bounds checks.
+ *
+ * @param offset byte (not short) offset of the location to write
+ * @param value the value to write
+ */
+
+ public void unsafePutShort(int offset, short value) {
+ PlatformDependent.putShort(addr + offset, value);
+ }
+
+ /**
+ * Write a byte to the buffer at the given byte index, without
+ * bounds checks.
+ *
+ * @param offset byte offset of the location to write
+ * @param value the value to write
+ */
+
+ public void unsafePutByte(int offset, byte value) {
+ PlatformDependent.putByte(addr + offset, value);
+ }
+
+ /**
+ * Copy a buffer of heap data to the buffer memory.
+ *
+ * @param srce source byte buffer
+ * @param srcOffset offset within the byte buffer of the start of data
+ * @param destOffset byte offset into this buffer to which to write the
+ * data
+ * @param length length of the data, which must be within the
+ * bounds of this buffer
+ */
+
+ public void unsafeCopyMemory(byte[] srce, int srcOffset, int destOffset, int length) {
+ PlatformDependent.copyMemory(srce, srcOffset, addr + destOffset, length);
+ }
}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/memory/base/src/main/java/io/netty/buffer/PooledByteBufAllocatorL.java
----------------------------------------------------------------------
diff --git a/exec/memory/base/src/main/java/io/netty/buffer/PooledByteBufAllocatorL.java b/exec/memory/base/src/main/java/io/netty/buffer/PooledByteBufAllocatorL.java
index 5358ca2..1e70216 100644
--- a/exec/memory/base/src/main/java/io/netty/buffer/PooledByteBufAllocatorL.java
+++ b/exec/memory/base/src/main/java/io/netty/buffer/PooledByteBufAllocatorL.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -32,14 +32,16 @@ import com.codahale.metrics.MetricFilter;
import com.codahale.metrics.MetricRegistry;
/**
- * The base allocator that we use for all of Drill's memory management. Returns UnsafeDirectLittleEndian buffers.
+ * The base allocator that we use for all of Drill's memory management. Returns
+ * UnsafeDirectLittleEndian buffers.
*/
+
public class PooledByteBufAllocatorL {
- private static final org.slf4j.Logger memoryLogger = org.slf4j.LoggerFactory.getLogger("drill.allocator");
+ private static final org.slf4j.Logger memoryLogger = org.slf4j.LoggerFactory
+ .getLogger("drill.allocator");
private static final int MEMORY_LOGGER_FREQUENCY_SECONDS = 60;
-
public static final String METRIC_PREFIX = "drill.allocator.";
private final MetricRegistry registry;
@@ -54,7 +56,8 @@ public class PooledByteBufAllocatorL {
public PooledByteBufAllocatorL(MetricRegistry registry) {
this.registry = registry;
allocator = new InnerAllocator();
- empty = new UnsafeDirectLittleEndian(new DuplicatedByteBuf(Unpooled.EMPTY_BUFFER));
+ empty = new UnsafeDirectLittleEndian(
+ new DuplicatedByteBuf(Unpooled.EMPTY_BUFFER));
}
public UnsafeDirectLittleEndian allocate(int size) {
@@ -63,7 +66,6 @@ public class PooledByteBufAllocatorL {
} catch (OutOfMemoryError e) {
throw new OutOfMemoryException("Failure allocating buffer.", e);
}
-
}
public int getChunkSize() {
@@ -72,13 +74,13 @@ public class PooledByteBufAllocatorL {
private class InnerAllocator extends PooledByteBufAllocator {
-
private final PoolArena<ByteBuffer>[] directArenas;
private final MemoryStatusThread statusThread;
private final Histogram largeBuffersHist;
private final Histogram normalBuffersHist;
private final int chunkSize;
+ @SuppressWarnings("unchecked")
public InnerAllocator() {
super(true);
@@ -87,7 +89,9 @@ public class PooledByteBufAllocatorL {
f.setAccessible(true);
this.directArenas = (PoolArena<ByteBuffer>[]) f.get(this);
} catch (Exception e) {
- throw new RuntimeException("Failure while initializing allocator. Unable to retrieve direct arenas field.", e);
+ throw new RuntimeException(
+ "Failure while initializing allocator. Unable to retrieve direct arenas field.",
+ e);
}
this.chunkSize = directArenas[0].chunkSize;
@@ -130,17 +134,14 @@ public class PooledByteBufAllocatorL {
largeBuffersHist = registry.histogram(METRIC_PREFIX + "huge.hist");
normalBuffersHist = registry.histogram(METRIC_PREFIX + "normal.hist");
-
}
-
private synchronized void removeOldMetrics() {
registry.removeMatching(new MetricFilter() {
@Override
public boolean matches(String name, Metric metric) {
return name.startsWith("drill.allocator.");
}
-
});
}
@@ -152,17 +153,21 @@ public class PooledByteBufAllocatorL {
if (initialCapacity > directArena.chunkSize) {
// This is beyond chunk size so we'll allocate separately.
- ByteBuf buf = UnpooledByteBufAllocator.DEFAULT.directBuffer(initialCapacity, maxCapacity);
+ ByteBuf buf = UnpooledByteBufAllocator.DEFAULT
+ .directBuffer(initialCapacity, maxCapacity);
hugeBufferCount.incrementAndGet();
hugeBufferSize.addAndGet(buf.capacity());
largeBuffersHist.update(buf.capacity());
- // logger.debug("Allocating huge buffer of size {}", initialCapacity, new Exception());
- return new UnsafeDirectLittleEndian(new LargeBuffer(buf, hugeBufferSize, hugeBufferCount));
+ // logger.debug("Allocating huge buffer of size {}", initialCapacity,
+ // new Exception());
+ return new UnsafeDirectLittleEndian(
+ new LargeBuffer(buf, hugeBufferSize, hugeBufferCount));
} else {
// within chunk, use arena.
- ByteBuf buf = directArena.allocate(cache, initialCapacity, maxCapacity);
+ ByteBuf buf = directArena.allocate(cache, initialCapacity,
+ maxCapacity);
if (!(buf instanceof PooledUnsafeDirectByteBuf)) {
fail();
}
@@ -173,10 +178,9 @@ public class PooledByteBufAllocatorL {
normalBufferCount.incrementAndGet();
}
- return new UnsafeDirectLittleEndian((PooledUnsafeDirectByteBuf) buf, normalBufferCount,
- normalBufferSize);
+ return new UnsafeDirectLittleEndian((PooledUnsafeDirectByteBuf) buf,
+ normalBufferCount, normalBufferSize);
}
-
} else {
throw fail();
}
@@ -184,10 +188,12 @@ public class PooledByteBufAllocatorL {
private UnsupportedOperationException fail() {
return new UnsupportedOperationException(
- "Drill requries that the JVM used supports access sun.misc.Unsafe. This platform didn't provide that functionality.");
+ "Drill requires that the JVM used supports access sun.misc.Unsafe. This platform doesn't provide that functionality.");
}
- public UnsafeDirectLittleEndian directBuffer(int initialCapacity, int maxCapacity) {
+ @Override
+ public UnsafeDirectLittleEndian directBuffer(int initialCapacity,
+ int maxCapacity) {
if (initialCapacity == 0 && maxCapacity == 0) {
newDirectBuffer(initialCapacity, maxCapacity);
}
@@ -197,13 +203,14 @@ public class PooledByteBufAllocatorL {
@Override
public ByteBuf heapBuffer(int initialCapacity, int maxCapacity) {
- throw new UnsupportedOperationException("Drill doesn't support using heap buffers.");
+ throw new UnsupportedOperationException(
+ "Drill doesn't support using heap buffers.");
}
-
private void validate(int initialCapacity, int maxCapacity) {
if (initialCapacity < 0) {
- throw new IllegalArgumentException("initialCapacity: " + initialCapacity + " (expectd: 0+)");
+ throw new IllegalArgumentException(
+ "initialCapacity: " + initialCapacity + " (expected: 0+)");
}
if (initialCapacity > maxCapacity) {
throw new IllegalArgumentException(String.format(
@@ -223,18 +230,18 @@ public class PooledByteBufAllocatorL {
@Override
public void run() {
while (true) {
- memoryLogger.trace("Memory Usage: \n{}", PooledByteBufAllocatorL.this.toString());
+ memoryLogger.trace("Memory Usage: \n{}",
+ PooledByteBufAllocatorL.this.toString());
try {
Thread.sleep(MEMORY_LOGGER_FREQUENCY_SECONDS * 1000);
} catch (InterruptedException e) {
return;
}
-
}
}
-
}
+ @Override
public String toString() {
StringBuilder buf = new StringBuilder();
buf.append(directArenas.length);
@@ -257,8 +264,6 @@ public class PooledByteBufAllocatorL {
buf.append(" bytes.");
return buf.toString();
}
-
-
}
public static final boolean ASSERT_ENABLED;
@@ -268,5 +273,4 @@ public class PooledByteBufAllocatorL {
assert isAssertEnabled = true;
ASSERT_ENABLED = isAssertEnabled;
}
-
}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/memory/base/src/main/java/org/apache/drill/exec/memory/AllocationManager.java
----------------------------------------------------------------------
diff --git a/exec/memory/base/src/main/java/org/apache/drill/exec/memory/AllocationManager.java b/exec/memory/base/src/main/java/org/apache/drill/exec/memory/AllocationManager.java
index 833a604..3b5967f 100644
--- a/exec/memory/base/src/main/java/org/apache/drill/exec/memory/AllocationManager.java
+++ b/exec/memory/base/src/main/java/org/apache/drill/exec/memory/AllocationManager.java
@@ -38,23 +38,25 @@ import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
/**
- * Manages the relationship between one or more allocators and a particular UDLE. Ensures that one allocator owns the
- * memory that multiple allocators may be referencing. Manages a BufferLedger between each of its associated allocators.
- * This class is also responsible for managing when memory is allocated and returned to the Netty-based
- * PooledByteBufAllocatorL.
+ * Manages the relationship between one or more allocators and a particular
+ * UDLE. Ensures that one allocator owns the memory that multiple allocators may
+ * be referencing. Manages a BufferLedger between each of its associated
+ * allocators. This class is also responsible for managing when memory is
+ * allocated and returned to the Netty-based PooledByteBufAllocatorL.
*
- * The only reason that this isn't package private is we're forced to put DrillBuf in Netty's package which need access
- * to these objects or methods.
+ * The only reason that this isn't package private is we're forced to put
+ * DrillBuf in Netty's package which need access to these objects or methods.
*
- * Threading: AllocationManager manages thread-safety internally. Operations within the context of a single BufferLedger
- * are lockless in nature and can be leveraged by multiple threads. Operations that cross the context of two ledgers
- * will acquire a lock on the AllocationManager instance. Important note, there is one AllocationManager per
- * UnsafeDirectLittleEndian buffer allocation. As such, there will be thousands of these in a typical query. The
+ * Threading: AllocationManager manages thread-safety internally. Operations
+ * within the context of a single BufferLedger are lockless in nature and can be
+ * leveraged by multiple threads. Operations that cross the context of two
+ * ledgers will acquire a lock on the AllocationManager instance. Important
+ * note, there is one AllocationManager per UnsafeDirectLittleEndian buffer
+ * allocation. As such, there will be thousands of these in a typical query. The
* contention of acquiring a lock on AllocationManager should be very low.
- *
*/
+
public class AllocationManager {
- // private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(AllocationManager.class);
private static final AtomicLong MANAGER_ID_GENERATOR = new AtomicLong(0);
private static final AtomicLong LEDGER_ID_GENERATOR = new AtomicLong(0);
@@ -87,11 +89,13 @@ public class AllocationManager {
}
/**
- * Associate the existing underlying buffer with a new allocator. This will increase the reference count to the
- * provided ledger by 1.
+ * Associate the existing underlying buffer with a new allocator. This will
+ * increase the reference count to the provided ledger by 1.
+ *
* @param allocator
* The target allocator to associate this buffer with.
- * @return The Ledger (new or existing) that associates the underlying buffer to this new ledger.
+ * @return The Ledger (new or existing) that associates the underlying buffer
+ * to this new ledger.
*/
BufferLedger associate(final BaseAllocator allocator) {
return associate(allocator, true);
@@ -114,7 +118,6 @@ public class AllocationManager {
}
return ledger;
}
-
}
try (AutoCloseableLock write = writeLock.open()) {
// we have to recheck existing ledger since a second reader => writer could be competing with us.
@@ -138,11 +141,16 @@ public class AllocationManager {
}
}
+ public static int chunkSize() {
+ return INNER_ALLOCATOR.getChunkSize();
+ }
/**
- * The way that a particular BufferLedger communicates back to the AllocationManager that it now longer needs to hold
- * a reference to particular piece of memory.
+ * The way that a particular BufferLedger communicates back to the
+ * AllocationManager that it now longer needs to hold a reference to
+ * particular piece of memory.
*/
+
private class ReleaseListener {
private final BufferAllocator allocator;
@@ -154,6 +162,7 @@ public class AllocationManager {
/**
* Can only be called when you already hold the writeLock.
*/
+
public void release() {
allocator.assertOpen();
@@ -180,16 +189,17 @@ public class AllocationManager {
throw new IllegalStateException("The final removal of a ledger should be connected to the owning ledger.");
}
}
-
-
}
}
/**
- * The reference manager that binds an allocator manager to a particular BaseAllocator. Also responsible for creating
- * a set of DrillBufs that share a common fate and set of reference counts.
- * As with AllocationManager, the only reason this is public is due to DrillBuf being in io.netty.buffer package.
+ * The reference manager that binds an allocator manager to a particular
+ * BaseAllocator. Also responsible for creating a set of DrillBufs that share
+ * a common fate and set of reference counts. As with AllocationManager, the
+ * only reason this is public is due to DrillBuf being in io.netty.buffer
+ * package.
*/
+
public class BufferLedger {
private final IdentityHashMap<DrillBuf, Object> buffers =
@@ -294,7 +304,6 @@ public class AllocationManager {
}
}
}
-
}
private void inc() {
@@ -302,9 +311,11 @@ public class AllocationManager {
}
/**
- * Decrement the ledger's reference count. If the ledger is decremented to zero, this ledger should release its
- * ownership back to the AllocationManager
+ * Decrement the ledger's reference count. If the ledger is decremented to
+ * zero, this ledger should release its ownership back to the
+ * AllocationManager
*/
+
public int decrement(int decrement) {
allocator.assertOpen();
@@ -321,29 +332,36 @@ public class AllocationManager {
}
/**
- * Returns the ledger associated with a particular BufferAllocator. If the BufferAllocator doesn't currently have a
- * ledger associated with this AllocationManager, a new one is created. This is placed on BufferLedger rather than
- * AllocationManager directly because DrillBufs don't have access to AllocationManager and they are the ones
- * responsible for exposing the ability to associate multiple allocators with a particular piece of underlying
- * memory. Note that this will increment the reference count of this ledger by one to ensure the ledger isn't
- * destroyed before use.
+ * Returns the ledger associated with a particular BufferAllocator. If the
+ * BufferAllocator doesn't currently have a ledger associated with this
+ * AllocationManager, a new one is created. This is placed on BufferLedger
+ * rather than AllocationManager directly because DrillBufs don't have
+ * access to AllocationManager and they are the ones responsible for
+ * exposing the ability to associate multiple allocators with a particular
+ * piece of underlying memory. Note that this will increment the reference
+ * count of this ledger by one to ensure the ledger isn't destroyed before
+ * use.
*
* @param allocator
* @return
*/
+
public BufferLedger getLedgerForAllocator(BufferAllocator allocator) {
return associate((BaseAllocator) allocator);
}
/**
- * Create a new DrillBuf associated with this AllocationManager and memory. Does not impact reference count.
- * Typically used for slicing.
+ * Create a new DrillBuf associated with this AllocationManager and memory.
+ * Does not impact reference count. Typically used for slicing.
+ *
* @param offset
* The offset in bytes to start this new DrillBuf.
* @param length
* The length in bytes that this DrillBuf will provide access to.
- * @return A new DrillBuf that shares references with all DrillBufs associated with this BufferLedger
+ * @return A new DrillBuf that shares references with all DrillBufs
+ * associated with this BufferLedger
*/
+
public DrillBuf newDrillBuf(int offset, int length) {
allocator.assertOpen();
return newDrillBuf(offset, length, null);
@@ -394,6 +412,7 @@ public class AllocationManager {
*
* @return Size in bytes
*/
+
public int getSize() {
return size;
}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/codegen/templates/ColumnAccessors.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/codegen/templates/ColumnAccessors.java b/exec/vector/src/main/codegen/templates/ColumnAccessors.java
index f1fbf2f..33b12be 100644
--- a/exec/vector/src/main/codegen/templates/ColumnAccessors.java
+++ b/exec/vector/src/main/codegen/templates/ColumnAccessors.java
@@ -19,145 +19,131 @@
<@pp.dropOutputFile />
<@pp.changeOutputFile name="/org/apache/drill/exec/vector/accessor/ColumnAccessors.java" />
<#include "/@includes/license.ftl" />
-<#macro getType label>
+<#macro getType drillType label>
@Override
public ValueType valueType() {
<#if label == "Int">
return ValueType.INTEGER;
+ <#elseif drillType == "VarChar" || drillType == "Var16Char">
+ return ValueType.STRING;
<#else>
return ValueType.${label?upper_case};
</#if>
}
</#macro>
-<#macro bindReader prefix drillType>
+<#macro bindReader vectorPrefix drillType isArray >
<#if drillType = "Decimal9" || drillType == "Decimal18">
- private MaterializedField field;
+ private MajorType type;
</#if>
- private ${prefix}${drillType}Vector.Accessor accessor;
+ private ${vectorPrefix}${drillType}Vector.Accessor accessor;
@Override
- public void bind(RowIndex vectorIndex, ValueVector vector) {
- bind(vectorIndex);
+ public void bindVector(ValueVector vector) {
<#if drillType = "Decimal9" || drillType == "Decimal18">
- field = vector.getField();
+ type = vector.getField().getType();
</#if>
- accessor = ((${prefix}${drillType}Vector) vector).getAccessor();
+ accessor = ((${vectorPrefix}${drillType}Vector) vector).getAccessor();
}
<#if drillType = "Decimal9" || drillType == "Decimal18">
@Override
- public void bind(RowIndex vectorIndex, MaterializedField field, VectorAccessor va) {
- bind(vectorIndex, field, va);
- this.field = field;
+ public void bindVector(MajorType type, VectorAccessor va) {
+ super.bindVector(type, va);
+ this.type = type;
}
</#if>
- private ${prefix}${drillType}Vector.Accessor accessor() {
+ private ${vectorPrefix}${drillType}Vector.Accessor accessor() {
if (vectorAccessor == null) {
return accessor;
} else {
- return ((${prefix}${drillType}Vector) vectorAccessor.vector()).getAccessor();
+ return ((${vectorPrefix}${drillType}Vector) vectorAccessor.vector()).getAccessor();
}
}
</#macro>
<#macro get drillType accessorType label isArray>
@Override
public ${accessorType} get${label}(<#if isArray>int index</#if>) {
+ <#assign getObject ="getObject"/>
<#if isArray>
- <#assign index=", index"/>
- <#assign getObject="getSingleObject">
+ <#assign indexVar = "index"/>
<#else>
- <#assign index=""/>
- <#assign getObject="getObject">
+ <#assign indexVar = ""/>
</#if>
- <#if drillType == "VarChar">
- return new String(accessor().get(vectorIndex.index()${index}), Charsets.UTF_8);
- <#elseif drillType == "Var16Char">
- return new String(accessor().get(vectorIndex.index()${index}), Charsets.UTF_16);
- <#elseif drillType == "VarBinary">
- return accessor().get(vectorIndex.index()${index});
+ <#if drillType == "VarChar" || drillType == "Var16Char" || drillType == "VarBinary">
+ return accessor().get(vectorIndex.vectorIndex(${indexVar}));
<#elseif drillType == "Decimal9" || drillType == "Decimal18">
return DecimalUtility.getBigDecimalFromPrimitiveTypes(
- accessor().get(vectorIndex.index()${index}),
- field.getScale(),
- field.getPrecision());
+ accessor().get(vectorIndex.vectorIndex(${indexVar})),
+ type.getScale(),
+ type.getPrecision());
<#elseif accessorType == "BigDecimal" || accessorType == "Period">
- return accessor().${getObject}(vectorIndex.index()${index});
+ return accessor().${getObject}(vectorIndex.vectorIndex(${indexVar}));
+ <#elseif drillType == "UInt1">
+ return ((int) accessor().get(vectorIndex.vectorIndex(${indexVar}))) & 0xFF;
<#else>
- return accessor().get(vectorIndex.index()${index});
+ return accessor().get(vectorIndex.vectorIndex(${indexVar}));
</#if>
}
-</#macro>
-<#macro bindWriter prefix drillType>
- <#if drillType = "Decimal9" || drillType == "Decimal18">
- private MaterializedField field;
- </#if>
- private ${prefix}${drillType}Vector.Mutator mutator;
+ <#if drillType == "VarChar">
@Override
- public void bind(RowIndex vectorIndex, ValueVector vector) {
- bind(vectorIndex);
- <#if drillType = "Decimal9" || drillType == "Decimal18">
- field = vector.getField();
- </#if>
- this.mutator = ((${prefix}${drillType}Vector) vector).getMutator();
+ public String getString(<#if isArray>int index</#if>) {
+ return new String(getBytes(${indexVar}), Charsets.UTF_8);
}
-</#macro>
-<#macro set drillType accessorType label nullable verb>
- @Override
- public void set${label}(${accessorType} value) {
- <#if drillType == "VarChar">
- byte bytes[] = value.getBytes(Charsets.UTF_8);
- mutator.${verb}Safe(vectorIndex.index(), bytes, 0, bytes.length);
<#elseif drillType == "Var16Char">
- byte bytes[] = value.getBytes(Charsets.UTF_16);
- mutator.${verb}Safe(vectorIndex.index(), bytes, 0, bytes.length);
- <#elseif drillType == "VarBinary">
- mutator.${verb}Safe(vectorIndex.index(), value, 0, value.length);
- <#elseif drillType == "Decimal9">
- mutator.${verb}Safe(vectorIndex.index(),
- DecimalUtility.getDecimal9FromBigDecimal(value,
- field.getScale(), field.getPrecision()));
- <#elseif drillType == "Decimal18">
- mutator.${verb}Safe(vectorIndex.index(),
- DecimalUtility.getDecimal18FromBigDecimal(value,
- field.getScale(), field.getPrecision()));
- <#elseif drillType == "IntervalYear">
- mutator.${verb}Safe(vectorIndex.index(), value.getYears() * 12 + value.getMonths());
- <#elseif drillType == "IntervalDay">
- mutator.${verb}Safe(vectorIndex.index(),<#if nullable> 1,</#if>
- value.getDays(),
- ((value.getHours() * 60 + value.getMinutes()) * 60 +
- value.getSeconds()) * 1000 + value.getMillis());
- <#elseif drillType == "Interval">
- mutator.${verb}Safe(vectorIndex.index(),<#if nullable> 1,</#if>
- value.getYears() * 12 + value.getMonths(),
- value.getDays(),
- ((value.getHours() * 60 + value.getMinutes()) * 60 +
- value.getSeconds()) * 1000 + value.getMillis());
+
+ @Override
+ public String getString(<#if isArray>int index</#if>) {
+ return new String(getBytes(${indexVar}), Charsets.UTF_16);
+ }
+ </#if>
+</#macro>
+<#macro build types vectorType accessorType>
+ <#if vectorType == "Repeated">
+ <#assign fnPrefix = "Array" />
+ <#assign classType = "Element" />
<#else>
- mutator.${verb}Safe(vectorIndex.index(), <#if cast=="set">(${javaType}) </#if>value);
+ <#assign fnPrefix = vectorType />
+ <#assign classType = "Scalar" />
</#if>
- }
+ <#if vectorType == "Required">
+ <#assign vectorPrefix = "" />
+ <#else>
+ <#assign vectorPrefix = vectorType />
+ </#if>
+ public static void define${fnPrefix}${accessorType}s(
+ Class<? extends Base${classType}${accessorType}> ${accessorType?lower_case}s[]) {
+ <#list types as type>
+ <#list type.minor as minor>
+ <#assign drillType=minor.class>
+ <#assign notyet=minor.accessorDisabled!type.accessorDisabled!false>
+ <#if ! notyet>
+ <#assign typeEnum=drillType?upper_case>
+ ${accessorType?lower_case}s[MinorType.${typeEnum}.ordinal()] = ${vectorPrefix}${drillType}Column${accessorType}.class;
+ </#if>
+ </#list>
+ </#list>
+ }
</#macro>
package org.apache.drill.exec.vector.accessor;
import java.math.BigDecimal;
-import org.apache.drill.common.types.TypeProtos.DataMode;
+import org.apache.drill.common.types.TypeProtos.MajorType;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.vector.*;
-import org.apache.drill.exec.record.MaterializedField;
import org.apache.drill.exec.util.DecimalUtility;
-import org.apache.drill.exec.vector.accessor.impl.AbstractColumnReader;
-import org.apache.drill.exec.vector.accessor.impl.AbstractColumnWriter;
-import org.apache.drill.exec.vector.complex.BaseRepeatedValueVector;
-import org.apache.drill.exec.vector.accessor.impl.AbstractArrayReader;
-import org.apache.drill.exec.vector.accessor.impl.AbstractArrayWriter;
-import org.apache.drill.exec.vector.accessor.impl.AbstractColumnReader.VectorAccessor;
+import org.apache.drill.exec.vector.accessor.reader.BaseScalarReader;
+import org.apache.drill.exec.vector.accessor.reader.BaseElementReader;
+import org.apache.drill.exec.vector.accessor.reader.VectorAccessor;
+import org.apache.drill.exec.vector.accessor.writer.BaseScalarWriter;
+import org.apache.drill.exec.vector.accessor.writer.AbstractFixedWidthWriter.BaseFixedWidthWriter;
+import org.apache.drill.exec.vector.accessor.writer.BaseVarWidthWriter;
import com.google.common.base.Charsets;
+
import org.joda.time.Period;
/**
@@ -191,141 +177,176 @@ public class ColumnAccessors {
<#if accessorType=="BigDecimal">
<#assign label="Decimal">
</#if>
+ <#if drillType == "VarChar" || drillType == "Var16Char">
+ <#assign accessorType = "byte[]">
+ <#assign label = "Bytes">
+ </#if>
<#if ! notyet>
//------------------------------------------------------------------------
// ${drillType} readers and writers
- public static class ${drillType}ColumnReader extends AbstractColumnReader {
+ public static class ${drillType}ColumnReader extends BaseScalarReader {
- <@bindReader "" drillType />
+ <@bindReader "" drillType false />
- <@getType label />
+ <@getType drillType label />
<@get drillType accessorType label false/>
}
- public static class Nullable${drillType}ColumnReader extends AbstractColumnReader {
+ public static class Nullable${drillType}ColumnReader extends BaseScalarReader {
- <@bindReader "Nullable" drillType />
+ <@bindReader "Nullable" drillType false />
- <@getType label />
+ <@getType drillType label />
@Override
public boolean isNull() {
- return accessor().isNull(vectorIndex.index());
- }
-
- <@get drillType accessorType label false/>
- }
-
- public static class Repeated${drillType}ColumnReader extends AbstractArrayReader {
-
- <@bindReader "Repeated" drillType />
-
- <@getType label />
-
- @Override
- public int size() {
- return accessor().getInnerValueCountAt(vectorIndex.index());
+ return accessor().isNull(vectorIndex.vectorIndex());
}
- <@get drillType accessorType label true/>
+ <@get drillType accessorType label false />
}
- public static class ${drillType}ColumnWriter extends AbstractColumnWriter {
+ public static class Repeated${drillType}ColumnReader extends BaseElementReader {
- <@bindWriter "" drillType />
+ <@bindReader "" drillType true />
- <@getType label />
+ <@getType drillType label />
- <@set drillType accessorType label false "set" />
+ <@get drillType accessorType label true />
}
- public static class Nullable${drillType}ColumnWriter extends AbstractColumnWriter {
-
- <@bindWriter "Nullable" drillType />
+ <#assign varWidth = drillType == "VarChar" || drillType == "Var16Char" || drillType == "VarBinary" />
+ <#if varWidth>
+ public static class ${drillType}ColumnWriter extends BaseVarWidthWriter {
+ <#else>
+ public static class ${drillType}ColumnWriter extends BaseFixedWidthWriter {
+ <#if drillType = "Decimal9" || drillType == "Decimal18" ||
+ drillType == "Decimal28Sparse" || drillType == "Decimal38Sparse">
+ private MajorType type;
+ </#if>
+ private static final int VALUE_WIDTH = ${drillType}Vector.VALUE_WIDTH;
+ </#if>
+ private final ${drillType}Vector vector;
+
+ public ${drillType}ColumnWriter(final ValueVector vector) {
+ <#if varWidth>
+ super(((${drillType}Vector) vector).getOffsetVector());
+ <#else>
+ <#if drillType = "Decimal9" || drillType == "Decimal18" ||
+ drillType == "Decimal28Sparse" || drillType == "Decimal38Sparse">
+ type = vector.getField().getType();
+ </#if>
+ </#if>
+ this.vector = (${drillType}Vector) vector;
+ }
- <@getType label />
+ @Override public BaseDataValueVector vector() { return vector; }
+
+ <#if ! varWidth>
+ @Override public int width() { return VALUE_WIDTH; }
+
+ </#if>
+ <@getType drillType label />
+ <#if accessorType == "byte[]">
+ <#assign args = ", int len">
+ <#else>
+ <#assign args = "">
+ </#if>
+ <#if javaType == "char">
+ <#assign putType = "short" />
+ <#assign doCast = true />
+ <#else>
+ <#assign putType = javaType />
+ <#assign doCast = (cast == "set") />
+ </#if>
+ <#if ! varWidth>
+ </#if>
@Override
- public void setNull() {
- mutator.setNull(vectorIndex.index());
+ public final void set${label}(final ${accessorType} value${args}) {
+ <#-- Must compute the write offset first; can't be inline because the
+ writeOffset() function has a side effect of possibly changing the buffer
+ address (bufAddr). -->
+ <#if varWidth>
+ final int offset = writeIndex(len);
+ <#else>
+ final int writeIndex = writeIndex();
+ <#assign putAddr = "writeIndex * VALUE_WIDTH">
+ </#if>
+ <#if varWidth>
+ drillBuf.unsafeCopyMemory(value, 0, offset, len);
+ offsetsWriter.setNextOffset(offset + len);
+ <#elseif drillType == "Decimal9">
+ drillBuf.unsafePutInt(${putAddr},
+ DecimalUtility.getDecimal9FromBigDecimal(value,
+ type.getScale(), type.getPrecision()));
+ <#elseif drillType == "Decimal18">
+ drillBuf.unsafePutLong(${putAddr},
+ DecimalUtility.getDecimal18FromBigDecimal(value,
+ type.getScale(), type.getPrecision()));
+ <#elseif drillType == "Decimal38Sparse">
+ <#-- Hard to optimize this case. Just use the available tools. -->
+ DecimalUtility.getSparseFromBigDecimal(value, vector.getBuffer(), writeIndex * VALUE_WIDTH,
+ type.getScale(), type.getPrecision(), 6);
+ <#elseif drillType == "Decimal28Sparse">
+ <#-- Hard to optimize this case. Just use the available tools. -->
+ DecimalUtility.getSparseFromBigDecimal(value, vector.getBuffer(), writeIndex * VALUE_WIDTH,
+ type.getScale(), type.getPrecision(), 5);
+ <#elseif drillType == "IntervalYear">
+ drillBuf.unsafePutInt(${putAddr},
+ value.getYears() * 12 + value.getMonths());
+ <#elseif drillType == "IntervalDay">
+ final int offset = ${putAddr};
+ drillBuf.unsafePutInt(offset, value.getDays());
+ drillBuf.unsafePutInt(offset + 4, periodToMillis(value));
+ <#elseif drillType == "Interval">
+ final int offset = ${putAddr};
+ drillBuf.unsafePutInt(offset, value.getYears() * 12 + value.getMonths());
+ drillBuf.unsafePutInt(offset + 4, value.getDays());
+ drillBuf.unsafePutInt(offset + 8, periodToMillis(value));
+ <#elseif drillType == "Float4">
+ drillBuf.unsafePutInt(${putAddr}, Float.floatToRawIntBits((float) value));
+ <#elseif drillType == "Float8">
+ drillBuf.unsafePutLong(${putAddr}, Double.doubleToRawLongBits(value));
+ <#else>
+ drillBuf.unsafePut${putType?cap_first}(${putAddr}, <#if doCast>(${putType}) </#if>value);
+ </#if>
+ vectorIndex.nextElement();
}
-
- <@set drillType accessorType label true "set" />
- }
-
- public static class Repeated${drillType}ColumnWriter extends AbstractArrayWriter {
-
- <@bindWriter "Repeated" drillType />
-
- <@getType label />
-
- protected BaseRepeatedValueVector.BaseRepeatedMutator mutator() {
- return mutator;
+ <#if drillType == "VarChar">
+
+ @Override
+ public final void setString(String value) {
+ final byte bytes[] = value.getBytes(Charsets.UTF_8);
+ setBytes(bytes, bytes.length);
+ }
+ <#elseif drillType == "Var16Char">
+
+ @Override
+ public final void setString(String value) {
+ final byte bytes[] = value.getBytes(Charsets.UTF_16);
+ setBytes(bytes, bytes.length);
}
-
- <@set drillType accessorType label false "add" />
- }
-
- </#if>
- </#list>
-</#list>
- public static void defineReaders(
- Class<? extends AbstractColumnReader> readers[][]) {
-<#list vv.types as type>
- <#list type.minor as minor>
- <#assign drillType=minor.class>
- <#assign notyet=minor.accessorDisabled!type.accessorDisabled!false>
- <#if ! notyet>
- <#assign typeEnum=drillType?upper_case>
- readers[MinorType.${typeEnum}.ordinal()][DataMode.REQUIRED.ordinal()] = ${drillType}ColumnReader.class;
- readers[MinorType.${typeEnum}.ordinal()][DataMode.OPTIONAL.ordinal()] = Nullable${drillType}ColumnReader.class;
</#if>
- </#list>
-</#list>
}
- public static void defineWriters(
- Class<? extends AbstractColumnWriter> writers[][]) {
-<#list vv.types as type>
- <#list type.minor as minor>
- <#assign drillType=minor.class>
- <#assign notyet=minor.accessorDisabled!type.accessorDisabled!false>
- <#if ! notyet>
- <#assign typeEnum=drillType?upper_case>
- writers[MinorType.${typeEnum}.ordinal()][DataMode.REQUIRED.ordinal()] = ${drillType}ColumnWriter.class;
- writers[MinorType.${typeEnum}.ordinal()][DataMode.OPTIONAL.ordinal()] = Nullable${drillType}ColumnWriter.class;
</#if>
</#list>
</#list>
+ public static int periodToMillis(Period value) {
+ return ((value.getHours() * 60 +
+ value.getMinutes()) * 60 +
+ value.getSeconds()) * 1000 +
+ value.getMillis();
}
- public static void defineArrayReaders(
- Class<? extends AbstractArrayReader> readers[]) {
-<#list vv.types as type>
- <#list type.minor as minor>
- <#assign drillType=minor.class>
- <#assign notyet=minor.accessorDisabled!type.accessorDisabled!false>
- <#if ! notyet>
- <#assign typeEnum=drillType?upper_case>
- readers[MinorType.${typeEnum}.ordinal()] = Repeated${drillType}ColumnReader.class;
- </#if>
- </#list>
-</#list>
- }
+<@build vv.types "Required" "Reader" />
- public static void defineArrayWriters(
- Class<? extends AbstractArrayWriter> writers[]) {
-<#list vv.types as type>
- <#list type.minor as minor>
- <#assign drillType=minor.class>
- <#assign notyet=minor.accessorDisabled!type.accessorDisabled!false>
- <#if ! notyet>
- <#assign typeEnum=drillType?upper_case>
- writers[MinorType.${typeEnum}.ordinal()] = Repeated${drillType}ColumnWriter.class;
- </#if>
- </#list>
-</#list>
- }
+<@build vv.types "Nullable" "Reader" />
+
+<@build vv.types "Repeated" "Reader" />
+
+<@build vv.types "Required" "Writer" />
}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/codegen/templates/FixedValueVectors.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/codegen/templates/FixedValueVectors.java b/exec/vector/src/main/codegen/templates/FixedValueVectors.java
index 51938a3..1f6a008 100644
--- a/exec/vector/src/main/codegen/templates/FixedValueVectors.java
+++ b/exec/vector/src/main/codegen/templates/FixedValueVectors.java
@@ -22,8 +22,8 @@
<#assign friendlyType = (minor.friendlyType!minor.boxedType!type.boxedType) />
<#if type.major == "Fixed">
-<@pp.changeOutputFile name="/org/apache/drill/exec/vector/${minor.class}Vector.java" />
-<#include "/@includes/license.ftl" />
+ <@pp.changeOutputFile name="/org/apache/drill/exec/vector/${minor.class}Vector.java" />
+ <#include "/@includes/license.ftl" />
package org.apache.drill.exec.vector;
@@ -31,15 +31,18 @@ package org.apache.drill.exec.vector;
import org.apache.drill.exec.util.DecimalUtility;
/**
- * ${minor.class} implements a vector of fixed width values. Elements in the vector are accessed
- * by position, starting from the logical start of the vector. Values should be pushed onto the
- * vector sequentially, but may be accessed randomly.
+ * ${minor.class} implements a vector of fixed width values. Elements in the
+ * vector are accessed by position, starting from the logical start of the
+ * vector. Values should be pushed onto the vector sequentially, but may be
+ * accessed randomly.
* <ul>
- * <li>The width of each element is {@link #VALUE_WIDTH} (= ${type.width}) byte<#if type.width != 1>s</#if>.</li>
+ * <li>The width of each element is {@link #VALUE_WIDTH} (= ${type.width})
+ * byte<#if type.width != 1>s</#if>.</li>
* <li>The equivalent Java primitive is '${minor.javaType!type.javaType}'.</li>
* </ul>
*
- * NB: this class is automatically generated from ${.template_name} and ValueVectorTypes.tdd using FreeMarker.
+ * NB: this class is automatically generated from ${.template_name} and
+ * ValueVectorTypes.tdd using FreeMarker.
*/
public final class ${minor.class}Vector extends BaseDataValueVector implements FixedWidthVector {
private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(${minor.class}Vector.class);
@@ -151,13 +154,16 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements F
}
/**
- * Allocate a new buffer that supports setting at least the provided number of values. May actually be sized bigger
- * depending on underlying buffer rounding size. Must be called prior to using the ValueVector.
+ * Allocate a new buffer that supports setting at least the provided number of
+ * values. May actually be sized bigger depending on underlying buffer
+ * rounding size. Must be called prior to using the ValueVector.
*
- * Note that the maximum number of values a vector can allocate is Integer.MAX_VALUE / value width.
+ * Note that the maximum number of values a vector can allocate is
+ * Integer.MAX_VALUE / value width.
*
* @param valueCount
- * @throws OutOfMemoryException if it can't allocate the new buffer
+ * @throws OutOfMemoryException
+ * if it can't allocate the new buffer
*/
@Override
public void allocateNew(final int valueCount) {
@@ -211,18 +217,24 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements F
throw new OversizedAllocationException("Unable to expand the buffer. Max allowed buffer size is reached.");
}
+ reallocRaw((int) newAllocationSize);
+ final int halfNewCapacity = data.capacity() / 2;
+ data.setZero(halfNewCapacity, halfNewCapacity);
+ }
+
+ @Override
+ public DrillBuf reallocRaw(int newAllocationSize) {
logger.debug("Reallocating vector [{}]. # of bytes: [{}] -> [{}]", field, allocationSizeInBytes, newAllocationSize);
if (newAllocationSize == 0) {
throw new IllegalStateException("Attempt to reAlloc a zero-sized vector");
}
- final DrillBuf newBuf = allocator.buffer((int)newAllocationSize);
+ final DrillBuf newBuf = allocator.buffer(newAllocationSize);
newBuf.setBytes(0, data, 0, data.capacity());
- final int halfNewCapacity = newBuf.capacity() / 2;
- newBuf.setZero(halfNewCapacity, halfNewCapacity);
newBuf.writerIndex(data.writerIndex());
data.release(1);
data = newBuf;
- allocationSizeInBytes = (int)newAllocationSize;
+ allocationSizeInBytes = newAllocationSize;
+ return newBuf;
}
/**
@@ -337,7 +349,7 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements F
@Override
public void copyEntry(int toIndex, ValueVector from, int fromIndex) {
- ((${minor.class}Vector) from).data.getBytes(fromIndex * ${type.width}, data, toIndex * ${type.width}, ${type.width});
+ ((${minor.class}Vector) from).data.getBytes(fromIndex * VALUE_WIDTH, data, toIndex * VALUE_WIDTH, VALUE_WIDTH);
}
public void decrementAllocationMonitor() {
@@ -423,7 +435,6 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements F
final String monthString = (Math.abs(months) == 1) ? " month " : " months ";
final String dayString = (Math.abs(days) == 1) ? " day " : " days ";
-
return(new StringBuilder().
append(years).append(yearString).
append(months).append(monthString).
@@ -621,26 +632,31 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements F
}
/**
- * ${minor.class}.Mutator implements a mutable vector of fixed width values. Elements in the
- * vector are accessed by position from the logical start of the vector. Values should be pushed
- * onto the vector sequentially, but may be randomly accessed.
+ * ${minor.class}.Mutator implements a mutable vector of fixed width values.
+ * Elements in the vector are accessed by position from the logical start of
+ * the vector. Values should be pushed onto the vector sequentially, but may
+ * be randomly accessed.
* <ul>
- * <li>The width of each element is {@link #VALUE_WIDTH} (= ${type.width}) byte(s).</li>
+ * <li>The width of each element is {@link #VALUE_WIDTH} (= ${type.width})
+ * byte(s).</li>
* <li>The equivalent Java primitive is '${minor.javaType!type.javaType}'</li>
* </ul>
*
- * NB: this class is automatically generated from ValueVectorTypes.tdd using FreeMarker.
+ * NB: this class is automatically generated from ValueVectorTypes.tdd using
+ * FreeMarker.
*/
public final class Mutator extends BaseDataValueVector.BaseMutator {
private Mutator() {};
/**
- * Set the element at the given index to the given value. Note that widths smaller than
- * 32 bits are handled by the DrillBuf interface.
+ * Set the element at the given index to the given value. Note that widths
+ * smaller than 32 bits are handled by the DrillBuf interface.
*
- * @param index position of the bit to set
- * @param value value to set
+ * @param index
+ * position of the bit to set
+ * @param value
+ * value to set
*/
<#if (type.width > 8)>
@@ -655,37 +671,6 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements F
data.setBytes(index * VALUE_WIDTH, value, 0, VALUE_WIDTH);
}
- /**
- * Set the value of a required or nullable vector. Enforces the value
- * and size limits.
- * @param index item to write
- * @param value value to set
- * @throws VectorOverflowException if the item was written, false if the index would
- * overfill the vector
- */
-
- public void setScalar(int index, <#if (type.width > 4)>${minor.javaType!type.javaType}<#else>int</#if> value) throws VectorOverflowException {
- if (index >= MAX_SCALAR_COUNT) {
- throw new VectorOverflowException();
- }
- setSafe(index, value);
- }
-
- /**
- * Set the value of a repeated vector. Enforces only the size limit.
- * @param index item to write
- * @param value value to set
- * @throws VectorOverflowException if the item was written, false if the index would
- * overfill the vector
- */
-
- public void setArrayItem(int index, <#if (type.width > 4)>${minor.javaType!type.javaType}<#else>int</#if> value) throws VectorOverflowException {
- if (index >= MAX_VALUE_COUNT) {
- throw new VectorOverflowException();
- }
- setSafe(index, value);
- }
-
<#if minor.class == "Interval">
public void set(int index, int months, int days, int milliseconds) {
final int offsetIndex = index * VALUE_WIDTH;
@@ -701,20 +686,6 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements F
set(index, months, days, milliseconds);
}
- public void setScalar(int index, int months, int days, int milliseconds) throws VectorOverflowException {
- if (index >= MAX_SCALAR_COUNT) {
- throw new VectorOverflowException();
- }
- setSafe(index, months, days, milliseconds);
- }
-
- public void setArrayItem(int index, int months, int days, int milliseconds) throws VectorOverflowException {
- if (index >= MAX_VALUE_COUNT) {
- throw new VectorOverflowException();
- }
- setSafe(index, months, days, milliseconds);
- }
-
protected void set(int index, ${minor.class}Holder holder) {
set(index, holder.months, holder.days, holder.milliseconds);
}
@@ -723,14 +694,6 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements F
setSafe(index, holder.months, holder.days, holder.milliseconds);
}
- public void setScalar(int index, ${minor.class}Holder holder) throws VectorOverflowException {
- setScalar(index, holder.months, holder.days, holder.milliseconds);
- }
-
- public void setArrayItem(int index, ${minor.class}Holder holder) throws VectorOverflowException {
- setArrayItem(index, holder.months, holder.days, holder.milliseconds);
- }
-
protected void set(int index, Nullable${minor.class}Holder holder) {
set(index, holder.months, holder.days, holder.milliseconds);
}
@@ -739,14 +702,6 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements F
setSafe(index, holder.months, holder.days, holder.milliseconds);
}
- public void setScalar(int index, Nullable${minor.class}Holder holder) throws VectorOverflowException {
- setScalar(index, holder.months, holder.days, holder.milliseconds);
- }
-
- public void setArrayItem(int index, Nullable${minor.class}Holder holder) throws VectorOverflowException {
- setArrayItem(index, holder.months, holder.days, holder.milliseconds);
- }
-
<#elseif minor.class == "IntervalDay">
public void set(int index, int days, int milliseconds) {
final int offsetIndex = index * VALUE_WIDTH;
@@ -761,20 +716,6 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements F
set(index, days, milliseconds);
}
- public void setScalar(int index, int days, int milliseconds) throws VectorOverflowException {
- if (index >= MAX_SCALAR_COUNT) {
- throw new VectorOverflowException();
- }
- setSafe(index, days, milliseconds);
- }
-
- public void setArrayItem(int index, int days, int milliseconds) throws VectorOverflowException {
- if (index >= MAX_VALUE_COUNT) {
- throw new VectorOverflowException();
- }
- setSafe(index, days, milliseconds);
- }
-
protected void set(int index, ${minor.class}Holder holder) {
set(index, holder.days, holder.milliseconds);
}
@@ -783,14 +724,6 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements F
setSafe(index, holder.days, holder.milliseconds);
}
- public void setScalar(int index, ${minor.class}Holder holder) throws VectorOverflowException {
- setScalar(index, holder.days, holder.milliseconds);
- }
-
- public void setArrayItem(int index, ${minor.class}Holder holder) throws VectorOverflowException {
- setArrayItem(index, holder.days, holder.milliseconds);
- }
-
protected void set(int index, Nullable${minor.class}Holder holder) {
set(index, holder.days, holder.milliseconds);
}
@@ -799,14 +732,6 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements F
setSafe(index, holder.days, holder.milliseconds);
}
- public void setScalar(int index, Nullable${minor.class}Holder holder) throws VectorOverflowException {
- setScalar(index, holder.days, holder.milliseconds);
- }
-
- public void setArrayItem(int index, Nullable${minor.class}Holder holder) throws VectorOverflowException {
- setArrayItem(index, holder.days, holder.milliseconds);
- }
-
<#elseif minor.class == "Decimal28Sparse" || minor.class == "Decimal38Sparse" || minor.class == "Decimal28Dense" || minor.class == "Decimal38Dense">
public void setSafe(int index, int start, DrillBuf buffer) {
while(index >= getValueCapacity()) {
@@ -815,20 +740,6 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements F
set(index, start, buffer);
}
- public void setScalar(int index, int start, DrillBuf buffer) throws VectorOverflowException {
- if (index >= MAX_SCALAR_COUNT) {
- throw new VectorOverflowException();
- }
- setSafe(index, start, buffer);
- }
-
- public void setArrayItem(int index, int start, DrillBuf buffer) throws VectorOverflowException {
- if (index >= MAX_VALUE_COUNT) {
- throw new VectorOverflowException();
- }
- setSafe(index, start, buffer);
- }
-
public void set(int index, ${minor.class}Holder holder) {
set(index, holder.start, holder.buffer);
}
@@ -837,14 +748,6 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements F
setSafe(index, holder.start, holder.buffer);
}
- public void setScalar(int index, ${minor.class}Holder holder) throws VectorOverflowException {
- setScalar(index, holder.start, holder.buffer);
- }
-
- public void setArrayItem(int index, ${minor.class}Holder holder) throws VectorOverflowException {
- setArrayItem(index, holder.start, holder.buffer);
- }
-
void set(int index, Nullable${minor.class}Holder holder) {
set(index, holder.start, holder.buffer);
}
@@ -853,14 +756,6 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements F
setSafe(index, holder.start, holder.buffer);
}
- public void setScalar(int index, Nullable${minor.class}Holder holder) throws VectorOverflowException {
- setScalar(index, holder.start, holder.buffer);
- }
-
- public void setArrayItem(int index, Nullable${minor.class}Holder holder) throws VectorOverflowException {
- setArrayItem(index, holder.start, holder.buffer);
- }
-
<#if minor.class == "Decimal28Sparse" || minor.class == "Decimal38Sparse">
public void set(int index, BigDecimal value) {
DecimalUtility.getSparseFromBigDecimal(value, data, index * VALUE_WIDTH,
@@ -874,20 +769,6 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements F
set(index, value);
}
- public void setScalar(int index, BigDecimal value) throws VectorOverflowException {
- if (index >= MAX_SCALAR_COUNT) {
- throw new VectorOverflowException();
- }
- setSafe(index, value);
- }
-
- public void setArrayItem(int index, BigDecimal value) throws VectorOverflowException {
- if (index >= MAX_VALUE_COUNT) {
- throw new VectorOverflowException();
- }
- setSafe(index, value);
- }
-
</#if>
public void set(int index, int start, DrillBuf buffer){
data.setBytes(index * VALUE_WIDTH, buffer, start, VALUE_WIDTH);
@@ -912,42 +793,18 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements F
data.set${(minor.javaType!type.javaType)?cap_first}(index * VALUE_WIDTH, value);
}
- public void setSafe(int index, <#if (type.width >= 4)>${minor.javaType!type.javaType}<#else>int</#if> value) {
- while(index >= getValueCapacity()) {
- reAlloc();
- }
- set(index, value);
- }
-
/**
- * Set the value of a required or nullable vector. Enforces the value
- * and size limits.
+ * Set the value of a required or nullable vector. Grows the vector as needed.
+ * Does not enforce size limits; scalar fixed-width types can never overflow
+ * a vector.
* @param index item to write
- * @param value value to set
- * @throws VectorOverflowException if the item was written, false if the index would
- * overfill the vector
*/
- public void setScalar(int index, <#if (type.width >= 4)>${minor.javaType!type.javaType}<#else>int</#if> value) throws VectorOverflowException {
- if (index >= MAX_SCALAR_COUNT) {
- throw new VectorOverflowException();
- }
- setSafe(index, value);
- }
-
- /**
- * Set the value of a repeated vector. Enforces only the size limit.
- * @param index item to write
- * @param value value to set
- * @throws VectorOverflowException if the item was written, false if the index would
- * overfill the vector
- */
-
- public void setArrayItem(int index, <#if (type.width >= 4)>${minor.javaType!type.javaType}<#else>int</#if> value) throws VectorOverflowException {
- if (index >= MAX_VALUE_COUNT) {
- throw new VectorOverflowException();
+ public void setSafe(int index, <#if (type.width >= 4)>${minor.javaType!type.javaType}<#else>int</#if> value) {
+ while(index >= getValueCapacity()) {
+ reAlloc();
}
- setSafe(index, value);
+ set(index, value);
}
protected void set(int index, ${minor.class}Holder holder) {
@@ -961,20 +818,6 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements F
set(index, holder);
}
- public void setScalar(int index, ${minor.class}Holder holder) throws VectorOverflowException {
- if (index >= MAX_SCALAR_COUNT) {
- throw new VectorOverflowException();
- }
- setSafe(index, holder);
- }
-
- public void setArrayItem(int index, ${minor.class}Holder holder) throws VectorOverflowException {
- if (index >= MAX_VALUE_COUNT) {
- throw new VectorOverflowException();
- }
- setSafe(index, holder);
- }
-
protected void set(int index, Nullable${minor.class}Holder holder) {
data.set${(minor.javaType!type.javaType)?cap_first}(index * VALUE_WIDTH, holder.value);
}
@@ -986,20 +829,6 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements F
set(index, holder);
}
- public void setScalar(int index, Nullable${minor.class}Holder holder) throws VectorOverflowException {
- if (index >= MAX_SCALAR_COUNT) {
- throw new VectorOverflowException();
- }
- setSafe(index, holder);
- }
-
- public void setArrayItem(int index, Nullable${minor.class}Holder holder) throws VectorOverflowException {
- if (index >= MAX_VALUE_COUNT) {
- throw new VectorOverflowException();
- }
- setSafe(index, holder);
- }
-
@Override
public void generateTestData(int size) {
setValueCount(size);
@@ -1028,30 +857,6 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements F
}
</#if> <#-- type.width -->
- /**
- * Backfill missing offsets from the given last written position to the
- * given current write position. Used by the "new" size-safe column
- * writers to allow skipping values. The <tt>set()</tt> and <tt>setSafe()</tt>
- * <b>do not</b> fill empties. See DRILL-5529 and DRILL-5530.
- * @param lastWrite the position of the last valid write: the offset
- * to be copied forward
- * @param index the current write position filling occurs up to,
- * but not including, this position
- * @throws VectorOverflowException if the item was written, false if the index would
- * overfill the vector
- */
-
- public void fillEmptiesBounded(int lastWrite, int index)
- throws VectorOverflowException {
- <#if type.width <= 8>
- for (int i = lastWrite + 1; i <= index; i++) {
- setSafe(i, <#if (type.width >= 4)>(${minor.javaType!type.javaType})</#if> 0);
- }
- <#else>
- throw new UnsupportedOperationException("Cannot zero-fill ${minor.class} vectors.");
- </#if>
- }
-
@Override
public void setValueCount(int valueCount) {
final int currentValueCapacity = getValueCapacity();
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/codegen/templates/NullableValueVectors.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/codegen/templates/NullableValueVectors.java b/exec/vector/src/main/codegen/templates/NullableValueVectors.java
index 4f3eb17..fdb0200 100644
--- a/exec/vector/src/main/codegen/templates/NullableValueVectors.java
+++ b/exec/vector/src/main/codegen/templates/NullableValueVectors.java
@@ -47,7 +47,7 @@ package org.apache.drill.exec.vector;
*
* NB: this class is automatically generated from ${.template_name} and ValueVectorTypes.tdd using FreeMarker.
*/
-@SuppressWarnings("unused")
+
public final class ${className} extends BaseDataValueVector implements <#if type.major == "VarLen">VariableWidth<#else>FixedWidth</#if>Vector, NullableVector {
private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(${className}.class);
@@ -180,6 +180,11 @@ public final class ${className} extends BaseDataValueVector implements <#if type
}
@Override
+ public DrillBuf reallocRaw(int newAllocationSize) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
public void collectLedgers(Set<BufferLedger> ledgers) {
bits.collectLedgers(ledgers);
values.collectLedgers(ledgers);
@@ -304,7 +309,7 @@ public final class ${className} extends BaseDataValueVector implements <#if type
}
private class TransferImpl implements TransferPair {
- Nullable${minor.class}Vector to;
+ private final Nullable${minor.class}Vector to;
public TransferImpl(MaterializedField field, BufferAllocator allocator){
to = new Nullable${minor.class}Vector(field, allocator);
@@ -336,12 +341,12 @@ public final class ${className} extends BaseDataValueVector implements <#if type
}
@Override
- public Accessor getAccessor(){
+ public Accessor getAccessor() {
return accessor;
}
@Override
- public Mutator getMutator(){
+ public Mutator getMutator() {
return mutator;
}
@@ -548,16 +553,6 @@ public final class ${className} extends BaseDataValueVector implements <#if type
lastSet = index;
}
- public void setScalar(int index, byte[] value, int start, int length) throws VectorOverflowException {
- if (index > lastSet + 1) {
- fillEmpties(index); // Filling empties cannot overflow the vector
- }
- values.getMutator().setScalar(index, value, start, length);
- bits.getMutator().setSafe(index, 1);
- setCount++;
- lastSet = index;
- }
-
public void setSafe(int index, ByteBuffer value, int start, int length) {
if (index > lastSet + 1) {
fillEmpties(index);
@@ -569,17 +564,6 @@ public final class ${className} extends BaseDataValueVector implements <#if type
lastSet = index;
}
- public void setScalar(int index, DrillBuf value, int start, int length) throws VectorOverflowException {
- if (index > lastSet + 1) {
- fillEmpties(index); // Filling empties cannot overflow the vector
- }
-
- values.getMutator().setScalar(index, value, start, length);
- bits.getMutator().setSafe(index, 1);
- setCount++;
- lastSet = index;
- }
-
</#if>
public void setNull(int index) {
bits.getMutator().setSafe(index, 0);
@@ -593,10 +577,6 @@ public final class ${className} extends BaseDataValueVector implements <#if type
values.getMutator().set(index, holder);
}
- public void setNullBounded(int index) throws VectorOverflowException {
- bits.getMutator().setScalar(index, 0);
- }
-
public void set(int index, Nullable${minor.class}Holder holder) {
final ${valuesName}.Mutator valuesMutator = values.getMutator();
<#if type.major == "VarLen">
@@ -648,18 +628,6 @@ public final class ${className} extends BaseDataValueVector implements <#if type
values.getMutator().setSafe(index<#list fields as field><#if field.include!true >, ${field.name}Field</#if></#list>);
setCount++;
<#if type.major == "VarLen">lastSet = index;</#if>
- }
-
- public void setScalar(int index, int isSet<#list fields as field><#if field.include!true >, ${field.type} ${field.name}Field</#if></#list> ) throws VectorOverflowException {
- <#if type.major == "VarLen">
- if (index > lastSet + 1) {
- fillEmpties(index);
- }
- </#if>
- values.getMutator().setScalar(index<#list fields as field><#if field.include!true >, ${field.name}Field</#if></#list>);
- bits.getMutator().setSafe(index, isSet);
- setCount++;
- <#if type.major == "VarLen">lastSet = index;</#if>
}
public void setSafe(int index, Nullable${minor.class}Holder value) {
@@ -674,18 +642,6 @@ public final class ${className} extends BaseDataValueVector implements <#if type
<#if type.major == "VarLen">lastSet = index;</#if>
}
- public void setScalar(int index, Nullable${minor.class}Holder value) throws VectorOverflowException {
- <#if type.major == "VarLen">
- if (index > lastSet + 1) {
- fillEmpties(index);
- }
- </#if>
- values.getMutator().setScalar(index, value);
- bits.getMutator().setSafe(index, value.isSet);
- setCount++;
- <#if type.major == "VarLen">lastSet = index;</#if>
- }
-
public void setSafe(int index, ${minor.class}Holder value) {
<#if type.major == "VarLen">
if (index > lastSet + 1) {
@@ -698,18 +654,6 @@ public final class ${className} extends BaseDataValueVector implements <#if type
<#if type.major == "VarLen">lastSet = index;</#if>
}
- public void setScalar(int index, ${minor.class}Holder value) throws VectorOverflowException {
- <#if type.major == "VarLen">
- if (index > lastSet + 1) {
- fillEmpties(index);
- }
- </#if>
- values.getMutator().setScalar(index, value);
- bits.getMutator().setSafe(index, 1);
- setCount++;
- <#if type.major == "VarLen">lastSet = index;</#if>
- }
-
<#if !(type.major == "VarLen" || minor.class == "Decimal28Sparse" || minor.class == "Decimal38Sparse" || minor.class == "Decimal28Dense" || minor.class == "Decimal38Dense" || minor.class == "Interval" || minor.class == "IntervalDay")>
public void setSafe(int index, ${minor.javaType!type.javaType} value) {
<#if type.major == "VarLen">
@@ -722,17 +666,6 @@ public final class ${className} extends BaseDataValueVector implements <#if type
setCount++;
}
- public void setScalar(int index, ${minor.javaType!type.javaType} value) throws VectorOverflowException {
- <#if type.major == "VarLen">
- if (index > lastSet + 1) {
- fillEmpties(index);
- }
- </#if>
- values.getMutator().setScalar(index, value);
- bits.getMutator().setSafe(index, 1);
- setCount++;
- }
-
</#if>
<#if minor.class == "Decimal28Sparse" || minor.class == "Decimal38Sparse">
public void set(int index, BigDecimal value) {
@@ -747,12 +680,6 @@ public final class ${className} extends BaseDataValueVector implements <#if type
setCount++;
}
- public void setScalar(int index, BigDecimal value) throws VectorOverflowException {
- values.getMutator().setScalar(index, value);
- bits.getMutator().setSafe(index, 1);
- setCount++;
- }
-
</#if>
@Override
public void setValueCount(int valueCount) {
[15/15] drill git commit: DRILL-5657: Size-aware vector writer
structure
Posted by pr...@apache.org.
DRILL-5657: Size-aware vector writer structure
- Vector and accessor layer
- Row Set layer
- Tuple and column models
- Revised write-time metadata
- "Result set loader" layer
this closes #914
Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/40de8ca4
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/40de8ca4
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/40de8ca4
Branch: refs/heads/master
Commit: 40de8ca4f47533fa6593d1266403868ae1a2119f
Parents: eb0c403
Author: Paul Rogers <pr...@maprtech.com>
Authored: Thu Aug 17 22:41:30 2017 -0700
Committer: Paul Rogers <pr...@maprtech.com>
Committed: Wed Dec 20 21:17:48 2017 -0800
----------------------------------------------------------------------
.../exec/physical/rowSet/ResultSetLoader.java | 204 +++
.../exec/physical/rowSet/ResultVectorCache.java | 33 +
.../exec/physical/rowSet/RowSetLoader.java | 153 +++
.../exec/physical/rowSet/impl/ColumnState.java | 358 +++++
.../physical/rowSet/impl/NullProjectionSet.java | 41 +
.../rowSet/impl/NullResultVectorCacheImpl.java | 41 +
.../physical/rowSet/impl/NullVectorState.java | 52 +
.../rowSet/impl/NullableVectorState.java | 108 ++
.../physical/rowSet/impl/OptionBuilder.java | 134 ++
.../rowSet/impl/PrimitiveColumnState.java | 105 ++
.../physical/rowSet/impl/ProjectionSet.java | 48 +
.../physical/rowSet/impl/ProjectionSetImpl.java | 136 ++
.../rowSet/impl/RepeatedVectorState.java | 168 +++
.../rowSet/impl/ResultSetLoaderImpl.java | 775 +++++++++++
.../rowSet/impl/ResultVectorCacheImpl.java | 186 +++
.../physical/rowSet/impl/RowSetLoaderImpl.java | 98 ++
.../physical/rowSet/impl/SingleVectorState.java | 274 ++++
.../exec/physical/rowSet/impl/TupleState.java | 388 ++++++
.../rowSet/impl/VectorContainerBuilder.java | 257 ++++
.../exec/physical/rowSet/impl/VectorState.java | 102 ++
.../physical/rowSet/impl/WriterIndexImpl.java | 100 ++
.../exec/physical/rowSet/impl/package-info.java | 304 +++++
.../physical/rowSet/model/BaseTupleModel.java | 117 ++
.../physical/rowSet/model/ContainerVisitor.java | 115 ++
.../physical/rowSet/model/MetadataProvider.java | 93 ++
.../exec/physical/rowSet/model/ReaderIndex.java | 53 +
.../physical/rowSet/model/SchemaInference.java | 61 +
.../exec/physical/rowSet/model/TupleModel.java | 117 ++
.../rowSet/model/hyper/BaseReaderBuilder.java | 149 +++
.../rowSet/model/hyper/package-info.java | 30 +
.../physical/rowSet/model/package-info.java | 68 +
.../rowSet/model/single/BaseReaderBuilder.java | 89 ++
.../rowSet/model/single/BaseWriterBuilder.java | 72 +
.../model/single/BuildVectorsFromMetadata.java | 97 ++
.../rowSet/model/single/VectorAllocator.java | 112 ++
.../rowSet/model/single/package-info.java | 28 +
.../exec/physical/rowSet/package-info.java | 193 +++
.../apache/drill/exec/record/BatchSchema.java | 42 +-
.../apache/drill/exec/record/RecordBatch.java | 3 +-
.../apache/drill/exec/record/TupleSchema.java | 534 ++++++++
.../exec/record/selection/SelectionVector2.java | 20 +-
.../exec/cache/TestBatchSerialization.java | 22 +-
.../exec/physical/impl/TopN/TopNBatchTest.java | 26 +-
.../impl/validate/TestBatchValidator.java | 64 +-
.../physical/impl/xsort/TestExternalSort.java | 12 +-
.../impl/xsort/managed/SortTestUtilities.java | 8 +-
.../physical/impl/xsort/managed/TestCopier.java | 146 +-
.../impl/xsort/managed/TestShortArrays.java | 8 +-
.../impl/xsort/managed/TestSortImpl.java | 46 +-
.../physical/impl/xsort/managed/TestSorter.java | 38 +-
.../rowSet/impl/TestResultSetLoaderLimits.java | 224 ++++
.../impl/TestResultSetLoaderMapArray.java | 481 +++++++
.../rowSet/impl/TestResultSetLoaderMaps.java | 810 +++++++++++
.../impl/TestResultSetLoaderOmittedValues.java | 379 ++++++
.../impl/TestResultSetLoaderOverflow.java | 680 ++++++++++
.../impl/TestResultSetLoaderProjection.java | 470 +++++++
.../impl/TestResultSetLoaderProtocol.java | 586 ++++++++
.../rowSet/impl/TestResultSetLoaderTorture.java | 453 +++++++
.../rowSet/impl/TestResultSetSchemaChange.java | 245 ++++
.../drill/exec/record/TestTupleSchema.java | 509 +++++++
.../drill/exec/record/TestVectorContainer.java | 127 --
.../exec/record/vector/TestValueVector.java | 12 +
.../apache/drill/exec/sql/TestInfoSchema.java | 2 +-
.../exec/store/easy/text/compliant/TestCsv.java | 6 +-
.../java/org/apache/drill/test/ExampleTest.java | 4 +-
.../org/apache/drill/test/OperatorFixture.java | 30 +-
.../org/apache/drill/test/QueryBuilder.java | 12 +-
.../apache/drill/test/QueryRowSetIterator.java | 2 +-
.../drill/test/rowSet/AbstractRowSet.java | 109 +-
.../drill/test/rowSet/AbstractSingleRowSet.java | 182 +--
.../apache/drill/test/rowSet/DirectRowSet.java | 171 +--
.../drill/test/rowSet/HyperRowSetImpl.java | 245 +---
.../drill/test/rowSet/IndirectRowSet.java | 38 +-
.../org/apache/drill/test/rowSet/RowSet.java | 81 +-
.../apache/drill/test/rowSet/RowSetBuilder.java | 32 +-
.../drill/test/rowSet/RowSetComparison.java | 124 +-
.../apache/drill/test/rowSet/RowSetPrinter.java | 30 +-
.../apache/drill/test/rowSet/RowSetReader.java | 54 +
.../drill/test/rowSet/RowSetReaderImpl.java | 76 ++
.../apache/drill/test/rowSet/RowSetSchema.java | 304 -----
.../drill/test/rowSet/RowSetUtilities.java | 101 +-
.../apache/drill/test/rowSet/RowSetWriter.java | 119 ++
.../drill/test/rowSet/RowSetWriterImpl.java | 155 +++
.../apache/drill/test/rowSet/SchemaBuilder.java | 87 +-
.../drill/test/rowSet/file/JsonFileBuilder.java | 35 +-
.../drill/test/rowSet/test/DummyWriterTest.java | 169 +++
.../drill/test/rowSet/test/PerformanceTool.java | 296 ++++
.../drill/test/rowSet/test/RowSetTest.java | 858 +++++++-----
.../drill/test/rowSet/test/TestFillEmpties.java | 241 ++++
.../test/rowSet/test/TestFixedWidthWriter.java | 444 ++++++
.../rowSet/test/TestOffsetVectorWriter.java | 425 ++++++
.../test/rowSet/test/TestScalarAccessors.java | 1266 ++++++++++++++++++
.../rowSet/test/TestVariableWidthWriter.java | 418 ++++++
.../drill/test/rowSet/test/VectorPrinter.java | 72 +
.../apache/drill/vector/TestFillEmpties.java | 55 +-
.../apache/drill/vector/TestVectorLimits.java | 487 -------
exec/jdbc-all/pom.xml | 2 +-
.../src/main/java/io/netty/buffer/DrillBuf.java | 70 +-
.../netty/buffer/PooledByteBufAllocatorL.java | 62 +-
.../drill/exec/memory/AllocationManager.java | 89 +-
.../main/codegen/templates/ColumnAccessors.java | 383 +++---
.../codegen/templates/FixedValueVectors.java | 293 +---
.../codegen/templates/NullableValueVectors.java | 91 +-
.../codegen/templates/RepeatedValueVectors.java | 71 +-
.../src/main/codegen/templates/UnionVector.java | 44 +-
.../templates/VariableLengthVectors.java | 216 +--
.../drill/exec/record/ColumnMetadata.java | 114 ++
.../drill/exec/record/MaterializedField.java | 41 +-
.../apache/drill/exec/record/TupleMetadata.java | 88 ++
.../drill/exec/record/TupleNameSpace.java | 89 ++
.../drill/exec/vector/AllocationHelper.java | 2 +-
.../drill/exec/vector/BaseDataValueVector.java | 16 +
.../org/apache/drill/exec/vector/BitVector.java | 52 +-
.../drill/exec/vector/FixedWidthVector.java | 7 +-
.../apache/drill/exec/vector/ObjectVector.java | 26 +-
.../drill/exec/vector/UntypedNullVector.java | 59 +-
.../apache/drill/exec/vector/ValueVector.java | 53 +-
.../drill/exec/vector/VariableWidthVector.java | 4 +-
.../apache/drill/exec/vector/VectorUtils.java | 63 -
.../apache/drill/exec/vector/ZeroVector.java | 6 +-
.../exec/vector/accessor/AccessorUtilities.java | 125 --
.../drill/exec/vector/accessor/ArrayReader.java | 108 +-
.../drill/exec/vector/accessor/ArrayWriter.java | 60 +-
.../exec/vector/accessor/ColumnAccessor.java | 40 -
.../exec/vector/accessor/ColumnReader.java | 64 -
.../exec/vector/accessor/ColumnReaderIndex.java | 28 +
.../exec/vector/accessor/ColumnWriter.java | 45 -
.../exec/vector/accessor/ColumnWriterIndex.java | 76 ++
.../exec/vector/accessor/ObjectReader.java | 60 +
.../drill/exec/vector/accessor/ObjectType.java | 28 +
.../exec/vector/accessor/ObjectWriter.java | 101 ++
.../vector/accessor/ScalarElementReader.java | 65 +
.../exec/vector/accessor/ScalarReader.java | 75 ++
.../exec/vector/accessor/ScalarWriter.java | 71 +-
.../exec/vector/accessor/TupleAccessor.java | 71 -
.../drill/exec/vector/accessor/TupleReader.java | 36 +-
.../drill/exec/vector/accessor/TupleWriter.java | 154 ++-
.../drill/exec/vector/accessor/ValueType.java | 31 +
.../accessor/impl/AbstractArrayReader.java | 128 --
.../accessor/impl/AbstractArrayWriter.java | 127 --
.../accessor/impl/AbstractColumnAccessor.java | 43 -
.../accessor/impl/AbstractColumnReader.java | 126 --
.../accessor/impl/AbstractColumnWriter.java | 87 --
.../accessor/impl/AbstractTupleAccessor.java | 38 -
.../vector/accessor/impl/AccessorUtilities.java | 53 +
.../accessor/impl/ColumnAccessorFactory.java | 122 --
.../accessor/impl/HierarchicalFormatter.java | 38 +
.../accessor/impl/HierarchicalPrinter.java | 238 ++++
.../vector/accessor/impl/TupleReaderImpl.java | 151 ---
.../vector/accessor/impl/TupleWriterImpl.java | 162 ---
.../exec/vector/accessor/package-info.java | 79 +-
.../accessor/reader/AbstractArrayReader.java | 188 +++
.../accessor/reader/AbstractObjectReader.java | 52 +
.../accessor/reader/AbstractTupleReader.java | 189 +++
.../accessor/reader/BaseElementReader.java | 187 +++
.../accessor/reader/BaseScalarReader.java | 189 +++
.../accessor/reader/ColumnReaderFactory.java | 109 ++
.../accessor/reader/ElementReaderIndex.java | 24 +
.../reader/FixedWidthElementReaderIndex.java | 38 +
.../exec/vector/accessor/reader/MapReader.java | 43 +
.../accessor/reader/ObjectArrayReader.java | 159 +++
.../accessor/reader/ScalarArrayReader.java | 102 ++
.../vector/accessor/reader/VectorAccessor.java | 26 +
.../vector/accessor/reader/package-info.java | 26 +
.../accessor/writer/AbstractArrayWriter.java | 348 +++++
.../writer/AbstractFixedWidthWriter.java | 258 ++++
.../accessor/writer/AbstractObjectWriter.java | 72 +
.../accessor/writer/AbstractScalarWriter.java | 126 ++
.../accessor/writer/AbstractTupleWriter.java | 450 +++++++
.../accessor/writer/BaseScalarWriter.java | 272 ++++
.../accessor/writer/BaseVarWidthWriter.java | 157 +++
.../accessor/writer/ColumnWriterFactory.java | 196 +++
.../exec/vector/accessor/writer/MapWriter.java | 155 +++
.../accessor/writer/NullableScalarWriter.java | 190 +++
.../accessor/writer/ObjectArrayWriter.java | 143 ++
.../accessor/writer/OffsetVectorWriter.java | 283 ++++
.../accessor/writer/ScalarArrayWriter.java | 229 ++++
.../vector/accessor/writer/WriterEvents.java | 127 ++
.../accessor/writer/dummy/DummyArrayWriter.java | 96 ++
.../writer/dummy/DummyScalarWriter.java | 89 ++
.../accessor/writer/dummy/package-info.java | 54 +
.../vector/accessor/writer/package-info.java | 151 +++
.../exec/vector/complex/AbstractMapVector.java | 13 +-
.../vector/complex/BaseRepeatedValueVector.java | 13 +-
.../drill/exec/vector/complex/ListVector.java | 4 +-
.../drill/exec/vector/complex/MapVector.java | 24 +-
.../exec/vector/complex/RepeatedListVector.java | 20 +-
.../exec/vector/complex/RepeatedMapVector.java | 21 +-
188 files changed, 22717 insertions(+), 4811 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/ResultSetLoader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/ResultSetLoader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/ResultSetLoader.java
new file mode 100644
index 0000000..a4b260b
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/ResultSetLoader.java
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet;
+
+import org.apache.drill.exec.record.TupleMetadata;
+import org.apache.drill.exec.record.VectorContainer;
+import org.apache.drill.exec.vector.BaseValueVector;
+import org.apache.drill.exec.vector.complex.impl.VectorContainerWriter;
+
+/**
+ * Builds a result set (series of zero or more row sets) based on a defined
+ * schema which may
+ * evolve (expand) over time. Automatically rolls "overflow" rows over
+ * when a batch fills.
+ * <p>
+ * Many of the methods in this interface verify that the loader is
+ * in the proper state. For example, an exception is thrown if the caller
+ * attempts to save a row before starting a batch. However, the per-column
+ * write methods are checked only through assertions that should enabled
+ * during testing, but will be disabled during production.
+ *
+ * @see {@link VectorContainerWriter}, the class which this class
+ * replaces
+ */
+
+public interface ResultSetLoader {
+
+ public static final int DEFAULT_ROW_COUNT = BaseValueVector.INITIAL_VALUE_ALLOCATION;
+
+ /**
+ * Current schema version. The version increments by one each time
+ * a column is added.
+ * @return the current schema version
+ */
+
+ int schemaVersion();
+
+ /**
+ * Adjust the number of rows to produce in the next batch. Takes
+ * affect after the next call to {@link #startBatch()}.
+ *
+ * @param count target batch row count
+ */
+
+ void setTargetRowCount(int count);
+
+ /**
+ * The number of rows produced by this loader (as configured in the loader
+ * options.)
+ *
+ * @return the target row count for batches that this loader produces
+ */
+
+ int targetRowCount();
+
+ /**
+ * The largest vector size produced by this loader (as specified by
+ * the value vector limit.)
+ *
+ * @return the largest vector size. Attempting to extend a vector beyond
+ * this limit causes automatic vector overflow and terminates the
+ * in-flight batch, even if the batch has not yet reached the target
+ * row count
+ */
+
+ int targetVectorSize();
+
+ /**
+ * Total number of batches created. Includes the current batch if
+ * the row count in this batch is non-zero.
+ * @return the number of batches produced including the current
+ * one
+ */
+
+ int batchCount();
+
+ /**
+ * Total number of rows loaded for all previous batches and the
+ * current batch.
+ * @return total row count
+ */
+
+ int totalRowCount();
+
+ /**
+ * Start a new row batch. Valid only when first started, or after the
+ * previous batch has been harvested.
+ */
+
+ void startBatch();
+
+ /**
+ * Writer for the top-level tuple (the entire row). Valid only when
+ * the mutator is actively writing a batch (after <tt>startBatch()</tt>
+ * but before </tt>harvest()</tt>.)
+ *
+ * @return writer for the top-level columns
+ */
+
+ RowSetLoader writer();
+ boolean writeable();
+
+ /**
+ * Load a row using column values passed as variable-length arguments. Expects
+ * map values to represented as an array.
+ * A schema of (a:int, b:map(c:varchar)) would be>
+ * set as <br><tt>loadRow(10, new Object[] {"foo"});</tt><br>
+ * Values of arrays can be expressed as a Java
+ * array. A schema of (a:int, b:int[]) can be set as<br>
+ * <tt>loadRow(10, new int[] {100, 200});</tt><br>.
+ * Primarily for testing, too slow for production code.
+ * <p>
+ * If the row consists of a single map or list, then the one value will be an
+ * <tt>Object</tt> array, creating an ambiguity. Use <tt>writer().set(0, value);</tt>
+ * in this case.
+ *
+ * @param values column values in column index order
+ * @return this loader
+ */
+
+ ResultSetLoader setRow(Object...values);
+
+ /**
+ * Return the output container, primarily to obtain the schema
+ * and set of vectors. Depending on when this is called, the
+ * data may or may not be populated: call
+ * {@link #harvest()} to obtain the container for a batch.
+ * <p>
+ * This method is useful when the schema is known and fixed.
+ * After declaring the schema, call this method to get the container
+ * that holds the vectors for use in planning projection, etc.
+ * <p>
+ * If the result set schema changes, then a call to this method will
+ * return the latest schema. But, if the schema changes during the
+ * overflow row, then this method will not see those changes until
+ * after harvesting the current batch. (This avoid the appearance
+ * of phantom columns in the output since the new column won't
+ * appear until the next batch.)
+ * <p>
+ * Never count on the data in the container; it may be empty, half
+ * written, or inconsistent. Always call
+ * {@link #harvest()} to obtain the container for a batch.
+ *
+ * @return the output container including schema and value
+ * vectors
+ */
+
+ VectorContainer outputContainer();
+
+ /**
+ * Harvest the current row batch, and reset the mutator
+ * to the start of the next row batch (which may already contain
+ * an overflow row.
+ * <p>
+ * The schema of the returned container is defined as:
+ * <ul>
+ * <li>The schema as passed in via the loader options, plus</li>
+ * <li>Columns added dynamically during write, minus</li>
+ * <li>Any columns not included in the project list, minus</li>
+ * <li>Any columns added in the overflow row.</li>
+ * </ul>
+ * That is, column order is as defined by the initial schema and column
+ * additions. In particular, the schema order is <b>not</b> defined by
+ * the projection list. (Another mechanism is required to reorder columns
+ * for the actual projection.)
+ *
+ * @return the row batch to send downstream
+ */
+
+ VectorContainer harvest();
+
+ /**
+ * The schema of the harvested batch. Valid until the start of the
+ * next batch.
+ *
+ * @return the extended schema of the harvested batch which includes
+ * any allocation hints used when creating the batch
+ */
+
+ TupleMetadata harvestSchema();
+
+ /**
+ * Called after all rows are returned, whether because no more data is
+ * available, or the caller wishes to cancel the current row batch
+ * and complete.
+ */
+
+ void close();
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/ResultVectorCache.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/ResultVectorCache.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/ResultVectorCache.java
new file mode 100644
index 0000000..6e32b5d
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/ResultVectorCache.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet;
+
+import org.apache.drill.exec.memory.BufferAllocator;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.vector.ValueVector;
+
+/**
+ * Interface for a cache that implements "vector persistence" across
+ * multiple result set loaders. Allows a single scan operator to offer
+ * the same set of vectors even when data is read by a set of readers.
+ */
+
+public interface ResultVectorCache {
+ BufferAllocator allocator();
+ ValueVector addOrGet(MaterializedField colSchema);
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/RowSetLoader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/RowSetLoader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/RowSetLoader.java
new file mode 100644
index 0000000..070e9a9
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/RowSetLoader.java
@@ -0,0 +1,153 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet;
+
+import org.apache.drill.exec.vector.accessor.TupleWriter;
+
+/**
+ * Interface for writing values to a row set. Only available for newly-created
+ * single row sets.
+ * <p>
+ * Typical usage:
+ *
+ * <pre></code>
+ * void writeABatch() {
+ * RowSetLoader writer = ...
+ * while (! writer.isFull()) {
+ * writer.start();
+ * writer.scalar(0).setInt(10);
+ * writer.scalar(1).setString("foo");
+ * ...
+ * writer.save();
+ * }
+ * }</code></pre>
+ * Alternative usage:
+ *
+ * <pre></code>
+ * void writeABatch() {
+ * RowSetLoader writer = ...
+ * while (writer.start()) {
+ * writer.scalar(0).setInt(10);
+ * writer.scalar(1).setString("foo");
+ * ...
+ * writer.save();
+ * }
+ * }</code></pre>
+ *
+ * The above writes until the batch is full, based on size or vector overflow.
+ * That is, the details of vector overflow are hidden from the code that calls
+ * the writer.
+ */
+
+public interface RowSetLoader extends TupleWriter {
+
+ ResultSetLoader loader();
+
+ /**
+ * Write a row of values, given by Java objects. Object type must match
+ * expected column type. Stops writing, and returns false, if any value causes
+ * vector overflow. Value format:
+ * <ul>
+ * <li>For scalars, the value as a suitable Java type (int or Integer, say,
+ * for <tt>INTEGER</tt> values.)</li>
+ * <li>For scalar arrays, an array of a suitable Java primitive type for
+ * scalars. For example, <tt>int[]</tt> for an <tt>INTEGER</tt> column.</li>
+ * <li>For a Map, an <tt>Object<tt> array with values encoded as above.
+ * (In fact, the list here is the same as the map format.</li>
+ * <li>For a list (repeated map, list of list), an <tt>Object</tt> array with
+ * values encoded as above. (So, for a repeated map, an outer <tt>Object</tt>
+ * map encodes the array, an inner one encodes the map members.</li>
+ * </ul>
+ *
+ * @param values
+ * variable-length argument list of column values
+ */
+
+ RowSetLoader addRow(Object... values);
+
+ /**
+ * Indicates that no more rows fit into the current row batch and that the row
+ * batch should be harvested and sent downstream. Any overflow row is
+ * automatically saved for the next cycle. The value is undefined when a batch
+ * is not active.
+ * <p>
+ * Will be false on the first row, and all subsequent rows until either the
+ * maximum number of rows are written, or a vector overflows. After that, will
+ * return true. The method returns false as soon as any column writer
+ * overflows even in the middle of a row write. That is, this writer does not
+ * automatically handle overflow rows because that added complexity is seldom
+ * needed for tests.
+ *
+ * @return true if another row can be written, false if not
+ */
+
+ boolean isFull();
+
+ /**
+ * The number of rows in the current row set. Does not count any overflow row
+ * saved for the next batch.
+ *
+ * @return number of rows to be sent downstream
+ */
+
+ int rowCount();
+
+ /**
+ * The index of the current row. Same as the row count except in an overflow
+ * row in which case the row index will revert to zero as soon as any vector
+ * overflows. Note: this means that the index can change between columns in a
+ * single row. Applications usually don't use this index directly; rely on the
+ * writers to write to the proper location.
+ *
+ * @return the current write index
+ */
+
+ int rowIndex();
+
+ /**
+ * Prepare a new row for writing. Call this before each row.
+ * <p>
+ * Handles a very special case: that of discarding the last row written.
+ * A reader can read a row into vectors, then "sniff" the row to check,
+ * for example, against a filter. If the row is not wanted, simply omit
+ * the call to <tt>save()</tt> and the next all to <tt>start()</tt> will
+ * discard the unsaved row.
+ * <p>
+ * Note that the vectors still contain values in the
+ * discarded position; just the various pointers are unset. If
+ * the batch ends before the discarded values are overwritten, the
+ * discarded values just exist at the end of the vector. Since vectors
+ * start with garbage contents, the discarded values are simply a different
+ * kind of garbage. But, if the client writes a new row, then the new
+ * row overwrites the discarded row. This works because we only change
+ * the tail part of a vector; never the internals.
+ *
+ * @return true if another row can be added, false if the batch is full
+ */
+
+ boolean start();
+
+ /**
+ * Saves the current row and moves to the next row. Failing to call this
+ * method effectively abandons the in-flight row; something that may be useful
+ * to recover from partially-written rows that turn out to contain errors.
+ * Done automatically if using <tt>setRow()</tt>.
+ */
+
+ void save();
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/ColumnState.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/ColumnState.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/ColumnState.java
new file mode 100644
index 0000000..f3626d9
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/ColumnState.java
@@ -0,0 +1,358 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.impl;
+
+import java.util.ArrayList;
+
+import org.apache.drill.common.exceptions.UserException;
+import org.apache.drill.exec.physical.rowSet.impl.SingleVectorState.OffsetVectorState;
+import org.apache.drill.exec.physical.rowSet.impl.TupleState.MapState;
+import org.apache.drill.exec.record.ColumnMetadata;
+import org.apache.drill.exec.vector.UInt4Vector;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.accessor.impl.HierarchicalFormatter;
+import org.apache.drill.exec.vector.accessor.writer.AbstractArrayWriter;
+import org.apache.drill.exec.vector.accessor.writer.AbstractObjectWriter;
+import org.apache.drill.exec.vector.accessor.writer.ColumnWriterFactory;
+import org.apache.drill.exec.vector.complex.BaseRepeatedValueVector;
+
+/**
+ * Represents the write-time state for a column including the writer and the (optional)
+ * backing vector. Implements per-column operations such as vector overflow. If a column
+ * is a (possibly repeated) map, then the column state will hold a tuple state.
+ * <p>
+ * If a column is not projected, then the writer exists (to make life easier for the
+ * reader), but there will be no vector backing the writer.
+ * <p>
+ * Different columns need different kinds of vectors: a data vector, possibly an offset
+ * vector, or even a non-existent vector. The {@link VectorState} class abstracts out
+ * these diffrences.
+ */
+
+public abstract class ColumnState {
+
+ private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(ColumnState.class);
+
+ public static abstract class BaseMapColumnState extends ColumnState {
+ protected final MapState mapState;
+
+ public BaseMapColumnState(ResultSetLoaderImpl resultSetLoader,
+ AbstractObjectWriter writer, VectorState vectorState,
+ ProjectionSet projectionSet) {
+ super(resultSetLoader, writer, vectorState);
+ mapState = new MapState(resultSetLoader, this, projectionSet);
+ }
+
+ @Override
+ public void rollover() {
+ super.rollover();
+ mapState.rollover();
+ }
+
+ @Override
+ public void startBatch() {
+ super.startBatch();
+ mapState.startBatch();
+ }
+
+ @Override
+ public void harvestWithLookAhead() {
+ super.harvestWithLookAhead();
+ mapState.harvestWithLookAhead();
+ }
+
+ @Override
+ public void close() {
+ super.close();
+ mapState.close();
+ }
+
+ public MapState mapState() { return mapState; }
+ }
+
+ public static class MapColumnState extends BaseMapColumnState {
+
+ public MapColumnState(ResultSetLoaderImpl resultSetLoader,
+ ColumnMetadata columnSchema,
+ ProjectionSet projectionSet) {
+ super(resultSetLoader,
+ ColumnWriterFactory.buildMap(columnSchema, null,
+ new ArrayList<AbstractObjectWriter>()),
+ new NullVectorState(),
+ projectionSet);
+ }
+
+ @Override
+ public void updateCardinality(int cardinality) {
+ super.updateCardinality(cardinality);
+ mapState.updateCardinality(cardinality);
+ }
+ }
+
+ public static class MapArrayColumnState extends BaseMapColumnState {
+
+ public MapArrayColumnState(ResultSetLoaderImpl resultSetLoader,
+ AbstractObjectWriter writer,
+ VectorState vectorState,
+ ProjectionSet projectionSet) {
+ super(resultSetLoader, writer,
+ vectorState,
+ projectionSet);
+ }
+
+ @SuppressWarnings("resource")
+ public static MapArrayColumnState build(ResultSetLoaderImpl resultSetLoader,
+ ColumnMetadata columnSchema,
+ ProjectionSet projectionSet) {
+
+ // Create the map's offset vector.
+
+ UInt4Vector offsetVector = new UInt4Vector(
+ BaseRepeatedValueVector.OFFSETS_FIELD,
+ resultSetLoader.allocator());
+
+ // Create the writer using the offset vector
+
+ AbstractObjectWriter writer = ColumnWriterFactory.buildMapArray(
+ columnSchema, offsetVector,
+ new ArrayList<AbstractObjectWriter>());
+
+ // Wrap the offset vector in a vector state
+
+ VectorState vectorState = new OffsetVectorState(
+ ((AbstractArrayWriter) writer.array()).offsetWriter(),
+ offsetVector,
+ (AbstractObjectWriter) writer.array().entry());
+
+ // Assemble it all into the column state.
+
+ return new MapArrayColumnState(resultSetLoader,
+ writer, vectorState, projectionSet);
+ }
+
+ @Override
+ public void updateCardinality(int cardinality) {
+ super.updateCardinality(cardinality);
+ int childCardinality = cardinality * schema().expectedElementCount();
+ mapState.updateCardinality(childCardinality);
+ }
+ }
+
+ /**
+ * Columns move through various lifecycle states as identified by this
+ * enum. (Yes, sorry that the term "state" is used in two different ways
+ * here: the variables for a column and the point within the column
+ * lifecycle.
+ */
+
+ protected enum State {
+
+ /**
+ * Column is in the normal state of writing with no overflow
+ * in effect.
+ */
+
+ NORMAL,
+
+ /**
+ * Like NORMAL, but means that the data has overflowed and the
+ * column's data for the current row appears in the new,
+ * overflow batch. For a client that omits some columns, written
+ * columns will be in OVERFLOW state, unwritten columns in
+ * NORMAL state.
+ */
+
+ OVERFLOW,
+
+ /**
+ * Indicates that the column has data saved
+ * in the overflow batch.
+ */
+
+ LOOK_AHEAD,
+
+ /**
+ * Like LOOK_AHEAD, but indicates the special case that the column
+ * was added after overflow, so there is no vector for the column
+ * in the harvested batch.
+ */
+
+ NEW_LOOK_AHEAD
+ }
+
+ protected final ResultSetLoaderImpl resultSetLoader;
+ protected final int addVersion;
+ protected final VectorState vectorState;
+ protected State state;
+ protected AbstractObjectWriter writer;
+
+ /**
+ * Cardinality of the value itself. If this is an array,
+ * then this is the number of arrays. A separate number,
+ * the inner cardinality, is computed as the outer cardinality
+ * times the expected array count (from metadata.) The inner
+ * cardinality is the total number of array items in the
+ * vector.
+ */
+
+ protected int outerCardinality;
+
+ public ColumnState(ResultSetLoaderImpl resultSetLoader,
+ AbstractObjectWriter writer, VectorState vectorState) {
+ this.resultSetLoader = resultSetLoader;
+ this.vectorState = vectorState;
+ this.addVersion = resultSetLoader.bumpVersion();
+ state = resultSetLoader.hasOverflow() ?
+ State.NEW_LOOK_AHEAD : State.NORMAL;
+ this.writer = writer;
+ }
+
+ public AbstractObjectWriter writer() { return writer; }
+ public ColumnMetadata schema() { return writer.schema(); }
+
+ public ValueVector vector() { return vectorState.vector(); }
+
+ public void allocateVectors() {
+ assert outerCardinality != 0;
+ resultSetLoader.tallyAllocations(
+ vectorState.allocate(outerCardinality));
+ }
+
+ /**
+ * Prepare the column for a new row batch after overflow on the previous
+ * batch. Restore the look-ahead buffer to the
+ * active vector so we start writing where we left off.
+ */
+
+ public void startBatch() {
+ switch (state) {
+ case NORMAL:
+ resultSetLoader.tallyAllocations(vectorState.allocate(outerCardinality));
+ break;
+
+ case NEW_LOOK_AHEAD:
+
+ // Column is new, was not exchanged with backup vector
+
+ break;
+
+ case LOOK_AHEAD:
+
+ // Restore the look-ahead values to the main vector.
+
+ vectorState.startBatchWithLookAhead();
+ break;
+
+ default:
+ throw new IllegalStateException("Unexpected state: " + state);
+ }
+
+ // In all cases, we are back to normal writing.
+
+ state = State.NORMAL;
+ }
+
+ /**
+ * A column within the row batch overflowed. Prepare to absorb the rest of the
+ * in-flight row by rolling values over to a new vector, saving the complete
+ * vector for later. This column could have a value for the overflow row, or
+ * for some previous row, depending on exactly when and where the overflow
+ * occurs.
+ */
+
+ public void rollover() {
+ assert state == State.NORMAL;
+
+ // If the source index is 0, then we could not fit this one
+ // value in the original vector. Nothing will be accomplished by
+ // trying again with an an overflow vector. Just fail.
+ //
+ // Note that this is a judgment call. It is possible to allow the
+ // vector to double beyond the limit, but that will require a bit
+ // of thought to get right -- and, of course, completely defeats
+ // the purpose of limiting vector size to avoid memory fragmentation...
+
+ if (resultSetLoader.writerIndex().vectorIndex() == 0) {
+ throw UserException
+ .memoryError("A single column value is larger than the maximum allowed size of 16 MB")
+ .build(logger);
+ }
+
+ // Otherwise, do the roll-over to a look-ahead vector.
+
+ vectorState.rollover(outerCardinality);
+
+ // Remember that we did this overflow processing.
+
+ state = State.OVERFLOW;
+ }
+
+ /**
+ * Writing of a row batch is complete. Prepare the vector for harvesting
+ * to send downstream. If this batch encountered overflow, set aside the
+ * look-ahead vector and put the full vector buffer back into the active
+ * vector.
+ */
+
+ public void harvestWithLookAhead() {
+ switch (state) {
+ case NEW_LOOK_AHEAD:
+
+ // If added after overflow, no data to save from the complete
+ // batch: the vector does not appear in the completed batch.
+
+ break;
+
+ case OVERFLOW:
+
+ // Otherwise, restore the original, full buffer and
+ // last write position.
+
+ vectorState.harvestWithLookAhead();
+
+ // Remember that we have look-ahead values stashed away in the
+ // backup vector.
+
+ state = State.LOOK_AHEAD;
+ break;
+
+ default:
+ throw new IllegalStateException("Unexpected state: " + state);
+ }
+ }
+
+ public void close() {
+ vectorState.reset();
+ }
+
+ public void updateCardinality(int cardinality) {
+ outerCardinality = cardinality;
+ }
+
+ public void dump(HierarchicalFormatter format) {
+ format
+ .startObject(this)
+ .attribute("addVersion", addVersion)
+ .attribute("state", state)
+ .attributeIdentity("writer", writer)
+ .attribute("vectorState")
+ ;
+ vectorState.dump(format);
+ format.endObject();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/NullProjectionSet.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/NullProjectionSet.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/NullProjectionSet.java
new file mode 100644
index 0000000..2fcc813
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/NullProjectionSet.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.impl;
+
+/**
+ * Represents a wildcard: SELECT * when used at the root tuple.
+ * When used with maps, means selection of all map columns, either
+ * implicitly, or because the map itself is selected.
+ */
+
+public class NullProjectionSet implements ProjectionSet {
+
+ private boolean allProjected;
+
+ public NullProjectionSet(boolean allProjected) {
+ this.allProjected = allProjected;
+ }
+
+ @Override
+ public boolean isProjected(String colName) { return allProjected; }
+
+ @Override
+ public ProjectionSet mapProjection(String colName) {
+ return new NullProjectionSet(allProjected);
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/NullResultVectorCacheImpl.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/NullResultVectorCacheImpl.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/NullResultVectorCacheImpl.java
new file mode 100644
index 0000000..930dc30
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/NullResultVectorCacheImpl.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.impl;
+
+import org.apache.drill.exec.expr.TypeHelper;
+import org.apache.drill.exec.memory.BufferAllocator;
+import org.apache.drill.exec.physical.rowSet.ResultVectorCache;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.vector.ValueVector;
+
+public class NullResultVectorCacheImpl implements ResultVectorCache {
+
+ private final BufferAllocator allocator;
+
+ public NullResultVectorCacheImpl(BufferAllocator allocator) {
+ this.allocator = allocator;
+ }
+
+ @Override
+ public BufferAllocator allocator() { return allocator; }
+
+ @Override
+ public ValueVector addOrGet(MaterializedField colSchema) {
+ return TypeHelper.getNewVector(colSchema, allocator, null);
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/NullVectorState.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/NullVectorState.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/NullVectorState.java
new file mode 100644
index 0000000..8372758
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/NullVectorState.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.impl;
+
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.accessor.impl.HierarchicalFormatter;
+
+/**
+ * Do-nothing vector state for a map column which has no actual vector
+ * associated with it.
+ */
+
+public class NullVectorState implements VectorState {
+
+ @Override public int allocate(int cardinality) { return 0; }
+ @Override public void rollover(int cardinality) { }
+ @Override public void harvestWithLookAhead() { }
+ @Override public void startBatchWithLookAhead() { }
+ @Override public void reset() { }
+ @Override public ValueVector vector() { return null; }
+
+ public static class UnmanagedVectorState extends NullVectorState {
+ ValueVector vector;
+
+ public UnmanagedVectorState(ValueVector vector) {
+ this.vector = vector;
+ }
+
+ @Override
+ public ValueVector vector() { return vector; }
+ }
+
+ @Override
+ public void dump(HierarchicalFormatter format) {
+ format.startObject(this).endObject();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/NullableVectorState.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/NullableVectorState.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/NullableVectorState.java
new file mode 100644
index 0000000..bf91032
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/NullableVectorState.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.impl;
+
+import org.apache.drill.exec.physical.rowSet.impl.SingleVectorState.ValuesVectorState;
+import org.apache.drill.exec.record.ColumnMetadata;
+import org.apache.drill.exec.vector.FixedWidthVector;
+import org.apache.drill.exec.vector.NullableVector;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.accessor.impl.HierarchicalFormatter;
+import org.apache.drill.exec.vector.accessor.writer.AbstractObjectWriter;
+import org.apache.drill.exec.vector.accessor.writer.AbstractScalarWriter;
+import org.apache.drill.exec.vector.accessor.writer.NullableScalarWriter;
+
+public class NullableVectorState implements VectorState {
+
+ public static class BitsVectorState extends ValuesVectorState {
+
+ public BitsVectorState(ColumnMetadata schema, AbstractScalarWriter writer, ValueVector mainVector) {
+ super(schema, writer, mainVector);
+ }
+
+ @Override
+ public int allocateVector(ValueVector vector, int cardinality) {
+ ((FixedWidthVector) vector).allocateNew(cardinality);
+ return vector.getBufferSize();
+ }
+ }
+
+ private final ColumnMetadata schema;
+ private final NullableScalarWriter writer;
+ private final NullableVector vector;
+ private final ValuesVectorState bitsState;
+ private final ValuesVectorState valuesState;
+
+ public NullableVectorState(AbstractObjectWriter writer, NullableVector vector) {
+ this.schema = writer.schema();
+ this.vector = vector;
+
+ this.writer = (NullableScalarWriter) writer.scalar();
+ bitsState = new BitsVectorState(schema, this.writer.bitsWriter(), vector.getBitsVector());
+ valuesState = new ValuesVectorState(schema, this.writer.baseWriter(), vector.getValuesVector());
+ }
+
+ @Override
+ public int allocate(int cardinality) {
+ return bitsState.allocate(cardinality) +
+ valuesState.allocate(cardinality);
+ }
+
+ @Override
+ public void rollover(int cardinality) {
+ bitsState.rollover(cardinality);
+ valuesState.rollover(cardinality);
+ }
+
+ @Override
+ public void harvestWithLookAhead() {
+ bitsState.harvestWithLookAhead();
+ valuesState.harvestWithLookAhead();
+ }
+
+ @Override
+ public void startBatchWithLookAhead() {
+ bitsState.startBatchWithLookAhead();
+ valuesState.startBatchWithLookAhead();
+ }
+
+ @Override
+ public void reset() {
+ bitsState.reset();
+ valuesState.reset();
+ }
+
+ @Override
+ public ValueVector vector() { return vector; }
+
+ @Override
+ public void dump(HierarchicalFormatter format) {
+ format
+ .startObject(this)
+ .attribute("schema", schema)
+ .attributeIdentity("writer", writer)
+ .attributeIdentity("vector", vector)
+ .attribute("bitsState");
+ bitsState.dump(format);
+ format
+ .attribute("valuesState");
+ valuesState.dump(format);
+ format
+ .endObject();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/OptionBuilder.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/OptionBuilder.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/OptionBuilder.java
new file mode 100644
index 0000000..a743052
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/OptionBuilder.java
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.impl;
+
+import java.util.Collection;
+
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.exec.physical.rowSet.ResultVectorCache;
+import org.apache.drill.exec.physical.rowSet.impl.ResultSetLoaderImpl.ResultSetOptions;
+import org.apache.drill.exec.record.TupleMetadata;
+import org.apache.drill.exec.vector.BaseValueVector;
+import org.apache.drill.exec.vector.ValueVector;
+
+/**
+ * Builder for the options for the row set loader. Reasonable defaults
+ * are provided for all options; use these options for test code or
+ * for clients that don't need special settings.
+ */
+
+public class OptionBuilder {
+ protected int vectorSizeLimit;
+ protected int rowCountLimit;
+ protected Collection<SchemaPath> projection;
+ protected ResultVectorCache vectorCache;
+ protected TupleMetadata schema;
+ protected long maxBatchSize;
+
+ public OptionBuilder() {
+ ResultSetOptions options = new ResultSetOptions();
+ vectorSizeLimit = options.vectorSizeLimit;
+ rowCountLimit = options.rowCountLimit;
+ maxBatchSize = options.maxBatchSize;
+ }
+
+ /**
+ * Specify the maximum number of rows per batch. Defaults to
+ * {@link BaseValueVector#INITIAL_VALUE_ALLOCATION}. Batches end either
+ * when this limit is reached, or when a vector overflows, whichever
+ * occurs first. The limit is capped at
+ * {@link ValueVector#MAX_ROW_COUNT}.
+ *
+ * @param limit the row count limit
+ * @return this builder
+ */
+
+ public OptionBuilder setRowCountLimit(int limit) {
+ rowCountLimit = Math.max(1,
+ Math.min(limit, ValueVector.MAX_ROW_COUNT));
+ return this;
+ }
+
+ public OptionBuilder setBatchSizeLimit(int bytes) {
+ maxBatchSize = bytes;
+ return this;
+ }
+
+ /**
+ * Record (batch) readers often read a subset of available table columns,
+ * but want to use a writer schema that includes all columns for ease of
+ * writing. (For example, a CSV reader must read all columns, even if the user
+ * wants a subset. The unwanted columns are simply discarded.)
+ * <p>
+ * This option provides a projection list, in the form of column names, for
+ * those columns which are to be projected. Only those columns will be
+ * backed by value vectors; non-projected columns will be backed by "null"
+ * writers that discard all values.
+ *
+ * @param projection the list of projected columns
+ * @return this builder
+ */
+
+ // TODO: Use SchemaPath in place of strings.
+
+ public OptionBuilder setProjection(Collection<SchemaPath> projection) {
+ this.projection = projection;
+ return this;
+ }
+
+ /**
+ * Downstream operators require "vector persistence": the same vector
+ * must represent the same column in every batch. For the scan operator,
+ * which creates multiple readers, this can be a challenge. The vector
+ * cache provides a transparent mechanism to enable vector persistence
+ * by returning the same vector for a set of independent readers. By
+ * default, the code uses a "null" cache which creates a new vector on
+ * each request. If a true cache is needed, the caller must provide one
+ * here.
+ */
+
+ public OptionBuilder setVectorCache(ResultVectorCache vectorCache) {
+ this.vectorCache = vectorCache;
+ return this;
+ }
+
+ /**
+ * Clients can use the row set builder in several ways:
+ * <ul>
+ * <li>Provide the schema up front, when known, by using this method to
+ * provide the schema.</li>
+ * <li>Discover the schema on the fly, adding columns during the write
+ * operation. Leave this method unset to start with an empty schema.</li>
+ * <li>A combination of the above.</li>
+ * </ul>
+ * @param schema the initial schema for the loader
+ * @return this builder
+ */
+
+ public OptionBuilder setSchema(TupleMetadata schema) {
+ this.schema = schema;
+ return this;
+ }
+
+ // TODO: No setter for vector length yet: is hard-coded
+ // at present in the value vector.
+
+ public ResultSetOptions build() {
+ return new ResultSetOptions(this);
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/PrimitiveColumnState.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/PrimitiveColumnState.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/PrimitiveColumnState.java
new file mode 100644
index 0000000..c97ec18
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/PrimitiveColumnState.java
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.impl;
+
+import org.apache.drill.exec.physical.rowSet.impl.SingleVectorState.ValuesVectorState;
+import org.apache.drill.exec.vector.NullableVector;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.accessor.ScalarWriter;
+import org.apache.drill.exec.vector.accessor.ScalarWriter.ColumnWriterListener;
+import org.apache.drill.exec.vector.accessor.impl.HierarchicalFormatter;
+import org.apache.drill.exec.vector.accessor.writer.AbstractObjectWriter;
+import org.apache.drill.exec.vector.accessor.writer.AbstractScalarWriter;
+import org.apache.drill.exec.vector.complex.RepeatedValueVector;
+
+/**
+ * Primitive (non-map) column state. Handles all three cardinalities.
+ * Column metadata is hosted on the writer.
+ */
+
+public class PrimitiveColumnState extends ColumnState implements ColumnWriterListener {
+
+ public PrimitiveColumnState(ResultSetLoaderImpl resultSetLoader,
+ AbstractObjectWriter colWriter,
+ VectorState vectorState) {
+ super(resultSetLoader, colWriter, vectorState);
+ writer.bindListener(this);
+ }
+
+ public static PrimitiveColumnState newPrimitive(
+ ResultSetLoaderImpl resultSetLoader,
+ ValueVector vector,
+ AbstractObjectWriter writer) {
+ VectorState vectorState;
+ if (vector == null) {
+ vectorState = new NullVectorState();
+ } else {
+ vectorState = new ValuesVectorState(
+ writer.schema(),
+ (AbstractScalarWriter) writer.scalar(),
+ vector);
+ }
+ return new PrimitiveColumnState(resultSetLoader, writer,
+ vectorState);
+ }
+
+ public static PrimitiveColumnState newNullablePrimitive(
+ ResultSetLoaderImpl resultSetLoader,
+ ValueVector vector,
+ AbstractObjectWriter writer) {
+ VectorState vectorState;
+ if (vector == null) {
+ vectorState = new NullVectorState();
+ } else {
+ vectorState = new NullableVectorState(
+ writer,
+ (NullableVector) vector);
+ }
+ return new PrimitiveColumnState(resultSetLoader, writer,
+ vectorState);
+ }
+
+ public static PrimitiveColumnState newPrimitiveArray(
+ ResultSetLoaderImpl resultSetLoader,
+ ValueVector vector,
+ AbstractObjectWriter writer) {
+ VectorState vectorState;
+ if (vector == null) {
+ vectorState = new NullVectorState();
+ } else {
+ vectorState = new RepeatedVectorState(writer, (RepeatedValueVector) vector);
+ }
+ return new PrimitiveColumnState(resultSetLoader, writer,
+ vectorState);
+ }
+
+ @Override
+ public void overflowed(ScalarWriter writer) {
+ resultSetLoader.overflowed();
+ }
+
+ @Override
+ public void dump(HierarchicalFormatter format) {
+ // TODO Auto-generated method stub
+ }
+
+ @Override
+ public boolean canExpand(ScalarWriter writer, int delta) {
+ return resultSetLoader.canExpand(delta);
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/ProjectionSet.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/ProjectionSet.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/ProjectionSet.java
new file mode 100644
index 0000000..9ea118f
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/ProjectionSet.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.impl;
+
+/**
+ * Represents the set of columns projected for a tuple (row or map.)
+ * The projected columns might themselves be columns, so returns a
+ * projection set for such columns.
+ * <p>
+ * Three implementations exist:
+ * <ul>
+ * <li>Project all ({@link NullProjectionSet): used for a tuple when
+ * all columns are projected. Example: the root tuple (the row) in
+ * a <tt>SELECT *</tt> query.</li>
+ * <li>Project none (also {@link NullProjectionSet): used when no
+ * columns are projected from a tuple, such as when a map itself is
+ * not projected, so none of its member columns are projected.</li>
+ * <li>Project some ({@link ProjectionSetImpl}: used in the
+ * <tt>SELECT a, c, e</tt> case in which the query identifies which
+ * columns to project (implicitly leaving out others, such as b and
+ * d in our example.)</li>
+ * </ul>
+ * <p>
+ * The result is that each tuple (row and map) has an associated
+ * projection set which the code can query to determine if a newly
+ * added column is wanted (and so should have a backing vector) or
+ * is unwanted (and can just receive a dummy writer.)
+ */
+
+interface ProjectionSet {
+ boolean isProjected(String colName);
+ ProjectionSet mapProjection(String colName);
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/ProjectionSetImpl.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/ProjectionSetImpl.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/ProjectionSetImpl.java
new file mode 100644
index 0000000..e17f486
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/ProjectionSetImpl.java
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.impl;
+
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.drill.common.expression.PathSegment;
+import org.apache.drill.common.expression.PathSegment.NameSegment;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.map.CaseInsensitiveMap;
+
+/**
+ * Represents an explicit projection at some tuple level.
+ * <p>
+ * A column is projected if it is explicitly listed in the selection list.
+ * <p>
+ * If a column is a map, then the projection for the map's columns is based on
+ * two rules:
+ * <ol>
+ * <li>If the projection list includes at least one explicit mention of a map
+ * member, then include only those columns explicitly listed.</li>
+ * <li>If the projection at the parent level lists only the map column itself
+ * (which the projection can't know is a map), then assume this implies all
+ * columns, as if the entry where "map.*".</li>
+ * </ol>
+ * <p>
+ * Examples:<br>
+ * <code>m</code><br>
+ * If m turns out to be a map, project all members of m.<br>
+ * <code>m.a</code><br>
+ * Column m must be a map. Project only column a.<br>
+ * <code>m, m.a</code><br>
+ * Tricky case. We interpret this as projecting only the "a" element of map m.
+ * <p>
+ * The projection set is build from a list of columns, represented as
+ * {@link SchemaPath} objects, provided by the physical plan. The structure of
+ * <tt>SchemaPath</tt> is a bit awkward:
+ * <p>
+ * <ul>
+ * <li><tt>SchemaPath> is a wrapper for a column which directly holds the
+ * <tt>NameSegment</tt> for the top-level column.</li>
+ * <li><tt>NameSegment</tt> holds a name. This can be a top name such as
+ * `a`, or parts of a compound name such as `a`.`b`. Each <tt>NameSegment</tt>
+ * has a "child" that points to the option following parts of the name.</li>
+ * <li><PathSegment</tt> is the base class for the parts of a name.</tt>
+ * <li><tt>ArraySegment</tt> is the other kind of name part and represents
+ * an array index such as the "[1]" in `columns`[1].</li>
+ * <ul>
+ * The parser here consumes only names, this mechanism does not consider
+ * array indexes. As a result, there may be multiple projected columns that
+ * map to the same projection here: `columns`[1] and `columns`[2] both map to
+ * the name `columns`, for example.
+ */
+
+public class ProjectionSetImpl implements ProjectionSet {
+
+ Set<String> projection = new HashSet<>();
+ Map<String, ProjectionSetImpl> mapProjections = CaseInsensitiveMap
+ .newHashMap();
+
+ @Override
+ public boolean isProjected(String colName) {
+ return projection.contains(colName.toLowerCase());
+ }
+
+ @Override
+ public ProjectionSet mapProjection(String colName) {
+ ProjectionSet mapProj = mapProjections.get(colName.toLowerCase());
+ if (mapProj != null) {
+ return mapProj;
+ }
+
+ // No explicit information for the map. Members inherit the
+ // same projection as the map itself.
+
+ return new NullProjectionSet(isProjected(colName));
+ }
+
+ /**
+ * Parse a projection list. The list should consist of a list of column
+ * names; any wildcards should have been processed by the caller. An
+ * empty or null list means everything is projected (that is, an
+ * empty list here is equivalent to a wildcard in the SELECT
+ * statement.)
+ *
+ * @param projList
+ * @return
+ */
+ public static ProjectionSet parse(Collection<SchemaPath> projList) {
+ if (projList == null || projList.isEmpty()) {
+ return new NullProjectionSet(true);
+ }
+ ProjectionSetImpl projSet = new ProjectionSetImpl();
+ for (SchemaPath col : projList) {
+ projSet.addSegment(col.getRootSegment());
+ }
+ return projSet;
+ }
+
+ private void addSegment(NameSegment rootSegment) {
+ String rootKey = rootSegment.getPath().toLowerCase();
+ projection.add(rootKey);
+ PathSegment child = rootSegment.getChild();
+ if (child == null) {
+ return;
+ }
+ if (child.isArray()) {
+ // Ignore the [x] array suffix.
+ return;
+ }
+ ProjectionSetImpl map = mapProjections.get(rootKey);
+ if (map == null) {
+ map = new ProjectionSetImpl();
+ mapProjections.put(rootKey, map);
+ }
+ map.addSegment((NameSegment) child);
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/RepeatedVectorState.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/RepeatedVectorState.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/RepeatedVectorState.java
new file mode 100644
index 0000000..98b6beb
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/RepeatedVectorState.java
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.impl;
+
+import org.apache.drill.exec.physical.rowSet.impl.SingleVectorState.OffsetVectorState;
+import org.apache.drill.exec.physical.rowSet.impl.SingleVectorState.ValuesVectorState;
+import org.apache.drill.exec.record.ColumnMetadata;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.accessor.impl.HierarchicalFormatter;
+import org.apache.drill.exec.vector.accessor.writer.AbstractArrayWriter;
+import org.apache.drill.exec.vector.accessor.writer.AbstractObjectWriter;
+import org.apache.drill.exec.vector.accessor.writer.AbstractScalarWriter;
+import org.apache.drill.exec.vector.complex.RepeatedValueVector;
+
+/**
+ * Vector state for a scalar array (repeated scalar) vector. Manages both the
+ * offsets vector and data vector during overflow and other operations.
+ */
+
+public class RepeatedVectorState implements VectorState {
+ private final ColumnMetadata schema;
+ private final AbstractArrayWriter arrayWriter;
+ private final RepeatedValueVector vector;
+ private final OffsetVectorState offsetsState;
+ private final ValuesVectorState valuesState;
+
+ public RepeatedVectorState(AbstractObjectWriter writer, RepeatedValueVector vector) {
+ this.schema = writer.schema();
+
+ // Get the repeated vector
+
+ this.vector = vector;
+
+ // Create the values state using the value (data) portion of the repeated
+ // vector, and the scalar (value) portion of the array writer.
+
+ arrayWriter = (AbstractArrayWriter) writer.array();
+ AbstractScalarWriter colWriter = (AbstractScalarWriter) arrayWriter.scalar();
+ valuesState = new ValuesVectorState(schema, colWriter, vector.getDataVector());
+
+ // Create the offsets state with the offset vector portion of the repeated
+ // vector, and the offset writer portion of the array writer.
+
+ offsetsState = new OffsetVectorState(arrayWriter.offsetWriter(),
+ vector.getOffsetVector(),
+ (AbstractObjectWriter) arrayWriter.entry());
+ }
+
+ @Override
+ public ValueVector vector() { return vector; }
+
+ @Override
+ public int allocate(int cardinality) {
+ return offsetsState.allocate(cardinality) +
+ valuesState.allocate(childCardinality(cardinality));
+ }
+
+ private int childCardinality(int cardinality) {
+ return cardinality * schema.expectedElementCount();
+ }
+
+ /**
+ * The column is a scalar or an array of scalars. We need to roll over both the column
+ * values and the offsets that point to those values. The index provided is
+ * the index into the offset vector. We use that to obtain the index of the
+ * values to roll-over.
+ * <p>
+ * Data structure:
+ * <p><pre></code>
+ * RepeatedVectorState (this class)
+ * +- OffsetVectorState
+ * . +- OffsetVectorWriter (A)
+ * . +- Offset vector (B)
+ * . +- Backup (e.g. look-ahead) offset vector
+ * +- ValuesVectorState
+ * . +- Scalar (element) writer (C)
+ * . +- Data (elements) vector (D)
+ * . +- Backup elements vector
+ * +- Array Writer
+ * . +- ColumnWriterIndex (for array as a whole)
+ * . +- OffsetVectorWriter (A)
+ * . . +- Offset vector (B)
+ * . +- ArrayElementWriterIndex
+ * . +- ScalarWriter (D)
+ * . . +- ArrayElementWriterIndex
+ * . . +- Scalar vector (D)
+ * </code></pre>
+ * <p>
+ * The top group of objects point into the writer objects in the second
+ * group. Letters in parens show the connections.
+ * <p>
+ * To perform the roll-over, we must:
+ * <ul>
+ * <li>Copy values from the current vectors to a set of new, look-ahead
+ * vectors.</li>
+ * <li>Swap buffers between the main and "backup" vectors, effectively
+ * moving the "full" batch to the sidelines, putting the look-ahead vectors
+ * into play in order to finish writing the current row.</li>
+ * <li>Update the writers to point to the look-ahead buffers, including
+ * the initial set of data copied into those vectors.</li>
+ * <li>Update the vector indexes to point to the next write positions
+ * after the values copied during roll-over.</li>
+ * </ul>
+ *
+ * @param cardinality the number of outer elements to create in the look-ahead
+ * vector
+ */
+
+ @Override
+ public void rollover(int cardinality) {
+
+ // Swap out the two vectors. The index presented to the caller
+ // is that of the data vector: the next position in the data
+ // vector to be set into the data vector writer index.
+
+ valuesState.rollover(childCardinality(cardinality));
+ offsetsState.rollover(cardinality);
+ }
+
+ @Override
+ public void harvestWithLookAhead() {
+ offsetsState.harvestWithLookAhead();
+ valuesState.harvestWithLookAhead();
+ }
+
+ @Override
+ public void startBatchWithLookAhead() {
+ offsetsState.startBatchWithLookAhead();
+ valuesState.startBatchWithLookAhead();
+ }
+
+ @Override
+ public void reset() {
+ offsetsState.reset();
+ valuesState.reset();
+ }
+
+ @Override
+ public void dump(HierarchicalFormatter format) {
+ format
+ .startObject(this)
+ .attribute("schema", schema)
+ .attributeIdentity("writer", arrayWriter)
+ .attributeIdentity("vector", vector)
+ .attribute("offsetsState");
+ offsetsState.dump(format);
+ format
+ .attribute("valuesState");
+ valuesState.dump(format);
+ format
+ .endObject();
+ }
+}
[11/15] drill git commit: DRILL-5657: Size-aware vector writer
structure
Posted by pr...@apache.org.
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderMaps.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderMaps.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderMaps.java
new file mode 100644
index 0000000..b23eb0d
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderMaps.java
@@ -0,0 +1,810 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.impl;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.util.Arrays;
+
+import org.apache.drill.common.types.TypeProtos.DataMode;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.physical.rowSet.ResultSetLoader;
+import org.apache.drill.exec.physical.rowSet.RowSetLoader;
+import org.apache.drill.exec.record.BatchSchema;
+import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode;
+import org.apache.drill.exec.record.TupleMetadata;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.accessor.ScalarWriter;
+import org.apache.drill.exec.vector.accessor.TupleReader;
+import org.apache.drill.exec.vector.accessor.TupleWriter;
+import org.apache.drill.test.SubOperatorTest;
+import org.apache.drill.test.rowSet.RowSet;
+import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
+import org.apache.drill.test.rowSet.RowSetComparison;
+import org.apache.drill.test.rowSet.RowSetReader;
+import org.apache.drill.test.rowSet.SchemaBuilder;
+import org.junit.Test;
+
+/**
+ * Test (non-array) map support in the result set loader and related classes.
+ */
+
+public class TestResultSetLoaderMaps extends SubOperatorTest {
+
+ @Test
+ public void testBasics() {
+ TupleMetadata schema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .addMap("m")
+ .add("c", MinorType.INT)
+ .add("d", MinorType.VARCHAR)
+ .buildMap()
+ .add("e", MinorType.VARCHAR)
+ .buildSchema();
+ ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder()
+ .setSchema(schema)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ // Verify structure and schema
+
+ assertEquals(5, rsLoader.schemaVersion());
+ TupleMetadata actualSchema = rootWriter.schema();
+ assertEquals(3, actualSchema.size());
+ assertTrue(actualSchema.metadata(1).isMap());
+ assertEquals(2, actualSchema.metadata("m").mapSchema().size());
+ assertEquals(2, actualSchema.column("m").getChildren().size());
+
+ rsLoader.startBatch();
+
+ // Write a row the way that clients will do.
+
+ ScalarWriter aWriter = rootWriter.scalar("a");
+ TupleWriter mWriter = rootWriter.tuple("m");
+ ScalarWriter cWriter = mWriter.scalar("c");
+ ScalarWriter dWriter = mWriter.scalar("d");
+ ScalarWriter eWriter = rootWriter.scalar("e");
+
+ rootWriter.start();
+ aWriter.setInt(10);
+ cWriter.setInt(110);
+ dWriter.setString("fred");
+ eWriter.setString("pebbles");
+ rootWriter.save();
+
+ // Try adding a duplicate column.
+
+ try {
+ mWriter.addColumn(SchemaBuilder.columnSchema("c", MinorType.INT, DataMode.OPTIONAL));
+ fail();
+ } catch (IllegalArgumentException e) {
+ // Expected
+ }
+
+ // Write another using the test-time conveniences
+
+ rootWriter.addRow(20, new Object[] {210, "barney"}, "bam-bam");
+
+ // Harvest the batch
+
+ RowSet actual = fixture.wrap(rsLoader.harvest());
+ assertEquals(5, rsLoader.schemaVersion());
+ assertEquals(2, actual.rowCount());
+
+ // Validate data
+
+ SingleRowSet expected = fixture.rowSetBuilder(schema)
+ .addRow(10, new Object[] {110, "fred"}, "pebbles")
+ .addRow(20, new Object[] {210, "barney"}, "bam-bam")
+ .build();
+
+ new RowSetComparison(expected).verifyAndClearAll(actual);
+ rsLoader.close();
+ }
+
+ /**
+ * Create schema with a map, then add columns to the map
+ * after delivering the first batch. The new columns should appear
+ * in the second-batch output.
+ */
+
+ @Test
+ public void testMapEvolution() {
+ TupleMetadata schema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .addMap("m")
+ .add("b", MinorType.VARCHAR)
+ .buildMap()
+ .buildSchema();
+ ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder()
+ .setSchema(schema)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ assertEquals(3, rsLoader.schemaVersion());
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ rsLoader.startBatch();
+ rootWriter
+ .addRow(10, new Object[] {"fred"})
+ .addRow(20, new Object[] {"barney"});
+
+ RowSet actual = fixture.wrap(rsLoader.harvest());
+ assertEquals(3, rsLoader.schemaVersion());
+ assertEquals(2, actual.rowCount());
+
+ // Validate first batch
+
+ SingleRowSet expected = fixture.rowSetBuilder(schema)
+ .addRow(10, new Object[] {"fred"})
+ .addRow(20, new Object[] {"barney"})
+ .build();
+
+ new RowSetComparison(expected).verifyAndClearAll(actual);
+
+ // Add three columns in the second batch. One before
+ // the batch starts, one before the first row, and one after
+ // the first row.
+
+ TupleWriter mapWriter = rootWriter.tuple("m");
+ mapWriter.addColumn(SchemaBuilder.columnSchema("c", MinorType.INT, DataMode.REQUIRED));
+
+ rsLoader.startBatch();
+ mapWriter.addColumn(SchemaBuilder.columnSchema("d", MinorType.BIGINT, DataMode.REQUIRED));
+
+ rootWriter.addRow(30, new Object[] {"wilma", 130, 130_000L});
+
+ mapWriter.addColumn(SchemaBuilder.columnSchema("e", MinorType.VARCHAR, DataMode.REQUIRED));
+ rootWriter.addRow(40, new Object[] {"betty", 140, 140_000L, "bam-bam"});
+
+ actual = fixture.wrap(rsLoader.harvest());
+ assertEquals(6, rsLoader.schemaVersion());
+ assertEquals(2, actual.rowCount());
+
+ // Validate first batch
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .addMap("m")
+ .add("b", MinorType.VARCHAR)
+ .add("c", MinorType.INT)
+ .add("d", MinorType.BIGINT)
+ .add("e", MinorType.VARCHAR)
+ .buildMap()
+ .buildSchema();
+ expected = fixture.rowSetBuilder(expectedSchema)
+ .addRow(30, new Object[] {"wilma", 130, 130_000L, ""})
+ .addRow(40, new Object[] {"betty", 140, 140_000L, "bam-bam"})
+ .build();
+
+ new RowSetComparison(expected).verifyAndClearAll(actual);
+
+ rsLoader.close();
+ }
+
+ /**
+ * Test adding a map to a loader after writing the first row.
+ */
+
+ @Test
+ public void testMapAddition() {
+ TupleMetadata schema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .buildSchema();
+ ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder()
+ .setSchema(schema)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ assertEquals(1, rsLoader.schemaVersion());
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ // Start without the map. Add a map after the first row.
+
+ rsLoader.startBatch();
+ rootWriter.addRow(10);
+
+ int mapIndex = rootWriter.addColumn(SchemaBuilder.columnSchema("m", MinorType.MAP, DataMode.REQUIRED));
+ TupleWriter mapWriter = rootWriter.tuple(mapIndex);
+
+ // Add a column to the map with the same name as the top-level column.
+ // Verifies that the name spaces are independent.
+
+ mapWriter.addColumn(SchemaBuilder.columnSchema("a", MinorType.VARCHAR, DataMode.REQUIRED));
+
+ rootWriter
+ .addRow(20, new Object[]{"fred"})
+ .addRow(30, new Object[]{"barney"});
+
+ RowSet actual = fixture.wrap(rsLoader.harvest());
+ assertEquals(3, rsLoader.schemaVersion());
+ assertEquals(3, actual.rowCount());
+
+ // Validate first batch
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .addMap("m")
+ .add("a", MinorType.VARCHAR)
+ .buildMap()
+ .buildSchema();
+ SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
+ .addRow(10, new Object[] {""})
+ .addRow(20, new Object[] {"fred"})
+ .addRow(30, new Object[] {"barney"})
+ .build();
+
+ new RowSetComparison(expected).verifyAndClearAll(actual);
+
+ rsLoader.close();
+ }
+
+ /**
+ * Test adding an empty map to a loader after writing the first row.
+ * Then add columns in another batch. Yes, this is a bizarre condition,
+ * but we must check it anyway for robustness.
+ */
+
+ @Test
+ public void testEmptyMapAddition() {
+ TupleMetadata schema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .buildSchema();
+ ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder()
+ .setSchema(schema)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ assertEquals(1, rsLoader.schemaVersion());
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ // Start without the map. Add a map after the first row.
+
+ rsLoader.startBatch();
+ rootWriter.addRow(10);
+
+ int mapIndex = rootWriter.addColumn(SchemaBuilder.columnSchema("m", MinorType.MAP, DataMode.REQUIRED));
+ TupleWriter mapWriter = rootWriter.tuple(mapIndex);
+
+ rootWriter
+ .addRow(20, new Object[]{})
+ .addRow(30, new Object[]{});
+
+ RowSet actual = fixture.wrap(rsLoader.harvest());
+ assertEquals(2, rsLoader.schemaVersion());
+ assertEquals(3, actual.rowCount());
+
+ // Validate first batch
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .addMap("m")
+ .buildMap()
+ .buildSchema();
+ SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
+ .addRow(10, new Object[] {})
+ .addRow(20, new Object[] {})
+ .addRow(30, new Object[] {})
+ .build();
+
+ new RowSetComparison(expected).verifyAndClearAll(actual);
+
+ // Now add another column to the map
+
+ rsLoader.startBatch();
+ mapWriter.addColumn(SchemaBuilder.columnSchema("a", MinorType.VARCHAR, DataMode.REQUIRED));
+
+ rootWriter
+ .addRow(40, new Object[]{"fred"})
+ .addRow(50, new Object[]{"barney"});
+
+ actual = fixture.wrap(rsLoader.harvest());
+ assertEquals(3, rsLoader.schemaVersion());
+ assertEquals(2, actual.rowCount());
+
+ // Validate first batch
+
+ expectedSchema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .addMap("m")
+ .add("a", MinorType.VARCHAR)
+ .buildMap()
+ .buildSchema();
+ expected = fixture.rowSetBuilder(expectedSchema)
+ .addRow(40, new Object[] {"fred"})
+ .addRow(50, new Object[] {"barney"})
+ .build();
+
+ new RowSetComparison(expected).verifyAndClearAll(actual);
+
+ rsLoader.close();
+ }
+
+ /**
+ * Create nested maps. Then, add columns to each map
+ * on the fly. Use required, variable-width columns since
+ * those require the most processing and are most likely to
+ * fail if anything is out of place.
+ */
+
+ @Test
+ public void testNestedMapsRequired() {
+ TupleMetadata schema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .addMap("m1")
+ .add("b", MinorType.VARCHAR)
+ .addMap("m2")
+ .add("c", MinorType.VARCHAR)
+ .buildMap()
+ .buildMap()
+ .buildSchema();
+ ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder()
+ .setSchema(schema)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ assertEquals(5, rsLoader.schemaVersion());
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ rsLoader.startBatch();
+ rootWriter.addRow(10, new Object[] {"b1", new Object[] {"c1"}});
+
+ // Validate first batch
+
+ RowSet actual = fixture.wrap(rsLoader.harvest());
+ assertEquals(5, rsLoader.schemaVersion());
+ SingleRowSet expected = fixture.rowSetBuilder(schema)
+ .addRow(10, new Object[] {"b1", new Object[] {"c1"}})
+ .build();
+
+ new RowSetComparison(expected).verifyAndClearAll(actual);
+
+ // Now add columns in the second batch.
+
+ rsLoader.startBatch();
+ rootWriter.addRow(20, new Object[] {"b2", new Object[] {"c2"}});
+
+ TupleWriter m1Writer = rootWriter.tuple("m1");
+ m1Writer.addColumn(SchemaBuilder.columnSchema("d", MinorType.VARCHAR, DataMode.REQUIRED));
+ TupleWriter m2Writer = m1Writer.tuple("m2");
+ m2Writer.addColumn(SchemaBuilder.columnSchema("e", MinorType.VARCHAR, DataMode.REQUIRED));
+
+ rootWriter.addRow(30, new Object[] {"b3", new Object[] {"c3", "e3"}, "d3"});
+
+ // And another set while the write proceeds.
+
+ m1Writer.addColumn(SchemaBuilder.columnSchema("f", MinorType.VARCHAR, DataMode.REQUIRED));
+ m2Writer.addColumn(SchemaBuilder.columnSchema("g", MinorType.VARCHAR, DataMode.REQUIRED));
+
+ rootWriter.addRow(40, new Object[] {"b4", new Object[] {"c4", "e4", "g4"}, "d4", "e4"});
+
+ // Validate second batch
+
+ actual = fixture.wrap(rsLoader.harvest());
+ assertEquals(9, rsLoader.schemaVersion());
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .addMap("m1")
+ .add("b", MinorType.VARCHAR)
+ .addMap("m2")
+ .add("c", MinorType.VARCHAR)
+ .add("e", MinorType.VARCHAR)
+ .add("g", MinorType.VARCHAR)
+ .buildMap()
+ .add("d", MinorType.VARCHAR)
+ .add("f", MinorType.VARCHAR)
+ .buildMap()
+ .buildSchema();
+ expected = fixture.rowSetBuilder(expectedSchema)
+ .addRow(20, new Object[] {"b2", new Object[] {"c2", "", "" }, "", "" })
+ .addRow(30, new Object[] {"b3", new Object[] {"c3", "e3", "" }, "d3", "" })
+ .addRow(40, new Object[] {"b4", new Object[] {"c4", "e4", "g4"}, "d4", "e4"})
+ .build();
+
+ new RowSetComparison(expected).verifyAndClearAll(actual);
+
+ rsLoader.close();
+ }
+
+ /**
+ * Create nested maps. Then, add columns to each map
+ * on the fly. This time, with nullable types.
+ */
+
+ @Test
+ public void testNestedMapsNullable() {
+ TupleMetadata schema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .addMap("m1")
+ .addNullable("b", MinorType.VARCHAR)
+ .addMap("m2")
+ .addNullable("c", MinorType.VARCHAR)
+ .buildMap()
+ .buildMap()
+ .buildSchema();
+ ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder()
+ .setSchema(schema)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ rsLoader.startBatch();
+ rootWriter.addRow(10, new Object[] {"b1", new Object[] {"c1"}});
+
+ // Validate first batch
+
+ RowSet actual = fixture.wrap(rsLoader.harvest());
+ SingleRowSet expected = fixture.rowSetBuilder(schema)
+ .addRow(10, new Object[] {"b1", new Object[] {"c1"}})
+ .build();
+// actual.print();
+// expected.print();
+
+ new RowSetComparison(expected).verifyAndClearAll(actual);
+
+ // Now add columns in the second batch.
+
+ rsLoader.startBatch();
+ rootWriter.addRow(20, new Object[] {"b2", new Object[] {"c2"}});
+
+ TupleWriter m1Writer = rootWriter.tuple("m1");
+ m1Writer.addColumn(SchemaBuilder.columnSchema("d", MinorType.VARCHAR, DataMode.OPTIONAL));
+ TupleWriter m2Writer = m1Writer.tuple("m2");
+ m2Writer.addColumn(SchemaBuilder.columnSchema("e", MinorType.VARCHAR, DataMode.OPTIONAL));
+
+ rootWriter.addRow(30, new Object[] {"b3", new Object[] {"c3", "e3"}, "d3"});
+
+ // And another set while the write proceeds.
+
+ m1Writer.addColumn(SchemaBuilder.columnSchema("f", MinorType.VARCHAR, DataMode.OPTIONAL));
+ m2Writer.addColumn(SchemaBuilder.columnSchema("g", MinorType.VARCHAR, DataMode.OPTIONAL));
+
+ rootWriter.addRow(40, new Object[] {"b4", new Object[] {"c4", "e4", "g4"}, "d4", "e4"});
+
+ // Validate second batch
+
+ actual = fixture.wrap(rsLoader.harvest());
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .addMap("m1")
+ .addNullable("b", MinorType.VARCHAR)
+ .addMap("m2")
+ .addNullable("c", MinorType.VARCHAR)
+ .addNullable("e", MinorType.VARCHAR)
+ .addNullable("g", MinorType.VARCHAR)
+ .buildMap()
+ .addNullable("d", MinorType.VARCHAR)
+ .addNullable("f", MinorType.VARCHAR)
+ .buildMap()
+ .buildSchema();
+ expected = fixture.rowSetBuilder(expectedSchema)
+ .addRow(20, new Object[] {"b2", new Object[] {"c2", null, null}, null, null})
+ .addRow(30, new Object[] {"b3", new Object[] {"c3", "e3", null}, "d3", null})
+ .addRow(40, new Object[] {"b4", new Object[] {"c4", "e4", "g4"}, "d4", "e4"})
+ .build();
+
+ new RowSetComparison(expected).verifyAndClearAll(actual);
+
+ rsLoader.close();
+ }
+
+ /**
+ * Test a map that contains a scalar array. No reason to suspect that this
+ * will have problem as the array writer is fully tested in the accessor
+ * subsystem. Still, need to test the cardinality methods of the loader
+ * layer.
+ */
+
+ @Test
+ public void testMapWithArray() {
+ TupleMetadata schema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .addMap("m")
+ .addArray("c", MinorType.INT)
+ .addArray("d", MinorType.VARCHAR)
+ .buildMap()
+ .buildSchema();
+ ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder()
+ .setSchema(schema)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ // Write some rows
+
+ rsLoader.startBatch();
+ rootWriter
+ .addRow(10, new Object[] {new int[] {110, 120, 130},
+ new String[] {"d1.1", "d1.2", "d1.3", "d1.4"}})
+ .addRow(20, new Object[] {new int[] {210}, new String[] {}})
+ .addRow(30, new Object[] {new int[] {}, new String[] {"d3.1"}})
+ ;
+
+ // Validate first batch
+
+ RowSet actual = fixture.wrap(rsLoader.harvest());
+ SingleRowSet expected = fixture.rowSetBuilder(schema)
+ .addRow(10, new Object[] {new int[] {110, 120, 130},
+ new String[] {"d1.1", "d1.2", "d1.3", "d1.4"}})
+ .addRow(20, new Object[] {new int[] {210}, new String[] {}})
+ .addRow(30, new Object[] {new int[] {}, new String[] {"d3.1"}})
+ .build();
+
+ new RowSetComparison(expected).verifyAndClearAll(actual);
+
+ // Add another array after the first row in the second batch.
+
+ rsLoader.startBatch();
+ rootWriter
+ .addRow(40, new Object[] {new int[] {410, 420}, new String[] {"d4.1", "d4.2"}})
+ .addRow(50, new Object[] {new int[] {510}, new String[] {"d5.1"}})
+ ;
+
+ TupleWriter mapWriter = rootWriter.tuple("m");
+ mapWriter.addColumn(SchemaBuilder.columnSchema("e", MinorType.VARCHAR, DataMode.REPEATED));
+ rootWriter
+ .addRow(60, new Object[] {new int[] {610, 620}, new String[] {"d6.1", "d6.2"}, new String[] {"e6.1", "e6.2"}})
+ .addRow(70, new Object[] {new int[] {710}, new String[] {}, new String[] {"e7.1", "e7.2"}})
+ ;
+
+ // Validate first batch. The new array should have been back-filled with
+ // empty offsets for the missing rows.
+
+ actual = fixture.wrap(rsLoader.harvest());
+// System.out.println(actual.schema().toString());
+ expected = fixture.rowSetBuilder(actual.schema())
+ .addRow(40, new Object[] {new int[] {410, 420}, new String[] {"d4.1", "d4.2"}, new String[] {}})
+ .addRow(50, new Object[] {new int[] {510}, new String[] {"d5.1"}, new String[] {}})
+ .addRow(60, new Object[] {new int[] {610, 620}, new String[] {"d6.1", "d6.2"}, new String[] {"e6.1", "e6.2"}})
+ .addRow(70, new Object[] {new int[] {710}, new String[] {}, new String[] {"e7.1", "e7.2"}})
+ .build();
+// expected.print();
+
+ new RowSetComparison(expected).verifyAndClearAll(actual);
+
+ rsLoader.close();
+ }
+
+ /**
+ * Create a schema with a map, then trigger an overflow on one of the columns
+ * in the map. Proper overflow handling should occur regardless of nesting
+ * depth.
+ */
+
+ @Test
+ public void testMapWithOverflow() {
+ TupleMetadata schema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .addMap("m1")
+ .add("b", MinorType.INT)
+ .addMap("m2")
+ .add("c", MinorType.INT) // Before overflow, written
+ .add("d", MinorType.VARCHAR)
+ .add("e", MinorType.INT) // After overflow, not yet written
+ .buildMap()
+ .buildMap()
+ .buildSchema();
+ ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder()
+ .setSchema(schema)
+ .setRowCountLimit(ValueVector.MAX_ROW_COUNT)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ byte value[] = new byte[512];
+ Arrays.fill(value, (byte) 'X');
+ int count = 0;
+ rsLoader.startBatch();
+ while (! rootWriter.isFull()) {
+ rootWriter.addRow(count, new Object[] {count * 10, new Object[] {count * 100, value, count * 1000}});
+ count++;
+ }
+
+ // Our row count should include the overflow row
+
+ int expectedCount = ValueVector.MAX_BUFFER_SIZE / value.length;
+ assertEquals(expectedCount + 1, count);
+
+ // Loader's row count should include only "visible" rows
+
+ assertEquals(expectedCount, rootWriter.rowCount());
+
+ // Total count should include invisible and look-ahead rows.
+
+ assertEquals(expectedCount + 1, rsLoader.totalRowCount());
+
+ // Result should exclude the overflow row
+
+ RowSet result = fixture.wrap(rsLoader.harvest());
+ assertEquals(expectedCount, result.rowCount());
+ result.clear();
+
+ // Next batch should start with the overflow row
+
+ rsLoader.startBatch();
+ assertEquals(1, rootWriter.rowCount());
+ assertEquals(expectedCount + 1, rsLoader.totalRowCount());
+ result = fixture.wrap(rsLoader.harvest());
+ assertEquals(1, result.rowCount());
+ result.clear();
+
+ rsLoader.close();
+ }
+
+ /**
+ * Test the case in which a new column is added during the overflow row. Unlike
+ * the top-level schema case, internally we must create a copy of the map, and
+ * move vectors across only when the result is to include the schema version
+ * of the target column. For overflow, the new column is added after the
+ * first batch; it is added in the second batch that contains the overflow
+ * row in which the column was added.
+ */
+
+ @Test
+ public void testMapOverflowWithNewColumn() {
+ TupleMetadata schema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .addMap("m")
+ .add("b", MinorType.INT)
+ .add("c", MinorType.VARCHAR)
+ .buildMap()
+ .buildSchema();
+ ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder()
+ .setSchema(schema)
+ .setRowCountLimit(ValueVector.MAX_ROW_COUNT)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ assertEquals(4, rsLoader.schemaVersion());
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ // Can't use the shortcut to populate rows when doing a schema
+ // change.
+
+ ScalarWriter aWriter = rootWriter.scalar("a");
+ TupleWriter mWriter = rootWriter.tuple("m");
+ ScalarWriter bWriter = mWriter.scalar("b");
+ ScalarWriter cWriter = mWriter.scalar("c");
+
+ byte value[] = new byte[512];
+ Arrays.fill(value, (byte) 'X');
+ int count = 0;
+ rsLoader.startBatch();
+ while (! rootWriter.isFull()) {
+ rootWriter.start();
+ aWriter.setInt(count);
+ bWriter.setInt(count * 10);
+ cWriter.setBytes(value, value.length);
+ if (rootWriter.isFull()) {
+
+ // Overflow just occurred. Add another column.
+
+ mWriter.addColumn(SchemaBuilder.columnSchema("d", MinorType.INT, DataMode.OPTIONAL));
+ mWriter.scalar("d").setInt(count * 100);
+ }
+ rootWriter.save();
+ count++;
+ }
+
+ // Result set should include the original columns, but not d.
+
+ RowSet result = fixture.wrap(rsLoader.harvest());
+
+ assertEquals(4, rsLoader.schemaVersion());
+ assertTrue(schema.isEquivalent(result.schema()));
+ BatchSchema expectedSchema = new BatchSchema(SelectionVectorMode.NONE, schema.toFieldList());
+ assertTrue(expectedSchema.isEquivalent(result.batchSchema()));
+
+ // Use a reader to validate row-by-row. Too large to create an expected
+ // result set.
+
+ RowSetReader reader = result.reader();
+ TupleReader mapReader = reader.tuple("m");
+ int rowId = 0;
+ while (reader.next()) {
+ assertEquals(rowId, reader.scalar("a").getInt());
+ assertEquals(rowId * 10, mapReader.scalar("b").getInt());
+ assertTrue(Arrays.equals(value, mapReader.scalar("c").getBytes()));
+ rowId++;
+ }
+ result.clear();
+
+ // Next batch should start with the overflow row
+
+ rsLoader.startBatch();
+ assertEquals(1, rootWriter.rowCount());
+ result = fixture.wrap(rsLoader.harvest());
+ assertEquals(1, result.rowCount());
+
+ reader = result.reader();
+ mapReader = reader.tuple("m");
+ while (reader.next()) {
+ assertEquals(rowId, reader.scalar("a").getInt());
+ assertEquals(rowId * 10, mapReader.scalar("b").getInt());
+ assertTrue(Arrays.equals(value, mapReader.scalar("c").getBytes()));
+ assertEquals(rowId * 100, mapReader.scalar("d").getInt());
+ }
+ result.clear();
+
+ rsLoader.close();
+ }
+
+ /**
+ * Version of the {#link TestResultSetLoaderProtocol#testOverwriteRow()} test
+ * that uses nested columns.
+ */
+
+ @Test
+ public void testOverwriteRow() {
+ TupleMetadata schema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .addMap("m")
+ .add("b", MinorType.INT)
+ .add("c", MinorType.VARCHAR)
+ .buildMap()
+ .buildSchema();
+ ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder()
+ .setSchema(schema)
+ .setRowCountLimit(ValueVector.MAX_ROW_COUNT)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ // Can't use the shortcut to populate rows when doing overwrites.
+
+ ScalarWriter aWriter = rootWriter.scalar("a");
+ TupleWriter mWriter = rootWriter.tuple("m");
+ ScalarWriter bWriter = mWriter.scalar("b");
+ ScalarWriter cWriter = mWriter.scalar("c");
+
+ // Write 100,000 rows, overwriting 99% of them. This will cause vector
+ // overflow and data corruption if overwrite does not work; but will happily
+ // produce the correct result if everything works as it should.
+
+ byte value[] = new byte[512];
+ Arrays.fill(value, (byte) 'X');
+ int count = 0;
+ rsLoader.startBatch();
+ while (count < 100_000) {
+ rootWriter.start();
+ count++;
+ aWriter.setInt(count);
+ bWriter.setInt(count * 10);
+ cWriter.setBytes(value, value.length);
+ if (count % 100 == 0) {
+ rootWriter.save();
+ }
+ }
+
+ // Verify using a reader.
+
+ RowSet result = fixture.wrap(rsLoader.harvest());
+ assertEquals(count / 100, result.rowCount());
+ RowSetReader reader = result.reader();
+ TupleReader mReader = reader.tuple("m");
+ int rowId = 1;
+ while (reader.next()) {
+ assertEquals(rowId * 100, reader.scalar("a").getInt());
+ assertEquals(rowId * 1000, mReader.scalar("b").getInt());
+ assertTrue(Arrays.equals(value, mReader.scalar("c").getBytes()));
+ rowId++;
+ }
+
+ result.clear();
+ rsLoader.close();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderOmittedValues.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderOmittedValues.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderOmittedValues.java
new file mode 100644
index 0000000..2c4c87b
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderOmittedValues.java
@@ -0,0 +1,379 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.impl;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.util.Arrays;
+
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.physical.rowSet.ResultSetLoader;
+import org.apache.drill.exec.physical.rowSet.RowSetLoader;
+import org.apache.drill.exec.record.BatchSchema;
+import org.apache.drill.exec.record.TupleMetadata;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.accessor.ScalarWriter;
+import org.apache.drill.test.SubOperatorTest;
+import org.apache.drill.test.rowSet.RowSet;
+import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
+import org.apache.drill.test.rowSet.RowSetComparison;
+import org.apache.drill.test.rowSet.RowSetReader;
+import org.apache.drill.test.rowSet.SchemaBuilder;
+import org.junit.Test;
+
+public class TestResultSetLoaderOmittedValues extends SubOperatorTest {
+
+ /**
+ * Test "holes" in the middle of a batch, and unset columns at
+ * the end. Ending the batch should fill in missing values.
+ */
+
+ @Test
+ public void testOmittedValuesAtEnd() {
+
+ // Create columns up front
+
+ TupleMetadata schema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .add("b", MinorType.VARCHAR)
+ .addNullable("c", MinorType.VARCHAR)
+ .add("d", MinorType.INT)
+ .addNullable("e", MinorType.INT)
+ .addArray("f", MinorType.VARCHAR)
+ .buildSchema();
+ ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder()
+ .setSchema(schema)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ rsLoader.startBatch();
+ int rowCount = 0;
+ ScalarWriter arrayWriter;
+ for (int i = 0; i < 2; i++) { // Row 0, 1
+ rootWriter.start();
+ rowCount++;
+ rootWriter.scalar(0).setInt(rowCount);
+ rootWriter.scalar(1).setString("b_" + rowCount);
+ rootWriter.scalar(2).setString("c_" + rowCount);
+ rootWriter.scalar(3).setInt(rowCount * 10);
+ rootWriter.scalar(4).setInt(rowCount * 100);
+ arrayWriter = rootWriter.column(5).array().scalar();
+ arrayWriter.setString("f_" + rowCount + "-1");
+ arrayWriter.setString("f_" + rowCount + "-2");
+ rootWriter.save();
+ }
+
+ // Holes in half the columns
+
+ for (int i = 0; i < 2; i++) { // Rows 2, 3
+ rootWriter.start();
+ rowCount++;
+ rootWriter.scalar(0).setInt(rowCount);
+ rootWriter.scalar(1).setString("b_" + rowCount);
+ rootWriter.scalar(3).setInt(rowCount * 10);
+ arrayWriter = rootWriter.column(5).array().scalar();
+ arrayWriter.setString("f_" + rowCount + "-1");
+ arrayWriter.setString("f_" + rowCount + "-2");
+ rootWriter.save();
+ }
+
+ // Holes in the other half
+
+ for (int i = 0; i < 2; i++) { // Rows 4, 5
+ rootWriter.start();
+ rowCount++;
+ rootWriter.scalar(0).setInt(rowCount);
+ rootWriter.scalar(2).setString("c_" + rowCount);
+ rootWriter.scalar(4).setInt(rowCount * 100);
+ rootWriter.save();
+ }
+
+ // All columns again.
+
+ for (int i = 0; i < 2; i++) { // Rows 6, 7
+ rootWriter.start();
+ rowCount++;
+ rootWriter.scalar(0).setInt(rowCount);
+ rootWriter.scalar(1).setString("b_" + rowCount);
+ rootWriter.scalar(2).setString("c_" + rowCount);
+ rootWriter.scalar(3).setInt(rowCount * 10);
+ rootWriter.scalar(4).setInt(rowCount * 100);
+ arrayWriter = rootWriter.column(5).array().scalar();
+ arrayWriter.setString("f_" + rowCount + "-1");
+ arrayWriter.setString("f_" + rowCount + "-2");
+ rootWriter.save();
+ }
+
+ // Omit all but the key column at end
+
+ for (int i = 0; i < 2; i++) { // Rows 8, 9
+ rootWriter.start();
+ rowCount++;
+ rootWriter.scalar(0).setInt(rowCount);
+ rootWriter.save();
+ }
+
+ // Harvest the row and verify.
+
+ RowSet actual = fixture.wrap(rsLoader.harvest());
+// actual.print();
+
+ BatchSchema expectedSchema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .add("b", MinorType.VARCHAR)
+ .addNullable("c", MinorType.VARCHAR)
+ .add("3", MinorType.INT)
+ .addNullable("e", MinorType.INT)
+ .addArray("f", MinorType.VARCHAR)
+ .build();
+ SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
+ .addRow( 1, "b_1", "c_1", 10, 100, new String[] {"f_1-1", "f_1-2"})
+ .addRow( 2, "b_2", "c_2", 20, 200, new String[] {"f_2-1", "f_2-2"})
+ .addRow( 3, "b_3", null, 30, null, new String[] {"f_3-1", "f_3-2"})
+ .addRow( 4, "b_4", null, 40, null, new String[] {"f_4-1", "f_4-2"})
+ .addRow( 5, "", "c_5", 0, 500, new String[] {})
+ .addRow( 6, "", "c_6", 0, 600, new String[] {})
+ .addRow( 7, "b_7", "c_7", 70, 700, new String[] {"f_7-1", "f_7-2"})
+ .addRow( 8, "b_8", "c_8", 80, 800, new String[] {"f_8-1", "f_8-2"})
+ .addRow( 9, "", null, 0, null, new String[] {})
+ .addRow( 10, "", null, 0, null, new String[] {})
+ .build();
+
+ new RowSetComparison(expected)
+ .verifyAndClearAll(actual);
+ rsLoader.close();
+ }
+
+ /**
+ * Test "holes" at the end of a batch when batch overflows. Completed
+ * batch must be finalized correctly, new batch initialized correct,
+ * for the missing values.
+ */
+
+ @Test
+ public void testOmittedValuesAtEndWithOverflow() {
+ TupleMetadata schema = new SchemaBuilder()
+ // Row index
+ .add("a", MinorType.INT)
+ // Column that forces overflow
+ .add("b", MinorType.VARCHAR)
+ // Column with all holes
+ .addNullable("c", MinorType.VARCHAR)
+ // Column with some holes
+ .addNullable("d", MinorType.VARCHAR)
+ .buildSchema();
+ ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder()
+ .setRowCountLimit(ValueVector.MAX_ROW_COUNT)
+ .setSchema(schema)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ // Fill the batch. Column d has some values. Column c is worst case: no values.
+
+ rsLoader.startBatch();
+ byte value[] = new byte[533];
+ Arrays.fill(value, (byte) 'X');
+ int rowNumber = 0;
+ while (! rootWriter.isFull()) {
+ rootWriter.start();
+ rowNumber++;
+ rootWriter.scalar(0).setInt(rowNumber);
+ rootWriter.scalar(1).setBytes(value, value.length);
+ if (rowNumber < 10_000) {
+ rootWriter.scalar(3).setString("d-" + rowNumber);
+ }
+ rootWriter.save();
+ assertEquals(rowNumber, rsLoader.totalRowCount());
+ }
+
+ // Harvest and verify
+
+ RowSet result = fixture.wrap(rsLoader.harvest());
+ assertEquals(rowNumber - 1, result.rowCount());
+ RowSetReader reader = result.reader();
+ int rowIndex = 0;
+ while (reader.next()) {
+ int expectedRowNumber = 1 + rowIndex;
+ assertEquals(expectedRowNumber, reader.scalar(0).getInt());
+ assertTrue(reader.scalar(2).isNull());
+ if (expectedRowNumber < 10_000) {
+ assertEquals("d-" + expectedRowNumber, reader.scalar(3).getString());
+ } else {
+ assertTrue(reader.scalar(3).isNull());
+ }
+ rowIndex++;
+ }
+
+ // Start count for this batch is one less than current
+ // count, because of the overflow row.
+
+ int startRowNumber = rowNumber;
+
+ // Write a few more rows to the next batch
+
+ rsLoader.startBatch();
+ for (int i = 0; i < 10; i++) {
+ rootWriter.start();
+ rowNumber++;
+ rootWriter.scalar(0).setInt(rowNumber);
+ rootWriter.scalar(1).setBytes(value, value.length);
+ if (i > 5) {
+ rootWriter.scalar(3).setString("d-" + rowNumber);
+ }
+ rootWriter.save();
+ assertEquals(rowNumber, rsLoader.totalRowCount());
+ }
+
+ // Verify that holes were preserved.
+
+ result = fixture.wrap(rsLoader.harvest());
+ assertEquals(rowNumber, rsLoader.totalRowCount());
+ assertEquals(rowNumber - startRowNumber + 1, result.rowCount());
+// result.print();
+ reader = result.reader();
+ rowIndex = 0;
+ while (reader.next()) {
+ int expectedRowNumber = startRowNumber + rowIndex;
+ assertEquals(expectedRowNumber, reader.scalar(0).getInt());
+ assertTrue(reader.scalar(2).isNull());
+ if (rowIndex > 6) {
+ assertEquals("d-" + expectedRowNumber, reader.scalar(3).getString());
+ } else {
+ assertTrue("Row " + rowIndex + " col d should be null", reader.scalar(3).isNull());
+ }
+ rowIndex++;
+ }
+ assertEquals(rowIndex, 11);
+
+ rsLoader.close();
+ }
+
+ /**
+ * Test that omitting the call to saveRow() effectively discards
+ * the row. Note that the vectors still contain values in the
+ * discarded position; just the various pointers are unset. If
+ * the batch ends before the discarded values are overwritten, the
+ * discarded values just exist at the end of the vector. Since vectors
+ * start with garbage contents, the discarded values are simply a different
+ * kind of garbage. But, if the client writes a new row, then the new
+ * row overwrites the discarded row. This works because we only change
+ * the tail part of a vector; never the internals.
+ */
+
+ @Test
+ public void testSkipRows() {
+ TupleMetadata schema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .addNullable("b", MinorType.VARCHAR)
+ .buildSchema();
+ ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder()
+ .setRowCountLimit(ValueVector.MAX_ROW_COUNT)
+ .setSchema(schema)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ rsLoader.startBatch();
+ int rowNumber = 0;
+ for (int i = 0; i < 14; i++) {
+ rootWriter.start();
+ rowNumber++;
+ rootWriter.scalar(0).setInt(rowNumber);
+ if (i % 3 == 0) {
+ rootWriter.scalar(1).setNull();
+ } else {
+ rootWriter.scalar(1).setString("b-" + rowNumber);
+ }
+ if (i % 2 == 0) {
+ rootWriter.save();
+ }
+ }
+
+ RowSet result = fixture.wrap(rsLoader.harvest());
+// result.print();
+ SingleRowSet expected = fixture.rowSetBuilder(result.batchSchema())
+ .addRow( 1, null)
+ .addRow( 3, "b-3")
+ .addRow( 5, "b-5")
+ .addRow( 7, null)
+ .addRow( 9, "b-9")
+ .addRow(11, "b-11")
+ .addRow(13, null)
+ .build();
+// expected.print();
+ new RowSetComparison(expected)
+ .verifyAndClearAll(result);
+
+ rsLoader.close();
+ }
+
+ /**
+ * Test that discarding a row works even if that row happens to be an
+ * overflow row.
+ */
+
+ @Test
+ public void testSkipOverflowRow() {
+ TupleMetadata schema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .addNullable("b", MinorType.VARCHAR)
+ .buildSchema();
+ ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder()
+ .setRowCountLimit(ValueVector.MAX_ROW_COUNT)
+ .setSchema(schema)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ rsLoader.startBatch();
+ byte value[] = new byte[512];
+ Arrays.fill(value, (byte) 'X');
+ int count = 0;
+ while (! rootWriter.isFull()) {
+ rootWriter.start();
+ rootWriter.scalar(0).setInt(count);
+ rootWriter.scalar(1).setBytes(value, value.length);
+
+ // Relies on fact that isFull becomes true right after
+ // a vector overflows; don't have to wait for saveRow().
+ // Keep all rows, but discard the overflow row.
+
+ if (! rootWriter.isFull()) {
+ rootWriter.save();
+ }
+ count++;
+ }
+
+ // Discard the results.
+
+ rsLoader.harvest().zeroVectors();
+
+ // Harvest the next batch. Will be empty (because overflow row
+ // was discarded.)
+
+ rsLoader.startBatch();
+ RowSet result = fixture.wrap(rsLoader.harvest());
+ assertEquals(0, result.rowCount());
+ result.clear();
+
+ rsLoader.close();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderOverflow.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderOverflow.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderOverflow.java
new file mode 100644
index 0000000..0146cfe
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderOverflow.java
@@ -0,0 +1,680 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.impl;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.util.Arrays;
+
+import org.apache.drill.common.exceptions.UserException;
+import org.apache.drill.common.types.TypeProtos.DataMode;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.physical.rowSet.ResultSetLoader;
+import org.apache.drill.exec.physical.rowSet.RowSetLoader;
+import org.apache.drill.exec.physical.rowSet.impl.ResultSetLoaderImpl.ResultSetOptions;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.TupleMetadata;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.accessor.ScalarElementReader;
+import org.apache.drill.exec.vector.accessor.ScalarWriter;
+import org.apache.drill.test.SubOperatorTest;
+import org.apache.drill.test.rowSet.RowSet;
+import org.apache.drill.test.rowSet.RowSetReader;
+import org.apache.drill.test.rowSet.SchemaBuilder;
+import org.junit.Test;
+
+import com.google.common.base.Charsets;
+
+/**
+ * Exercise the vector overflow functionality for the result set loader.
+ */
+
+public class TestResultSetLoaderOverflow extends SubOperatorTest {
+
+ /**
+ * Test that the writer detects a vector overflow. The offending column
+ * value should be moved to the next batch.
+ */
+
+ @Test
+ public void testVectorSizeLimit() {
+ TupleMetadata schema = new SchemaBuilder()
+ .add("s", MinorType.VARCHAR)
+ .buildSchema();
+ ResultSetOptions options = new OptionBuilder()
+ .setRowCountLimit(ValueVector.MAX_ROW_COUNT)
+ .setSchema(schema)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ rsLoader.startBatch();
+ byte value[] = new byte[512];
+ Arrays.fill(value, (byte) 'X');
+ int count = 0;
+ while (! rootWriter.isFull()) {
+ rootWriter.start();
+ rootWriter.scalar(0).setBytes(value, value.length);
+ rootWriter.save();
+ count++;
+ }
+
+ // Number of rows should be driven by vector size.
+ // Our row count should include the overflow row
+
+ int expectedCount = ValueVector.MAX_BUFFER_SIZE / value.length;
+ assertEquals(expectedCount + 1, count);
+
+ // Loader's row count should include only "visible" rows
+
+ assertEquals(expectedCount, rootWriter.rowCount());
+
+ // Total count should include invisible and look-ahead rows.
+
+ assertEquals(expectedCount + 1, rsLoader.totalRowCount());
+
+ // Result should exclude the overflow row
+
+ RowSet result = fixture.wrap(rsLoader.harvest());
+ assertEquals(expectedCount, result.rowCount());
+ result.clear();
+
+ // Next batch should start with the overflow row
+
+ rsLoader.startBatch();
+ assertEquals(1, rootWriter.rowCount());
+ assertEquals(expectedCount + 1, rsLoader.totalRowCount());
+ result = fixture.wrap(rsLoader.harvest());
+ assertEquals(1, result.rowCount());
+ result.clear();
+
+ rsLoader.close();
+ }
+
+ /**
+ * Test that the writer detects a vector overflow. The offending column
+ * value should be moved to the next batch.
+ */
+
+ @Test
+ public void testBatchSizeLimit() {
+ TupleMetadata schema = new SchemaBuilder()
+ .add("s", MinorType.VARCHAR)
+ .buildSchema();
+ ResultSetOptions options = new OptionBuilder()
+ .setRowCountLimit(ValueVector.MAX_ROW_COUNT)
+ .setSchema(schema)
+ .setBatchSizeLimit(
+ 8 * 1024 * 1024 + // Data
+ 2 * ValueVector.MAX_ROW_COUNT * 4) // Offsets, doubled because of +1
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ rsLoader.startBatch();
+ byte value[] = new byte[512];
+ Arrays.fill(value, (byte) 'X');
+ int count = 0;
+ while (! rootWriter.isFull()) {
+ rootWriter.start();
+ rootWriter.scalar(0).setBytes(value, value.length);
+ rootWriter.save();
+ count++;
+ }
+
+ // Our row count should include the overflow row
+
+ int expectedCount = 8 * 1024 * 1024 / value.length;
+ assertEquals(expectedCount + 1, count);
+
+ // Loader's row count should include only "visible" rows
+
+ assertEquals(expectedCount, rootWriter.rowCount());
+
+ // Total count should include invisible and look-ahead rows.
+
+ assertEquals(expectedCount + 1, rsLoader.totalRowCount());
+
+ // Result should exclude the overflow row
+
+ RowSet result = fixture.wrap(rsLoader.harvest());
+ assertEquals(expectedCount, result.rowCount());
+ result.clear();
+
+ // Next batch should start with the overflow row
+
+ rsLoader.startBatch();
+ assertEquals(1, rootWriter.rowCount());
+ assertEquals(expectedCount + 1, rsLoader.totalRowCount());
+ result = fixture.wrap(rsLoader.harvest());
+ assertEquals(1, result.rowCount());
+ result.clear();
+
+ rsLoader.close();
+ }
+
+ /**
+ * Load a batch to overflow. Then, close the loader with the overflow
+ * batch unharvested. The Loader should release the memory allocated
+ * to the unused overflow vectors.
+ */
+
+ @Test
+ public void testCloseWithOverflow() {
+ TupleMetadata schema = new SchemaBuilder()
+ .add("s", MinorType.VARCHAR)
+ .buildSchema();
+ ResultSetOptions options = new OptionBuilder()
+ .setRowCountLimit(ValueVector.MAX_ROW_COUNT)
+ .setSchema(schema)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ rsLoader.startBatch();
+ byte value[] = new byte[512];
+ Arrays.fill(value, (byte) 'X');
+ int count = 0;
+ while (! rootWriter.isFull()) {
+ rootWriter.start();
+ rootWriter.scalar(0).setBytes(value, value.length);
+ rootWriter.save();
+ count++;
+ }
+
+ assertTrue(count < ValueVector.MAX_ROW_COUNT);
+
+ // Harvest the full batch
+
+ RowSet result = fixture.wrap(rsLoader.harvest());
+ result.clear();
+
+ // Close without harvesting the overflow batch.
+
+ rsLoader.close();
+ }
+
+ /**
+ * Case where a single array fills up the vector to the maximum size
+ * limit. Overflow won't work here; the attempt will fail with a user
+ * exception.
+ */
+
+ @Test
+ public void testOversizeArray() {
+ TupleMetadata schema = new SchemaBuilder()
+ .addArray("s", MinorType.VARCHAR)
+ .buildSchema();
+ ResultSetOptions options = new OptionBuilder()
+ .setRowCountLimit(ValueVector.MAX_ROW_COUNT)
+ .setSchema(schema)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ // Create a single array as the column value in the first row. When
+ // this overflows, an exception is thrown since overflow is not possible.
+
+ rsLoader.startBatch();
+ byte value[] = new byte[473];
+ Arrays.fill(value, (byte) 'X');
+ rootWriter.start();
+ ScalarWriter array = rootWriter.array(0).scalar();
+ try {
+ for (int i = 0; i < ValueVector.MAX_ROW_COUNT; i++) {
+ array.setBytes(value, value.length);
+ }
+ fail();
+ } catch (UserException e) {
+ assertTrue(e.getMessage().contains("column value is larger than the maximum"));
+ }
+ rsLoader.close();
+ }
+
+ /**
+ * Test a row with a single array column which overflows. Verifies
+ * that all the fiddly bits about offset vectors and so on works
+ * correctly. Run this test (the simplest case) if you change anything
+ * about the array handling code.
+ */
+
+ @Test
+ public void testSizeLimitOnArray() {
+ TupleMetadata schema = new SchemaBuilder()
+ .addArray("s", MinorType.VARCHAR)
+ .buildSchema();
+ ResultSetOptions options = new OptionBuilder()
+ .setRowCountLimit(ValueVector.MAX_ROW_COUNT)
+ .setSchema(schema)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ // Fill batch with rows of with a single array, three values each. Tack on
+ // a suffix to each so we can be sure the proper data is written and moved
+ // to the overflow batch.
+
+ rsLoader.startBatch();
+ byte value[] = new byte[473];
+ Arrays.fill(value, (byte) 'X');
+ String strValue = new String(value, Charsets.UTF_8);
+ int count = 0;
+ int rowSize = 0;
+ int totalSize = 0;
+ int valuesPerArray = 13;
+ while (rootWriter.start()) {
+ totalSize += rowSize;
+ rowSize = 0;
+ ScalarWriter array = rootWriter.array(0).scalar();
+ for (int i = 0; i < valuesPerArray; i++) {
+ String cellValue = strValue + (count + 1) + "." + i;
+ array.setString(cellValue);
+ rowSize += cellValue.length();
+ }
+ rootWriter.save();
+ count++;
+ }
+
+ // Row count should include the overflow row.
+
+ int expectedCount = count - 1;
+
+ // Size without overflow row should fit in the vector, size
+ // with overflow should not.
+
+ assertTrue(totalSize <= ValueVector.MAX_BUFFER_SIZE);
+ assertTrue(totalSize + rowSize > ValueVector.MAX_BUFFER_SIZE);
+
+ // Result should exclude the overflow row. Last row
+ // should hold the last full array.
+
+ RowSet result = fixture.wrap(rsLoader.harvest());
+ assertEquals(expectedCount, result.rowCount());
+ RowSetReader reader = result.reader();
+ reader.set(expectedCount - 1);
+ ScalarElementReader arrayReader = reader.column(0).elements();
+ assertEquals(valuesPerArray, arrayReader.size());
+ for (int i = 0; i < valuesPerArray; i++) {
+ String cellValue = strValue + (count - 1) + "." + i;
+ assertEquals(cellValue, arrayReader.getString(i));
+ }
+ result.clear();
+
+ // Next batch should start with the overflow row.
+ // The only row in this next batch should be the whole
+ // array being written at the time of overflow.
+
+ rsLoader.startBatch();
+// VectorPrinter.printStrings((VarCharVector) ((VarCharColumnWriter) rootWriter.array(0).scalar()).vector(), 0, 5);
+// ((ResultSetLoaderImpl) rsLoader).dump(new HierarchicalPrinter());
+ assertEquals(1, rootWriter.rowCount());
+ assertEquals(expectedCount + 1, rsLoader.totalRowCount());
+ result = fixture.wrap(rsLoader.harvest());
+// VectorPrinter.printStrings((VarCharVector) ((VarCharColumnWriter) rootWriter.array(0).scalar()).vector(), 0, 5);
+ assertEquals(1, result.rowCount());
+ reader = result.reader();
+ reader.next();
+ arrayReader = reader.column(0).elements();
+ assertEquals(valuesPerArray, arrayReader.size());
+ for (int i = 0; i < valuesPerArray; i++) {
+ String cellValue = strValue + (count) + "." + i;
+ assertEquals(cellValue, arrayReader.getString(i));
+ }
+ result.clear();
+
+ rsLoader.close();
+ }
+
+ /**
+ * Test the complete set of array overflow cases:
+ * <ul>
+ * <li>Array a is written before the column that has overflow,
+ * and must be copied, in its entirety, to the overflow row.</li>
+ * <li>Column b causes the overflow.</li>
+ * <li>Column c is written after the overflow, and should go
+ * to the look-ahead row.</li>
+ * <li>Column d is written for a while, then has empties before
+ * the overflow row, but is written in the overflow row.<li>
+ * <li>Column e is like d, but is not written in the overflow
+ * row.</li>
+ */
+
+ @Test
+ public void testArrayOverflowWithOtherArrays() {
+ TupleMetadata schema = new SchemaBuilder()
+ .addArray("a", MinorType.INT)
+ .addArray("b", MinorType.VARCHAR)
+ .addArray("c", MinorType.INT)
+ .addArray("d", MinorType.INT)
+ .buildSchema();
+ ResultSetOptions options = new OptionBuilder()
+ .setRowCountLimit(ValueVector.MAX_ROW_COUNT)
+ .setSchema(schema)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ // Fill batch with rows of with a single array, three values each. Tack on
+ // a suffix to each so we can be sure the proper data is written and moved
+ // to the overflow batch.
+
+ byte value[] = new byte[512];
+ Arrays.fill(value, (byte) 'X');
+ String strValue = new String(value, Charsets.UTF_8);
+
+ int aCount = 3;
+ int bCount = 11;
+ int cCount = 5;
+ int dCount = 7;
+
+ int cCutoff = ValueVector.MAX_BUFFER_SIZE / value.length / bCount / 2;
+
+ ScalarWriter aWriter = rootWriter.array("a").scalar();
+ ScalarWriter bWriter = rootWriter.array("b").scalar();
+ ScalarWriter cWriter = rootWriter.array("c").scalar();
+ ScalarWriter dWriter = rootWriter.array("d").scalar();
+
+ int count = 0;
+ rsLoader.startBatch();
+ while (rootWriter.start()) {
+ if (rootWriter.rowCount() == 2952) {
+ count = count + 0;
+ }
+ for (int i = 0; i < aCount; i++) {
+ aWriter.setInt(count * aCount + i);
+ }
+ for (int i = 0; i < bCount; i++) {
+ String cellValue = strValue + (count * bCount + i);
+ bWriter.setString(cellValue);
+ }
+ if (count < cCutoff) {
+ for (int i = 0; i < cCount; i++) {
+ cWriter.setInt(count * cCount + i);
+ }
+ }
+
+ // Relies on fact that isFull becomes true right after
+ // a vector overflows; don't have to wait for saveRow().
+
+ if (count < cCutoff || rootWriter.isFull()) {
+ for (int i = 0; i < dCount; i++) {
+ dWriter.setInt(count * dCount + i);
+ }
+ }
+ rootWriter.save();
+ count++;
+ }
+
+ // Verify
+
+ RowSet result = fixture.wrap(rsLoader.harvest());
+ assertEquals(count - 1, result.rowCount());
+
+ RowSetReader reader = result.reader();
+ ScalarElementReader aReader = reader.array("a").elements();
+ ScalarElementReader bReader = reader.array("b").elements();
+ ScalarElementReader cReader = reader.array("c").elements();
+ ScalarElementReader dReader = reader.array("d").elements();
+
+ while (reader.next()) {
+ int rowId = reader.rowIndex();
+ assertEquals(aCount, aReader.size());
+ for (int i = 0; i < aCount; i++) {
+ assertEquals(rowId * aCount + i, aReader.getInt(i));
+ }
+ assertEquals(bCount, bReader.size());
+ for (int i = 0; i < bCount; i++) {
+ String cellValue = strValue + (rowId * bCount + i);
+ assertEquals(cellValue, bReader.getString(i));
+ }
+ if (rowId < cCutoff) {
+ assertEquals(cCount, cReader.size());
+ for (int i = 0; i < cCount; i++) {
+ assertEquals(rowId * cCount + i, cReader.getInt(i));
+ }
+ assertEquals(dCount, dReader.size());
+ for (int i = 0; i < dCount; i++) {
+ assertEquals(rowId * dCount + i, dReader.getInt(i));
+ }
+ } else {
+ assertEquals(0, cReader.size());
+ assertEquals(0, dReader.size());
+ }
+ }
+ result.clear();
+ int firstCount = count - 1;
+
+ // One row is in the batch. Write more, skipping over the
+ // initial few values for columns c and d. Column d has a
+ // roll-over value, c has an empty roll-over.
+
+ rsLoader.startBatch();
+ for (int j = 0; j < 5; j++) {
+ rootWriter.start();
+ for (int i = 0; i < aCount; i++) {
+ aWriter.setInt(count * aCount + i);
+ }
+ for (int i = 0; i < bCount; i++) {
+ String cellValue = strValue + (count * bCount + i);
+ bWriter.setString(cellValue);
+ }
+ if (j > 3) {
+ for (int i = 0; i < cCount; i++) {
+ cWriter.setInt(count * cCount + i);
+ }
+ for (int i = 0; i < dCount; i++) {
+ dWriter.setInt(count * dCount + i);
+ }
+ }
+ rootWriter.save();
+ count++;
+ }
+
+ result = fixture.wrap(rsLoader.harvest());
+ assertEquals(6, result.rowCount());
+
+ reader = result.reader();
+ aReader = reader.array("a").elements();
+ bReader = reader.array("b").elements();
+ cReader = reader.array("c").elements();
+ dReader = reader.array("d").elements();
+
+ int j = 0;
+ while (reader.next()) {
+ int rowId = firstCount + reader.rowIndex();
+ assertEquals(aCount, aReader.size());
+ for (int i = 0; i < aCount; i++) {
+ assertEquals("Index " + i, rowId * aCount + i, aReader.getInt(i));
+ }
+ assertEquals(bCount, bReader.size());
+ for (int i = 0; i < bCount; i++) {
+ String cellValue = strValue + (rowId * bCount + i);
+ assertEquals(cellValue, bReader.getString(i));
+ }
+ if (j > 4) {
+ assertEquals(cCount, cReader.size());
+ for (int i = 0; i < cCount; i++) {
+ assertEquals(rowId * cCount + i, cReader.getInt(i));
+ }
+ } else {
+ assertEquals(0, cReader.size());
+ }
+ if (j == 0 || j > 4) {
+ assertEquals(dCount, dReader.size());
+ for (int i = 0; i < dCount; i++) {
+ assertEquals(rowId * dCount + i, dReader.getInt(i));
+ }
+ } else {
+ assertEquals(0, dReader.size());
+ }
+ j++;
+ }
+ result.clear();
+
+ rsLoader.close();
+ }
+
+ /**
+ * Create an array that contains more than 64K values. Drill has no numeric
+ * limit on array lengths. (Well, it does, but the limit is about 2 billion
+ * which, even for bytes, is too large to fit into a vector...)
+ */
+
+ @Test
+ public void testLargeArray() {
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator());
+ RowSetLoader rootWriter = rsLoader.writer();
+ MaterializedField field = SchemaBuilder.columnSchema("a", MinorType.INT, DataMode.REPEATED);
+ rootWriter.addColumn(field);
+
+ // Create a single array as the column value in the first row. When
+ // this overflows, an exception is thrown since overflow is not possible.
+
+ rsLoader.startBatch();
+ rootWriter.start();
+ ScalarWriter array = rootWriter.array(0).scalar();
+ try {
+ for (int i = 0; i < Integer.MAX_VALUE; i++) {
+ array.setInt(i+1);
+ }
+ fail();
+ } catch (UserException e) {
+ // Expected
+ }
+ rsLoader.close();
+ }
+
+ /**
+ * Test the case that an array has "missing values" before the overflow.
+ */
+
+ @Test
+ public void testMissingArrayValues() {
+ TupleMetadata schema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .add("b", MinorType.VARCHAR)
+ .addArray("c", MinorType.INT)
+ .buildSchema();
+ ResultSetOptions options = new OptionBuilder()
+ .setRowCountLimit(ValueVector.MAX_ROW_COUNT)
+ .setSchema(schema)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ byte value[] = new byte[512];
+ Arrays.fill(value, (byte) 'X');
+
+ int blankAfter = ValueVector.MAX_BUFFER_SIZE / 512 * 2 / 3;
+ ScalarWriter cWriter = rootWriter.array("c").scalar();
+
+ rsLoader.startBatch();
+ int rowId = 0;
+ while (rootWriter.start()) {
+ rootWriter.scalar("a").setInt(rowId);
+ rootWriter.scalar("b").setBytes(value, value.length);
+ if (rowId < blankAfter) {
+ for (int i = 0; i < 3; i++) {
+ cWriter.setInt(rowId * 3 + i);
+ }
+ }
+ rootWriter.save();
+ rowId++;
+ }
+
+ RowSet result = fixture.wrap(rsLoader.harvest());
+ assertEquals(rowId - 1, result.rowCount());
+ RowSetReader reader = result.reader();
+ ScalarElementReader cReader = reader.array("c").elements();
+ while (reader.next()) {
+ assertEquals(reader.rowIndex(), reader.scalar("a").getInt());
+ assertTrue(Arrays.equals(value, reader.scalar("b").getBytes()));
+ if (reader.rowIndex() < blankAfter) {
+ assertEquals(3, cReader.size());
+ for (int i = 0; i < 3; i++) {
+ assertEquals(reader.rowIndex() * 3 + i, cReader.getInt(i));
+ }
+ } else {
+ assertEquals(0, cReader.size());
+ }
+ }
+ result.clear();
+ rsLoader.close();
+ }
+
+ @Test
+ public void testOverflowWithNullables() {
+ TupleMetadata schema = new SchemaBuilder()
+ .add("n", MinorType.INT)
+ .addNullable("a", MinorType.VARCHAR)
+ .addNullable("b", MinorType.VARCHAR)
+ .addNullable("c", MinorType.VARCHAR)
+ .buildSchema();
+ ResultSetOptions options = new OptionBuilder()
+ .setRowCountLimit(ValueVector.MAX_ROW_COUNT)
+ .setSchema(schema)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ rsLoader.startBatch();
+ byte value[] = new byte[512];
+ Arrays.fill(value, (byte) 'X');
+ int count = 0;
+ while (! rootWriter.isFull()) {
+ rootWriter.start();
+ rootWriter.scalar(0).setInt(count);
+ rootWriter.scalar(1).setNull();
+ rootWriter.scalar(2).setBytes(value, value.length);
+ rootWriter.scalar(3).setNull();
+ rootWriter.save();
+ count++;
+ }
+
+ // Result should exclude the overflow row
+
+ RowSet result = fixture.wrap(rsLoader.harvest());
+ assertEquals(count - 1, result.rowCount());
+
+ RowSetReader reader = result.reader();
+ while (reader.next()) {
+ assertEquals(reader.rowIndex(), reader.scalar(0).getInt());
+ assertTrue(reader.scalar(1).isNull());
+ assertTrue(Arrays.equals(value, reader.scalar(2).getBytes()));
+ assertTrue(reader.scalar(3).isNull());
+ }
+ result.clear();
+
+ // Next batch should start with the overflow row
+
+ rsLoader.startBatch();
+ result = fixture.wrap(rsLoader.harvest());
+ reader = result.reader();
+ assertEquals(1, result.rowCount());
+ assertTrue(reader.next());
+ assertEquals(count - 1, reader.scalar(0).getInt());
+ assertTrue(reader.scalar(1).isNull());
+ assertTrue(Arrays.equals(value, reader.scalar(2).getBytes()));
+ assertTrue(reader.scalar(3).isNull());
+ result.clear();
+
+ rsLoader.close();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderProjection.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderProjection.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderProjection.java
new file mode 100644
index 0000000..5c6ff7b
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderProjection.java
@@ -0,0 +1,470 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.impl;
+
+import static org.junit.Assert.*;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.types.TypeProtos.DataMode;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.physical.rowSet.ResultSetLoader;
+import org.apache.drill.exec.physical.rowSet.RowSetLoader;
+import org.apache.drill.exec.physical.rowSet.impl.ResultSetLoaderImpl.ResultSetOptions;
+import org.apache.drill.exec.record.BatchSchema;
+import org.apache.drill.exec.record.ColumnMetadata;
+import org.apache.drill.exec.record.TupleMetadata;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.test.SubOperatorTest;
+import org.apache.drill.test.rowSet.RowSet;
+import org.apache.drill.test.rowSet.RowSetComparison;
+import org.apache.drill.test.rowSet.SchemaBuilder;
+import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Test of the basics of the projection mechanism.
+ */
+
+public class TestResultSetLoaderProjection extends SubOperatorTest {
+
+ @Test
+ public void testProjectionMap() {
+
+ // Null map means everything is projected
+
+ {
+ ProjectionSet projSet = ProjectionSetImpl.parse(null);
+ assertTrue(projSet instanceof NullProjectionSet);
+ assertTrue(projSet.isProjected("foo"));
+ }
+
+ // Empty list means everything is projected
+
+ {
+ ProjectionSet projSet = ProjectionSetImpl.parse(new ArrayList<SchemaPath>());
+ assertTrue(projSet instanceof NullProjectionSet);
+ assertTrue(projSet.isProjected("foo"));
+ }
+
+ // Simple non-map columns
+
+ {
+ List<SchemaPath> projCols = new ArrayList<>();
+ projCols.add(SchemaPath.getSimplePath("foo"));
+ projCols.add(SchemaPath.getSimplePath("bar"));
+ ProjectionSet projSet = ProjectionSetImpl.parse(projCols);
+ assertTrue(projSet instanceof ProjectionSetImpl);
+ assertTrue(projSet.isProjected("foo"));
+ assertTrue(projSet.isProjected("bar"));
+ assertFalse(projSet.isProjected("mumble"));
+ }
+
+ // Whole-map projection (note, fully projected maps are
+ // identical to projected simple columns at this level of
+ // abstraction.)
+
+ {
+ List<SchemaPath> projCols = new ArrayList<>();
+ projCols.add(SchemaPath.getSimplePath("map"));
+ ProjectionSet projSet = ProjectionSetImpl.parse(projCols);
+ assertTrue(projSet instanceof ProjectionSetImpl);
+ assertTrue(projSet.isProjected("map"));
+ assertFalse(projSet.isProjected("another"));
+ ProjectionSet mapProj = projSet.mapProjection("map");
+ assertNotNull(mapProj);
+ assertTrue(mapProj instanceof NullProjectionSet);
+ assertTrue(mapProj.isProjected("foo"));
+ assertNotNull(projSet.mapProjection("another"));
+ assertFalse(projSet.mapProjection("another").isProjected("anyCol"));
+ }
+
+ // Selected map projection, multiple levels, full projection
+ // at leaf level.
+
+ {
+ List<SchemaPath> projCols = new ArrayList<>();
+ projCols.add(SchemaPath.getCompoundPath("map", "a"));
+ projCols.add(SchemaPath.getCompoundPath("map", "b"));
+ projCols.add(SchemaPath.getCompoundPath("map", "map2", "x"));
+ ProjectionSet projSet = ProjectionSetImpl.parse(projCols);
+ assertTrue(projSet instanceof ProjectionSetImpl);
+ assertTrue(projSet.isProjected("map"));
+
+ // Map: an explicit map at top level
+
+ ProjectionSet mapProj = projSet.mapProjection("map");
+ assertTrue(mapProj instanceof ProjectionSetImpl);
+ assertTrue(mapProj.isProjected("a"));
+ assertTrue(mapProj.isProjected("b"));
+ assertTrue(mapProj.isProjected("map2"));
+ assertFalse(projSet.isProjected("bogus"));
+
+ // Map b: an implied nested map
+
+ ProjectionSet bMapProj = mapProj.mapProjection("b");
+ assertNotNull(bMapProj);
+ assertTrue(bMapProj instanceof NullProjectionSet);
+ assertTrue(bMapProj.isProjected("foo"));
+
+ // Map2, an nested map, has an explicit projection
+
+ ProjectionSet map2Proj = mapProj.mapProjection("map2");
+ assertNotNull(map2Proj);
+ assertTrue(map2Proj instanceof ProjectionSetImpl);
+ assertTrue(map2Proj.isProjected("x"));
+ assertFalse(map2Proj.isProjected("bogus"));
+ }
+ }
+
+ /**
+ * Test imposing a selection mask between the client and the underlying
+ * vector container.
+ */
+
+ @Test
+ public void testProjectionStatic() {
+ List<SchemaPath> selection = Lists.newArrayList(
+ SchemaPath.getSimplePath("c"),
+ SchemaPath.getSimplePath("b"),
+ SchemaPath.getSimplePath("e"));
+ TupleMetadata schema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .add("b", MinorType.INT)
+ .add("c", MinorType.INT)
+ .add("d", MinorType.INT)
+ .buildSchema();
+ ResultSetOptions options = new OptionBuilder()
+ .setProjection(selection)
+ .setSchema(schema)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+
+ doProjectionTest(rsLoader);
+ }
+
+ @Test
+ public void testProjectionDynamic() {
+ List<SchemaPath> selection = Lists.newArrayList(
+ SchemaPath.getSimplePath("c"),
+ SchemaPath.getSimplePath("b"),
+ SchemaPath.getSimplePath("e"));
+ ResultSetOptions options = new OptionBuilder()
+ .setProjection(selection)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ RowSetLoader rootWriter = rsLoader.writer();
+ rootWriter.addColumn(SchemaBuilder.columnSchema("a", MinorType.INT, DataMode.REQUIRED));
+ rootWriter.addColumn(SchemaBuilder.columnSchema("b", MinorType.INT, DataMode.REQUIRED));
+ rootWriter.addColumn(SchemaBuilder.columnSchema("c", MinorType.INT, DataMode.REQUIRED));
+ rootWriter.addColumn(SchemaBuilder.columnSchema("d", MinorType.INT, DataMode.REQUIRED));
+
+ doProjectionTest(rsLoader);
+ }
+
+ private void doProjectionTest(ResultSetLoader rsLoader) {
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ // All columns appear, including non-projected ones.
+
+ TupleMetadata actualSchema = rootWriter.schema();
+ assertEquals(4, actualSchema.size());
+ assertEquals("a", actualSchema.column(0).getName());
+ assertEquals("b", actualSchema.column(1).getName());
+ assertEquals("c", actualSchema.column(2).getName());
+ assertEquals("d", actualSchema.column(3).getName());
+ assertEquals(0, actualSchema.index("A"));
+ assertEquals(3, actualSchema.index("d"));
+ assertEquals(-1, actualSchema.index("e"));
+
+ // Non-projected columns identify themselves via metadata
+
+ assertFalse(actualSchema.metadata("a").isProjected());
+ assertTrue(actualSchema.metadata("b").isProjected());
+ assertTrue(actualSchema.metadata("c").isProjected());
+ assertFalse(actualSchema.metadata("d").isProjected());
+
+ // Write some data. Doesn't need much.
+
+ rsLoader.startBatch();
+ for (int i = 1; i < 3; i++) {
+ rootWriter.start();
+ rootWriter.scalar(0).setInt(i * 5);
+ rootWriter.scalar(1).setInt(i);
+ rootWriter.scalar(2).setInt(i * 10);
+ rootWriter.scalar(3).setInt(i * 20);
+ rootWriter.save();
+ }
+
+ // Verify. Result should only have the projected
+ // columns, only if defined by the loader, in the order
+ // of definition.
+
+ BatchSchema expectedSchema = new SchemaBuilder()
+ .add("b", MinorType.INT)
+ .add("c", MinorType.INT)
+ .build();
+ SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
+ .addRow(1, 10)
+ .addRow(2, 20)
+ .build();
+ RowSet actual = fixture.wrap(rsLoader.harvest());
+// actual.print();
+ new RowSetComparison(expected)
+ .verifyAndClearAll(actual);
+ rsLoader.close();
+ }
+
+ @Test
+ public void testMapProjection() {
+ List<SchemaPath> selection = Lists.newArrayList(
+ SchemaPath.getSimplePath("m1"),
+ SchemaPath.getCompoundPath("m2", "d"));
+ TupleMetadata schema = new SchemaBuilder()
+ .addMap("m1")
+ .add("a", MinorType.INT)
+ .add("b", MinorType.INT)
+ .buildMap()
+ .addMap("m2")
+ .add("c", MinorType.INT)
+ .add("d", MinorType.INT)
+ .buildMap()
+ .addMap("m3")
+ .add("e", MinorType.INT)
+ .add("f", MinorType.INT)
+ .buildMap()
+ .buildSchema();
+ ResultSetOptions options = new OptionBuilder()
+ .setProjection(selection)
+ .setSchema(schema)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ // Verify the projected columns
+
+ TupleMetadata actualSchema = rootWriter.schema();
+ ColumnMetadata m1Md = actualSchema.metadata("m1");
+ assertTrue(m1Md.isMap());
+ assertTrue(m1Md.isProjected());
+ assertEquals(2, m1Md.mapSchema().size());
+ assertTrue(m1Md.mapSchema().metadata("a").isProjected());
+ assertTrue(m1Md.mapSchema().metadata("b").isProjected());
+
+ ColumnMetadata m2Md = actualSchema.metadata("m2");
+ assertTrue(m2Md.isMap());
+ assertTrue(m2Md.isProjected());
+ assertEquals(2, m2Md.mapSchema().size());
+ assertFalse(m2Md.mapSchema().metadata("c").isProjected());
+ assertTrue(m2Md.mapSchema().metadata("d").isProjected());
+
+ ColumnMetadata m3Md = actualSchema.metadata("m3");
+ assertTrue(m3Md.isMap());
+ assertFalse(m3Md.isProjected());
+ assertEquals(2, m3Md.mapSchema().size());
+ assertFalse(m3Md.mapSchema().metadata("e").isProjected());
+ assertFalse(m3Md.mapSchema().metadata("f").isProjected());
+
+ // Write a couple of rows.
+
+ rsLoader.startBatch();
+ rootWriter.start();
+ rootWriter.tuple("m1").scalar("a").setInt(1);
+ rootWriter.tuple("m1").scalar("b").setInt(2);
+ rootWriter.tuple("m2").scalar("c").setInt(3);
+ rootWriter.tuple("m2").scalar("d").setInt(4);
+ rootWriter.tuple("m3").scalar("e").setInt(5);
+ rootWriter.tuple("m3").scalar("f").setInt(6);
+ rootWriter.save();
+
+ rootWriter.start();
+ rootWriter.tuple("m1").scalar("a").setInt(11);
+ rootWriter.tuple("m1").scalar("b").setInt(12);
+ rootWriter.tuple("m2").scalar("c").setInt(13);
+ rootWriter.tuple("m2").scalar("d").setInt(14);
+ rootWriter.tuple("m3").scalar("e").setInt(15);
+ rootWriter.tuple("m3").scalar("f").setInt(16);
+ rootWriter.save();
+
+ // Verify. Only the projected columns appear in the result set.
+
+ BatchSchema expectedSchema = new SchemaBuilder()
+ .addMap("m1")
+ .add("a", MinorType.INT)
+ .add("b", MinorType.INT)
+ .buildMap()
+ .addMap("m2")
+ .add("d", MinorType.INT)
+ .buildMap()
+ .build();
+ SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
+ .addRow(new Object[] {1, 2}, new Object[] {4})
+ .addRow(new Object[] {11, 12}, new Object[] {14})
+ .build();
+ new RowSetComparison(expected)
+ .verifyAndClearAll(fixture.wrap(rsLoader.harvest()));
+ rsLoader.close();
+ }
+
+ /**
+ * Test a map array. Use the convenience methods to set values.
+ * Only the projected array members should appear in the harvested
+ * results.
+ */
+
+ @Test
+ public void testMapArrayProjection() {
+ List<SchemaPath> selection = Lists.newArrayList(
+ SchemaPath.getSimplePath("m1"),
+ SchemaPath.getCompoundPath("m2", "d"));
+ TupleMetadata schema = new SchemaBuilder()
+ .addMapArray("m1")
+ .add("a", MinorType.INT)
+ .add("b", MinorType.INT)
+ .buildMap()
+ .addMapArray("m2")
+ .add("c", MinorType.INT)
+ .add("d", MinorType.INT)
+ .buildMap()
+ .addMapArray("m3")
+ .add("e", MinorType.INT)
+ .add("f", MinorType.INT)
+ .buildMap()
+ .buildSchema();
+ ResultSetOptions options = new OptionBuilder()
+ .setProjection(selection)
+ .setSchema(schema)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ // Write a couple of rows.
+
+ rsLoader.startBatch();
+ rootWriter.addRow(
+ new Object[] { new Object[] {10, 20}, new Object[] {11, 21}},
+ new Object[] { new Object[] {30, 40}, new Object[] {31, 42}},
+ new Object[] { new Object[] {50, 60}, new Object[] {51, 62}});
+ rootWriter.addRow(
+ new Object[] { new Object[] {110, 120}, new Object[] {111, 121}},
+ new Object[] { new Object[] {130, 140}, new Object[] {131, 142}},
+ new Object[] { new Object[] {150, 160}, new Object[] {151, 162}});
+
+ // Verify. Only the projected columns appear in the result set.
+
+ BatchSchema expectedSchema = new SchemaBuilder()
+ .addMapArray("m1")
+ .add("a", MinorType.INT)
+ .add("b", MinorType.INT)
+ .buildMap()
+ .addMapArray("m2")
+ .add("d", MinorType.INT)
+ .buildMap()
+ .build();
+ SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
+ .addRow(
+ new Object[] { new Object[] {10, 20}, new Object[] {11, 21}},
+ new Object[] { new Object[] {40}, new Object[] {42}})
+ .addRow(
+ new Object[] { new Object[] {110, 120}, new Object[] {111, 121}},
+ new Object[] { new Object[] {140}, new Object[] {142}})
+ .build();
+ new RowSetComparison(expected)
+ .verifyAndClearAll(fixture.wrap(rsLoader.harvest()));
+ rsLoader.close();
+ }
+
+ /**
+ * Verify that the projection code plays nice with vector overflow. Overflow
+ * is the most complex operation in this subsystem with many specialized
+ * methods that must work together flawlessly. This test ensures that
+ * non-projected columns stay in the background and don't interfere
+ * with overflow logic.
+ */
+
+ @Test
+ public void testProjectWithOverflow() {
+ List<SchemaPath> selection = Lists.newArrayList(
+ SchemaPath.getSimplePath("small"),
+ SchemaPath.getSimplePath("dummy"));
+ TupleMetadata schema = new SchemaBuilder()
+ .add("big", MinorType.VARCHAR)
+ .add("small", MinorType.VARCHAR)
+ .buildSchema();
+ ResultSetOptions options = new OptionBuilder()
+ .setRowCountLimit(ValueVector.MAX_ROW_COUNT)
+ .setProjection(selection)
+ .setSchema(schema)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ byte big[] = new byte[600];
+ Arrays.fill(big, (byte) 'X');
+ byte small[] = new byte[512];
+ Arrays.fill(small, (byte) 'X');
+
+ rsLoader.startBatch();
+ int count = 0;
+ while (! rootWriter.isFull()) {
+ rootWriter.start();
+ rootWriter.scalar(0).setBytes(big, big.length);
+ rootWriter.scalar(1).setBytes(small, small.length);
+ rootWriter.save();
+ count++;
+ }
+
+ // Number of rows should be driven by size of the
+ // projected vector ("small"), not by the larger, unprojected
+ // "big" vector.
+ // Our row count should include the overflow row
+
+ int expectedCount = ValueVector.MAX_BUFFER_SIZE / small.length;
+ assertEquals(expectedCount + 1, count);
+
+ // Loader's row count should include only "visible" rows
+
+ assertEquals(expectedCount, rootWriter.rowCount());
+
+ // Total count should include invisible and look-ahead rows.
+
+ assertEquals(expectedCount + 1, rsLoader.totalRowCount());
+
+ // Result should exclude the overflow row
+
+ RowSet result = fixture.wrap(rsLoader.harvest());
+ assertEquals(expectedCount, result.rowCount());
+ result.clear();
+
+ // Next batch should start with the overflow row
+
+ rsLoader.startBatch();
+ assertEquals(1, rootWriter.rowCount());
+ assertEquals(expectedCount + 1, rsLoader.totalRowCount());
+ result = fixture.wrap(rsLoader.harvest());
+ assertEquals(1, result.rowCount());
+ result.clear();
+
+ rsLoader.close();
+ }
+}
[14/15] drill git commit: DRILL-5657: Size-aware vector writer
structure
Posted by pr...@apache.org.
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/ResultSetLoaderImpl.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/ResultSetLoaderImpl.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/ResultSetLoaderImpl.java
new file mode 100644
index 0000000..b875e7e
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/ResultSetLoaderImpl.java
@@ -0,0 +1,775 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.impl;
+
+import java.util.Collection;
+
+import org.apache.drill.common.exceptions.UserException;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.exec.memory.BufferAllocator;
+import org.apache.drill.exec.physical.rowSet.ResultSetLoader;
+import org.apache.drill.exec.physical.rowSet.ResultVectorCache;
+import org.apache.drill.exec.physical.rowSet.RowSetLoader;
+import org.apache.drill.exec.physical.rowSet.impl.TupleState.RowState;
+import org.apache.drill.exec.record.TupleMetadata;
+import org.apache.drill.exec.record.VectorContainer;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.accessor.impl.HierarchicalFormatter;
+
+/**
+ * Implementation of the result set loader.
+ * @see {@link ResultSetLoader}
+ */
+
+public class ResultSetLoaderImpl implements ResultSetLoader {
+
+ /**
+ * Read-only set of options for the result set loader.
+ */
+
+ public static class ResultSetOptions {
+ public final int vectorSizeLimit;
+ public final int rowCountLimit;
+ public final ResultVectorCache vectorCache;
+ public final Collection<SchemaPath> projection;
+ public final TupleMetadata schema;
+ public final long maxBatchSize;
+
+ public ResultSetOptions() {
+ vectorSizeLimit = ValueVector.MAX_BUFFER_SIZE;
+ rowCountLimit = DEFAULT_ROW_COUNT;
+ projection = null;
+ vectorCache = null;
+ schema = null;
+ maxBatchSize = -1;
+ }
+
+ public ResultSetOptions(OptionBuilder builder) {
+ this.vectorSizeLimit = builder.vectorSizeLimit;
+ this.rowCountLimit = builder.rowCountLimit;
+ this.projection = builder.projection;
+ this.vectorCache = builder.vectorCache;
+ this.schema = builder.schema;
+ this.maxBatchSize = builder.maxBatchSize;
+ }
+
+ public void dump(HierarchicalFormatter format) {
+ format
+ .startObject(this)
+ .attribute("vectorSizeLimit", vectorSizeLimit)
+ .attribute("rowCountLimit", rowCountLimit)
+ .attribute("projection", projection)
+ .endObject();
+ }
+ }
+
+ private enum State {
+ /**
+ * Before the first batch.
+ */
+
+ START,
+
+ /**
+ * Writing to a batch normally.
+ */
+
+ ACTIVE,
+
+ /**
+ * Batch overflowed a vector while writing. Can continue
+ * to write to a temporary "overflow" batch until the
+ * end of the current row.
+ */
+
+ OVERFLOW,
+
+ /**
+ * Temporary state to avoid batch-size related overflow while
+ * an overflow is in progress.
+ */
+
+ IN_OVERFLOW,
+
+ /**
+ * Batch is full due to reaching the row count limit
+ * when saving a row.
+ * No more writes allowed until harvesting the current batch.
+ */
+
+ FULL_BATCH,
+
+ /**
+ * Current batch was harvested: data is gone. No lookahead
+ * batch exists.
+ */
+
+ HARVESTED,
+
+ /**
+ * Current batch was harvested and its data is gone. However,
+ * overflow occurred during that batch and the data exists
+ * in the overflow vectors.
+ * <p>
+ * This state needs special consideration. The column writer
+ * structure maintains its state (offsets, etc.) from the OVERFLOW
+ * state, but the buffers currently in the vectors are from the
+ * complete batch. <b>No writes can be done in this state!</b>
+ * The writer state does not match the data in the buffers.
+ * The code here does what it can to catch this state. But, if
+ * some client tries to write to a column writer in this state,
+ * bad things will happen. Doing so is invalid (the write is outside
+ * of a batch), so this is not a terrible restriction.
+ * <p>
+ * Said another way, the current writer state is invalid with respect
+ * to the active buffers, but only if the writers try to act on the
+ * buffers. Since the writers won't do so, this temporary state is
+ * fine. The correct buffers are restored once a new batch is started
+ * and the state moves to ACTIVE.
+ */
+
+ LOOK_AHEAD,
+
+ /**
+ * Mutator is closed: no more operations are allowed.
+ */
+
+ CLOSED
+ }
+
+ static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(ResultSetLoaderImpl.class);
+
+ /**
+ * Options provided to this loader.
+ */
+
+ private final ResultSetOptions options;
+
+ /**
+ * Allocator for vectors created by this loader.
+ */
+
+ final BufferAllocator allocator;
+
+ /**
+ * Internal structure used to work with the vectors (real or dummy) used
+ * by this loader.
+ */
+
+ final RowState rootState;
+
+ /**
+ * Top-level writer index that steps through the rows as they are written.
+ * When an overflow batch is in effect, indexes into that batch instead.
+ * Since a batch is really a tree of tuples, in which some branches of
+ * the tree are arrays, the root indexes here feeds into array indexes
+ * within the writer structure that points to the current position within
+ * an array column.
+ */
+
+ private final WriterIndexImpl writerIndex;
+
+ /**
+ * The row-level writer for stepping through rows as they are written,
+ * and for accessing top-level columns.
+ */
+
+ private final RowSetLoaderImpl rootWriter;
+
+ /**
+ * Vector cache for this loader.
+ * @see {@link OptionBuilder#setVectorCache()}.
+ */
+
+ private final ResultVectorCache vectorCache;
+
+ /**
+ * Tracks the state of the row set loader. Handling vector overflow requires
+ * careful stepping through a variety of states as the write proceeds.
+ */
+
+ private State state = State.START;
+
+ /**
+ * Track the current schema as seen by the writer. Each addition of a column
+ * anywhere in the schema causes the active schema version to increase by one.
+ * This allows very easy checks for schema changes: save the prior version number
+ * and compare it against the current version number.
+ */
+
+ private int activeSchemaVersion;
+
+ /**
+ * Track the current schema as seen by the consumer of the batches that this
+ * loader produces. The harvest schema version can be behind the active schema
+ * version in the case in which new columns are added to the overflow row.
+ * Since the overflow row won't be visible to the harvested batch, that batch
+ * sees the schema as it existed at a prior version: the harvest schema
+ * version.
+ */
+
+ private int harvestSchemaVersion;
+
+ /**
+ * Builds the harvest vector container that includes only the columns that
+ * are included in the harvest schema version. That is, it excludes columns
+ * added while writing the overflow row.
+ */
+
+ private VectorContainerBuilder containerBuilder;
+
+ /**
+ * Counts the batches harvested (sent downstream) from this loader. Does
+ * not include the current, in-flight batch.
+ */
+
+ private int harvestBatchCount;
+
+ /**
+ * Counts the rows included in previously-harvested batches. Does not
+ * include the number of rows in the current batch.
+ */
+
+ private int previousRowCount;
+
+ /**
+ * Number of rows in the harvest batch. If an overflow batch is in effect,
+ * then this is the number of rows in the "main" batch before the overflow;
+ * that is the number of rows in the batch that will be harvested. If no
+ * overflow row is in effect, then this number is undefined (and should be
+ * zero.)
+ */
+
+ private int pendingRowCount;
+
+ /**
+ * The number of rows per batch. Starts with the configured amount. Can be
+ * adjusted between batches, perhaps based on the actual observed size of
+ * input data.
+ */
+
+ private int targetRowCount;
+
+ /**
+ * Total bytes allocated to the current batch.
+ */
+
+ protected int accumulatedBatchSize;
+
+ protected final ProjectionSet projectionSet;
+
+ public ResultSetLoaderImpl(BufferAllocator allocator, ResultSetOptions options) {
+ this.allocator = allocator;
+ this.options = options;
+ targetRowCount = options.rowCountLimit;
+ writerIndex = new WriterIndexImpl(this);
+
+ if (options.vectorCache == null) {
+ vectorCache = new NullResultVectorCacheImpl(allocator);
+ } else {
+ vectorCache = options.vectorCache;
+ }
+
+ // If projection, build the projection map.
+
+ projectionSet = ProjectionSetImpl.parse(options.projection);
+
+ // Build the row set model depending on whether a schema is provided.
+
+ rootState = new RowState(this);
+ rootWriter = rootState.rootWriter();
+
+ // If no schema, columns will be added incrementally as they
+ // are discovered. Start with an empty model.
+
+ if (options.schema != null) {
+
+ // Schema provided. Populate a model (and create vectors) for the
+ // provided schema. The schema can be extended later, but normally
+ // won't be if known up front.
+
+ logger.debug("Schema: " + options.schema.toString());
+ rootState.buildSchema(options.schema);
+ }
+ }
+
+ private void updateCardinality() {
+ rootState.updateCardinality(targetRowCount());
+ }
+
+ public ResultSetLoaderImpl(BufferAllocator allocator) {
+ this(allocator, new ResultSetOptions());
+ }
+
+ public BufferAllocator allocator() { return allocator; }
+
+ protected int bumpVersion() {
+
+ // Update the active schema version. We cannot update the published
+ // schema version at this point because a column later in this same
+ // row might cause overflow, and any new columns in this row will
+ // be hidden until a later batch. But, if we are between batches,
+ // then it is fine to add the column to the schema.
+
+ activeSchemaVersion++;
+ switch (state) {
+ case HARVESTED:
+ case START:
+ case LOOK_AHEAD:
+ harvestSchemaVersion = activeSchemaVersion;
+ break;
+ default:
+ break;
+
+ }
+ return activeSchemaVersion;
+ }
+
+ @Override
+ public int schemaVersion() { return harvestSchemaVersion; }
+
+ @Override
+ public void startBatch() {
+ switch (state) {
+ case HARVESTED:
+ case START:
+ logger.trace("Start batch");
+ accumulatedBatchSize = 0;
+ updateCardinality();
+ rootState.startBatch();
+ checkInitialAllocation();
+
+ // The previous batch ended without overflow, so start
+ // a new batch, and reset the write index to 0.
+
+ writerIndex.reset();
+ rootWriter.startWrite();
+ break;
+
+ case LOOK_AHEAD:
+
+ // A row overflowed so keep the writer index at its current value
+ // as it points to the second row in the overflow batch. However,
+ // the last write position of each writer must be restored on
+ // a column-by-column basis, which is done by the visitor.
+
+ logger.trace("Start batch after overflow");
+ rootState.startBatch();
+
+ // Note: no need to do anything with the writers; they were left
+ // pointing to the correct positions in the look-ahead batch.
+ // The above simply puts the look-ahead vectors back "under"
+ // the writers.
+
+ break;
+
+ default:
+ throw new IllegalStateException("Unexpected state: " + state);
+ }
+
+ // Update the visible schema with any pending overflow batch
+ // updates.
+
+ harvestSchemaVersion = activeSchemaVersion;
+ pendingRowCount = 0;
+ state = State.ACTIVE;
+ }
+
+ @Override
+ public RowSetLoader writer() {
+ if (state == State.CLOSED) {
+ throw new IllegalStateException("Unexpected state: " + state);
+ }
+ return rootWriter;
+ }
+
+ @Override
+ public ResultSetLoader setRow(Object... values) {
+ startRow();
+ writer().setTuple(values);
+ saveRow();
+ return this;
+ }
+
+ /**
+ * Called before writing a new row. Implementation of
+ * {@link RowSetLoader#start()}.
+ */
+
+ protected void startRow() {
+ switch (state) {
+ case ACTIVE:
+
+ // Update the visible schema with any pending overflow batch
+ // updates.
+
+ harvestSchemaVersion = activeSchemaVersion;
+ rootWriter.startRow();
+ break;
+ default:
+ throw new IllegalStateException("Unexpected state: " + state);
+ }
+ }
+
+ /**
+ * Finalize the current row. Implementation of
+ * {@link RowSetLoader#save()}.
+ */
+
+ protected void saveRow() {
+ switch (state) {
+ case ACTIVE:
+ rootWriter.endArrayValue();
+ rootWriter.saveRow();
+ if (! writerIndex.next()) {
+ state = State.FULL_BATCH;
+ }
+
+ // No overflow row. Advertise the schema version to the client.
+
+ harvestSchemaVersion = activeSchemaVersion;
+ break;
+
+ case OVERFLOW:
+
+ // End the value of the look-ahead row in the look-ahead vectors.
+
+ rootWriter.endArrayValue();
+ rootWriter.saveRow();
+
+ // Advance the writer index relative to the look-ahead batch.
+
+ writerIndex.next();
+
+ // Stay in the overflow state. Doing so will cause the writer
+ // to report that it is full.
+ //
+ // Also, do not change the harvest schema version. We will
+ // expose to the downstream operators the schema in effect
+ // at the start of the row. Columns added within the row won't
+ // appear until the next batch.
+
+ break;
+
+ default:
+ throw new IllegalStateException("Unexpected state: " + state);
+ }
+ }
+
+ /**
+ * Implementation of {@link RowSetLoader#isFull()}
+ * @return true if the batch is full (reached vector capacity or the
+ * row count limit), false if more rows can be added
+ */
+
+ protected boolean isFull() {
+ switch (state) {
+ case ACTIVE:
+ return ! writerIndex.valid();
+ case OVERFLOW:
+ case FULL_BATCH:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ @Override
+ public boolean writeable() {
+ return state == State.ACTIVE || state == State.OVERFLOW;
+ }
+
+ private boolean isBatchActive() {
+ return state == State.ACTIVE || state == State.OVERFLOW ||
+ state == State.FULL_BATCH ;
+ }
+
+ /**
+ * Implementation for {#link {@link RowSetLoader#rowCount()}.
+ *
+ * @return the number of rows to be sent downstream for this
+ * batch. Does not include the overflow row.
+ */
+
+ protected int rowCount() {
+ switch (state) {
+ case ACTIVE:
+ case FULL_BATCH:
+ return writerIndex.size();
+ case OVERFLOW:
+ return pendingRowCount;
+ default:
+ return 0;
+ }
+ }
+
+ protected WriterIndexImpl writerIndex() { return writerIndex; }
+
+ @Override
+ public void setTargetRowCount(int rowCount) {
+ targetRowCount = Math.max(1, rowCount);
+ }
+
+ @Override
+ public int targetRowCount() { return targetRowCount; }
+
+ @Override
+ public int targetVectorSize() { return options.vectorSizeLimit; }
+
+ protected void overflowed() {
+ logger.trace("Vector overflow");
+
+ // If we see overflow when we are already handling overflow, it means
+ // that a single value is too large to fit into an entire vector.
+ // Fail the query.
+ //
+ // Note that this is a judgment call. It is possible to allow the
+ // vector to double beyond the limit, but that will require a bit
+ // of thought to get right -- and, of course, completely defeats
+ // the purpose of limiting vector size to avoid memory fragmentation...
+ //
+ // Individual columns handle the case in which overflow occurs on the
+ // first row of the main batch. This check handles the pathological case
+ // in which we successfully overflowed, but then another column
+ // overflowed during the overflow row -- that indicates that that one
+ // column can't fit in an empty vector. That is, this check is for a
+ // second-order overflow.
+
+ if (state == State.OVERFLOW) {
+ throw UserException
+ .memoryError("A single column value is larger than the maximum allowed size of 16 MB")
+ .build(logger);
+ }
+ if (state != State.ACTIVE) {
+ throw new IllegalStateException("Unexpected state: " + state);
+ }
+ state = State.IN_OVERFLOW;
+
+ // Preserve the number of rows in the now-complete batch.
+
+ pendingRowCount = writerIndex.vectorIndex();
+
+ // Roll-over will allocate new vectors. Update with the latest
+ // array cardinality.
+
+ updateCardinality();
+
+// rootWriter.dump(new HierarchicalPrinter());
+
+ // Wrap up the completed rows into a batch. Sets
+ // vector value counts. The rollover data still exists so
+ // it can be moved, but it is now past the recorded
+ // end of the vectors (though, obviously, not past the
+ // physical end.)
+
+ rootWriter.preRollover();
+
+ // Roll over vector values.
+
+ accumulatedBatchSize = 0;
+ rootState.rollover();
+
+ // Adjust writer state to match the new vector values. This is
+ // surprisingly easy if we not that the current row is shifted to
+ // the 0 position in the new vector, so we just shift all offsets
+ // downward by the current row position at each repeat level.
+
+ rootWriter.postRollover();
+
+ // The writer index is reset back to 0. Because of the above roll-over
+ // processing, some vectors may now already have values in the 0 slot.
+ // However, the vector that triggered overflow has not yet written to
+ // the current record, and so will now write to position 0. After the
+ // completion of the row, all 0-position values should be written (or
+ // at least those provided by the client.)
+ //
+ // For arrays, the writer might have written a set of values
+ // (v1, v2, v3), and v4 might have triggered the overflow. In this case,
+ // the array values have been moved, offset vectors adjusted, the
+ // element writer adjusted, so that v4 will be written to index 3
+ // to produce (v1, v2, v3, v4, v5, ...) in the look-ahead vector.
+
+ writerIndex.rollover();
+ checkInitialAllocation();
+
+ // Remember that overflow is in effect.
+
+ state = State.OVERFLOW;
+ }
+
+ protected boolean hasOverflow() { return state == State.OVERFLOW; }
+
+ @Override
+ public VectorContainer harvest() {
+ int rowCount;
+ switch (state) {
+ case ACTIVE:
+ case FULL_BATCH:
+ rowCount = harvestNormalBatch();
+ logger.trace("Harvesting {} rows", rowCount);
+ break;
+ case OVERFLOW:
+ rowCount = harvestOverflowBatch();
+ logger.trace("Harvesting {} rows after overflow", rowCount);
+ break;
+ default:
+ throw new IllegalStateException("Unexpected state: " + state);
+ }
+
+ // Build the output container
+
+ VectorContainer container = outputContainer();
+ container.setRecordCount(rowCount);
+
+ // Finalize: update counts, set state.
+
+ harvestBatchCount++;
+ previousRowCount += rowCount;
+ return container;
+ }
+
+ private int harvestNormalBatch() {
+
+ // Wrap up the vectors: final fill-in, set value count, etc.
+
+ rootWriter.endBatch();
+ harvestSchemaVersion = activeSchemaVersion;
+ state = State.HARVESTED;
+ return writerIndex.size();
+ }
+
+ private int harvestOverflowBatch() {
+ rootState.harvestWithLookAhead();
+ state = State.LOOK_AHEAD;
+ return pendingRowCount;
+ }
+
+ @Override
+ public VectorContainer outputContainer() {
+ // Build the output container.
+
+ if (containerBuilder == null) {
+ containerBuilder = new VectorContainerBuilder(this);
+ }
+ containerBuilder.update(harvestSchemaVersion);
+ return containerBuilder.container();
+ }
+
+ @Override
+ public TupleMetadata harvestSchema() {
+ return containerBuilder.schema();
+ }
+
+ @Override
+ public void close() {
+ if (state == State.CLOSED) {
+ return;
+ }
+ rootState.close();
+
+ // Do not close the vector cache; the caller owns that and
+ // will, presumably, reuse those vectors for another writer.
+
+ state = State.CLOSED;
+ }
+
+ @Override
+ public int batchCount() {
+ return harvestBatchCount + (rowCount() == 0 ? 0 : 1);
+ }
+
+ @Override
+ public int totalRowCount() {
+ int total = previousRowCount;
+ if (isBatchActive()) {
+ total += pendingRowCount + writerIndex.size();
+ }
+ return total;
+ }
+
+ public ResultVectorCache vectorCache() { return vectorCache; }
+ public RowState rootState() { return rootState; }
+
+ /**
+ * Return whether a vector within the current batch can expand. Limits
+ * are enforce only if a limit was provided in the options.
+ *
+ * @param delta increase in vector size
+ * @return true if the vector can expand, false if an overflow
+ * event should occur
+ */
+
+ public boolean canExpand(int delta) {
+ accumulatedBatchSize += delta;
+ return state == State.IN_OVERFLOW ||
+ options.maxBatchSize <= 0 ||
+ accumulatedBatchSize <= options.maxBatchSize;
+ }
+
+ /**
+ * Accumulate the initial vector allocation sizes.
+ *
+ * @param allocationBytes number of bytes allocated to a vector
+ * in the batch setup step
+ */
+
+ public void tallyAllocations(int allocationBytes) {
+ accumulatedBatchSize += allocationBytes;
+ }
+
+ /**
+ * Log and check the initial vector allocation. If a batch size
+ * limit is set, warn if the initial allocation exceeds the limit.
+ * This will occur if the target row count is incorrect for the
+ * data size.
+ */
+
+ private void checkInitialAllocation() {
+ if (options.maxBatchSize < 0) {
+ logger.debug("Initial vector allocation: {}, no batch limit specified",
+ accumulatedBatchSize);
+ }
+ else if (accumulatedBatchSize > options.maxBatchSize) {
+ logger.warn("Initial vector allocation: {}, but batch size limit is: {}",
+ accumulatedBatchSize, options.maxBatchSize);
+ } else {
+ logger.debug("Initial vector allocation: {}, batch size limit: {}",
+ accumulatedBatchSize, options.maxBatchSize);
+ }
+ }
+
+ public void dump(HierarchicalFormatter format) {
+ format
+ .startObject(this)
+ .attribute("options");
+ options.dump(format);
+ format
+ .attribute("index", writerIndex.vectorIndex())
+ .attribute("state", state)
+ .attribute("activeSchemaVersion", activeSchemaVersion)
+ .attribute("harvestSchemaVersion", harvestSchemaVersion)
+ .attribute("pendingRowCount", pendingRowCount)
+ .attribute("targetRowCount", targetRowCount)
+ ;
+ format.attribute("root");
+ rootState.dump(format);
+ format.attribute("rootWriter");
+ rootWriter.dump(format);
+ format.endObject();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/ResultVectorCacheImpl.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/ResultVectorCacheImpl.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/ResultVectorCacheImpl.java
new file mode 100644
index 0000000..c7288b2
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/ResultVectorCacheImpl.java
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.impl;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.exec.expr.TypeHelper;
+import org.apache.drill.exec.memory.BufferAllocator;
+import org.apache.drill.exec.physical.rowSet.ResultVectorCache;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.vector.ValueVector;
+
+/**
+ * Manages an inventory of value vectors used across row batch readers.
+ * Drill semantics for batches is complex. Each operator logically returns
+ * a batch of records on each call of the Drill Volcano iterator protocol
+ * <tt>next()</tt> operation. However, the batches "returned" are not
+ * separate objects. Instead, Drill enforces the following semantics:
+ * <ul>
+ * <li>If a <tt>next()</tt> call returns <tt>OK</tt> then the set of vectors
+ * in the "returned" batch must be identical to those in the prior batch. Not
+ * just the same type; they must be the same <tt>ValueVector</tt> objects.
+ * (The buffers within the vectors will be different.)</li>
+ * <li>If the set of vectors changes in any way (add a vector, remove a
+ * vector, change the type of a vector), then the <tt>next()</tt> call
+ * <b>must</b> return <tt>OK_NEW_SCHEMA</tt>.</ul>
+ * </ul>
+ * These rules create interesting constraints for the scan operator.
+ * Conceptually, each batch is distinct. But, it must share vectors. The
+ * {@link ResultSetLoader} class handles this by managing the set of vectors
+ * used by a single reader.
+ * <p>
+ * Readers are independent: each may read a distinct schema (as in JSON.)
+ * Yet, the Drill protocol requires minimizing spurious <tt>OK_NEW_SCHEMA</tt>
+ * events. As a result, two readers run by the same scan operator must
+ * share the same set of vectors, despite the fact that they may have
+ * different schemas and thus different <tt>ResultSetLoader</tt>s.
+ * <p>
+ * The purpose of this inventory is to persist vectors across readers, even
+ * when, say, reader B does not use a vector that reader A created.
+ * <p>
+ * The semantics supported by this class include:
+ * <ul>
+ * <li>Ability to "pre-declare" columns based on columns that appear in
+ * an explicit select list. This ensures that the columns are known (but
+ * not their types).</li>
+ * <li>Ability to reuse a vector across readers if the column retains the same
+ * name and type (minor type and mode.)</li>
+ * <li>Ability to flush unused vectors for readers with changing schemas
+ * if a schema change occurs.</li>
+ * <li>Support schema "hysteresis"; that is, the a "sticky" schema that
+ * minimizes spurious changes. Once a vector is declared, it can be included
+ * in all subsequent batches (provided the column is nullable or an array.)</li>
+ * </ul>
+ */
+public class ResultVectorCacheImpl implements ResultVectorCache {
+
+ /**
+ * State of a projected vector. At first all we have is a name.
+ * Later, we'll discover the type.
+ */
+
+ private static class VectorState {
+ protected final String name;
+ protected ValueVector vector;
+ protected boolean touched;
+
+ public VectorState(String name) {
+ this.name = name;
+ }
+
+ public boolean satisfies(MaterializedField colSchema) {
+ if (vector == null) {
+ return false;
+ }
+ MaterializedField vectorSchema = vector.getField();
+ return vectorSchema.getType().equals(colSchema.getType());
+ }
+ }
+
+ private final BufferAllocator allocator;
+ private final Map<String, VectorState> vectors = new HashMap<>();
+
+ public ResultVectorCacheImpl(BufferAllocator allocator) {
+ this.allocator = allocator;
+ }
+
+ @Override
+ public BufferAllocator allocator() { return allocator; }
+
+ public void predefine(List<String> selected) {
+ for (String colName : selected) {
+ addVector(colName);
+ }
+ }
+
+ private VectorState addVector(String colName) {
+ VectorState vs = new VectorState(colName);
+ vectors.put(vs.name, vs);
+ return vs;
+ }
+
+ public void newBatch() {
+ for (VectorState vs : vectors.values()) {
+ vs.touched = false;
+ }
+ }
+
+ public void trimUnused() {
+ List<VectorState> unused = new ArrayList<>();
+ for (VectorState vs : vectors.values()) {
+ if (! vs.touched) {
+ unused.add(vs);
+ }
+ }
+ if (unused.isEmpty()) {
+ return;
+ }
+ for (VectorState vs : unused) {
+ vectors.remove(vs.name);
+ }
+ }
+
+ @Override
+ public ValueVector addOrGet(MaterializedField colSchema) {
+ VectorState vs = vectors.get(colSchema.getName());
+
+ // If the vector is found, and is of the right type, reuse it.
+
+ if (vs != null && vs.satisfies(colSchema)) {
+ return vs.vector;
+ }
+
+ // If no vector, this is a late schema. Create the vector.
+
+ if (vs == null) {
+ vs = addVector(colSchema.getName());
+
+ // Else, if the vector changed type, close the old one.
+
+ } else if (vs.vector != null) {
+ vs.vector.close();
+ vs.vector = null;
+ }
+
+ // Create the new vector.
+
+ vs.touched = true;
+ vs.vector = TypeHelper.getNewVector(colSchema, allocator, null);
+ return vs.vector;
+ }
+
+ public MajorType getType(String name) {
+ VectorState vs = vectors.get(name);
+ if (vs == null || vs.vector == null) {
+ return null;
+ }
+ return vs.vector.getField().getType();
+ }
+
+ public void close() {
+ for (VectorState vs : vectors.values()) {
+ vs.vector.close();
+ }
+ vectors.clear();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/RowSetLoaderImpl.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/RowSetLoaderImpl.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/RowSetLoaderImpl.java
new file mode 100644
index 0000000..ec61ae7
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/RowSetLoaderImpl.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.impl;
+
+import java.util.ArrayList;
+
+import org.apache.drill.exec.physical.rowSet.ResultSetLoader;
+import org.apache.drill.exec.physical.rowSet.RowSetLoader;
+import org.apache.drill.exec.record.TupleMetadata;
+import org.apache.drill.exec.vector.accessor.writer.AbstractObjectWriter;
+import org.apache.drill.exec.vector.accessor.writer.AbstractTupleWriter;
+
+/**
+ * Implementation of the row set loader. Provides row-level operations, leaving the
+ * result set loader to provide batch-level operations. However, all control
+ * operations are actually delegated to the result set loader, which handles
+ * the details of working with overflow rows.
+ */
+
+public class RowSetLoaderImpl extends AbstractTupleWriter implements RowSetLoader {
+
+ private final ResultSetLoaderImpl rsLoader;
+
+ protected RowSetLoaderImpl(ResultSetLoaderImpl rsLoader, TupleMetadata schema) {
+ super(schema, new ArrayList<AbstractObjectWriter>());
+ this.rsLoader = rsLoader;
+ bindIndex(rsLoader.writerIndex());
+ }
+
+ @Override
+ public ResultSetLoader loader() { return rsLoader; }
+
+ @Override
+ public RowSetLoader addRow(Object...values) {
+ if (! start()) {
+ throw new IllegalStateException("Batch is full.");
+ }
+ setObject(values);
+ save();
+ return this;
+ }
+
+ @Override
+ public int rowIndex() { return rsLoader.writerIndex().vectorIndex(); }
+
+ @Override
+ public void save() { rsLoader.saveRow(); }
+
+ @Override
+ public boolean start() {
+ if (rsLoader.isFull()) {
+
+ // Full batch? Return false.
+
+ return false;
+ } else if (state == State.IN_ROW) {
+
+ // Already in a row? Rewind the to start of the row.
+
+ restartRow();
+ } else {
+
+ // Otherwise, advance to the next row.
+
+ rsLoader.startRow();
+ }
+ return true;
+ }
+
+ public void endBatch() {
+ if (state == State.IN_ROW) {
+ restartRow();
+ state = State.IN_WRITE;
+ }
+ endWrite();
+ }
+
+ @Override
+ public boolean isFull( ) { return rsLoader.isFull(); }
+
+ @Override
+ public int rowCount() { return rsLoader.rowCount(); }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/SingleVectorState.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/SingleVectorState.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/SingleVectorState.java
new file mode 100644
index 0000000..f6bc5f3
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/SingleVectorState.java
@@ -0,0 +1,274 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.impl;
+
+import org.apache.drill.exec.expr.TypeHelper;
+import org.apache.drill.exec.record.ColumnMetadata;
+import org.apache.drill.exec.vector.FixedWidthVector;
+import org.apache.drill.exec.vector.UInt4Vector;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.VariableWidthVector;
+import org.apache.drill.exec.vector.accessor.impl.HierarchicalFormatter;
+import org.apache.drill.exec.vector.accessor.writer.AbstractObjectWriter;
+import org.apache.drill.exec.vector.accessor.writer.AbstractScalarWriter;
+import org.apache.drill.exec.vector.accessor.writer.OffsetVectorWriter;
+
+/**
+ * Base class for a single vector. Handles the bulk of work for that vector.
+ * Subclasses are specialized for offset vectors or values vectors.
+ * (The "single vector" name contrasts with classes that manage compound
+ * vectors, such as a data and offsets vector.)
+ */
+
+public abstract class SingleVectorState implements VectorState {
+
+ /**
+ * State for a scalar value vector. The vector might be for a simple (non-array)
+ * vector, or might be the payload part of a scalar array (repeated scalar)
+ * vector.
+ */
+
+ public static class ValuesVectorState extends SingleVectorState {
+
+ private final ColumnMetadata schema;
+
+ public ValuesVectorState(ColumnMetadata schema, AbstractScalarWriter writer, ValueVector mainVector) {
+ super(writer, mainVector);
+ this.schema = schema;
+ }
+
+ @Override
+ public int allocateVector(ValueVector vector, int cardinality) {
+ if (schema.isVariableWidth()) {
+
+ // Cap the allocated size to the maximum.
+
+ int size = (int) Math.min(ValueVector.MAX_BUFFER_SIZE, (long) cardinality * schema.expectedWidth());
+ ((VariableWidthVector) vector).allocateNew(size, cardinality);
+ } else {
+ ((FixedWidthVector) vector).allocateNew(cardinality);
+ }
+ return vector.getBufferSize();
+ }
+
+ @Override
+ protected void copyOverflow(int sourceStartIndex, int sourceEndIndex) {
+ int newIndex = 0;
+ ResultSetLoaderImpl.logger.trace("Vector {} of type {}: copy {} values from {} to {}",
+ mainVector.getField().toString(),
+ mainVector.getClass().getSimpleName(),
+ Math.max(0, sourceEndIndex - sourceStartIndex + 1),
+ sourceStartIndex, newIndex);
+
+ // Copy overflow values from the full vector to the new
+ // look-ahead vector. Uses vector-level operations for convenience.
+ // These aren't very efficient, but overflow does not happen very
+ // often.
+
+ for (int src = sourceStartIndex; src <= sourceEndIndex; src++, newIndex++) {
+ mainVector.copyEntry(newIndex, backupVector, src);
+ }
+ }
+ }
+
+ /**
+ * Special case for an offset vector. Offset vectors are managed like any other
+ * vector with respect to overflow and allocation. This means that the loader
+ * classes avoid the use of the RepeatedVector class methods, instead working
+ * with the offsets vector (here) or the values vector to allow the needed
+ * fine control over overflow operations.
+ */
+
+ public static class OffsetVectorState extends SingleVectorState {
+
+ private final AbstractObjectWriter childWriter;
+
+ public OffsetVectorState(AbstractScalarWriter writer, ValueVector mainVector,
+ AbstractObjectWriter childWriter) {
+ super(writer, mainVector);
+ this.childWriter = childWriter;
+ }
+
+ @Override
+ public int allocateVector(ValueVector toAlloc, int cardinality) {
+ ((UInt4Vector) toAlloc).allocateNew(cardinality);
+ return toAlloc.getBufferSize();
+ }
+
+ public int rowStartOffset() {
+ return ((OffsetVectorWriter) writer).rowStartOffset();
+ }
+
+ @Override
+ protected void copyOverflow(int sourceStartIndex, int sourceEndIndex) {
+
+ if (sourceStartIndex > sourceEndIndex) {
+ return;
+ }
+
+ // This is an offset vector. The data to copy is one greater
+ // than the row index.
+
+ sourceStartIndex++;
+ sourceEndIndex++;
+
+ // Copy overflow values from the full vector to the new
+ // look-ahead vector. Since this is an offset vector, values must
+ // be adjusted as they move across.
+ //
+ // Indexing can be confusing. Offset vectors have values offset
+ // from their row by one position. The offset vector position for
+ // row i has the start value for row i. The offset vector position for
+ // i+1 has the start of the next value. The difference between the
+ // two is the element length. As a result, the offset vector always has
+ // one more value than the number of rows, and position 0 is always 0.
+ //
+ // The index passed in here is that of the row that overflowed. That
+ // offset vector position contains the offset of the start of the data
+ // for the current row. We must subtract that offset from each copied
+ // value to adjust the offset for the destination.
+
+ UInt4Vector.Accessor sourceAccessor = ((UInt4Vector) backupVector).getAccessor();
+ UInt4Vector.Mutator destMutator = ((UInt4Vector) mainVector).getMutator();
+ int offset = childWriter.events().writerIndex().rowStartIndex();
+ int newIndex = 1;
+ ResultSetLoaderImpl.logger.trace("Offset vector: copy {} values from {} to {} with offset {}",
+ Math.max(0, sourceEndIndex - sourceStartIndex + 1),
+ sourceStartIndex, newIndex, offset);
+ assert offset == sourceAccessor.get(sourceStartIndex - 1);
+
+ // Position zero is special and will be filled in by the writer
+ // later.
+
+ for (int src = sourceStartIndex; src <= sourceEndIndex; src++, newIndex++) {
+ destMutator.set(newIndex, sourceAccessor.get(src) - offset);
+ }
+// VectorPrinter.printOffsets((UInt4Vector) backupVector, sourceStartIndex - 1, sourceEndIndex - sourceStartIndex + 3);
+// VectorPrinter.printOffsets((UInt4Vector) mainVector, 0, newIndex);
+ }
+ }
+
+ protected final AbstractScalarWriter writer;
+ protected final ValueVector mainVector;
+ protected ValueVector backupVector;
+
+ public SingleVectorState(AbstractScalarWriter writer, ValueVector mainVector) {
+ this.writer = writer;
+ this.mainVector = mainVector;
+ }
+
+ @Override
+ public ValueVector vector() { return mainVector; }
+
+ @Override
+ public int allocate(int cardinality) {
+ return allocateVector(mainVector, cardinality);
+ }
+
+ protected abstract int allocateVector(ValueVector vector, int cardinality);
+
+ /**
+ * A column within the row batch overflowed. Prepare to absorb the rest of
+ * the in-flight row by rolling values over to a new vector, saving the
+ * complete vector for later. This column could have a value for the overflow
+ * row, or for some previous row, depending on exactly when and where the
+ * overflow occurs.
+ *
+ * @param sourceStartIndex the index of the row that caused the overflow, the
+ * values of which should be copied to a new "look-ahead" vector. If the
+ * vector is an array, then the overflowIndex is the position of the first
+ * element to be moved, and multiple elements may need to move
+ */
+
+ @Override
+ public void rollover(int cardinality) {
+
+ int sourceStartIndex = writer.writerIndex().rowStartIndex();
+
+ // Remember the last write index for the original vector.
+ // This tells us the end of the set of values to move, while the
+ // sourceStartIndex above tells us the start.
+
+ int sourceEndIndex = writer.lastWriteIndex();
+
+ // Switch buffers between the backup vector and the writer's output
+ // vector. Done this way because writers are bound to vectors and
+ // we wish to keep the binding.
+
+ if (backupVector == null) {
+ backupVector = TypeHelper.getNewVector(mainVector.getField(), mainVector.getAllocator(), null);
+ }
+ assert cardinality > 0;
+ allocateVector(backupVector, cardinality);
+ mainVector.exchange(backupVector);
+
+ // Copy overflow values from the full vector to the new
+ // look-ahead vector.
+
+ copyOverflow(sourceStartIndex, sourceEndIndex);
+
+ // At this point, the writer is positioned to write to the look-ahead
+ // vector at the position after the copied values. The original vector
+ // is saved along with a last write position that is no greater than
+ // the retained values.
+ }
+
+ protected abstract void copyOverflow(int sourceStartIndex, int sourceEndIndex);
+
+ /**
+ * Exchange the data from the backup vector and the main vector, putting
+ * the completed buffers back into the main vectors, and stashing the
+ * overflow buffers away in the backup vector.
+ * Restore the main vector's last write position.
+ */
+
+ @Override
+ public void harvestWithLookAhead() {
+ mainVector.exchange(backupVector);
+ }
+
+ /**
+ * The previous full batch has been sent downstream and the client is
+ * now ready to start writing to the next batch. Initialize that new batch
+ * with the look-ahead values saved during overflow of the previous batch.
+ */
+
+ @Override
+ public void startBatchWithLookAhead() {
+ mainVector.exchange(backupVector);
+ backupVector.clear();
+ }
+
+ @Override
+ public void reset() {
+ mainVector.clear();
+ if (backupVector != null) {
+ backupVector.clear();
+ }
+ }
+
+ @Override
+ public void dump(HierarchicalFormatter format) {
+ format
+ .startObject(this)
+ .attributeIdentity("writer", writer)
+ .attributeIdentity("mainVector", mainVector)
+ .attributeIdentity("backupVector", backupVector)
+ .endObject();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/TupleState.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/TupleState.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/TupleState.java
new file mode 100644
index 0000000..de41ee4
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/TupleState.java
@@ -0,0 +1,388 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.impl;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.drill.exec.physical.rowSet.impl.ColumnState.BaseMapColumnState;
+import org.apache.drill.exec.physical.rowSet.impl.ColumnState.MapArrayColumnState;
+import org.apache.drill.exec.physical.rowSet.impl.ColumnState.MapColumnState;
+import org.apache.drill.exec.record.ColumnMetadata;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.TupleMetadata;
+import org.apache.drill.exec.record.TupleSchema;
+import org.apache.drill.exec.record.TupleSchema.AbstractColumnMetadata;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.accessor.ObjectType;
+import org.apache.drill.exec.vector.accessor.ObjectWriter;
+import org.apache.drill.exec.vector.accessor.TupleWriter;
+import org.apache.drill.exec.vector.accessor.TupleWriter.TupleWriterListener;
+import org.apache.drill.exec.vector.accessor.impl.HierarchicalFormatter;
+import org.apache.drill.exec.vector.accessor.writer.AbstractObjectWriter;
+import org.apache.drill.exec.vector.accessor.writer.AbstractTupleWriter;
+import org.apache.drill.exec.vector.accessor.writer.ColumnWriterFactory;
+
+/**
+ * Represents the loader state for a tuple: a row or a map. This is "state" in
+ * the sense of variables that are carried along with each tuple. Handles
+ * write-time issues such as defining new columns, allocating memory, handling
+ * overflow, assembling the output version of the map, and so on. Each
+ * row and map in the result set has a tuple state instances associated
+ * with it.
+ * <p>
+ * Here, by "tuple" we mean a container of vectors, each of which holds
+ * a variety of values. So, the "tuple" here is structural, not a specific
+ * set of values, but rather the collection of vectors that hold tuple
+ * values.
+ */
+
+public abstract class TupleState implements TupleWriterListener {
+
+ /**
+ * Handles the details of the top-level tuple, the data row itself.
+ * Note that by "row" we mean the set of vectors that define the
+ * set of rows.
+ */
+
+ public static class RowState extends TupleState {
+
+ /**
+ * The row-level writer for stepping through rows as they are written,
+ * and for accessing top-level columns.
+ */
+
+ private final RowSetLoaderImpl writer;
+
+ public RowState(ResultSetLoaderImpl rsLoader) {
+ super(rsLoader, rsLoader.projectionSet);
+ writer = new RowSetLoaderImpl(rsLoader, schema);
+ writer.bindListener(this);
+ }
+
+ public RowSetLoaderImpl rootWriter() { return writer; }
+
+ @Override
+ public AbstractTupleWriter writer() { return writer; }
+
+ @Override
+ public int innerCardinality() { return resultSetLoader.targetRowCount();}
+ }
+
+ /**
+ * Represents a tuple defined as a Drill map: single or repeated. Note that
+ * the map vector does not exist here; it is assembled only when "harvesting"
+ * a batch. This design supports the obscure case in which a new column
+ * is added during an overflow row, so exists within this abstraction,
+ * but is not published to the map that makes up the output.
+ */
+
+ public static class MapState extends TupleState {
+
+ protected final BaseMapColumnState mapColumnState;
+ protected int outerCardinality;
+
+ public MapState(ResultSetLoaderImpl rsLoader,
+ BaseMapColumnState mapColumnState,
+ ProjectionSet projectionSet) {
+ super(rsLoader, projectionSet);
+ this.mapColumnState = mapColumnState;
+ mapColumnState.writer().bindListener(this);
+ }
+
+ /**
+ * Return the tuple writer for the map. If this is a single
+ * map, then it is the writer itself. If this is a map array,
+ * then the tuple is nested inside the array.
+ */
+
+ @Override
+ public AbstractTupleWriter writer() {
+ AbstractObjectWriter objWriter = mapColumnState.writer();
+ TupleWriter tupleWriter;
+ if (objWriter.type() == ObjectType.ARRAY) {
+ tupleWriter = objWriter.array().tuple();
+ } else {
+ tupleWriter = objWriter.tuple();
+ }
+ return (AbstractTupleWriter) tupleWriter;
+ }
+
+ /**
+ * In order to allocate the correct-sized vectors, the map must know
+ * its member cardinality: the number of elements in each row. This
+ * is 1 for a single map, but may be any number for a map array. Then,
+ * this value is recursively pushed downward to compute the cardinality
+ * of lists of maps that contains lists of maps, and so on.
+ */
+
+ @Override
+ public void updateCardinality(int outerCardinality) {
+ this.outerCardinality = outerCardinality;
+ super.updateCardinality(outerCardinality);
+ }
+
+ @Override
+ public int innerCardinality() {
+ return outerCardinality * mapColumnState.schema().expectedElementCount();
+ }
+
+ @Override
+ public void dump(HierarchicalFormatter format) {
+ format
+ .startObject(this)
+ .attribute("column", mapColumnState.schema().name())
+ .attribute("cardinality", outerCardinality)
+ .endObject();
+ }
+ }
+
+ protected final ResultSetLoaderImpl resultSetLoader;
+ protected final List<ColumnState> columns = new ArrayList<>();
+ protected final TupleSchema schema = new TupleSchema();
+ protected final ProjectionSet projectionSet;
+
+ protected TupleState(ResultSetLoaderImpl rsLoader, ProjectionSet projectionSet) {
+ this.resultSetLoader = rsLoader;
+ this.projectionSet = projectionSet;
+ }
+
+ public abstract int innerCardinality();
+
+ /**
+ * Returns an ordered set of the columns which make up the tuple.
+ * Column order is the same as that defined by the map's schema,
+ * to allow indexed access. New columns always appear at the end
+ * of the list to preserve indexes.
+ *
+ * @return ordered list of column states for the columns within
+ * this tuple
+ */
+
+ public List<ColumnState> columns() { return columns; }
+
+ public TupleMetadata schema() { return writer().schema(); }
+
+ public abstract AbstractTupleWriter writer();
+
+ @Override
+ public ObjectWriter addColumn(TupleWriter tupleWriter, MaterializedField column) {
+ return addColumn(tupleWriter, TupleSchema.fromField(column));
+ }
+
+ @Override
+ public ObjectWriter addColumn(TupleWriter tupleWriter, ColumnMetadata columnSchema) {
+
+ // Verify name is not a (possibly case insensitive) duplicate.
+
+ TupleMetadata tupleSchema = schema();
+ String colName = columnSchema.name();
+ if (tupleSchema.column(colName) != null) {
+ throw new IllegalArgumentException("Duplicate column: " + colName);
+ }
+
+ return addColumn(columnSchema);
+ }
+
+ /**
+ * Implementation of the work to add a new column to this tuple given a
+ * schema description of the column.
+ *
+ * @param columnSchema schema of the column
+ * @return writer for the new column
+ */
+
+ private AbstractObjectWriter addColumn(ColumnMetadata columnSchema) {
+
+ // Indicate projection in the metadata.
+
+ ((AbstractColumnMetadata) columnSchema).setProjected(
+ projectionSet.isProjected(columnSchema.name()));
+
+ // Build the column
+
+ ColumnState colState;
+ if (columnSchema.isMap()) {
+ colState = buildMap(columnSchema);
+ } else {
+ colState = buildPrimitive(columnSchema);
+ }
+ columns.add(colState);
+ colState.updateCardinality(innerCardinality());
+ colState.allocateVectors();
+ return colState.writer();
+ }
+
+ /**
+ * Build a primitive column. Check if the column is projected. If not,
+ * allocate a dummy writer for the column. If projected, then allocate
+ * a vector, a writer, and the column state which binds the two together
+ * and manages the column.
+ *
+ * @param columnSchema schema of the new primitive column
+ * @return column state for the new column
+ */
+
+ @SuppressWarnings("resource")
+ private ColumnState buildPrimitive(ColumnMetadata columnSchema) {
+ ValueVector vector;
+ if (columnSchema.isProjected()) {
+
+ // Create the vector for the column.
+
+ vector = resultSetLoader.vectorCache().addOrGet(columnSchema.schema());
+ } else {
+
+ // Column is not projected. No materialized backing for the column.
+
+ vector = null;
+ }
+
+ // Create the writer. Will be returned to the tuple writer.
+
+ AbstractObjectWriter colWriter = ColumnWriterFactory.buildColumnWriter(columnSchema, vector);
+
+ if (columnSchema.isArray()) {
+ return PrimitiveColumnState.newPrimitiveArray(resultSetLoader, vector, colWriter);
+ } else {
+ return PrimitiveColumnState.newPrimitive(resultSetLoader, vector, colWriter);
+ }
+ }
+
+ /**
+ * Build a new map (single or repeated) column. No map vector is created
+ * here, instead we create a tuple state to hold the columns, and defer the
+ * map vector (or vector container) until harvest time.
+ *
+ * @param columnSchema description of the map column
+ * @return column state for the map column
+ */
+
+ private ColumnState buildMap(ColumnMetadata columnSchema) {
+
+ // When dynamically adding columns, must add the (empty)
+ // map by itself, then add columns to the map via separate
+ // calls.
+
+ assert columnSchema.isMap();
+ assert columnSchema.mapSchema().size() == 0;
+
+ // Create the writer. Will be returned to the tuple writer.
+
+ ProjectionSet childProjection = projectionSet.mapProjection(columnSchema.name());
+ if (columnSchema.isArray()) {
+ return MapArrayColumnState.build(resultSetLoader,
+ columnSchema,
+ childProjection);
+ } else {
+ return new MapColumnState(resultSetLoader,
+ columnSchema,
+ childProjection);
+ }
+ }
+
+ /**
+ * When creating a schema up front, provide the schema of the desired tuple,
+ * then build vectors and writers to match. Allows up-front schema definition
+ * in addition to on-the-fly schema creation handled elsewhere.
+ *
+ * @param schema desired tuple schema to be materialized
+ */
+
+ public void buildSchema(TupleMetadata schema) {
+ for (int i = 0; i < schema.size(); i++) {
+ ColumnMetadata colSchema = schema.metadata(i);
+ AbstractObjectWriter colWriter;
+ if (colSchema.isMap()) {
+ colWriter = addColumn(colSchema.cloneEmpty());
+ BaseMapColumnState mapColState = (BaseMapColumnState) columns.get(columns.size() - 1);
+ mapColState.mapState().buildSchema(colSchema.mapSchema());
+ } else {
+ colWriter = addColumn(colSchema);
+ }
+ writer().addColumnWriter(colWriter);
+ }
+ }
+
+ public void updateCardinality(int cardinality) {
+ for (ColumnState colState : columns) {
+ colState.updateCardinality(cardinality);
+ }
+ }
+
+ /**
+ * A column within the row batch overflowed. Prepare to absorb the rest of the
+ * in-flight row by rolling values over to a new vector, saving the complete
+ * vector for later. This column could have a value for the overflow row, or
+ * for some previous row, depending on exactly when and where the overflow
+ * occurs.
+ */
+
+ public void rollover() {
+ for (ColumnState colState : columns) {
+ colState.rollover();
+ }
+ }
+
+ /**
+ * Writing of a row batch is complete, and an overflow occurred. Prepare the
+ * vector for harvesting to send downstream. Set aside the look-ahead vector
+ * and put the full vector buffer back into the active vector.
+ */
+
+ public void harvestWithLookAhead() {
+ for (ColumnState colState : columns) {
+ colState.harvestWithLookAhead();
+ }
+ }
+
+ /**
+ * Start a new batch by shifting the overflow buffers back into the main
+ * write vectors and updating the writers.
+ */
+
+ public void startBatch() {
+ for (ColumnState colState : columns) {
+ colState.startBatch();
+ }
+ }
+
+ /**
+ * Clean up state (such as backup vectors) associated with the state
+ * for each vector.
+ */
+
+ public void close() {
+ for (ColumnState colState : columns) {
+ colState.close();
+ }
+ }
+
+ public void dump(HierarchicalFormatter format) {
+ format
+ .startObject(this)
+ .attributeArray("columns");
+ for (int i = 0; i < columns.size(); i++) {
+ format.element(i);
+ columns.get(i).dump(format);
+ }
+ format
+ .endArray()
+ .endObject();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/VectorContainerBuilder.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/VectorContainerBuilder.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/VectorContainerBuilder.java
new file mode 100644
index 0000000..faa68cb
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/VectorContainerBuilder.java
@@ -0,0 +1,257 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.impl;
+
+import java.util.List;
+
+import org.apache.drill.exec.memory.BufferAllocator;
+import org.apache.drill.exec.physical.rowSet.impl.ColumnState.BaseMapColumnState;
+import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode;
+import org.apache.drill.exec.record.ColumnMetadata;
+import org.apache.drill.exec.record.TupleMetadata;
+import org.apache.drill.exec.record.TupleSchema;
+import org.apache.drill.exec.record.VectorContainer;
+import org.apache.drill.exec.vector.UInt4Vector;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.complex.AbstractMapVector;
+import org.apache.drill.exec.vector.complex.MapVector;
+import org.apache.drill.exec.vector.complex.RepeatedMapVector;
+
+/**
+ * Builds the harvest vector container that includes only the columns that
+ * are included in the harvest schema version. That is, it excludes columns
+ * added while writing an overflow row.
+ * <p>
+ * Because a Drill row is actually a hierarchy, walks the internal hierarchy
+ * and builds a corresponding output hierarchy.
+ * <ul>
+ * <li>The root node is the row itself (vector container),</li>
+ * <li>Internal nodes are maps (structures),</li>
+ * <li>Leaf notes are primitive vectors (which may be arrays).</li>
+ * </ul>
+ * The basic algorithm is to identify the version of the output schema,
+ * then add any new columns added up to that version. This object maintains
+ * the output container across batches, meaning that updates are incremental:
+ * we need only add columns that are new since the last update. And, those new
+ * columns will always appear directly after all existing columns in the row
+ * or in a map.
+ * <p>
+ * As special case occurs when columns are added in the overflow row. These
+ * columns <i>do not</i> appear in the output container for the main part
+ * of the batch; instead they appear in the <i>next</i> output container
+ * that includes the overflow row.
+ * <p>
+ * Since the container here may contain a subset of the internal columns, an
+ * interesting case occurs for maps. The maps in the output container are
+ * <b>not</b> the same as those used internally. Since a map column can contain
+ * either one list of columns or another, the internal and external maps must
+ * differ. The set of child vectors (except for child maps) are shared.
+ */
+
+public class VectorContainerBuilder {
+
+ /**
+ * Drill vector containers and maps are both tuples, but they irritatingly
+ * have completely different APIs for working with their child vectors.
+ * This class acts as a proxy to wrap the two APIs to provide a common
+ * view for the use of the container builder.
+ */
+
+ public static abstract class TupleProxy {
+ protected TupleMetadata schema;
+
+ public TupleProxy(TupleMetadata schema) {
+ this.schema = schema;
+ }
+
+ protected abstract int size();
+ protected abstract ValueVector vector(int index);
+ protected abstract void add(ValueVector vector);
+
+ protected TupleProxy mapProxy(int index) {
+ return new MapProxy(
+ schema.metadata(index).mapSchema(),
+ (AbstractMapVector) vector(index));
+ }
+ }
+
+ /**
+ * Proxy wrapper class for a vector container.
+ */
+
+ protected static class ContainerProxy extends TupleProxy {
+
+ private VectorContainer container;
+
+ protected ContainerProxy(TupleMetadata schema, VectorContainer container) {
+ super(schema);
+ this.container = container;
+ }
+
+ @Override
+ protected int size() {
+ return container.getNumberOfColumns();
+ }
+
+ @Override
+ protected ValueVector vector(int index) {
+ return container.getValueVector(index).getValueVector();
+ }
+
+ @Override
+ protected void add(ValueVector vector) {
+ container.add(vector);
+ }
+ }
+
+ /**
+ * Proxy wrapper for a map container.
+ */
+
+ protected static class MapProxy extends TupleProxy {
+
+ private AbstractMapVector mapVector;
+
+ protected MapProxy(TupleMetadata schema, AbstractMapVector mapVector) {
+ super(schema);
+ this.mapVector = mapVector;
+ }
+
+ @Override
+ protected int size() {
+ return mapVector.size();
+ }
+
+ @Override
+ protected ValueVector vector(int index) {
+ return mapVector.getChildByOrdinal(index);
+ }
+
+ @Override
+ protected void add(ValueVector vector) {
+ mapVector.putChild(vector.getField().getName(), vector);
+ }
+ }
+
+ private final ResultSetLoaderImpl resultSetLoader;
+ private int outputSchemaVersion = -1;
+ private TupleMetadata schema;
+ private VectorContainer container;
+
+ public VectorContainerBuilder(ResultSetLoaderImpl rsLoader) {
+ this.resultSetLoader = rsLoader;
+ container = new VectorContainer(rsLoader.allocator);
+ schema = new TupleSchema();
+ }
+
+ public void update(int targetVersion) {
+ if (outputSchemaVersion >= targetVersion) {
+ return;
+ }
+ outputSchemaVersion = targetVersion;
+ updateTuple(resultSetLoader.rootState(), new ContainerProxy(schema, container));
+ container.buildSchema(SelectionVectorMode.NONE);
+ }
+
+ public VectorContainer container() { return container; }
+
+ public int outputSchemaVersion() { return outputSchemaVersion; }
+
+ public BufferAllocator allocator() {
+ return resultSetLoader.allocator();
+ }
+
+ private void updateTuple(TupleState sourceModel, TupleProxy destProxy) {
+ int prevCount = destProxy.size();
+ List<ColumnState> cols = sourceModel.columns();
+ int currentCount = cols.size();
+
+ // Scan any existing maps for column additions
+
+ for (int i = 0; i < prevCount; i++) {
+ ColumnState colState = cols.get(i);
+ if (! colState.schema().isProjected()) {
+ continue;
+ }
+ if (colState.schema().isMap()) {
+ updateTuple((TupleState) ((BaseMapColumnState) colState).mapState(), destProxy.mapProxy(i));
+ }
+ }
+
+ // Add new columns, which may be maps
+
+ for (int i = prevCount; i < currentCount; i++) {
+ ColumnState colState = cols.get(i);
+ if (! colState.schema().isProjected()) {
+ continue;
+ }
+
+ // If the column was added after the output schema version cutoff,
+ // skip that column for now.
+
+ if (colState.addVersion > outputSchemaVersion) {
+ break;
+ }
+ if (colState.schema().isMap()) {
+ buildMap(destProxy, (BaseMapColumnState) colState);
+ } else {
+ destProxy.add(colState.vector());
+ destProxy.schema.addColumn(colState.schema());
+ assert destProxy.size() == destProxy.schema.size();
+ }
+ }
+ }
+
+ @SuppressWarnings("resource")
+ private void buildMap(TupleProxy parentTuple, BaseMapColumnState colModel) {
+
+ // Creating the map vector will create its contained vectors if we
+ // give it a materialized field with children. So, instead pass a clone
+ // without children so we can add them.
+
+ ColumnMetadata mapColSchema = colModel.schema().cloneEmpty();
+
+ // Don't get the map vector from the vector cache. Map vectors may
+ // have content that varies from batch to batch. Only the leaf
+ // vectors can be cached.
+
+ AbstractMapVector mapVector;
+ if (mapColSchema.isArray()) {
+
+ // A repeated map shares an offset vector with the internal
+ // repeated map.
+
+ UInt4Vector offsets = (UInt4Vector) colModel.vector();
+ mapVector = new RepeatedMapVector(mapColSchema.schema(), offsets, null);
+ } else {
+ mapVector = new MapVector(mapColSchema.schema(), allocator(), null);
+ }
+
+ // Add the map vector and schema to the parent tuple
+
+ parentTuple.add(mapVector);
+ int index = parentTuple.schema.addColumn(mapColSchema);
+ assert parentTuple.size() == parentTuple.size();
+
+ // Update the tuple, which will add the new columns in the map
+
+ updateTuple(colModel.mapState(), parentTuple.mapProxy(index));
+ }
+
+ public TupleMetadata schema() { return schema; }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/VectorState.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/VectorState.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/VectorState.java
new file mode 100644
index 0000000..4a1c698
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/VectorState.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.impl;
+
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.accessor.impl.HierarchicalFormatter;
+
+/**
+ * Handles batch and overflow operation for a (possibly compound) vector.
+ * <p>
+ * The data model is the following:
+ * <ul>
+ * <li>Column model<ul>
+ * <li>Value vector itself</li>
+ * <li>Column writer</li>
+ * <li>Column schema</li>
+ * <li>Column coordinator (this class)</li>
+ * </ul></li></ul>
+ * The vector state coordinates events between the result set loader
+ * on the one side and the vectors, writers and schema on the other.
+ * For example:
+ * <pre><code>
+ * Result Set Vector
+ * Loader <--> State <--> Vectors
+ * </code></pre>
+ * Events from the row set loader deal with allocation, roll-over,
+ * harvesting completed batches and so on. Events from the writer,
+ * via the tuple model deal with adding columns and column
+ * overflow.
+ */
+
+public interface VectorState {
+
+ /**
+ * Allocate a new vector with the number of elements given. If the vector
+ * is an array, then the cardinality given is the number of arrays.
+ * @param cardinality number of elements desired in the allocated
+ * vector
+ *
+ * @return the number of bytes allocated
+ */
+
+ int allocate(int cardinality);
+
+ /**
+ * A vector has overflowed. Create a new look-ahead vector of the given
+ * cardinality, then copy the overflow values from the main vector to the
+ * look-ahead vector.
+ *
+ * @param cardinality the number of elements in the new vector. If this
+ * vector is an array, then this is the number of arrays
+ * @return the new next write position for the vector index associated
+ * with the writer for this vector
+ */
+
+ void rollover(int cardinality);
+
+ /**
+ * A batch is being harvested after an overflow. Put the full batch
+ * back into the main vector so it can be harvested.
+ */
+
+ void harvestWithLookAhead();
+
+ /**
+ * A new batch is starting while an look-ahead vector exists. Move
+ * the look-ahead buffers into the main vector to prepare for writing
+ * the rest of the batch.
+ */
+
+ void startBatchWithLookAhead();
+
+ /**
+ * Clear the vector(s) associated with this state.
+ */
+
+ void reset();
+
+ /**
+ * Underlying vector: the one presented to the consumer of the
+ * result set loader.
+ */
+
+ ValueVector vector();
+
+ void dump(HierarchicalFormatter format);
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/WriterIndexImpl.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/WriterIndexImpl.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/WriterIndexImpl.java
new file mode 100644
index 0000000..2158dd1
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/WriterIndexImpl.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.impl;
+
+import org.apache.drill.exec.physical.rowSet.ResultSetLoader;
+import org.apache.drill.exec.vector.accessor.ColumnWriterIndex;
+
+/**
+ * Writer index that points to each row in the row set. The index starts at
+ * the 0th row and advances one row on each increment. This allows writers to
+ * start positioned at the first row. Writes happen in the current row.
+ * Calling <tt>next()</tt> advances to the next position, effectively saving
+ * the current row. The most recent row can be abandoned easily simply by not
+ * calling <tt>next()</tt>. This means that the number of completed rows is
+ * the same as the row index.
+ * <p>
+ * The writer index enforces the row count limit for a new batch. The
+ * limit is set by the result set loader and can vary from batch to batch
+ * if the client chooses in order to adjust the row count based on actual
+ * data size.
+ */
+
+class WriterIndexImpl implements ColumnWriterIndex {
+
+ private final ResultSetLoader rsLoader;
+ private int rowIndex = 0;
+
+ public WriterIndexImpl(ResultSetLoader rsLoader) {
+ this.rsLoader = rsLoader;
+ }
+
+ @Override
+ public int vectorIndex() { return rowIndex; }
+
+ @Override
+ public int rowStartIndex() { return rowIndex; }
+
+ public boolean next() {
+ if (++rowIndex < rsLoader.targetRowCount()) {
+ return true;
+ } else {
+ // Should not call next() again once batch is full.
+ rowIndex = rsLoader.targetRowCount();
+ return false;
+ }
+ }
+
+ public int size() {
+
+ // The index always points to the next slot past the
+ // end of valid rows.
+
+ return rowIndex;
+ }
+
+ public boolean valid() { return rowIndex < rsLoader.targetRowCount(); }
+
+ @Override
+ public void rollover() {
+
+ // The top level index always rolls over to 0 --
+ // the first row position in the new vectors.
+
+ reset();
+ }
+
+ public void reset() { rowIndex = 0; }
+
+ @Override
+ public void nextElement() { }
+
+ @Override
+ public ColumnWriterIndex outerIndex() { return null; }
+
+ @Override
+ public String toString() {
+ return new StringBuilder()
+ .append("[")
+ .append(getClass().getSimpleName())
+ .append(" rowIndex = ")
+ .append(rowIndex)
+ .append("]")
+ .toString();
+ }
+}
[08/15] drill git commit: DRILL-5657: Size-aware vector writer
structure
Posted by pr...@apache.org.
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/SchemaBuilder.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/SchemaBuilder.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/SchemaBuilder.java
index 39b0128..f0b3321 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/SchemaBuilder.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/SchemaBuilder.java
@@ -17,15 +17,16 @@
*/
package org.apache.drill.test.rowSet;
-import java.util.ArrayList;
-import java.util.List;
-
import org.apache.drill.common.types.TypeProtos.DataMode;
import org.apache.drill.common.types.TypeProtos.MajorType;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.record.BatchSchema;
import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode;
+import org.apache.drill.exec.record.ColumnMetadata;
import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.TupleMetadata;
+import org.apache.drill.exec.record.TupleSchema;
+import org.apache.drill.exec.record.TupleSchema.MapColumnMetadata;
/**
* Builder of a row set schema expressed as a list of materialized
@@ -59,8 +60,6 @@ public class SchemaBuilder {
* need scale and precision, and so on.
*/
- // TODO: Add map methods
-
public static class ColumnBuilder {
private final String name;
private final MajorType.Builder typeBuilder;
@@ -78,7 +77,11 @@ public class SchemaBuilder {
}
public ColumnBuilder setWidth(int width) {
- typeBuilder.setPrecision(width);
+ return setPrecision(width);
+ }
+
+ public ColumnBuilder setPrecision(int precision) {
+ typeBuilder.setPrecision(precision);
return this;
}
@@ -101,10 +104,14 @@ public class SchemaBuilder {
public static class MapBuilder extends SchemaBuilder {
private final SchemaBuilder parent;
private final String memberName;
+ private final DataMode mode;
- public MapBuilder(SchemaBuilder parent, String memberName) {
+ public MapBuilder(SchemaBuilder parent, String memberName, DataMode mode) {
this.parent = parent;
this.memberName = memberName;
+ // Optional maps not supported in Drill
+ assert mode != DataMode.OPTIONAL;
+ this.mode = mode;
}
@Override
@@ -114,11 +121,14 @@ public class SchemaBuilder {
@Override
public SchemaBuilder buildMap() {
- MaterializedField col = columnSchema(memberName, MinorType.MAP, DataMode.REQUIRED);
- for (MaterializedField childCol : columns) {
- col.addChild(childCol);
+ // TODO: Use the map schema directly rather than
+ // rebuilding it as is done here.
+
+ MaterializedField col = columnSchema(memberName, MinorType.MAP, mode);
+ for (ColumnMetadata md : schema) {
+ col.addChild(md.schema());
}
- parent.finishMap(col);
+ parent.finishMap(TupleSchema.newMap(col, schema));
return parent;
}
@@ -128,7 +138,7 @@ public class SchemaBuilder {
}
}
- protected List<MaterializedField> columns = new ArrayList<>( );
+ protected TupleSchema schema = new TupleSchema();
private SelectionVectorMode svMode = SelectionVectorMode.NONE;
public SchemaBuilder() { }
@@ -144,59 +154,60 @@ public class SchemaBuilder {
}
}
- public SchemaBuilder add(String pathName, MajorType type) {
- return add(MaterializedField.create(pathName, type));
+ public SchemaBuilder add(String name, MajorType type) {
+ return add(MaterializedField.create(name, type));
}
public SchemaBuilder add(MaterializedField col) {
- columns.add(col);
+ schema.add(col);
return this;
}
/**
* Create a column schema using the "basic three" properties of name, type and
* cardinality (AKA "data mode.") Use the {@link ColumnBuilder} for to set
- * other schema attributes.
+ * other schema attributes. Name is relative to the enclosing map or tuple;
+ * it is not the fully qualified path name.
*/
- public static MaterializedField columnSchema(String pathName, MinorType type, DataMode mode) {
- return MaterializedField.create(pathName,
+ public static MaterializedField columnSchema(String name, MinorType type, DataMode mode) {
+ return MaterializedField.create(name,
MajorType.newBuilder()
.setMinorType(type)
.setMode(mode)
.build());
}
- public SchemaBuilder add(String pathName, MinorType type, DataMode mode) {
- return add(columnSchema(pathName, type, mode));
+ public SchemaBuilder add(String name, MinorType type, DataMode mode) {
+ return add(columnSchema(name, type, mode));
}
- public SchemaBuilder add(String pathName, MinorType type) {
- return add(pathName, type, DataMode.REQUIRED);
+ public SchemaBuilder add(String name, MinorType type) {
+ return add(name, type, DataMode.REQUIRED);
}
- public SchemaBuilder add(String pathName, MinorType type, int width) {
- MaterializedField field = new SchemaBuilder.ColumnBuilder(pathName, type)
+ public SchemaBuilder add(String name, MinorType type, int width) {
+ MaterializedField field = new SchemaBuilder.ColumnBuilder(name, type)
.setMode(DataMode.REQUIRED)
.setWidth(width)
.build();
return add(field);
}
- public SchemaBuilder addNullable(String pathName, MinorType type) {
- return add(pathName, type, DataMode.OPTIONAL);
+ public SchemaBuilder addNullable(String name, MinorType type) {
+ return add(name, type, DataMode.OPTIONAL);
}
- public SchemaBuilder addNullable(String pathName, MinorType type, int width) {
- MaterializedField field = new SchemaBuilder.ColumnBuilder(pathName, type)
+ public SchemaBuilder addNullable(String name, MinorType type, int width) {
+ MaterializedField field = new SchemaBuilder.ColumnBuilder(name, type)
.setMode(DataMode.OPTIONAL)
.setWidth(width)
.build();
return add(field);
}
- public SchemaBuilder addArray(String pathName, MinorType type) {
- return add(pathName, type, DataMode.REPEATED);
+ public SchemaBuilder addArray(String name, MinorType type) {
+ return add(name, type, DataMode.REPEATED);
}
/**
@@ -209,7 +220,11 @@ public class SchemaBuilder {
*/
public MapBuilder addMap(String pathName) {
- return new MapBuilder(this, pathName);
+ return new MapBuilder(this, pathName, DataMode.REQUIRED);
+ }
+
+ public MapBuilder addMapArray(String pathName) {
+ return new MapBuilder(this, pathName, DataMode.REPEATED);
}
public SchemaBuilder withSVMode(SelectionVectorMode svMode) {
@@ -218,14 +233,18 @@ public class SchemaBuilder {
}
public BatchSchema build() {
- return new BatchSchema(svMode, columns);
+ return schema.toBatchSchema(svMode);
}
- void finishMap(MaterializedField map) {
- columns.add(map);
+ void finishMap(MapColumnMetadata map) {
+ schema.add(map);
}
public SchemaBuilder buildMap() {
throw new IllegalStateException("Cannot build map for a top-level schema");
}
+
+ public TupleMetadata buildSchema() {
+ return schema;
+ }
}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/file/JsonFileBuilder.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/file/JsonFileBuilder.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/file/JsonFileBuilder.java
index ff93bf0..2076b16 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/file/JsonFileBuilder.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/file/JsonFileBuilder.java
@@ -23,9 +23,10 @@ import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.drill.exec.record.MaterializedField;
-import org.apache.drill.exec.vector.accessor.ColumnAccessor;
-import org.apache.drill.exec.vector.accessor.ColumnReader;
+import org.apache.drill.exec.vector.accessor.ScalarReader;
+import org.apache.drill.exec.vector.accessor.ValueType;
import org.apache.drill.test.rowSet.RowSet;
+import org.apache.drill.test.rowSet.RowSetReader;
import java.io.BufferedOutputStream;
import java.io.File;
@@ -44,13 +45,14 @@ public class JsonFileBuilder
public static final String DEFAULT_DECIMAL_FORMATTER = "%s";
public static final String DEFAULT_PERIOD_FORMATTER = "%s";
+ @SuppressWarnings("unchecked")
public static final Map<String, String> DEFAULT_FORMATTERS = new ImmutableMap.Builder()
- .put(ColumnAccessor.ValueType.DOUBLE, DEFAULT_DOUBLE_FORMATTER)
- .put(ColumnAccessor.ValueType.INTEGER, DEFAULT_INTEGER_FORMATTER)
- .put(ColumnAccessor.ValueType.LONG, DEFAULT_LONG_FORMATTER)
- .put(ColumnAccessor.ValueType.STRING, DEFAULT_STRING_FORMATTER)
- .put(ColumnAccessor.ValueType.DECIMAL, DEFAULT_DECIMAL_FORMATTER)
- .put(ColumnAccessor.ValueType.PERIOD, DEFAULT_PERIOD_FORMATTER)
+ .put(ValueType.DOUBLE, DEFAULT_DOUBLE_FORMATTER)
+ .put(ValueType.INTEGER, DEFAULT_INTEGER_FORMATTER)
+ .put(ValueType.LONG, DEFAULT_LONG_FORMATTER)
+ .put(ValueType.STRING, DEFAULT_STRING_FORMATTER)
+ .put(ValueType.DECIMAL, DEFAULT_DECIMAL_FORMATTER)
+ .put(ValueType.PERIOD, DEFAULT_PERIOD_FORMATTER)
.build();
private final RowSet rowSet;
@@ -66,8 +68,7 @@ public class JsonFileBuilder
Preconditions.checkNotNull(columnFormatter);
Iterator<MaterializedField> fields = rowSet
- .schema()
- .batch()
+ .batchSchema()
.iterator();
boolean hasColumn = false;
@@ -90,14 +91,12 @@ public class JsonFileBuilder
tableFile.getParentFile().mkdirs();
try (BufferedOutputStream os = new BufferedOutputStream(new FileOutputStream(tableFile))) {
- final RowSet.RowSetReader reader = rowSet.reader();
+ final RowSetReader reader = rowSet.reader();
final int numCols = rowSet
- .schema()
- .batch()
+ .batchSchema()
.getFieldCount();
final Iterator<MaterializedField> fieldIterator = rowSet
- .schema()
- .batch()
+ .batchSchema()
.iterator();
final List<String> columnNames = Lists.newArrayList();
final List<String> columnFormatters = Lists.newArrayList();
@@ -105,8 +104,8 @@ public class JsonFileBuilder
// Build formatters from first row.
while (fieldIterator.hasNext()) {
final String columnName = fieldIterator.next().getName();
- final ColumnReader columnReader = reader.column(columnName);
- final ColumnAccessor.ValueType valueType = columnReader.valueType();
+ final ScalarReader columnReader = reader.scalar(columnName);
+ final ValueType valueType = columnReader.valueType();
final String columnFormatter;
if (customFormatters.containsKey(columnName)) {
@@ -135,7 +134,7 @@ public class JsonFileBuilder
sb.append(separator);
final String columnName = columnNames.get(columnIndex);
- final ColumnReader columnReader = reader.column(columnIndex);
+ final ScalarReader columnReader = reader.scalar(columnIndex);
final String columnFormatter = columnFormatters.get(columnIndex);
final Object columnObject = columnReader.getObject();
final String columnString = String.format(columnFormatter, columnObject);
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/DummyWriterTest.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/DummyWriterTest.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/DummyWriterTest.java
new file mode 100644
index 0000000..db33b30
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/DummyWriterTest.java
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.test.rowSet.test;
+
+import static org.junit.Assert.assertNull;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.record.TupleMetadata;
+import org.apache.drill.exec.vector.accessor.writer.AbstractObjectWriter;
+import org.apache.drill.exec.vector.accessor.writer.AbstractTupleWriter;
+import org.apache.drill.exec.vector.accessor.writer.ColumnWriterFactory;
+import org.apache.drill.test.SubOperatorTest;
+import org.apache.drill.test.rowSet.SchemaBuilder;
+import org.junit.Test;
+
+public class DummyWriterTest extends SubOperatorTest {
+
+ /**
+ * Test only, bare-bones tuple writer used to gather the dummy
+ * column writers.
+ */
+
+ public class RootWriterFixture extends AbstractTupleWriter {
+
+ protected RootWriterFixture(TupleMetadata schema,
+ List<AbstractObjectWriter> writers) {
+ super(schema, writers);
+ }
+ }
+
+ /**
+ * Test dummy column writers for scalars and arrays of
+ * scalars.
+ */
+
+ @Test
+ public void testDummyScalar() {
+
+ TupleMetadata schema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .addArray("b", MinorType.VARCHAR)
+ .buildSchema();
+ List<AbstractObjectWriter> writers = new ArrayList<>();
+
+ // We provide no vector. Factory should build us "dummy" writers.
+
+ writers.add(ColumnWriterFactory.buildColumnWriter(schema.metadata("a"), null));
+ writers.add(ColumnWriterFactory.buildColumnWriter(schema.metadata("b"), null));
+ AbstractTupleWriter rootWriter = new RootWriterFixture(schema, writers);
+
+ // Events are ignored.
+
+ rootWriter.startWrite();
+ rootWriter.startRow();
+
+ // At present, dummy writers report no type (because they don't have one.)
+
+ assertNull(rootWriter.scalar(0).valueType());
+
+ // First column. Set int value.
+
+ rootWriter.scalar(0).setInt(10);
+
+ // Dummy writer does not do type checking. Write "wrong" type.
+ // Should be allowed.
+
+ rootWriter.scalar("a").setString("foo");
+
+ // Column is required, but writer does no checking. Can set
+ // a null value.
+
+ rootWriter.column(0).scalar().setNull();
+
+ // Second column: is an array.
+
+ rootWriter.array(1).scalar().setString("bar");
+ rootWriter.array(1).scalar().setString("mumble");
+
+ // Again, type is not checked.
+
+ rootWriter.array("b").scalar().setInt(200);
+
+ // More ignored events.
+
+ rootWriter.restartRow();
+ rootWriter.saveRow();
+ rootWriter.endWrite();
+ }
+
+ /**
+ * Test a dummy map or map array. A (non-enforced) rule is that such maps
+ * contain only dummy writers. The writers act like "real" writers.
+ */
+
+ @Test
+ public void testDummyMap() {
+
+ TupleMetadata schema = new SchemaBuilder()
+ .addMap("m1")
+ .add("a", MinorType.INT)
+ .addArray("b", MinorType.VARCHAR)
+ .buildMap()
+ .addMapArray("m2")
+ .add("c", MinorType.INT)
+ .buildMap()
+ .buildSchema();
+ List<AbstractObjectWriter> writers = new ArrayList<>();
+
+ {
+ schema.metadata("m1").setProjected(false);
+ TupleMetadata mapSchema = schema.metadata("m1").mapSchema();
+ List<AbstractObjectWriter> members = new ArrayList<>();
+ members.add(ColumnWriterFactory.buildColumnWriter(mapSchema.metadata("a"), null));
+ members.add(ColumnWriterFactory.buildColumnWriter(mapSchema.metadata("b"), null));
+ writers.add(ColumnWriterFactory.buildMapWriter(schema.metadata("m1"), null, members));
+ }
+
+ {
+ schema.metadata("m2").setProjected(false);
+ TupleMetadata mapSchema = schema.metadata("m2").mapSchema();
+ List<AbstractObjectWriter> members = new ArrayList<>();
+ members.add(ColumnWriterFactory.buildColumnWriter(mapSchema.metadata("c"), null));
+ writers.add(ColumnWriterFactory.buildMapWriter(schema.metadata("m2"), null, members));
+ }
+
+ AbstractTupleWriter rootWriter = new RootWriterFixture(schema, writers);
+
+ // Events are ignored.
+
+ rootWriter.startWrite();
+ rootWriter.startRow();
+
+ // Dummy columns seem real.
+
+ rootWriter.tuple("m1").scalar("a").setInt(20);
+ rootWriter.tuple(0).array("b").scalar().setString("foo");
+
+ // Dummy array map seems real.
+
+ rootWriter.array("m2").tuple().scalar("c").setInt(30);
+ rootWriter.array("m2").save();
+ rootWriter.array(1).tuple().scalar(0).setInt(40);
+ rootWriter.array(1).save();
+
+ // More ignored events.
+
+ rootWriter.restartRow();
+ rootWriter.saveRow();
+ rootWriter.endWrite();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/PerformanceTool.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/PerformanceTool.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/PerformanceTool.java
new file mode 100644
index 0000000..10e9032
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/PerformanceTool.java
@@ -0,0 +1,296 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.test.rowSet.test;
+
+import java.util.concurrent.TimeUnit;
+
+import org.apache.drill.common.types.TypeProtos.DataMode;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.record.ColumnMetadata;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.TupleMetadata;
+import org.apache.drill.exec.record.TupleSchema;
+import org.apache.drill.exec.vector.IntVector;
+import org.apache.drill.exec.vector.NullableIntVector;
+import org.apache.drill.exec.vector.RepeatedIntVector;
+import org.apache.drill.exec.vector.accessor.ColumnAccessors.IntColumnWriter;
+import org.apache.drill.exec.vector.accessor.ColumnWriterIndex;
+import org.apache.drill.exec.vector.accessor.ScalarWriter;
+import org.apache.drill.exec.vector.accessor.writer.AbstractArrayWriter.ArrayObjectWriter;
+import org.apache.drill.exec.vector.accessor.writer.NullableScalarWriter;
+import org.apache.drill.exec.vector.accessor.writer.ScalarArrayWriter;
+import org.apache.drill.test.OperatorFixture;
+import org.apache.drill.test.rowSet.RowSet.ExtendableRowSet;
+import org.apache.drill.test.rowSet.RowSetWriter;
+import org.apache.drill.test.rowSet.SchemaBuilder;
+
+import com.google.common.base.Stopwatch;
+
+/**
+ * Tests the performance of the writers compared to using the value
+ * vector mutators directly. In order to achieve apples-to-apples
+ * comparison, the tests work directly with individual columns in
+ * the writer case; the row writer level is omitted as the row writer
+ * simulates the reader logic previously used to write to vectors.
+ * <p>
+ * Current results:
+ * <ul>
+ * <li>Writer is 42% faster than a required mutator.</li>
+ * <li>Writer is 73% faster than a nullable mutator.</li>
+ * <li>Writer is 407% faster than a repeated mutator.</li>
+ * </ul>
+ * Since performance is critical for this component (this is the
+ * ultimate "inner loop", please run these tests periodically to
+ * ensure that performance does not drop; it is very easy to add
+ * a bit of code here or there that greatly impacts performance.
+ * <p>
+ * This is not a JUnit test. Rather, it is a stand-alone program
+ * which must be run explicitly. One handy way is to run it from
+ * your IDE. If using Eclipse, monitor the system to wait for Eclipse
+ * to finish its background processing before launching.
+ */
+
+public class PerformanceTool {
+
+ public static final int ROW_COUNT = 16 * 1024 * 1024 / 4;
+ public static final int ITERATIONS = 300;
+
+ public static abstract class PerfTester {
+ final TupleMetadata rowSchema;
+ final MaterializedField field;
+ final OperatorFixture fixture;
+ final String label;
+ final Stopwatch timer = Stopwatch.createUnstarted();
+
+ public PerfTester(OperatorFixture fixture, DataMode mode, String label) {
+ this.fixture = fixture;
+ this.label = label;
+ field = SchemaBuilder.columnSchema("a", MinorType.INT, mode);
+ rowSchema = new SchemaBuilder()
+ .add(field)
+ .buildSchema();
+ }
+
+ public void runTest() {
+ for (int i = 0; i < ITERATIONS; i++) {
+ doTest();
+ }
+ System.out.println(label + ": " + timer.elapsed(TimeUnit.MILLISECONDS));
+ }
+
+ public abstract void doTest();
+ }
+
+ public static class RequiredVectorTester extends PerfTester {
+
+ public RequiredVectorTester(OperatorFixture fixture) {
+ super(fixture, DataMode.REQUIRED, "Required vector");
+ }
+
+ @Override
+ public void doTest() {
+ try (IntVector vector = new IntVector(field, fixture.allocator());) {
+ vector.allocateNew(4096);
+ IntVector.Mutator mutator = vector.getMutator();
+ timer.start();
+ for (int i = 0; i < ROW_COUNT; i++) {
+ mutator.setSafe(i, 1234);
+ }
+ timer.stop();
+ }
+ }
+ }
+
+ public static class NullableVectorTester extends PerfTester {
+
+ public NullableVectorTester(OperatorFixture fixture) {
+ super(fixture, DataMode.OPTIONAL, "Nullable vector");
+ }
+
+ @Override
+ public void doTest() {
+ try (NullableIntVector vector = new NullableIntVector(field, fixture.allocator());) {
+ vector.allocateNew(4096);
+ NullableIntVector.Mutator mutator = vector.getMutator();
+ timer.start();
+ for (int i = 0; i < ROW_COUNT; i++) {
+ mutator.setSafe(i, 1234);
+ }
+ timer.stop();
+ }
+ }
+ }
+
+ public static class RepeatedVectorTester extends PerfTester {
+
+ public RepeatedVectorTester(OperatorFixture fixture) {
+ super(fixture, DataMode.REQUIRED, "Repeated vector");
+ }
+
+ @Override
+ public void doTest() {
+ try (RepeatedIntVector vector = new RepeatedIntVector(field, fixture.allocator());) {
+ vector.allocateNew(ROW_COUNT, 5 * ROW_COUNT);
+ RepeatedIntVector.Mutator mutator = vector.getMutator();
+ timer.start();
+ for (int i = 0; i < ROW_COUNT / 5; i++) {
+ mutator.startNewValue(i);
+ mutator.addSafe(i, 12341);
+ mutator.addSafe(i, 12342);
+ mutator.addSafe(i, 12343);
+ mutator.addSafe(i, 12344);
+ mutator.addSafe(i, 12345);
+ }
+ timer.stop();
+ }
+ }
+ }
+
+ private static class TestWriterIndex implements ColumnWriterIndex {
+
+ public int index;
+
+ @Override
+ public int vectorIndex() { return index; }
+
+ @Override
+ public final void nextElement() { index++; }
+
+ @Override
+ public void rollover() { }
+
+ @Override
+ public int rowStartIndex() { return index; }
+
+ @Override
+ public ColumnWriterIndex outerIndex() { return null; }
+ }
+
+ public static class RequiredWriterTester extends PerfTester {
+
+ public RequiredWriterTester(OperatorFixture fixture) {
+ super(fixture, DataMode.REQUIRED, "Required writer");
+ }
+
+ @Override
+ public void doTest() {
+ try (IntVector vector = new IntVector(rowSchema.column(0), fixture.allocator());) {
+ vector.allocateNew(ROW_COUNT);
+ IntColumnWriter colWriter = new IntColumnWriter(vector);
+ TestWriterIndex index = new TestWriterIndex();
+ colWriter.bindIndex(index);
+ colWriter.startWrite();
+ timer.start();
+ while (index.index < ROW_COUNT) {
+ colWriter.setInt(1234);
+ }
+ timer.stop();
+ colWriter.endWrite();
+ }
+ }
+ }
+
+ public static class NullableWriterTester extends PerfTester {
+
+ public NullableWriterTester(OperatorFixture fixture) {
+ super(fixture, DataMode.OPTIONAL, "Nullable writer");
+ }
+
+ @Override
+ public void doTest() {
+ try (NullableIntVector vector = new NullableIntVector(rowSchema.column(0), fixture.allocator());) {
+ vector.allocateNew(ROW_COUNT);
+ NullableScalarWriter colWriter = new NullableScalarWriter(
+ vector, new IntColumnWriter(vector.getValuesVector()));
+ TestWriterIndex index = new TestWriterIndex();
+ colWriter.bindIndex(index);
+ colWriter.startWrite();
+ timer.start();
+ while (index.index < ROW_COUNT) {
+ colWriter.setInt(1234);
+ }
+ timer.stop();
+ colWriter.endWrite();
+ }
+ }
+ }
+
+ public static class ArrayWriterTester extends PerfTester {
+
+ public ArrayWriterTester(OperatorFixture fixture) {
+ super(fixture, DataMode.REQUIRED, "Array writer");
+ }
+
+ @Override
+ public void doTest() {
+ try (RepeatedIntVector vector = new RepeatedIntVector(rowSchema.column(0), fixture.allocator());) {
+ vector.allocateNew(ROW_COUNT, 5 * ROW_COUNT);
+ IntColumnWriter colWriter = new IntColumnWriter(vector.getDataVector());
+ ColumnMetadata colSchema = TupleSchema.fromField(vector.getField());
+ ArrayObjectWriter arrayWriter = ScalarArrayWriter.build(colSchema, vector, colWriter);
+ TestWriterIndex index = new TestWriterIndex();
+ arrayWriter.events().bindIndex(index);
+ arrayWriter.events().startWrite();
+ timer.start();
+ for ( ; index.index < ROW_COUNT / 5; index.index++) {
+ arrayWriter.events().startRow();
+ colWriter.setInt(12341);
+ colWriter.setInt(12342);
+ colWriter.setInt(12343);
+ colWriter.setInt(12344);
+ colWriter.setInt(12345);
+ arrayWriter.events().endArrayValue();
+ }
+ timer.stop();
+ arrayWriter.events().endWrite();
+ }
+ }
+ }
+
+ public static void main(String args[]) {
+ try (OperatorFixture fixture = OperatorFixture.standardFixture();) {
+ for (int i = 0; i < 2; i++) {
+ System.out.println((i==0) ? "Warmup" : "Test run");
+ new RequiredVectorTester(fixture).runTest();
+ new RequiredWriterTester(fixture).runTest();
+ new NullableVectorTester(fixture).runTest();
+ new NullableWriterTester(fixture).runTest();
+ new RepeatedVectorTester(fixture).runTest();
+ new ArrayWriterTester(fixture).runTest();
+ }
+ } catch (Exception e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ }
+
+ @SuppressWarnings("unused")
+ private static void testWriter2(TupleMetadata rowSchema,
+ OperatorFixture fixture, Stopwatch timer) {
+ ExtendableRowSet rs = fixture.rowSet(rowSchema);
+ RowSetWriter writer = rs.writer(4096);
+ ScalarWriter colWriter = writer.scalar(0);
+ timer.start();
+ for (int i = 0; i < ROW_COUNT; i++) {
+ colWriter.setInt(i);
+ writer.save();
+ }
+ timer.stop();
+ writer.done().clear();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/RowSetTest.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/RowSetTest.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/RowSetTest.java
index af35cdf..4db4d09 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/RowSetTest.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/RowSetTest.java
@@ -19,362 +19,508 @@ package org.apache.drill.test.rowSet.test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertSame;
import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.UnsupportedEncodingException;
+import java.util.Arrays;
-import org.apache.drill.common.types.TypeProtos.DataMode;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.record.BatchSchema;
+import org.apache.drill.exec.record.TupleMetadata;
+import org.apache.drill.exec.vector.ValueVector;
import org.apache.drill.exec.vector.accessor.ArrayReader;
import org.apache.drill.exec.vector.accessor.ArrayWriter;
-import org.apache.drill.exec.vector.accessor.TupleAccessor.TupleSchema;
+import org.apache.drill.exec.vector.accessor.ObjectType;
+import org.apache.drill.exec.vector.accessor.ScalarElementReader;
+import org.apache.drill.exec.vector.accessor.ScalarReader;
+import org.apache.drill.exec.vector.accessor.ScalarWriter;
+import org.apache.drill.exec.vector.accessor.TupleReader;
+import org.apache.drill.exec.vector.accessor.TupleWriter;
+import org.apache.drill.exec.vector.accessor.ValueType;
+import org.apache.drill.exec.vector.complex.MapVector;
+import org.apache.drill.exec.vector.complex.RepeatedMapVector;
import org.apache.drill.test.SubOperatorTest;
import org.apache.drill.test.rowSet.RowSet.ExtendableRowSet;
-import org.apache.drill.test.rowSet.RowSet.RowSetReader;
-import org.apache.drill.test.rowSet.RowSet.RowSetWriter;
import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
import org.apache.drill.test.rowSet.RowSetComparison;
-import org.apache.drill.test.rowSet.RowSetSchema;
-import org.apache.drill.test.rowSet.RowSetSchema.FlattenedSchema;
-import org.apache.drill.test.rowSet.RowSetSchema.PhysicalSchema;
+import org.apache.drill.test.rowSet.RowSetReader;
+import org.apache.drill.test.rowSet.RowSetWriter;
import org.apache.drill.test.rowSet.SchemaBuilder;
import org.junit.Test;
-import com.google.common.base.Splitter;
+/**
+ * Test row sets. Since row sets are a thin wrapper around vectors,
+ * readers and writers, this is also a test of those constructs.
+ * <p>
+ * Tests basic protocol of the writers: <pre><code>
+ * row : tuple
+ * tuple : column *
+ * column : scalar obj | array obj | tuple obj
+ * scalar obj : scalar
+ * array obj : array writer
+ * array writer : element
+ * element : column
+ * tuple obj : tuple</code></pre>
+ */
public class RowSetTest extends SubOperatorTest {
/**
- * Test a simple physical schema with no maps.
+ * Test the simplest constructs: a row with top-level scalar
+ * columns.
+ * <p>
+ * The focus here is the structure of the readers and writers, along
+ * with the row set loader and verifier that use those constructs.
+ * That is, while this test uses the int vector, this test is not
+ * focused on that vector.
*/
-// @Test
-// public void testSchema() {
-// BatchSchema batchSchema = new SchemaBuilder()
-// .add("c", MinorType.INT)
-// .add("a", MinorType.INT, DataMode.REPEATED)
-// .addNullable("b", MinorType.VARCHAR)
-// .build();
-//
-// assertEquals("c", batchSchema.getColumn(0).getName());
-// assertEquals("a", batchSchema.getColumn(1).getName());
-// assertEquals("b", batchSchema.getColumn(2).getName());
-//
-// RowSetSchema schema = new RowSetSchema(batchSchema);
-// TupleSchema access = schema.hierarchicalAccess();
-// assertEquals(3, access.count());
-//
-// crossCheck(access, 0, "c", MinorType.INT);
-// assertEquals(DataMode.REQUIRED, access.column(0).getDataMode());
-// assertEquals(DataMode.REQUIRED, access.column(0).getType().getMode());
-// assertTrue(! access.column(0).isNullable());
-//
-// crossCheck(access, 1, "a", MinorType.INT);
-// assertEquals(DataMode.REPEATED, access.column(1).getDataMode());
-// assertEquals(DataMode.REPEATED, access.column(1).getType().getMode());
-// assertTrue(! access.column(1).isNullable());
-//
-// crossCheck(access, 2, "b", MinorType.VARCHAR);
-// assertEquals(MinorType.VARCHAR, access.column(2).getType().getMinorType());
-// assertEquals(DataMode.OPTIONAL, access.column(2).getDataMode());
-// assertEquals(DataMode.OPTIONAL, access.column(2).getType().getMode());
-// assertTrue(access.column(2).isNullable());
-//
-// // No maps: physical schema is the same as access schema.
-//
-// PhysicalSchema physical = schema.physical();
-// assertEquals(3, physical.count());
-// assertEquals("c", physical.column(0).field().getName());
-// assertEquals("a", physical.column(1).field().getName());
-// assertEquals("b", physical.column(2).field().getName());
-// }
+ @Test
+ public void testScalarStructure() {
+ TupleMetadata schema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .buildSchema();
+ ExtendableRowSet rowSet = fixture.rowSet(schema);
+ RowSetWriter writer = rowSet.writer();
- /**
- * Validate that the actual column metadata is as expected by
- * cross-checking: validate that the column at the index and
- * the column at the column name are both correct.
- *
- * @param schema the schema for the row set
- * @param index column index
- * @param fullName expected column name
- * @param type expected type
- */
+ // Required Int
+ // Verify the invariants of the "full" and "simple" access paths
-// public void crossCheck(TupleSchema schema, int index, String fullName, MinorType type) {
-// String name = null;
-// for (String part : Splitter.on(".").split(fullName)) {
-// name = part;
-// }
-// assertEquals(name, schema.column(index).getName());
-// assertEquals(index, schema.columnIndex(fullName));
-// assertSame(schema.column(index), schema.column(fullName));
-// assertEquals(type, schema.column(index).getType().getMinorType());
-// }
+ assertEquals(ObjectType.SCALAR, writer.column("a").type());
+ assertSame(writer.column("a"), writer.column(0));
+ assertSame(writer.scalar("a"), writer.scalar(0));
+ assertSame(writer.column("a").scalar(), writer.scalar("a"));
+ assertSame(writer.column(0).scalar(), writer.scalar(0));
+ assertEquals(ValueType.INTEGER, writer.scalar(0).valueType());
- /**
- * Verify that a nested map schema works as expected.
- */
+ // Sanity checks
-// @Test
-// public void testMapSchema() {
-// BatchSchema batchSchema = new SchemaBuilder()
-// .add("c", MinorType.INT)
-// .addMap("a")
-// .addNullable("b", MinorType.VARCHAR)
-// .add("d", MinorType.INT)
-// .addMap("e")
-// .add("f", MinorType.VARCHAR)
-// .buildMap()
-// .add("g", MinorType.INT)
-// .buildMap()
-// .add("h", MinorType.BIGINT)
-// .build();
-//
-// RowSetSchema schema = new RowSetSchema(batchSchema);
-//
-// // Access schema: flattened with maps removed
-//
-// FlattenedSchema access = schema.flatAccess();
-// assertEquals(6, access.count());
-// crossCheck(access, 0, "c", MinorType.INT);
-// crossCheck(access, 1, "a.b", MinorType.VARCHAR);
-// crossCheck(access, 2, "a.d", MinorType.INT);
-// crossCheck(access, 3, "a.e.f", MinorType.VARCHAR);
-// crossCheck(access, 4, "a.g", MinorType.INT);
-// crossCheck(access, 5, "h", MinorType.BIGINT);
-//
-// // Should have two maps.
-//
-// assertEquals(2, access.mapCount());
-// assertEquals("a", access.map(0).getName());
-// assertEquals("e", access.map(1).getName());
-// assertEquals(0, access.mapIndex("a"));
-// assertEquals(1, access.mapIndex("a.e"));
-//
-// // Verify physical schema: should mirror the schema created above.
-//
-// PhysicalSchema physical = schema.physical();
-// assertEquals(3, physical.count());
-// assertEquals("c", physical.column(0).field().getName());
-// assertEquals("c", physical.column(0).fullName());
-// assertFalse(physical.column(0).isMap());
-// assertNull(physical.column(0).mapSchema());
-//
-// assertEquals("a", physical.column(1).field().getName());
-// assertEquals("a", physical.column(1).fullName());
-// assertTrue(physical.column(1).isMap());
-// assertNotNull(physical.column(1).mapSchema());
-//
-// assertEquals("h", physical.column(2).field().getName());
-// assertEquals("h", physical.column(2).fullName());
-// assertFalse(physical.column(2).isMap());
-// assertNull(physical.column(2).mapSchema());
-//
-// PhysicalSchema aSchema = physical.column(1).mapSchema();
-// assertEquals(4, aSchema.count());
-// assertEquals("b", aSchema.column(0).field().getName());
-// assertEquals("a.b", aSchema.column(0).fullName());
-// assertEquals("d", aSchema.column(1).field().getName());
-// assertEquals("e", aSchema.column(2).field().getName());
-// assertEquals("g", aSchema.column(3).field().getName());
-//
-// PhysicalSchema eSchema = aSchema.column(2).mapSchema();
-// assertEquals(1, eSchema.count());
-// assertEquals("f", eSchema.column(0).field().getName());
-// assertEquals("a.e.f", eSchema.column(0).fullName());
-// }
+ try {
+ writer.column(0).array();
+ fail();
+ } catch (UnsupportedOperationException e) {
+ // Expected
+ }
+ try {
+ writer.column(0).tuple();
+ fail();
+ } catch (UnsupportedOperationException e) {
+ // Expected
+ }
- /**
- * Verify that simple scalar (non-repeated) column readers
- * and writers work as expected. This is for tiny ints.
- */
+ // Test the various ways to get at the scalar writer.
- @Test
- public void testTinyIntRW() {
- BatchSchema batchSchema = new SchemaBuilder()
- .add("col", MinorType.TINYINT)
- .build();
- SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
- .add(0)
- .add(Byte.MAX_VALUE)
- .add(Byte.MIN_VALUE)
- .build();
- assertEquals(3, rs.rowCount());
- RowSetReader reader = rs.reader();
+ writer.column("a").scalar().setInt(10);
+ writer.save();
+ writer.scalar("a").setInt(20);
+ writer.save();
+ writer.column(0).scalar().setInt(30);
+ writer.save();
+ writer.scalar(0).setInt(40);
+ writer.save();
+
+ // Finish the row set and get a reader.
+
+ SingleRowSet actual = writer.done();
+ RowSetReader reader = actual.reader();
+
+ // Verify invariants
+
+ assertEquals(ObjectType.SCALAR, reader.column(0).type());
+ assertSame(reader.column("a"), reader.column(0));
+ assertSame(reader.scalar("a"), reader.scalar(0));
+ assertSame(reader.column("a").scalar(), reader.scalar("a"));
+ assertSame(reader.column(0).scalar(), reader.scalar(0));
+ assertEquals(ValueType.INTEGER, reader.scalar(0).valueType());
+
+ // Test various accessors: full and simple
+
+ assertTrue(reader.next());
+ assertEquals(10, reader.column("a").scalar().getInt());
assertTrue(reader.next());
- assertEquals(0, reader.column(0).getInt());
+ assertEquals(20, reader.scalar("a").getInt());
assertTrue(reader.next());
- assertEquals(Byte.MAX_VALUE, reader.column(0).getInt());
- assertEquals((int) Byte.MAX_VALUE, reader.column(0).getObject());
+ assertEquals(30, reader.column(0).scalar().getInt());
assertTrue(reader.next());
- assertEquals(Byte.MIN_VALUE, reader.column(0).getInt());
+ assertEquals(40, reader.scalar(0).getInt());
assertFalse(reader.next());
- rs.clear();
- }
- @Test
- public void testSmallIntRW() {
- BatchSchema batchSchema = new SchemaBuilder()
- .add("col", MinorType.SMALLINT)
- .build();
- SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
- .add(0)
- .add(Short.MAX_VALUE)
- .add(Short.MIN_VALUE)
+ // Test the above again via the writer and reader
+ // utility classes.
+
+ SingleRowSet expected = fixture.rowSetBuilder(schema)
+ .addRow(10)
+ .addRow(20)
+ .addRow(30)
+ .addRow(40)
.build();
- RowSetReader reader = rs.reader();
- assertTrue(reader.next());
- assertEquals(0, reader.column(0).getInt());
- assertTrue(reader.next());
- assertEquals(Short.MAX_VALUE, reader.column(0).getInt());
- assertEquals((int) Short.MAX_VALUE, reader.column(0).getObject());
- assertTrue(reader.next());
- assertEquals(Short.MIN_VALUE, reader.column(0).getInt());
- assertFalse(reader.next());
- rs.clear();
+ new RowSetComparison(expected).verifyAndClearAll(actual);
}
+ /**
+ * Test a record with a top level array. The focus here is on the
+ * scalar array structure.
+ *
+ * @throws VectorOverflowException should never occur
+ */
+
@Test
- public void testIntRW() {
- BatchSchema batchSchema = new SchemaBuilder()
- .add("col", MinorType.INT)
- .build();
- SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
- .add(0)
- .add(Integer.MAX_VALUE)
- .add(Integer.MIN_VALUE)
- .build();
- RowSetReader reader = rs.reader();
+ public void testScalarArrayStructure() {
+ TupleMetadata schema = new SchemaBuilder()
+ .addArray("a", MinorType.INT)
+ .buildSchema();
+ ExtendableRowSet rowSet = fixture.rowSet(schema);
+ RowSetWriter writer = rowSet.writer();
+
+ // Repeated Int
+ // Verify the invariants of the "full" and "simple" access paths
+
+ assertEquals(ObjectType.ARRAY, writer.column("a").type());
+
+ assertSame(writer.column("a"), writer.column(0));
+ assertSame(writer.array("a"), writer.array(0));
+ assertSame(writer.column("a").array(), writer.array("a"));
+ assertSame(writer.column(0).array(), writer.array(0));
+
+ assertEquals(ObjectType.SCALAR, writer.column("a").array().entry().type());
+ assertEquals(ObjectType.SCALAR, writer.column("a").array().entryType());
+ assertSame(writer.array(0).entry().scalar(), writer.array(0).scalar());
+ assertEquals(ValueType.INTEGER, writer.array(0).scalar().valueType());
+
+ // Sanity checks
+
+ try {
+ writer.column(0).scalar();
+ fail();
+ } catch (UnsupportedOperationException e) {
+ // Expected
+ }
+ try {
+ writer.column(0).tuple();
+ fail();
+ } catch (UnsupportedOperationException e) {
+ // Expected
+ }
+
+ // Write some data
+
+ ScalarWriter intWriter = writer.array("a").scalar();
+ intWriter.setInt(10);
+ intWriter.setInt(11);
+ writer.save();
+ intWriter.setInt(20);
+ intWriter.setInt(21);
+ intWriter.setInt(22);
+ writer.save();
+ intWriter.setInt(30);
+ writer.save();
+ intWriter.setInt(40);
+ intWriter.setInt(41);
+ writer.save();
+
+ // Finish the row set and get a reader.
+
+ SingleRowSet actual = writer.done();
+ RowSetReader reader = actual.reader();
+
+ // Verify the invariants of the "full" and "simple" access paths
+
+ assertEquals(ObjectType.ARRAY, writer.column("a").type());
+
+ assertSame(reader.column("a"), reader.column(0));
+ assertSame(reader.array("a"), reader.array(0));
+ assertSame(reader.column("a").array(), reader.array("a"));
+ assertSame(reader.column(0).array(), reader.array(0));
+
+ assertEquals(ObjectType.SCALAR, reader.column("a").array().entryType());
+ assertEquals(ValueType.INTEGER, reader.array(0).elements().valueType());
+
+ // Read and verify the rows
+
+ ScalarElementReader intReader = reader.array(0).elements();
+ assertTrue(reader.next());
+ assertEquals(2, intReader.size());
+ assertEquals(10, intReader.getInt(0));
+ assertEquals(11, intReader.getInt(1));
assertTrue(reader.next());
- assertEquals(0, reader.column(0).getInt());
+ assertEquals(3, intReader.size());
+ assertEquals(20, intReader.getInt(0));
+ assertEquals(21, intReader.getInt(1));
+ assertEquals(22, intReader.getInt(2));
assertTrue(reader.next());
- assertEquals(Integer.MAX_VALUE, reader.column(0).getInt());
- assertEquals(Integer.MAX_VALUE, reader.column(0).getObject());
+ assertEquals(1, intReader.size());
+ assertEquals(30, intReader.getInt(0));
assertTrue(reader.next());
- assertEquals(Integer.MIN_VALUE, reader.column(0).getInt());
+ assertEquals(2, intReader.size());
+ assertEquals(40, intReader.getInt(0));
+ assertEquals(41, intReader.getInt(1));
assertFalse(reader.next());
- rs.clear();
- }
- @Test
- public void testLongRW() {
- BatchSchema batchSchema = new SchemaBuilder()
- .add("col", MinorType.BIGINT)
- .build();
- SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
- .add(0L)
- .add(Long.MAX_VALUE)
- .add(Long.MIN_VALUE)
+ // Test the above again via the writer and reader
+ // utility classes.
+
+ SingleRowSet expected = fixture.rowSetBuilder(schema)
+ .addSingleCol(new int[] {10, 11})
+ .addSingleCol(new int[] {20, 21, 22})
+ .addSingleCol(new int[] {30})
+ .addSingleCol(new int[] {40, 41})
.build();
- RowSetReader reader = rs.reader();
- assertTrue(reader.next());
- assertEquals(0, reader.column(0).getLong());
- assertTrue(reader.next());
- assertEquals(Long.MAX_VALUE, reader.column(0).getLong());
- assertEquals(Long.MAX_VALUE, reader.column(0).getObject());
- assertTrue(reader.next());
- assertEquals(Long.MIN_VALUE, reader.column(0).getLong());
- assertFalse(reader.next());
- rs.clear();
+ new RowSetComparison(expected)
+ .verifyAndClearAll(actual);
}
+ /**
+ * Test a simple map structure at the top level of a row.
+ *
+ * @throws VectorOverflowException should never occur
+ */
+
@Test
- public void testFloatRW() {
- BatchSchema batchSchema = new SchemaBuilder()
- .add("col", MinorType.FLOAT4)
- .build();
- SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
- .add(0F)
- .add(Float.MAX_VALUE)
- .add(Float.MIN_VALUE)
- .build();
- RowSetReader reader = rs.reader();
+ public void testMapStructure() {
+ TupleMetadata schema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .addMap("m")
+ .addArray("b", MinorType.INT)
+ .buildMap()
+ .buildSchema();
+ ExtendableRowSet rowSet = fixture.rowSet(schema);
+ RowSetWriter writer = rowSet.writer();
+
+ // Map and Int
+ // Test Invariants
+
+ assertEquals(ObjectType.SCALAR, writer.column("a").type());
+ assertEquals(ObjectType.SCALAR, writer.column(0).type());
+ assertEquals(ObjectType.TUPLE, writer.column("m").type());
+ assertEquals(ObjectType.TUPLE, writer.column(1).type());
+ assertSame(writer.column(1).tuple(), writer.tuple(1));
+
+ TupleWriter mapWriter = writer.column(1).tuple();
+ assertEquals(ObjectType.SCALAR, mapWriter.column("b").array().entry().type());
+ assertEquals(ObjectType.SCALAR, mapWriter.column("b").array().entryType());
+
+ ScalarWriter aWriter = writer.column("a").scalar();
+ ScalarWriter bWriter = writer.column("m").tuple().column("b").array().entry().scalar();
+ assertSame(bWriter, writer.tuple(1).array(0).scalar());
+ assertEquals(ValueType.INTEGER, bWriter.valueType());
+
+ // Sanity checks
+
+ try {
+ writer.column(1).scalar();
+ fail();
+ } catch (UnsupportedOperationException e) {
+ // Expected
+ }
+ try {
+ writer.column(1).array();
+ fail();
+ } catch (UnsupportedOperationException e) {
+ // Expected
+ }
+
+ // Write data
+
+ aWriter.setInt(10);
+ bWriter.setInt(11);
+ bWriter.setInt(12);
+ writer.save();
+ aWriter.setInt(20);
+ bWriter.setInt(21);
+ bWriter.setInt(22);
+ writer.save();
+ aWriter.setInt(30);
+ bWriter.setInt(31);
+ bWriter.setInt(32);
+ writer.save();
+
+ // Finish the row set and get a reader.
+
+ SingleRowSet actual = writer.done();
+ RowSetReader reader = actual.reader();
+
+ assertEquals(ObjectType.SCALAR, reader.column("a").type());
+ assertEquals(ObjectType.SCALAR, reader.column(0).type());
+ assertEquals(ObjectType.TUPLE, reader.column("m").type());
+ assertEquals(ObjectType.TUPLE, reader.column(1).type());
+ assertSame(reader.column(1).tuple(), reader.tuple(1));
+
+ ScalarReader aReader = reader.column(0).scalar();
+ TupleReader mReader = reader.column(1).tuple();
+ assertEquals(ObjectType.SCALAR, mReader.column("b").array().entryType());
+ ScalarElementReader bReader = mReader.column(0).elements();
+ assertEquals(ValueType.INTEGER, bReader.valueType());
+
assertTrue(reader.next());
- assertEquals(0, reader.column(0).getDouble(), 0.000001);
+ assertEquals(10, aReader.getInt());
+ assertEquals(11, bReader.getInt(0));
+ assertEquals(12, bReader.getInt(1));
assertTrue(reader.next());
- assertEquals((double) Float.MAX_VALUE, reader.column(0).getDouble(), 0.000001);
- assertEquals((double) Float.MAX_VALUE, (double) reader.column(0).getObject(), 0.000001);
+ assertEquals(20, aReader.getInt());
+ assertEquals(21, bReader.getInt(0));
+ assertEquals(22, bReader.getInt(1));
assertTrue(reader.next());
- assertEquals((double) Float.MIN_VALUE, reader.column(0).getDouble(), 0.000001);
+ assertEquals(30, aReader.getInt());
+ assertEquals(31, bReader.getInt(0));
+ assertEquals(32, bReader.getInt(1));
assertFalse(reader.next());
- rs.clear();
- }
- @Test
- public void testDoubleRW() {
- BatchSchema batchSchema = new SchemaBuilder()
- .add("col", MinorType.FLOAT8)
- .build();
- SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
- .add(0D)
- .add(Double.MAX_VALUE)
- .add(Double.MIN_VALUE)
+ // Verify that the map accessor's value count was set.
+
+ @SuppressWarnings("resource")
+ MapVector mapVector = (MapVector) actual.container().getValueVector(1).getValueVector();
+ assertEquals(actual.rowCount(), mapVector.getAccessor().getValueCount());
+
+ SingleRowSet expected = fixture.rowSetBuilder(schema)
+ .addRow(10, new Object[] {new int[] {11, 12}})
+ .addRow(20, new Object[] {new int[] {21, 22}})
+ .addRow(30, new Object[] {new int[] {31, 32}})
.build();
- RowSetReader reader = rs.reader();
- assertTrue(reader.next());
- assertEquals(0, reader.column(0).getDouble(), 0.000001);
- assertTrue(reader.next());
- assertEquals(Double.MAX_VALUE, reader.column(0).getDouble(), 0.000001);
- assertEquals(Double.MAX_VALUE, (double) reader.column(0).getObject(), 0.000001);
- assertTrue(reader.next());
- assertEquals(Double.MIN_VALUE, reader.column(0).getDouble(), 0.000001);
- assertFalse(reader.next());
- rs.clear();
+ new RowSetComparison(expected)
+ .verifyAndClearAll(actual);
}
@Test
- public void testStringRW() {
- BatchSchema batchSchema = new SchemaBuilder()
- .add("col", MinorType.VARCHAR)
- .build();
- SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
- .add("")
- .add("abcd")
- .build();
- RowSetReader reader = rs.reader();
+ public void testRepeatedMapStructure() {
+ TupleMetadata schema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .addMapArray("m")
+ .add("b", MinorType.INT)
+ .add("c", MinorType.INT)
+ .buildMap()
+ .buildSchema();
+ ExtendableRowSet rowSet = fixture.rowSet(schema);
+ RowSetWriter writer = rowSet.writer();
+
+ // Map and Int
+ // Pick out components and lightly test. (Assumes structure
+ // tested earlier is still valid, so no need to exhaustively
+ // test again.)
+
+ assertEquals(ObjectType.SCALAR, writer.column("a").type());
+ assertEquals(ObjectType.ARRAY, writer.column("m").type());
+
+ ArrayWriter maWriter = writer.column(1).array();
+ assertEquals(ObjectType.TUPLE, maWriter.entryType());
+
+ TupleWriter mapWriter = maWriter.tuple();
+ assertEquals(ObjectType.SCALAR, mapWriter.column("b").type());
+ assertEquals(ObjectType.SCALAR, mapWriter.column("c").type());
+
+ ScalarWriter aWriter = writer.column("a").scalar();
+ ScalarWriter bWriter = mapWriter.scalar("b");
+ ScalarWriter cWriter = mapWriter.scalar("c");
+ assertEquals(ValueType.INTEGER, aWriter.valueType());
+ assertEquals(ValueType.INTEGER, bWriter.valueType());
+ assertEquals(ValueType.INTEGER, cWriter.valueType());
+
+ // Write data
+
+ aWriter.setInt(10);
+ bWriter.setInt(101);
+ cWriter.setInt(102);
+ maWriter.save(); // Advance to next array position
+ bWriter.setInt(111);
+ cWriter.setInt(112);
+ maWriter.save();
+ writer.save();
+
+ aWriter.setInt(20);
+ bWriter.setInt(201);
+ cWriter.setInt(202);
+ maWriter.save();
+ bWriter.setInt(211);
+ cWriter.setInt(212);
+ maWriter.save();
+ writer.save();
+
+ aWriter.setInt(30);
+ bWriter.setInt(301);
+ cWriter.setInt(302);
+ maWriter.save();
+ bWriter.setInt(311);
+ cWriter.setInt(312);
+ maWriter.save();
+ writer.save();
+
+ // Finish the row set and get a reader.
+
+ SingleRowSet actual = writer.done();
+ RowSetReader reader = actual.reader();
+
+ // Verify reader structure
+
+ assertEquals(ObjectType.SCALAR, reader.column("a").type());
+ assertEquals(ObjectType.ARRAY, reader.column("m").type());
+
+ ArrayReader maReader = reader.column(1).array();
+ assertEquals(ObjectType.TUPLE, maReader.entryType());
+
+ TupleReader mapReader = maReader.tuple();
+ assertEquals(ObjectType.SCALAR, mapReader.column("b").type());
+ assertEquals(ObjectType.SCALAR, mapReader.column("c").type());
+
+ ScalarReader aReader = reader.column("a").scalar();
+ ScalarReader bReader = mapReader.scalar("b");
+ ScalarReader cReader = mapReader.scalar("c");
+ assertEquals(ValueType.INTEGER, aReader.valueType());
+ assertEquals(ValueType.INTEGER, bReader.valueType());
+ assertEquals(ValueType.INTEGER, cReader.valueType());
+
+ // Row 1: use index accessors
+
+ assertTrue(reader.next());
+ assertEquals(10, aReader.getInt());
+ TupleReader ixReader = maReader.tuple(0);
+ assertEquals(101, ixReader.scalar(0).getInt());
+ assertEquals(102, ixReader.scalar(1).getInt());
+ ixReader = maReader.tuple(1);
+ assertEquals(111, ixReader.scalar(0).getInt());
+ assertEquals(112, ixReader.scalar(1).getInt());
+
+ // Row 2: use common accessor with explicit positioning,
+ // but access scalars through the map reader.
+
assertTrue(reader.next());
- assertEquals("", reader.column(0).getString());
+ assertEquals(20, aReader.getInt());
+ maReader.setPosn(0);
+ assertEquals(201, mapReader.scalar(0).getInt());
+ assertEquals(202, mapReader.scalar(1).getInt());
+ maReader.setPosn(1);
+ assertEquals(211, mapReader.scalar(0).getInt());
+ assertEquals(212, mapReader.scalar(1).getInt());
+
+ // Row 3: use common accessor for scalars
+
assertTrue(reader.next());
- assertEquals("abcd", reader.column(0).getString());
- assertEquals("abcd", reader.column(0).getObject());
+ assertEquals(30, aReader.getInt());
+ maReader.setPosn(0);
+ assertEquals(301, bReader.getInt());
+ assertEquals(302, cReader.getInt());
+ maReader.setPosn(1);
+ assertEquals(311, bReader.getInt());
+ assertEquals(312, cReader.getInt());
+
assertFalse(reader.next());
- rs.clear();
- }
- /**
- * Test writing to and reading from a row set with nested maps.
- * Map fields are flattened into a logical schema.
- */
+ // Verify that the map accessor's value count was set.
-// @Test
-// public void testMap() {
-// BatchSchema batchSchema = new SchemaBuilder()
-// .add("a", MinorType.INT)
-// .addMap("b")
-// .add("c", MinorType.INT)
-// .add("d", MinorType.INT)
-// .buildMap()
-// .build();
-// SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
-// .add(10, 20, 30)
-// .add(40, 50, 60)
-// .build();
-// RowSetReader reader = rs.reader();
-// assertTrue(reader.next());
-// assertEquals(10, reader.column(0).getInt());
-// assertEquals(20, reader.column(1).getInt());
-// assertEquals(30, reader.column(2).getInt());
-// assertEquals(10, reader.column("a").getInt());
-// assertEquals(30, reader.column("b.d").getInt());
-// assertTrue(reader.next());
-// assertEquals(40, reader.column(0).getInt());
-// assertEquals(50, reader.column(1).getInt());
-// assertEquals(60, reader.column(2).getInt());
-// assertFalse(reader.next());
-// rs.clear();
-// }
+ @SuppressWarnings("resource")
+ RepeatedMapVector mapVector = (RepeatedMapVector) actual.container().getValueVector(1).getValueVector();
+ assertEquals(3, mapVector.getAccessor().getValueCount());
+
+ // Verify the readers and writers again using the testing tools.
+
+ SingleRowSet expected = fixture.rowSetBuilder(schema)
+ .addRow(10, new Object[] {new Object[] {101, 102}, new Object[] {111, 112}})
+ .addRow(20, new Object[] {new Object[] {201, 202}, new Object[] {211, 212}})
+ .addRow(30, new Object[] {new Object[] {301, 302}, new Object[] {311, 312}})
+ .build();
+ new RowSetComparison(expected)
+ .verifyAndClearAll(actual);
+ }
/**
* Test an array of ints (as an example fixed-width type)
@@ -382,7 +528,7 @@ public class RowSetTest extends SubOperatorTest {
*/
@Test
- public void TestTopFixedWidthArray() {
+ public void testTopFixedWidthArray() {
BatchSchema batchSchema = new SchemaBuilder()
.add("c", MinorType.INT)
.addArray("a", MinorType.INT)
@@ -390,49 +536,131 @@ public class RowSetTest extends SubOperatorTest {
ExtendableRowSet rs1 = fixture.rowSet(batchSchema);
RowSetWriter writer = rs1.writer();
- writer.column(0).setInt(10);
- ArrayWriter array = writer.column(1).array();
+ writer.scalar(0).setInt(10);
+ ScalarWriter array = writer.array(1).scalar();
array.setInt(100);
array.setInt(110);
writer.save();
- writer.column(0).setInt(20);
- array = writer.column(1).array();
+ writer.scalar(0).setInt(20);
array.setInt(200);
array.setInt(120);
array.setInt(220);
writer.save();
- writer.column(0).setInt(30);
+ writer.scalar(0).setInt(30);
writer.save();
- writer.done();
- RowSetReader reader = rs1.reader();
+ SingleRowSet result = writer.done();
+
+ RowSetReader reader = result.reader();
assertTrue(reader.next());
- assertEquals(10, reader.column(0).getInt());
- ArrayReader arrayReader = reader.column(1).array();
+ assertEquals(10, reader.scalar(0).getInt());
+ ScalarElementReader arrayReader = reader.array(1).elements();
assertEquals(2, arrayReader.size());
assertEquals(100, arrayReader.getInt(0));
assertEquals(110, arrayReader.getInt(1));
assertTrue(reader.next());
- assertEquals(20, reader.column(0).getInt());
- arrayReader = reader.column(1).array();
+ assertEquals(20, reader.scalar(0).getInt());
assertEquals(3, arrayReader.size());
assertEquals(200, arrayReader.getInt(0));
assertEquals(120, arrayReader.getInt(1));
assertEquals(220, arrayReader.getInt(2));
assertTrue(reader.next());
- assertEquals(30, reader.column(0).getInt());
- arrayReader = reader.column(1).array();
+ assertEquals(30, reader.scalar(0).getInt());
assertEquals(0, arrayReader.size());
assertFalse(reader.next());
SingleRowSet rs2 = fixture.rowSetBuilder(batchSchema)
- .add(10, new int[] {100, 110})
- .add(20, new int[] {200, 120, 220})
- .add(30, null)
+ .addRow(10, new int[] {100, 110})
+ .addRow(20, new int[] {200, 120, 220})
+ .addRow(30, null)
.build();
new RowSetComparison(rs1)
.verifyAndClearAll(rs2);
}
+ /**
+ * Test filling a row set up to the maximum number of rows.
+ * Values are small enough to prevent filling to the
+ * maximum buffer size.
+ */
+
+ @Test
+ public void testRowBounds() {
+ BatchSchema batchSchema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .build();
+
+ ExtendableRowSet rs = fixture.rowSet(batchSchema);
+ RowSetWriter writer = rs.writer();
+ int count = 0;
+ while (! writer.isFull()) {
+ writer.scalar(0).setInt(count++);
+ writer.save();
+ }
+ writer.done();
+
+ assertEquals(ValueVector.MAX_ROW_COUNT, count);
+ // The writer index points past the writable area.
+ // But, this is fine, the valid() method says we can't
+ // write at this location.
+ assertEquals(ValueVector.MAX_ROW_COUNT, writer.rowIndex());
+ assertEquals(ValueVector.MAX_ROW_COUNT, rs.rowCount());
+ rs.clear();
+ }
+
+ /**
+ * Test filling a row set up to the maximum vector size.
+ * Values in the first column are small enough to prevent filling to the
+ * maximum buffer size, but values in the second column
+ * will reach maximum buffer size before maximum row size.
+ * The result should be the number of rows that fit, with the
+ * partial last row not counting. (A complete application would
+ * reload the partial row into a new row set.)
+ */
+
+ @Test
+ public void testBufferBounds() {
+ BatchSchema batchSchema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .add("b", MinorType.VARCHAR)
+ .build();
+
+ String varCharValue;
+ try {
+ byte rawValue[] = new byte[512];
+ Arrays.fill(rawValue, (byte) 'X');
+ varCharValue = new String(rawValue, "UTF-8");
+ } catch (UnsupportedEncodingException e) {
+ throw new IllegalStateException(e);
+ }
+
+ ExtendableRowSet rs = fixture.rowSet(batchSchema);
+ RowSetWriter writer = rs.writer();
+ int count = 0;
+ try {
+
+ // Test overflow. This is not a typical use case: don't want to
+ // hit overflow without overflow handling. In this case, we throw
+ // away the last row because the row set abstraction does not
+ // implement vector overflow other than throwing an exception.
+
+ for (;;) {
+ writer.scalar(0).setInt(count);
+ writer.scalar(1).setString(varCharValue);
+
+ // Won't get here on overflow.
+ writer.save();
+ count++;
+ }
+ } catch (IndexOutOfBoundsException e) {
+ assertTrue(e.getMessage().contains("Overflow"));
+ }
+ writer.done();
+
+ assertTrue(count < ValueVector.MAX_ROW_COUNT);
+ assertEquals(count, writer.rowIndex());
+ assertEquals(count, rs.rowCount());
+ rs.clear();
+ }
}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestFillEmpties.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestFillEmpties.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestFillEmpties.java
new file mode 100644
index 0000000..147b713
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestFillEmpties.java
@@ -0,0 +1,241 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.test.rowSet.test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.drill.common.types.TypeProtos.DataMode;
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.record.TupleMetadata;
+import org.apache.drill.exec.vector.accessor.ScalarElementReader;
+import org.apache.drill.exec.vector.accessor.ScalarReader;
+import org.apache.drill.exec.vector.accessor.ScalarWriter;
+import org.apache.drill.exec.vector.accessor.ValueType;
+import org.apache.drill.test.SubOperatorTest;
+import org.apache.drill.test.rowSet.RowSet.ExtendableRowSet;
+import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
+import org.apache.drill.test.rowSet.RowSetReader;
+import org.apache.drill.test.rowSet.RowSetUtilities;
+import org.apache.drill.test.rowSet.RowSetWriter;
+import org.apache.drill.test.rowSet.SchemaBuilder;
+import org.junit.Test;
+
+/**
+ * Test the "fill empties" logic for all types for all modes.
+ * This test exploits the dynamic typing ability of the
+ * accessors. Creating an object per value is too slow for
+ * production code, but very handy for tests such as this.
+ * <p>
+ * Note that this test also has the handy side-effect of testing
+ * null handling in the accessor classes.
+ */
+
+public class TestFillEmpties extends SubOperatorTest {
+
+ public static final int ROW_COUNT = 1000;
+
+ /**
+ * Test "fill empties" for required types. Here, the fill value
+ * is more of a convention: 0 (fixed-width) or an empty
+ * entry (variable width.) Some fill value is required to avoid
+ * the alternatives which are either 1) leave the value as
+ * garbage, or 2) raise an exception about the missing value.
+ */
+
+ @Test
+ public void testFillEmptiesRequired() {
+ testFillEmpties(DataMode.REQUIRED);
+ }
+
+ /**
+ * Test "fill empties" for nullable types which are the most
+ * "natural" type for omitted values.
+ * Nullable vectors fill empties with nulls.
+ */
+
+ @Test
+ public void testFillEmptiesNullable() {
+ testFillEmpties(DataMode.OPTIONAL);
+ }
+
+ /**
+ * Test "fill empties" for repeated types.
+ * Drill defines a null (omitted) array as the same thing as
+ * a zero-length array.
+ */
+
+ @Test
+ public void testFillEmptiesRepeated() {
+ testFillEmpties(DataMode.REPEATED);
+ }
+
+ private void testFillEmpties(DataMode mode) {
+ for (MinorType type : MinorType.values()) {
+ switch (type) {
+ case DECIMAL28DENSE:
+ case DECIMAL38DENSE:
+ // Not yet supported
+ break;
+ case GENERIC_OBJECT:
+ case LATE:
+ case LIST:
+ case MAP:
+ case NULL:
+ case UNION:
+ // Writer N/A
+ break;
+ case BIT:
+ case FIXEDBINARY:
+ case FIXEDCHAR:
+ case FIXED16CHAR:
+ case MONEY:
+ case TIMESTAMPTZ:
+ case TIMETZ:
+ // Not supported in Drill
+ break;
+ case DECIMAL18:
+ case DECIMAL28SPARSE:
+ case DECIMAL9:
+ case DECIMAL38SPARSE:
+ doFillEmptiesTest(type, mode, 9, 2);
+ break;
+ default:
+ doFillEmptiesTest(type, mode);
+ }
+ }
+ }
+
+ private void doFillEmptiesTest(MinorType type, DataMode mode, int prec, int scale) {
+ MajorType majorType = MajorType.newBuilder()
+ .setMinorType(type)
+ .setMode(mode)
+ .setPrecision(prec)
+ .setScale(scale)
+ .build();
+ doFillEmptiesTest(majorType);
+ }
+
+ private void doFillEmptiesTest(MinorType type, DataMode mode) {
+ MajorType majorType = MajorType.newBuilder()
+ .setMinorType(type)
+ .setMode(mode)
+ .build();
+ doFillEmptiesTest(majorType);
+ }
+
+ private void doFillEmptiesTest(MajorType majorType) {
+ if (majorType.getMode() == DataMode.REPEATED) {
+ dofillEmptiesRepeated(majorType);
+ } else {
+ doFillEmptiesScalar(majorType);
+ }
+ }
+
+ private void doFillEmptiesScalar(MajorType majorType) {
+ TupleMetadata schema = new SchemaBuilder()
+ .add("a", majorType)
+ .buildSchema();
+ ExtendableRowSet rs = fixture.rowSet(schema);
+ RowSetWriter writer = rs.writer();
+ ScalarWriter colWriter = writer.scalar(0);
+ ValueType valueType = colWriter.valueType();
+ boolean nullable = majorType.getMode() == DataMode.OPTIONAL;
+ for (int i = 0; i < ROW_COUNT; i++) {
+ if (i % 5 == 0) {
+ colWriter.setObject(RowSetUtilities.testDataFromInt(valueType, majorType, i));
+ }
+ writer.save();
+ }
+ SingleRowSet result = writer.done();
+ RowSetReader reader = result.reader();
+ ScalarReader colReader = reader.scalar(0);
+ MinorType type = majorType.getMinorType();
+ boolean isVariable = (type == MinorType.VARCHAR ||
+ type == MinorType.VAR16CHAR ||
+ type == MinorType.VARBINARY);
+ for (int i = 0; i < ROW_COUNT; i++) {
+ assertTrue(reader.next());
+ if (i % 5 != 0) {
+ if (nullable) {
+ // Nullable types fill with nulls.
+
+ assertTrue(colReader.isNull());
+ continue;
+ }
+ if (isVariable) {
+ // Variable width types fill with a zero-length value.
+
+ assertEquals(0, colReader.getBytes().length);
+ continue;
+ }
+ }
+
+ // All other types fill with zero-bytes, interpreted as some form
+ // of zero for each type.
+
+ Object actual = colReader.getObject();
+ Object expected = RowSetUtilities.testDataFromInt(valueType, majorType,
+ i % 5 == 0 ? i : 0);
+ RowSetUtilities.assertEqualValues(
+ majorType.toString().replace('\n', ' ') + "[" + i + "]",
+ valueType, expected, actual);
+ }
+ result.clear();
+ }
+
+ private void dofillEmptiesRepeated(MajorType majorType) {
+ TupleMetadata schema = new SchemaBuilder()
+ .add("a", majorType)
+ .buildSchema();
+ ExtendableRowSet rs = fixture.rowSet(schema);
+ RowSetWriter writer = rs.writer();
+ ScalarWriter colWriter = writer.array(0).scalar();
+ ValueType valueType = colWriter.valueType();
+ for (int i = 0; i < ROW_COUNT; i++) {
+ if (i % 5 == 0) {
+ // Write two values so we can exercise a bit of the array logic.
+
+ colWriter.setObject(RowSetUtilities.testDataFromInt(valueType, majorType, i));
+ colWriter.setObject(RowSetUtilities.testDataFromInt(valueType, majorType, i+1));
+ }
+ writer.save();
+ }
+ SingleRowSet result = writer.done();
+ RowSetReader reader = result.reader();
+ ScalarElementReader colReader = reader.array(0).elements();
+ for (int i = 0; i < ROW_COUNT; i++) {
+ assertTrue(reader.next());
+ if (i % 5 != 0) {
+ // Empty arrays are defined to be the same as a zero-length array.
+
+ assertEquals(0, colReader.size());
+ } else {
+ for (int j = 0; j < 2; j++) {
+ Object actual = colReader.getObject(j);
+ Object expected = RowSetUtilities.testDataFromInt(valueType, majorType, i + j);
+ RowSetUtilities.assertEqualValues(
+ majorType.toString().replace('\n', ' ') + "[" + i + "][" + j + "]",
+ valueType, expected, actual);
+ }
+ }
+ }
+ result.clear();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestFixedWidthWriter.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestFixedWidthWriter.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestFixedWidthWriter.java
new file mode 100644
index 0000000..a27fdf4
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestFixedWidthWriter.java
@@ -0,0 +1,444 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.test.rowSet.test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.drill.common.types.TypeProtos.DataMode;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.vector.IntVector;
+import org.apache.drill.exec.vector.accessor.ColumnAccessors.IntColumnWriter;
+import org.apache.drill.exec.vector.accessor.ColumnWriterIndex;
+import org.apache.drill.exec.vector.accessor.ScalarWriter;
+import org.apache.drill.exec.vector.accessor.ScalarWriter.ColumnWriterListener;
+import org.apache.drill.exec.vector.accessor.ValueType;
+import org.apache.drill.test.SubOperatorTest;
+import org.apache.drill.test.rowSet.SchemaBuilder;
+import org.junit.Test;
+
+/**
+ * Test the int writer as a typical example of a fixed-width
+ * writer. Exercises normal writing, writing after a (simulated)
+ * overflow, and filling in empty values.
+ */
+
+public class TestFixedWidthWriter extends SubOperatorTest {
+
+ public static class TestIndex implements ColumnWriterIndex {
+
+ public int index;
+
+ @Override
+ public int vectorIndex() { return index; }
+
+ @Override
+ public void nextElement() { }
+
+ @Override
+ public void rollover() { }
+
+ @Override
+ public int rowStartIndex() { return index; }
+
+ @Override
+ public ColumnWriterIndex outerIndex() { return null; }
+ }
+
+ /**
+ * Basic test to write a contiguous set of values, enough to cause
+ * the vector to double in size twice, then read back the values.
+ */
+
+ @Test
+ public void testWrite() {
+ try (IntVector vector = allocVector(1000)) {
+ TestIndex index = new TestIndex();
+ IntColumnWriter writer = makeWriter(vector, index);
+
+ writer.startWrite();
+
+ // Write integers.
+ // Write enough that the vector is resized.
+
+ long origAddr = vector.getBuffer().addr();
+ for (int i = 0; i < 3000; i++) {
+ index.index = i;
+ writer.setInt(i * 10);
+ }
+ writer.endWrite();
+
+ // Should have been reallocated.
+
+ assertNotEquals(origAddr, vector.getBuffer().addr());
+
+ // Verify values
+
+ for (int i = 0; i < 3000; i++) {
+ assertEquals(i * 10, vector.getAccessor().get(i));
+ }
+ }
+ }
+
+ @Test
+ public void testRestartRow() {
+ try (IntVector vector = allocVector(1000)) {
+ TestIndex index = new TestIndex();
+ IntColumnWriter writer = makeWriter(vector, index);
+ writer.startWrite();
+
+ // Write rows, rewriting every other row.
+
+ writer.startRow();
+ index.index = 0;
+ for (int i = 0; i < 50; i++) {
+ writer.setInt(i);
+ if (i % 2 == 0) {
+ writer.saveRow();
+ writer.startRow();
+ index.index++;
+ } else {
+ writer.restartRow();
+ }
+ }
+ writer.endWrite();
+
+ // Verify values
+
+ for (int i = 0; i < 25; i++) {
+ assertEquals(2 * i, vector.getAccessor().get(i));
+ }
+ }
+ }
+
+ /**
+ * Required, fixed-width vectors are back-filling with 0 to fill in missing
+ * values. While using zero is not strictly SQL compliant, it is better
+ * than failing. (The SQL solution would be to fill with nulls, but a
+ * required vector does not support nulls...)
+ */
+
+ @Test
+ public void testFillEmpties() {
+ try (IntVector vector = allocVector(1000)) {
+ TestIndex index = new TestIndex();
+ IntColumnWriter writer = makeWriter(vector, index);
+ writer.startWrite();
+
+ // Write values, skipping four out of five positions,
+ // forcing backfill.
+ // The number of values is odd, forcing the writer to
+ // back-fill at the end as well as between values.
+ // Keep the number of values below the allocation so
+ // that we know all values were initially garbage-filled.
+
+ for (int i = 0; i < 501; i += 5) {
+ index.index = i;
+ writer.startRow();
+ writer.setInt(i);
+ writer.saveRow();
+ }
+ // At end, vector index defined to point one past the
+ // last row. That is, the vector index gives the row count.
+
+ index.index = 504;
+ writer.endWrite();
+
+ // Verify values
+
+ for (int i = 0; i < 504; i++) {
+ assertEquals("Mismatch on " + i,
+ (i%5) == 0 ? i : 0,
+ vector.getAccessor().get(i));
+ }
+ }
+ }
+
+ /**
+ * The rollover method is used during vector overflow.
+ */
+
+ @Test
+ public void testRollover() {
+ try (IntVector vector = allocVector(1000)) {
+ TestIndex index = new TestIndex();
+ IntColumnWriter writer = makeWriter(vector, index);
+ writer.startWrite();
+
+ // Simulate doing an overflow of ten values.
+
+ for (int i = 0; i < 10; i++) {
+ index.index = i;
+ writer.startRow();
+ writer.setInt(i);
+ writer.saveRow();
+ }
+
+ // Overflow occurs after writing the 11th row
+
+ index.index = 10;
+ writer.startRow();
+ writer.setInt(10);
+
+ // Overflow occurs
+
+ writer.preRollover();
+
+ // Simulate rollover
+
+ for (int i = 0; i < 15; i++) {
+ vector.getMutator().set(i, 0xdeadbeef);
+ }
+ vector.getMutator().set(0, 10);
+
+ writer.postRollover();
+ index.index = 0;
+ writer.saveRow();
+
+ // Simulate resuming with a few more values.
+
+ for (int i = 1; i < 5; i++) {
+ index.index = i;
+ writer.startRow();
+ writer.setInt(10 + i);
+ writer.saveRow();
+ }
+ writer.endWrite();
+
+ // Verify the results
+
+ for (int i = 0; i < 5; i++) {
+ assertEquals(10 + i, vector.getAccessor().get(i));
+ }
+ }
+ }
+
+ /**
+ * Simulate the case in which the tail end of an overflow
+ * batch has empties. <tt>preRollover()</tt> should back-fill
+ * them with the next offset prior to rollover.
+ */
+
+ @Test
+ public void testRolloverWithEmpties() {
+ try (IntVector vector = allocVector(1000)) {
+ TestIndex index = new TestIndex();
+ IntColumnWriter writer = makeWriter(vector, index);
+ writer.startWrite();
+
+ // Simulate doing an overflow of 15 values,
+ // of which 5 are empty.
+
+ for (int i = 0; i < 10; i++) {
+ index.index = i;
+ writer.startRow();
+ writer.setInt(i);
+ writer.saveRow();
+ }
+
+ for (int i = 10; i < 15; i++) {
+ index.index = i;
+ writer.startRow();
+ writer.saveRow();
+ }
+
+ // Overflow occurs before writing the 16th row
+
+ index.index = 15;
+ writer.startRow();
+
+ // Overflow occurs. This should fill empty offsets.
+
+ writer.preRollover();
+
+ // Verify the first "batch" results
+
+ for (int i = 0; i < 10; i++) {
+ assertEquals(i, vector.getAccessor().get(i));
+ }
+ for (int i = 10; i < 15; i++) {
+ assertEquals(0, vector.getAccessor().get(i));
+ }
+
+ // Simulate rollover
+
+ for (int i = 0; i < 20; i++) {
+ vector.getMutator().set(i, 0xdeadbeef);
+ }
+
+ writer.postRollover();
+ index.index = 0;
+ writer.saveRow();
+
+ // Skip more values.
+
+ for (int i = 1; i < 5; i++) {
+ index.index = i;
+ writer.startRow();
+ writer.saveRow();
+ }
+
+ // Simulate resuming with a few more values.
+
+ for (int i = 5; i < 10; i++) {
+ index.index = i;
+ writer.startRow();
+ writer.setInt(i + 20);
+ writer.saveRow();
+ }
+ writer.endWrite();
+
+ // Verify the results
+
+ for (int i = 0; i < 5; i++) {
+ assertEquals(0, vector.getAccessor().get(i));
+ }
+ for (int i = 5; i < 10; i++) {
+ assertEquals(i + 20, vector.getAccessor().get(i));
+ }
+ }
+ }
+
+ /**
+ * Test the case in which a scalar vector is used in conjunction
+ * with a nullable bits vector. The nullable vector will call the
+ * <tt>skipNulls()</tt> method to avoid writing values for null
+ * entries. (Without the call, the scalar writer will fill the
+ * empty values with zeros.)
+ */
+
+ @Test
+ public void testSkipNulls() {
+ try (IntVector vector = allocVector(1000)) {
+ TestIndex index = new TestIndex();
+ IntColumnWriter writer = makeWriter(vector, index);
+ writer.startWrite();
+
+ // Write values, skipping four out of five positions,
+ // skipping nulls.
+ // The loop will cause the vector to double in size.
+ // The number of values is odd, forcing the writer to
+ // skip nulls at the end as well as between values.
+
+ long origAddr = vector.getBuffer().addr();
+ for (int i = 0; i < 3000; i += 5) {
+ index.index = i;
+ writer.startRow();
+ writer.skipNulls();
+ writer.setInt(i);
+ writer.saveRow();
+ }
+ index.index = 3003;
+ writer.startRow();
+ writer.skipNulls();
+ writer.saveRow();
+ writer.endWrite();
+
+ // Should have been reallocated.
+
+ assertNotEquals(origAddr, vector.getBuffer().addr());
+
+ // Verify values. First 1000 were filled with known
+ // garbage values.
+
+ for (int i = 0; i < 1000; i++) {
+ assertEquals("Mismatch at " + i,
+ (i%5) == 0 ? i : 0xdeadbeef,
+ vector.getAccessor().get(i));
+ }
+
+ // Next values are filled with unknown values:
+ // whatever was left in the buffer allocated by Netty.
+
+ for (int i = 1005; i < 3000; i+= 5) {
+ assertEquals(i, vector.getAccessor().get(i));
+ }
+ }
+ }
+
+ /**
+ * Test resize monitoring. Add a listener to an int writer,
+ * capture each resize, and refuse a resize when the number
+ * of ints exceeds 8K values. This will trigger an overflow,
+ * which will throw an exception which we then check for.
+ */
+
+ @Test
+ public void testSizeLimit() {
+ try (IntVector vector = allocVector(1000)) {
+ TestIndex index = new TestIndex();
+ IntColumnWriter writer = makeWriter(vector, index);
+ writer.bindListener(new ColumnWriterListener() {
+ int totalAlloc = 4096;
+
+ @Override
+ public void overflowed(ScalarWriter writer) {
+ throw new IllegalStateException("overflow called");
+ }
+
+ @Override
+ public boolean canExpand(ScalarWriter writer, int delta) {
+// System.out.println("Delta: " + delta);
+ totalAlloc += delta;
+ return totalAlloc < 16_384 * 4;
+ }
+ });
+ writer.startWrite();
+ try {
+ for (int i = 0; ; i++ ) {
+ index.index = i;
+ writer.startRow();
+ writer.setInt(i);
+ writer.saveRow();
+ }
+ }
+ catch(IllegalStateException e) {
+ assertTrue(e.getMessage().contains("overflow called"));
+ }
+
+ // Should have failed on 8192, which doubled vector
+ // to 16K, which was rejected.
+
+ assertEquals(8192, index.index);
+ }
+ }
+
+ private IntVector allocVector(int size) {
+ MaterializedField field =
+ SchemaBuilder.columnSchema("x", MinorType.INT, DataMode.REQUIRED);
+ IntVector vector = new IntVector(field, fixture.allocator());
+ vector.allocateNew(size);
+
+ // Party on the bytes of the vector so we start dirty
+
+ for (int i = 0; i < size; i++) {
+ vector.getMutator().set(i, 0xdeadbeef);
+ }
+ return vector;
+ }
+
+ private IntColumnWriter makeWriter(IntVector vector, TestIndex index) {
+ IntColumnWriter writer = new IntColumnWriter(vector);
+ writer.bindIndex(index);
+
+ assertEquals(ValueType.INTEGER, writer.valueType());
+ return writer;
+ }
+}
[03/15] drill git commit: DRILL-5657: Size-aware vector writer
structure
Posted by pr...@apache.org.
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/package-info.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/package-info.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/package-info.java
index f51c1a9..c90a734 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/package-info.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/package-info.java
@@ -24,8 +24,40 @@
* framework for the java-exec project. That one implementation is specific to
* unit tests, but the accessor framework could easily be used for other
* purposes as well.
+ *
+ * <h4>Vector Overflow Handling</h4>
+ *
+ * The writers provide integrated support for detecting and handling vector
+ * overflow. Overflow occurs when a value exceeds some maximum, such as the
+ * 16MB block size in Netty. Overflow handling consists of replacing the
+ * "full" vector with a new, empty vector as part of a new batch. Overflow
+ * handing code must copy partially written values from the "overflow" row
+ * to the new vectors. The classes here do not provide overflow handling,
+ * rather they provide the framework on top of which overflow handling can be
+ * built by a higher level of abstraction.
+ *
+ * <h4>JSON-Like Model</h4>
+ *
+ * The object reader and writer provide a generic, JSON-like interface
+ * to allow any valid combination of readers or writers (generically
+ * accessors):<pre><code>
+ * row : tuple
+ * tuple : (name column) *
+ * column : scalar obj | array obj | tuple obj
+ * scalar obj : scalar accessor
+ * array obj : array accessor
+ * array accessor : element accessor
+ * tuple obj : tuple</code></pre>
* <p>
- * Drill provides a set of column readers and writers. Compared to those, this
+ * As seen above, the accessor tree starts with a tuple (a row in the form of
+ * a class provided by the consumer.) Each column in the tuple is represented
+ * by an object accesor. That object accessor contains a scalar, tuple or array
+ * accessor. This models Drill's JSON structure: a row can have a list of lists
+ * of tuples that contains lists of ints, say.
+ *
+ * <h4>Comparison with Previous Vector Readers and Writers</h4>
+ *
+ * Drill provides a set of vector readers and writers. Compared to those, this
* set:
* <ul>
* <li>Works with all Drill data types. The other set works only with repeated
@@ -36,23 +68,24 @@
* other set has accessors specific to each of the ~30 data types which Drill
* supports.</li>
* </ul>
- * The key difference is that this set is designed for developer ease-of-use, a
- * primary requirement for unit tests. The other set is designed to be used in
+ * The key difference is that this set is designed for both developer ease-of-use
+ * and performance. Developer eas-of-use is a
+ * primary requirement for unit tests. Performance is critical for production
+ * code. The other set is designed to be used in
* machine-generated or write-once code and so can be much more complex.
- * <p>
- * That is, the accessors here are optimized for test code: they trade
- * convenience for a slight decrease in speed (the performance hit comes from
- * the extra level of indirection which hides the complex, type-specific code
- * otherwise required.)
- * <p>
- * {@link ColumnReader} and {@link ColumnWriter} are the core abstractions: they
+ *
+ * <h4>Overview of the Code Structure</h4>
+ *
+ * {@link ScalarReader} and {@link ColumnWriter} are the core abstractions: they
* provide simplified access to the myriad of Drill column types via a
* simplified, uniform API. {@link TupleReader} and {@link TupleWriter} provide
* a simplified API to rows or maps (both of which are tuples in Drill.)
* {@link AccessorUtilities} provides a number of data conversion tools.
- * <p>
- * Overview of the code structure:
* <dl>
+ * <dt>ObjectWriter, ObjectReader</dt>
+ * <dd>Drill follows a JSON data model. A row is a tuple (AKA structure). Each
+ * column is a scalar, a map (AKA tuple, structure) or an array (AKA a repeated
+ * value.)</dd>
* <dt>TupleWriter, TupleReader</dt>
* <dd>In relational terms, a tuple is an ordered collection of values, where
* the meaning of the order is provided by a schema (usually a name/type pair.)
@@ -62,12 +95,8 @@
* But, doing so is slower than access by position (index). To provide efficient
* code, the tuple classes assume that the implementation imposes a column
* ordering which can be exposed via the indexes.</dd>
- * <dt>ColumnAccessor</dt>
- * <dd>A generic base class for column readers and writers that provides the
- * column data type.</dd>
- * <dt>ColumnWriter, ColumnReader</dt>
- * <dd>A uniform interface implemented for each column type ("major type" in
- * Drill terminology). The scalar types: Nullable (Drill optional) and
+ * <dt>ScalarWriter, ScalarReader</dt>
+ * <dd>A uniform interface for the scalar types: Nullable (Drill optional) and
* non-nullable (Drill required) fields use the same interface. Arrays (Drill
* repeated) are special. To handle the array aspect, even array fields use the
* same interface, but the <tt>getArray</tt> method returns another layer of
@@ -98,11 +127,11 @@
* <dd>The generated accessors: one for each combination of write/read, data
* (minor) type and cardinality (data model).
* <dd>
- * <dt>RowIndex</dt>
+ * <dt>ColumnReaderIndex, ColumnWriterIndex</dt>
* <dd>This nested class binds the accessor to the current row position for the
* entire record batch. That is, you don't ask for the value of column a for row
* 5, then the value of column b for row 5, etc. as with the "raw" vectors.
- * Instead, the implementation sets the row position (with, say an interator.)
+ * Instead, the implementation sets the row position (with, say an iterator.)
* Then, all columns implicitly return values for the current row.
* <p>
* Different implementations of the row index handle the case of no selection
@@ -122,6 +151,16 @@
* The column index picks out the x coordinate (horizontal position along the
* columns.)</dt>
* </dl>
+ * <h4>Column Writer Optimizations</h4>
+ * The writer classes here started as a simple abstraction on top of the existing
+ * vector mutators. The classes were then recruited for use in a new writer
+ * abstraction for Drill's record readers. At that point, performance became
+ * critical. The key to performance is to bypass the vector and the mutator and
+ * instead work with the Netty direct memory functions. This seems a risky
+ * approach until we realize that the writers form a very clear interface:
+ * the same interface supported the original mutator-based implementation and
+ * the revised Netty-based implementation. The benefit, however, is stark;
+ * the direct-to-Netty version is up to 4x faster (for repeated types).
*/
package org.apache.drill.exec.vector.accessor;
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractArrayReader.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractArrayReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractArrayReader.java
new file mode 100644
index 0000000..7fb0c9d
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractArrayReader.java
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.reader;
+
+import org.apache.drill.exec.vector.UInt4Vector.Accessor;
+import org.apache.drill.exec.vector.accessor.ArrayReader;
+import org.apache.drill.exec.vector.accessor.ColumnReaderIndex;
+import org.apache.drill.exec.vector.accessor.ObjectReader;
+import org.apache.drill.exec.vector.accessor.ObjectType;
+import org.apache.drill.exec.vector.accessor.ScalarElementReader;
+import org.apache.drill.exec.vector.accessor.TupleReader;
+import org.apache.drill.exec.vector.complex.RepeatedValueVector;
+
+/**
+ * Reader for an array-valued column. This reader provides access to specific
+ * array members via an array index. This is an abstract base class;
+ * subclasses are generated for each repeated value vector type.
+ */
+
+public abstract class AbstractArrayReader implements ArrayReader {
+
+ /**
+ * Object representation of an array reader.
+ */
+
+ public static class ArrayObjectReader extends AbstractObjectReader {
+
+ private AbstractArrayReader arrayReader;
+
+ public ArrayObjectReader(AbstractArrayReader arrayReader) {
+ this.arrayReader = arrayReader;
+ }
+
+ @Override
+ public void bindIndex(ColumnReaderIndex index) {
+ arrayReader.bindIndex(index);
+ }
+
+ @Override
+ public ObjectType type() {
+ return ObjectType.ARRAY;
+ }
+
+ @Override
+ public ArrayReader array() {
+ return arrayReader;
+ }
+
+ @Override
+ public ScalarElementReader elements() {
+ return arrayReader.elements();
+ }
+
+ @Override
+ public Object getObject() {
+ return arrayReader.getObject();
+ }
+
+ @Override
+ public String getAsString() {
+ return arrayReader.getAsString();
+ }
+
+ @Override
+ public void reposition() {
+ arrayReader.reposition();
+ }
+ }
+
+ public static class BaseElementIndex {
+ private final ColumnReaderIndex base;
+ protected int startOffset;
+ protected int length;
+
+ public BaseElementIndex(ColumnReaderIndex base) {
+ this.base = base;
+ }
+
+ public int batchIndex() {
+ return base.batchIndex();
+ }
+
+ public void reset(int startOffset, int length) {
+ assert length >= 0;
+ assert startOffset >= 0;
+ this.startOffset = startOffset;
+ this.length = length;
+ }
+
+ public int size() { return length; }
+
+ public int elementIndex(int index) {
+ if (index < 0 || length <= index) {
+ throw new IndexOutOfBoundsException("Index = " + index + ", length = " + length);
+ }
+ return startOffset + index;
+ }
+ }
+
+ private final Accessor accessor;
+ private final VectorAccessor vectorAccessor;
+ protected ColumnReaderIndex baseIndex;
+ protected BaseElementIndex elementIndex;
+
+ public AbstractArrayReader(RepeatedValueVector vector) {
+ accessor = vector.getOffsetVector().getAccessor();
+ vectorAccessor = null;
+ }
+
+ public AbstractArrayReader(VectorAccessor vectorAccessor) {
+ accessor = null;
+ this.vectorAccessor = vectorAccessor;
+ }
+
+ public void bindIndex(ColumnReaderIndex index) {
+ baseIndex = index;
+ if (vectorAccessor != null) {
+ vectorAccessor.bind(index);
+ }
+ }
+
+ private Accessor accessor() {
+ if (accessor != null) {
+ return accessor;
+ }
+ return ((RepeatedValueVector) (vectorAccessor.vector())).getOffsetVector().getAccessor();
+ }
+
+ public void reposition() {
+ final int index = baseIndex.vectorIndex();
+ final Accessor curAccesssor = accessor();
+ final int startPosn = curAccesssor.get(index);
+ elementIndex.reset(startPosn, curAccesssor.get(index + 1) - startPosn);
+ }
+
+ @Override
+ public int size() { return elementIndex.size(); }
+
+ @Override
+ public ScalarElementReader elements() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ObjectReader entry(int index) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public TupleReader tuple(int index) {
+ return entry(index).tuple();
+ }
+
+ @Override
+ public ArrayReader array(int index) {
+ return entry(index).array();
+ }
+
+ @Override
+ public ObjectReader entry() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public TupleReader tuple() {
+ return entry().tuple();
+ }
+
+ @Override
+ public ArrayReader array() {
+ return entry().array();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractObjectReader.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractObjectReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractObjectReader.java
new file mode 100644
index 0000000..59a066e
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractObjectReader.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.reader;
+
+import org.apache.drill.exec.vector.accessor.ArrayReader;
+import org.apache.drill.exec.vector.accessor.ColumnReaderIndex;
+import org.apache.drill.exec.vector.accessor.ObjectReader;
+import org.apache.drill.exec.vector.accessor.ScalarElementReader;
+import org.apache.drill.exec.vector.accessor.ScalarReader;
+import org.apache.drill.exec.vector.accessor.TupleReader;
+
+public abstract class AbstractObjectReader implements ObjectReader {
+
+ public abstract void bindIndex(ColumnReaderIndex index);
+
+ public void reposition() { }
+
+ @Override
+ public ScalarReader scalar() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public TupleReader tuple() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ArrayReader array() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ScalarElementReader elements() {
+ throw new UnsupportedOperationException();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractTupleReader.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractTupleReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractTupleReader.java
new file mode 100644
index 0000000..afa0cb7
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/AbstractTupleReader.java
@@ -0,0 +1,189 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.reader;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.drill.exec.record.TupleMetadata;
+import org.apache.drill.exec.vector.accessor.ArrayReader;
+import org.apache.drill.exec.vector.accessor.ColumnReaderIndex;
+import org.apache.drill.exec.vector.accessor.ObjectReader;
+import org.apache.drill.exec.vector.accessor.ObjectType;
+import org.apache.drill.exec.vector.accessor.ScalarElementReader;
+import org.apache.drill.exec.vector.accessor.ScalarReader;
+import org.apache.drill.exec.vector.accessor.TupleReader;
+
+/**
+ * Reader for a tuple (a row or a map.) Provides access to each
+ * column using either a name or a numeric index.
+ */
+
+public abstract class AbstractTupleReader implements TupleReader {
+
+ public static class TupleObjectReader extends AbstractObjectReader {
+
+ private AbstractTupleReader tupleReader;
+
+ public TupleObjectReader(AbstractTupleReader tupleReader) {
+ this.tupleReader = tupleReader;
+ }
+
+ @Override
+ public void bindIndex(ColumnReaderIndex index) {
+ tupleReader.bindIndex(index);
+ }
+
+ @Override
+ public ObjectType type() {
+ return ObjectType.TUPLE;
+ }
+
+ @Override
+ public TupleReader tuple() {
+ return tupleReader;
+ }
+
+ @Override
+ public Object getObject() {
+ return tupleReader.getObject();
+ }
+
+ @Override
+ public String getAsString() {
+ return tupleReader.getAsString();
+ }
+
+ @Override
+ public void reposition() {
+ tupleReader.reposition();
+ }
+ }
+
+ protected final TupleMetadata schema;
+ private final AbstractObjectReader readers[];
+
+ protected AbstractTupleReader(TupleMetadata schema, AbstractObjectReader readers[]) {
+ this.schema = schema;
+ this.readers = readers;
+ }
+
+ public void bindIndex(ColumnReaderIndex index) {
+ for (int i = 0; i < readers.length; i++) {
+ readers[i].bindIndex(index);
+ }
+ }
+
+ @Override
+ public TupleMetadata schema() { return schema; }
+
+ @Override
+ public int columnCount() { return schema().size(); }
+
+ @Override
+ public ObjectReader column(int colIndex) {
+ return readers[colIndex];
+ }
+
+ @Override
+ public ObjectReader column(String colName) {
+ int index = schema.index(colName);
+ if (index == -1) {
+ return null; }
+ return readers[index];
+ }
+
+ @Override
+ public ScalarReader scalar(int colIndex) {
+ return column(colIndex).scalar();
+ }
+
+ @Override
+ public ScalarReader scalar(String colName) {
+ return column(colName).scalar();
+ }
+
+ @Override
+ public TupleReader tuple(int colIndex) {
+ return column(colIndex).tuple();
+ }
+
+ @Override
+ public TupleReader tuple(String colName) {
+ return column(colName).tuple();
+ }
+
+ @Override
+ public ArrayReader array(int colIndex) {
+ return column(colIndex).array();
+ }
+
+ @Override
+ public ArrayReader array(String colName) {
+ return column(colName).array();
+ }
+
+ @Override
+ public ObjectType type(int colIndex) {
+ return column(colIndex).type();
+ }
+
+ @Override
+ public ObjectType type(String colName) {
+ return column(colName).type();
+ }
+
+ @Override
+ public ScalarElementReader elements(int colIndex) {
+ return column(colIndex).elements();
+ }
+
+ @Override
+ public ScalarElementReader elements(String colName) {
+ return column(colName).elements();
+ }
+
+ public void reposition() {
+ for (int i = 0; i < columnCount(); i++) {
+ readers[i].reposition();
+ }
+ }
+
+ @Override
+ public Object getObject() {
+ List<Object> elements = new ArrayList<>();
+ for (int i = 0; i < columnCount(); i++) {
+ elements.add(readers[i].getObject());
+ }
+ return elements;
+ }
+
+ @Override
+ public String getAsString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append("(");
+ for (int i = 0; i < columnCount(); i++) {
+ if (i > 0) {
+ buf.append( ", " );
+ }
+ buf.append(readers[i].getAsString());
+ }
+ buf.append(")");
+ return buf.toString();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/BaseElementReader.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/BaseElementReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/BaseElementReader.java
new file mode 100644
index 0000000..f32c101
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/BaseElementReader.java
@@ -0,0 +1,187 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.reader;
+
+import java.math.BigDecimal;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.accessor.ColumnReaderIndex;
+import org.apache.drill.exec.vector.accessor.ObjectType;
+import org.apache.drill.exec.vector.accessor.ScalarElementReader;
+import org.apache.drill.exec.vector.accessor.impl.AccessorUtilities;
+import org.joda.time.Period;
+
+public abstract class BaseElementReader implements ScalarElementReader {
+
+ public static class ScalarElementObjectReader extends AbstractObjectReader {
+
+ private BaseElementReader elementReader;
+
+ public ScalarElementObjectReader(BaseElementReader elementReader) {
+ this.elementReader = elementReader;
+ }
+
+ @Override
+ public void bindIndex(ColumnReaderIndex index) {
+ elementReader.bindIndex((ElementReaderIndex) index);
+ }
+
+ @Override
+ public ObjectType type() {
+ return ObjectType.SCALAR;
+ }
+
+ @Override
+ public ScalarElementReader elements() {
+ return elementReader;
+ }
+
+ @Override
+ public Object getObject() {
+ // Simple: return elements as an object list.
+ // If really needed, could return as a typed array, but that
+ // is a bit of a hassle.
+
+ List<Object> elements = new ArrayList<>();
+ for (int i = 0; i < elementReader.size(); i++) {
+ elements.add(elementReader.getObject(i));
+ }
+ return elements;
+ }
+
+ @Override
+ public String getAsString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append("[");
+ for (int i = 0; i < elementReader.size(); i++) {
+ if (i > 0) {
+ buf.append( ", " );
+ }
+ buf.append(elementReader.getAsString(i));
+ }
+ buf.append("]");
+ return buf.toString();
+ }
+ }
+
+ protected ElementReaderIndex vectorIndex;
+ protected VectorAccessor vectorAccessor;
+
+ public abstract void bindVector(ValueVector vector);
+
+ public void bindVector(MajorType majorType, VectorAccessor va) {
+ vectorAccessor = va;
+ }
+
+ protected void bindIndex(ElementReaderIndex rowIndex) {
+ this.vectorIndex = rowIndex;
+ }
+
+ @Override
+ public int size() { return vectorIndex.size(); }
+
+ @Override
+ public Object getObject(int index) {
+ if (isNull(index)) {
+ return "null";
+ }
+ switch (valueType()) {
+ case BYTES:
+ return getBytes(index);
+ case DECIMAL:
+ return getDecimal(index);
+ case DOUBLE:
+ return getDouble(index);
+ case INTEGER:
+ return getInt(index);
+ case LONG:
+ return getLong(index);
+ case PERIOD:
+ return getPeriod(index);
+ case STRING:
+ return getString(index);
+ default:
+ throw new IllegalStateException("Unexpected type: " + valueType());
+ }
+ }
+
+ @Override
+ public String getAsString(int index) {
+ switch (valueType()) {
+ case BYTES:
+ return AccessorUtilities.bytesToString(getBytes(index));
+ case DOUBLE:
+ return Double.toString(getDouble(index));
+ case INTEGER:
+ return Integer.toString(getInt(index));
+ case LONG:
+ return Long.toString(getLong(index));
+ case STRING:
+ return "\"" + getString(index) + "\"";
+ case DECIMAL:
+ return getDecimal(index).toPlainString();
+ case PERIOD:
+ return getPeriod(index).normalizedStandard().toString();
+ default:
+ throw new IllegalArgumentException("Unsupported type " + valueType());
+ }
+ }
+
+ @Override
+ public boolean isNull(int index) {
+ return false;
+ }
+
+ @Override
+ public int getInt(int index) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long getLong(int index) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public double getDouble(int index) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public String getString(int index) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public byte[] getBytes(int index) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public BigDecimal getDecimal(int index) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public Period getPeriod(int index) {
+ throw new UnsupportedOperationException();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/BaseScalarReader.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/BaseScalarReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/BaseScalarReader.java
new file mode 100644
index 0000000..fb9a711
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/BaseScalarReader.java
@@ -0,0 +1,189 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.reader;
+
+import java.math.BigDecimal;
+
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.accessor.ColumnReaderIndex;
+import org.apache.drill.exec.vector.accessor.ObjectType;
+import org.apache.drill.exec.vector.accessor.ScalarReader;
+import org.apache.drill.exec.vector.accessor.impl.AccessorUtilities;
+import org.joda.time.Period;
+
+/**
+ * Column reader implementation that acts as the basis for the
+ * generated, vector-specific implementations. All set methods
+ * throw an exception; subclasses simply override the supported
+ * method(s).
+ */
+
+public abstract class BaseScalarReader implements ScalarReader {
+
+ public static class ScalarObjectReader extends AbstractObjectReader {
+
+ private BaseScalarReader scalarReader;
+
+ public ScalarObjectReader(BaseScalarReader scalarReader) {
+ this.scalarReader = scalarReader;
+ }
+
+ @Override
+ public void bindIndex(ColumnReaderIndex index) {
+ scalarReader.bindIndex(index);
+ }
+
+ @Override
+ public ObjectType type() {
+ return ObjectType.SCALAR;
+ }
+
+ @Override
+ public ScalarReader scalar() {
+ return scalarReader;
+ }
+
+ @Override
+ public Object getObject() {
+ return scalarReader.getObject();
+ }
+
+ @Override
+ public String getAsString() {
+ return scalarReader.getAsString();
+ }
+ }
+
+ protected ColumnReaderIndex vectorIndex;
+ protected VectorAccessor vectorAccessor;
+
+ public static ScalarObjectReader build(ValueVector vector, BaseScalarReader reader) {
+ reader.bindVector(vector);
+ return new ScalarObjectReader(reader);
+ }
+
+ public static AbstractObjectReader build(MajorType majorType, VectorAccessor va,
+ BaseScalarReader reader) {
+ reader.bindVector(majorType, va);
+ return new ScalarObjectReader(reader);
+ }
+
+ public abstract void bindVector(ValueVector vector);
+
+ protected void bindIndex(ColumnReaderIndex rowIndex) {
+ this.vectorIndex = rowIndex;
+ if (vectorAccessor != null) {
+ vectorAccessor.bind(rowIndex);
+ }
+ }
+
+ public void bindVector(MajorType majorType, VectorAccessor va) {
+ vectorAccessor = va;
+ }
+
+ @Override
+ public Object getObject() {
+ if (isNull()) {
+ return null;
+ }
+ switch (valueType()) {
+ case BYTES:
+ return getBytes();
+ case DECIMAL:
+ return getDecimal();
+ case DOUBLE:
+ return getDouble();
+ case INTEGER:
+ return getInt();
+ case LONG:
+ return getLong();
+ case PERIOD:
+ return getPeriod();
+ case STRING:
+ return getString();
+ default:
+ throw new IllegalStateException("Unexpected type: " + valueType());
+ }
+ }
+
+ @Override
+ public String getAsString() {
+ if (isNull()) {
+ return "null";
+ }
+ switch (valueType()) {
+ case BYTES:
+ return AccessorUtilities.bytesToString(getBytes());
+ case DOUBLE:
+ return Double.toString(getDouble());
+ case INTEGER:
+ return Integer.toString(getInt());
+ case LONG:
+ return Long.toString(getLong());
+ case STRING:
+ return "\"" + getString() + "\"";
+ case DECIMAL:
+ return getDecimal().toPlainString();
+ case PERIOD:
+ return getPeriod().normalizedStandard().toString();
+ default:
+ throw new IllegalArgumentException("Unsupported type " + valueType());
+ }
+ }
+
+ @Override
+ public boolean isNull() {
+ return false;
+ }
+
+ @Override
+ public int getInt() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long getLong() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public double getDouble() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public String getString() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public byte[] getBytes() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public BigDecimal getDecimal() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public Period getPeriod() {
+ throw new UnsupportedOperationException();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ColumnReaderFactory.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ColumnReaderFactory.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ColumnReaderFactory.java
new file mode 100644
index 0000000..0bcb6e2
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ColumnReaderFactory.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.reader;
+
+import org.apache.drill.common.types.TypeProtos.DataMode;
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.accessor.ColumnAccessors;
+import org.apache.drill.exec.vector.complex.RepeatedValueVector;
+
+/**
+ * Gather generated reader classes into a set of class tables to allow rapid
+ * run-time creation of readers. Builds the reader and its object reader
+ * wrapper which binds the vector to the reader.
+ */
+
+@SuppressWarnings("unchecked")
+public class ColumnReaderFactory {
+
+ private static final int typeCount = MinorType.values().length;
+ private static final Class<? extends BaseScalarReader> requiredReaders[] = new Class[typeCount];
+ private static final Class<? extends BaseScalarReader> nullableReaders[] = new Class[typeCount];
+ private static final Class<? extends BaseElementReader> elementReaders[] = new Class[typeCount];
+
+ static {
+ ColumnAccessors.defineRequiredReaders(requiredReaders);
+ ColumnAccessors.defineNullableReaders(nullableReaders);
+ ColumnAccessors.defineArrayReaders(elementReaders);
+ }
+
+ public static AbstractObjectReader buildColumnReader(ValueVector vector) {
+ MajorType major = vector.getField().getType();
+ MinorType type = major.getMinorType();
+ DataMode mode = major.getMode();
+
+ switch (type) {
+ case GENERIC_OBJECT:
+ case LATE:
+ case NULL:
+ case LIST:
+ case MAP:
+ throw new UnsupportedOperationException(type.toString());
+ default:
+ switch (mode) {
+ case OPTIONAL:
+ return BaseScalarReader.build(vector, newAccessor(type, nullableReaders));
+ case REQUIRED:
+ return BaseScalarReader.build(vector, newAccessor(type, requiredReaders));
+ case REPEATED:
+ return ScalarArrayReader.build((RepeatedValueVector) vector, newAccessor(type, elementReaders));
+ default:
+ throw new UnsupportedOperationException(mode.toString());
+ }
+ }
+ }
+
+ public static AbstractObjectReader buildColumnReader(MajorType majorType, VectorAccessor va) {
+ MinorType type = majorType.getMinorType();
+ DataMode mode = majorType.getMode();
+
+ switch (type) {
+ case GENERIC_OBJECT:
+ case LATE:
+ case NULL:
+ case LIST:
+ case MAP:
+ throw new UnsupportedOperationException(type.toString());
+ default:
+ switch (mode) {
+ case OPTIONAL:
+ return BaseScalarReader.build(majorType, va, newAccessor(type, nullableReaders));
+ case REQUIRED:
+ return BaseScalarReader.build(majorType, va, newAccessor(type, requiredReaders));
+ case REPEATED:
+ return ScalarArrayReader.build(majorType, va, newAccessor(type, elementReaders));
+ default:
+ throw new UnsupportedOperationException(mode.toString());
+ }
+ }
+ }
+
+ public static <T> T newAccessor(MinorType type, Class<? extends T> accessors[]) {
+ try {
+ Class<? extends T> accessorClass = accessors[type.ordinal()];
+ if (accessorClass == null) {
+ throw new UnsupportedOperationException(type.toString());
+ }
+ return accessorClass.newInstance();
+ } catch (InstantiationException | IllegalAccessException e) {
+ throw new IllegalStateException(e);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ElementReaderIndex.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ElementReaderIndex.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ElementReaderIndex.java
new file mode 100644
index 0000000..9985edc
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ElementReaderIndex.java
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.reader;
+
+public interface ElementReaderIndex {
+ int batchIndex();
+ int size();
+ int vectorIndex(int posn);
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/FixedWidthElementReaderIndex.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/FixedWidthElementReaderIndex.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/FixedWidthElementReaderIndex.java
new file mode 100644
index 0000000..4f3aeeb
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/FixedWidthElementReaderIndex.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.reader;
+
+import org.apache.drill.exec.vector.accessor.ColumnReaderIndex;
+import org.apache.drill.exec.vector.accessor.reader.AbstractArrayReader.BaseElementIndex;
+
+/**
+ * Index into the vector of elements for a repeated vector.
+ * Keeps track of the current offset in terms of value positions.
+ */
+
+public class FixedWidthElementReaderIndex extends BaseElementIndex implements ElementReaderIndex {
+
+ public FixedWidthElementReaderIndex(ColumnReaderIndex base) {
+ super(base);
+ }
+
+ @Override
+ public int vectorIndex(int posn) {
+ return elementIndex(posn);
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/MapReader.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/MapReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/MapReader.java
new file mode 100644
index 0000000..66bc067
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/MapReader.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.reader;
+
+import java.util.List;
+
+import org.apache.drill.exec.record.ColumnMetadata;
+
+/**
+ * Reader for a Drill Map type. Maps are actually tuples, just like rows.
+ */
+
+public class MapReader extends AbstractTupleReader {
+
+ protected MapReader(ColumnMetadata schema, AbstractObjectReader readers[]) {
+ super(schema.mapSchema(), readers);
+ }
+
+ public static TupleObjectReader build(ColumnMetadata schema, AbstractObjectReader readers[]) {
+ return new TupleObjectReader(new MapReader(schema, readers));
+ }
+
+ public static AbstractObjectReader build(ColumnMetadata metadata,
+ List<AbstractObjectReader> readers) {
+ AbstractObjectReader readerArray[] = new AbstractObjectReader[readers.size()];
+ return build(metadata, readers.toArray(readerArray));
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ObjectArrayReader.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ObjectArrayReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ObjectArrayReader.java
new file mode 100644
index 0000000..9ed89f1
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ObjectArrayReader.java
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.reader;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.drill.exec.vector.accessor.ColumnReaderIndex;
+import org.apache.drill.exec.vector.accessor.ObjectReader;
+import org.apache.drill.exec.vector.accessor.ObjectType;
+import org.apache.drill.exec.vector.complex.RepeatedValueVector;
+
+/**
+ * Reader for an array of either tuples or other arrays.
+ */
+
+public class ObjectArrayReader extends AbstractArrayReader {
+
+ /**
+ * Index into the vector of elements for a repeated vector.
+ * Keeps track of the current offset in terms of value positions.
+ * This is a derived index. The base index points to an entry
+ * in the offset vector for the array. This inner index picks
+ * off elements within the range of offsets for that one entry.
+ * For example:<pre><code>
+ * [ ... 100 105 ...]
+ * </code></pre>In the above the value 100 might be at outer
+ * offset 5. The inner array will pick off the five values
+ * 100...104.
+ * <p>
+ * Because arrays allow random access on read, the inner offset
+ * is reset on each access to the array.
+ */
+
+ public static class ObjectElementReaderIndex extends BaseElementIndex implements ColumnReaderIndex {
+
+ private int posn;
+
+ public ObjectElementReaderIndex(ColumnReaderIndex base) {
+ super(base);
+ }
+
+ @Override
+ public int vectorIndex() {
+ return startOffset + posn;
+ }
+
+ public void set(int index) {
+ if (index < 0 || length <= index) {
+ throw new IndexOutOfBoundsException("Index = " + index + ", length = " + length);
+ }
+ posn = index;
+ }
+
+ public int posn() { return posn; }
+ }
+
+ /**
+ * Reader for each element.
+ */
+
+ private final AbstractObjectReader elementReader;
+
+ /**
+ * Index used to access elements.
+ */
+
+ private ObjectElementReaderIndex objElementIndex;
+
+ private ObjectArrayReader(RepeatedValueVector vector, AbstractObjectReader elementReader) {
+ super(vector);
+ this.elementReader = elementReader;
+ }
+
+ private ObjectArrayReader(VectorAccessor vectorAccessor, AbstractObjectReader elementReader) {
+ super(vectorAccessor);
+ this.elementReader = elementReader;
+ }
+
+ public static ArrayObjectReader build(RepeatedValueVector vector,
+ AbstractObjectReader elementReader) {
+ return new ArrayObjectReader(
+ new ObjectArrayReader(vector, elementReader));
+ }
+
+ public static AbstractObjectReader build(VectorAccessor vectorAccessor,
+ AbstractObjectReader elementReader) {
+ return new ArrayObjectReader(
+ new ObjectArrayReader(vectorAccessor, elementReader));
+ }
+
+ @Override
+ public void bindIndex(ColumnReaderIndex index) {
+ super.bindIndex(index);
+ objElementIndex = new ObjectElementReaderIndex(baseIndex);
+ elementIndex = objElementIndex;
+ elementReader.bindIndex(objElementIndex);
+ }
+
+ @Override
+ public ObjectType entryType() {
+ return elementReader.type();
+ }
+
+ @Override
+ public void setPosn(int index) {
+ objElementIndex.set(index);
+ elementReader.reposition();
+ }
+
+ @Override
+ public ObjectReader entry() {
+ return elementReader;
+ }
+
+ @Override
+ public ObjectReader entry(int index) {
+ setPosn(index);
+ return entry();
+ }
+
+ @Override
+ public Object getObject() {
+ List<Object> array = new ArrayList<>();
+ for (int i = 0; i < objElementIndex.size(); i++) {
+ array.add(entry(i).getObject());
+ }
+ return array;
+ }
+
+ @Override
+ public String getAsString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append("[");
+ for (int i = 0; i < size(); i++) {
+ if (i > 0) {
+ buf.append( ", " );
+ }
+ buf.append(entry(i).getAsString());
+ }
+ buf.append("]");
+ return buf.toString();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ScalarArrayReader.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ScalarArrayReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ScalarArrayReader.java
new file mode 100644
index 0000000..d93e4a5
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/ScalarArrayReader.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.reader;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.exec.vector.accessor.ColumnReaderIndex;
+import org.apache.drill.exec.vector.accessor.ObjectType;
+import org.apache.drill.exec.vector.accessor.ScalarElementReader;
+import org.apache.drill.exec.vector.complex.RepeatedValueVector;
+
+public class ScalarArrayReader extends AbstractArrayReader {
+
+ private final BaseElementReader elementReader;
+
+ private ScalarArrayReader(RepeatedValueVector vector,
+ BaseElementReader elementReader) {
+ super(vector);
+ this.elementReader = elementReader;
+ }
+
+ private ScalarArrayReader(VectorAccessor va,
+ BaseElementReader elementReader) {
+ super(va);
+ this.elementReader = elementReader;
+ }
+
+ public static ArrayObjectReader build(RepeatedValueVector vector,
+ BaseElementReader elementReader) {
+ elementReader.bindVector(vector.getDataVector());
+ return new ArrayObjectReader(new ScalarArrayReader(vector, elementReader));
+ }
+
+ public static ArrayObjectReader build(MajorType majorType, VectorAccessor va,
+ BaseElementReader elementReader) {
+ elementReader.bindVector(majorType, va);
+ return new ArrayObjectReader(new ScalarArrayReader(va, elementReader));
+ }
+
+ @Override
+ public void bindIndex(ColumnReaderIndex index) {
+ super.bindIndex(index);
+ FixedWidthElementReaderIndex fwElementIndex = new FixedWidthElementReaderIndex(baseIndex);
+ elementIndex = fwElementIndex;
+ elementReader.bindIndex(fwElementIndex);
+ }
+
+ @Override
+ public ObjectType entryType() {
+ return ObjectType.SCALAR;
+ }
+
+ @Override
+ public ScalarElementReader elements() {
+ return elementReader;
+ }
+
+ @Override
+ public void setPosn(int index) {
+ throw new IllegalStateException("setPosn() not supported for scalar arrays");
+ }
+
+ @Override
+ public Object getObject() {
+ List<Object> elements = new ArrayList<>();
+ for (int i = 0; i < size(); i++) {
+ elements.add(elementReader.getObject(i));
+ }
+ return elements;
+ }
+
+ @Override
+ public String getAsString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append("[");
+ for (int i = 0; i < size(); i++) {
+ if (i > 0) {
+ buf.append( ", " );
+ }
+ buf.append(elementReader.getAsString(i));
+ }
+ buf.append("]");
+ return buf.toString();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/VectorAccessor.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/VectorAccessor.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/VectorAccessor.java
new file mode 100644
index 0000000..1cf2a19
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/VectorAccessor.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.reader;
+
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.accessor.ColumnReaderIndex;
+
+public interface VectorAccessor {
+ void bind(ColumnReaderIndex index);
+ ValueVector vector();
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/package-info.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/package-info.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/package-info.java
new file mode 100644
index 0000000..a94d2e8
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/reader/package-info.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * Provides the reader hierarchy as explained in the API package.
+ * The only caveat is that a simplification is provided for arrays of
+ * scalar values: rather than a scalar reader for each value, the
+ * {#link ScalarElementReader} class provides access to the entire array
+ * via indexed get methods.
+ */
+
+package org.apache.drill.exec.vector.accessor.reader;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractArrayWriter.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractArrayWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractArrayWriter.java
new file mode 100644
index 0000000..e6e29b4
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractArrayWriter.java
@@ -0,0 +1,348 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.writer;
+
+import org.apache.drill.exec.record.ColumnMetadata;
+import org.apache.drill.exec.vector.UInt4Vector;
+import org.apache.drill.exec.vector.accessor.ArrayWriter;
+import org.apache.drill.exec.vector.accessor.ColumnWriterIndex;
+import org.apache.drill.exec.vector.accessor.ObjectType;
+import org.apache.drill.exec.vector.accessor.ObjectWriter;
+import org.apache.drill.exec.vector.accessor.ScalarWriter;
+import org.apache.drill.exec.vector.accessor.ScalarWriter.ColumnWriterListener;
+import org.apache.drill.exec.vector.accessor.TupleWriter;
+import org.apache.drill.exec.vector.accessor.TupleWriter.TupleWriterListener;
+import org.apache.drill.exec.vector.accessor.impl.HierarchicalFormatter;
+
+/**
+ * Writer for an array-valued column. This writer appends values: once a value
+ * is written, it cannot be changed. As a result, writer methods have no item
+ * index; each set advances the array to the next position.
+ * <p>
+ * This class represents the array as a whole. In practice that means building
+ * the offset vector. The array is associated with an element object that
+ * manages writing to the scalar, array or tuple that is the array element. Note
+ * that this representation makes little use of the methods in the "Repeated"
+ * vector class: instead it works directly with the offset and element vectors.
+ * <p>
+ * An array has a one-to-many relationship with its children. Starting an array
+ * prepares for writing the first element. Each element must be saved by calling
+ * <tt>endValue()</tt>. This is done automatically for scalars (since there is
+ * exactly one value per element), but must be done via the client code for
+ * arrays of arrays or tuples. Valid state transitions:
+ *
+ * <table border=1>
+ * <tr><th>Public API</th><th>Array Event</th><th>Offset Event</th><th>Element Event</th></tr>
+ * <tr><td>startBatch()</td>
+ * <td>startWrite()</td>
+ * <td>startWrite()</td>
+ * <td>startWrite()</td></tr>
+ * <tr><td>start() (new row)</td>
+ * <td>startRow()</td>
+ * <td>startRow()</td>
+ * <td>startRow()</td></tr>
+ * <tr><td>start() (without save)</td>
+ * <td>restartRow()</td>
+ * <td>restartRow()</td>
+ * <td>restartRow()</td></tr>
+ * <tr><td>save() (array)</td>
+ * <td>saveValue()</td>
+ * <td>saveValue()</td>
+ * <td>saveValue()</td></tr>
+ * <tr><td>save() (row)</td>
+ * <td colspan=3>See subclasses.</td></tr>
+ * <tr><td>harvest()</td>
+ * <td>endWrite()</td>
+ * <td>endWrite()</td>
+ * <td>endWrite()</td></tr>
+ * </table>
+ *
+ * Some items to note:
+ * <ul>
+ * <li>Batch and row events are passed to the element.</li>
+ * <li>Each element is saved via a call to {@link #save()} on the array.
+ * Without this call, the element value is discarded. This is necessary
+ * because the array always has an active element: no "startElement"
+ * method is necessary. This also means that any unsaved element values
+ * can be discarded simply by omitting a call to <tt>save()</tt>.</li>
+ * <li>Since elements must be saved individually, the call to
+ * {@link #saveRow()} <i>does not</i> call <tt>saveValue()</tt>. This
+ * is an important distinction between an array and a tuple.</li>
+ * <li>The offset and element writers are treated equally: the same events
+ * are passed to both.</li>
+ * </ul>
+ */
+
+public abstract class AbstractArrayWriter implements ArrayWriter, WriterEvents {
+
+ /**
+ * Object representation of an array writer.
+ */
+
+ public static class ArrayObjectWriter extends AbstractObjectWriter {
+
+ private AbstractArrayWriter arrayWriter;
+
+ public ArrayObjectWriter(ColumnMetadata schema, AbstractArrayWriter arrayWriter) {
+ super(schema);
+ this.arrayWriter = arrayWriter;
+ }
+
+ @Override
+ public ObjectType type() { return ObjectType.ARRAY; }
+
+ @Override
+ public void set(Object value) {
+ arrayWriter.setObject(value);
+ }
+
+ @Override
+ public ArrayWriter array() { return arrayWriter; }
+
+ @Override
+ public WriterEvents events() { return arrayWriter; }
+
+ @Override
+ public void bindListener(ColumnWriterListener listener) {
+ arrayWriter.bindListener(listener);
+ }
+
+ @Override
+ public void bindListener(TupleWriterListener listener) {
+ arrayWriter.bindListener(listener);
+ }
+
+ @Override
+ public void dump(HierarchicalFormatter format) {
+ format
+ .startObject(this)
+ .attribute("arrayWriter");
+ arrayWriter.dump(format);
+ format.endObject();
+ }
+ }
+
+ public static abstract class BaseArrayWriter extends AbstractArrayWriter {
+
+ /**
+ * Index into the vector of elements for a repeated vector.
+ * Keeps track of the current offset in terms of value positions.
+ * Forwards overflow events to the base index.
+ */
+
+ public class ArrayElementWriterIndex implements ColumnWriterIndex {
+
+ private int elementIndex;
+
+ public void reset() { elementIndex = 0; }
+
+ @Override
+ public int vectorIndex() { return elementIndex + offsetsWriter.nextOffset(); }
+
+ @Override
+ public int rowStartIndex() { return offsetsWriter.rowStartOffset(); }
+
+ public int arraySize() { return elementIndex; }
+
+ @Override
+ public void nextElement() { }
+
+ public final void next() { elementIndex++; }
+
+ public int valueStartOffset() { return offsetsWriter.nextOffset(); }
+
+ @Override
+ public void rollover() { }
+
+ @Override
+ public ColumnWriterIndex outerIndex() {
+ return outerIndex;
+ }
+
+ @Override
+ public String toString() {
+ return new StringBuilder()
+ .append("[")
+ .append(getClass().getSimpleName())
+ .append(" elementIndex = ")
+ .append(elementIndex)
+ .append("]")
+ .toString();
+ }
+ }
+
+ private final OffsetVectorWriter offsetsWriter;
+ private ColumnWriterIndex outerIndex;
+ protected ArrayElementWriterIndex elementIndex;
+
+ public BaseArrayWriter(UInt4Vector offsetVector, AbstractObjectWriter elementObjWriter) {
+ super(elementObjWriter);
+ offsetsWriter = new OffsetVectorWriter(offsetVector);
+ }
+
+ @Override
+ public void bindIndex(ColumnWriterIndex index) {
+ assert elementIndex != null;
+ outerIndex = index;
+ offsetsWriter.bindIndex(index);
+ elementObjWriter.events().bindIndex(elementIndex);
+ }
+
+ @Override
+ public ColumnWriterIndex writerIndex() { return outerIndex; }
+
+ @Override
+ public int size() { return elementIndex.arraySize(); }
+
+ @Override
+ public void startWrite() {
+ elementIndex.reset();
+ offsetsWriter.startWrite();
+ elementObjWriter.events().startWrite();
+ }
+
+ @Override
+ public void startRow() {
+
+ // Starting an outer value automatically starts the first
+ // element value. If no elements are written, then this
+ // inner start will just be ignored.
+
+ offsetsWriter.startRow();
+ elementIndex.reset();
+ elementObjWriter.events().startRow();
+ }
+
+ @Override
+ public void endArrayValue() {
+ offsetsWriter.setNextOffset(elementIndex.vectorIndex());
+ elementIndex.reset();
+ }
+
+ @Override
+ public void restartRow() {
+ offsetsWriter.restartRow();
+ elementIndex.reset();
+ elementObjWriter.events().restartRow();
+ }
+
+ @Override
+ public void saveRow() {
+ offsetsWriter.saveRow();
+ elementObjWriter.events().saveRow();
+ }
+
+ @Override
+ public void endWrite() {
+ offsetsWriter.endWrite();
+ elementObjWriter.events().endWrite();
+ }
+
+ @Override
+ public void preRollover() {
+ elementObjWriter.events().preRollover();
+ offsetsWriter.preRollover();
+ }
+
+ @Override
+ public void postRollover() {
+ elementObjWriter.events().postRollover();
+
+ // Reset the index after the vectors: the vectors
+ // need the old row start index from the index.
+
+ offsetsWriter.postRollover();
+ elementIndex.rollover();
+ }
+
+ @Override
+ public int lastWriteIndex() { return outerIndex.vectorIndex(); }
+
+ /**
+ * Return the writer for the offset vector for this array. Primarily used
+ * to handle overflow; other clients should not attempt to muck about with
+ * the offset vector directly.
+ *
+ * @return the writer for the offset vector associated with this array
+ */
+
+ @Override
+ public OffsetVectorWriter offsetWriter() { return offsetsWriter; }
+
+ @Override
+ public void bindListener(ColumnWriterListener listener) {
+ elementObjWriter.bindListener(listener);
+ }
+
+ @Override
+ public void bindListener(TupleWriterListener listener) {
+ elementObjWriter.bindListener(listener);
+ }
+
+ @Override
+ public void dump(HierarchicalFormatter format) {
+ format.extend();
+ super.dump(format);
+ format
+ .attribute("elementIndex", elementIndex.vectorIndex())
+ .attribute("offsetsWriter");
+ offsetsWriter.dump(format);
+ }
+ }
+
+ protected final AbstractObjectWriter elementObjWriter;
+
+ public AbstractArrayWriter(AbstractObjectWriter elementObjWriter) {
+ this.elementObjWriter = elementObjWriter;
+ }
+
+ @Override
+ public ObjectType entryType() {
+ return elementObjWriter.type();
+ }
+
+ @Override
+ public ObjectWriter entry() { return elementObjWriter; }
+
+ @Override
+ public ScalarWriter scalar() {
+ return elementObjWriter.scalar();
+ }
+
+ @Override
+ public TupleWriter tuple() {
+ return elementObjWriter.tuple();
+ }
+
+ @Override
+ public ArrayWriter array() {
+ return elementObjWriter.array();
+ }
+
+ public abstract void bindListener(ColumnWriterListener listener);
+ public abstract void bindListener(TupleWriterListener listener);
+ public abstract OffsetVectorWriter offsetWriter();
+
+ public void dump(HierarchicalFormatter format) {
+ format
+ .startObject(this)
+ .attribute("elementObjWriter");
+ elementObjWriter.dump(format);
+ format.endObject();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractFixedWidthWriter.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractFixedWidthWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractFixedWidthWriter.java
new file mode 100644
index 0000000..e49f92c
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractFixedWidthWriter.java
@@ -0,0 +1,258 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.writer;
+
+import org.apache.drill.exec.memory.BaseAllocator;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.accessor.impl.HierarchicalFormatter;
+
+/**
+ * Base class for writers for fixed-width vectors. Handles common
+ * tasks, leaving the generated code to handle only type-specific
+ * operations.
+ */
+
+public abstract class AbstractFixedWidthWriter extends BaseScalarWriter {
+
+ public static abstract class BaseFixedWidthWriter extends AbstractFixedWidthWriter {
+
+ /**
+ * Buffer of zeros used to back-fill vector buffers with
+ * zeros.
+ */
+
+ private static final byte ZERO_BUF[] = new byte[256];
+
+ /**
+ * Determine the write index, growing, overflowing and back-filling
+ * the vector as needed.
+ * <p>
+ * This is a bit tricky. This method has side effects, by design.
+ * The current vector buffer, and buffer address, will change in
+ * this method when a vector grows or overflows. So, don't use this
+ * method in inline calls of the form<br><code>
+ * vector.getBuffer().doSomething(writeIndex());</code></br>
+ * The buffer obtained by <tt>getBuffer()</tt> can be different than
+ * the current buffer after <tt>writeIndex()</tt>.
+ *
+ * @return the index at which to write the current value
+ */
+
+ protected final int writeIndex() {
+
+ // "Fast path" for the normal case of no fills, no overflow.
+ // This is the only bounds check we want to do for the entire
+ // set operation.
+
+ // This is performance critical code; every operation counts.
+ // Please be thoughtful when changing the code.
+
+ int writeIndex = vectorIndex.vectorIndex();
+ if (lastWriteIndex + 1 < writeIndex || writeIndex >= capacity) {
+ writeIndex = prepareWrite(writeIndex);
+ }
+
+ // Track the last write location for zero-fill use next time around.
+
+ lastWriteIndex = writeIndex;
+ return writeIndex;
+ }
+
+ protected final int prepareWrite(int writeIndex) {
+
+ // Either empties must be filed or the vector is full.
+
+ writeIndex = resize(writeIndex);
+
+ // Fill empties to the write position.
+
+ fillEmpties(writeIndex);
+ return writeIndex;
+ }
+
+ /**
+ * Fill empties. This is required because the allocated memory is not
+ * zero-filled.
+ */
+
+ @Override
+ protected final void fillEmpties(final int writeIndex) {
+ final int width = width();
+ final int stride = ZERO_BUF.length / width;
+ int dest = lastWriteIndex + 1;
+ while (dest < writeIndex) {
+ int length = writeIndex - dest;
+ length = Math.min(length, stride);
+ drillBuf.unsafeCopyMemory(ZERO_BUF, 0, dest * width, length * width);
+ dest += length;
+ }
+ }
+ }
+
+ /**
+ * The largest position to which the writer has written data. Used to allow
+ * "fill-empties" (AKA "back-fill") of missing values one each value write
+ * and at the end of a batch. Note that this is the position of the last
+ * write, not the next write position. Starts at -1 (no last write).
+ */
+
+ protected int lastWriteIndex;
+
+ @Override
+ public void startWrite() {
+ setBuffer();
+ lastWriteIndex = -1;
+ }
+
+ public abstract int width();
+
+ @Override
+ protected final void setBuffer() {
+ drillBuf = vector().getBuffer();
+ capacity = drillBuf.capacity() / width();
+ }
+
+ protected final void mandatoryResize(final int writeIndex) {
+ if (writeIndex < capacity) {
+ return;
+ }
+
+ // Since some vectors start off as 0 length, set a
+ // minimum size to avoid silly thrashing on early rows.
+
+ final int size = BaseAllocator.nextPowerOfTwo(
+ Math.max((writeIndex + 1) * width(), MIN_BUFFER_SIZE));
+ realloc(size);
+ }
+
+ protected final int resize(final int writeIndex) {
+ if (writeIndex < capacity) {
+ return writeIndex;
+ }
+ final int width = width();
+
+ // Since some vectors start off as 0 length, set a
+ // minimum size to avoid silly thrashing on early rows.
+
+ final int size = BaseAllocator.nextPowerOfTwo(
+ Math.max((writeIndex + 1) * width, MIN_BUFFER_SIZE));
+
+ // Two cases: grow this vector or allocate a new one.
+
+ // Grow the vector -- or overflow if the growth would make the batch
+ // consume too much memory. The idea is that we grow vectors as they
+ // fit the available memory budget, then we fill those vectors until
+ // one of them needs more space. At that point we trigger overflow to
+ // a new set of vectors. Internal fragmentation will result, but this
+ // approach (along with proper initial vector sizing), minimizes that
+ // fragmentation.
+
+ if (size <= ValueVector.MAX_BUFFER_SIZE &&
+ canExpand(size - capacity * width)) {
+
+ // Optimized form of reAlloc() which does not zero memory, does not do
+ // bounds checks (since they were already done above). The write index
+ // and offset remain unchanged.
+
+ realloc(size);
+ } else {
+
+ // Allocate a new vector, or throw an exception if overflow is not
+ // supported. If overflow is supported, the callback will call
+ // endWrite(), which will fill empties, so no need to do that here.
+ // The call to endWrite() will also set the final writer index for the
+ // current vector. Then, bindVector() will be called to provide the new
+ // vector. The write index changes with the new vector.
+
+ overflowed();
+ }
+
+ // Call to resize may cause rollover, so reset write index
+ // afterwards.
+
+ return vectorIndex.vectorIndex();
+ }
+
+ @Override
+ public int lastWriteIndex() { return lastWriteIndex; }
+
+ @Override
+ public void skipNulls() {
+
+ // Pretend we've written up to the previous value.
+ // This will leave null values (as specified by the
+ // caller) uninitialized.
+
+ lastWriteIndex = vectorIndex.vectorIndex() - 1;
+ }
+
+ @Override
+ public void restartRow() {
+ lastWriteIndex = Math.min(lastWriteIndex, vectorIndex.vectorIndex() - 1);
+ }
+
+ @Override
+ public void preRollover() {
+ setValueCount(vectorIndex.rowStartIndex());
+ }
+
+ @Override
+ public void postRollover() {
+ int newIndex = Math.max(lastWriteIndex - vectorIndex.rowStartIndex(), -1);
+ startWrite();
+ lastWriteIndex = newIndex;
+ }
+
+ @Override
+ public void endWrite() {
+ setValueCount(vectorIndex.vectorIndex());
+ }
+
+ protected abstract void fillEmpties(int writeIndex);
+
+ public void setValueCount(int valueCount) {
+
+ // Done this way to avoid another drill buf access in value set path.
+ // Though this calls writeOffset(), which handles vector overflow,
+ // such overflow should never occur because here we are simply
+ // finalizing a position already set. However, the vector size may
+ // grow and the "missing" values may be zero-filled. Note that, in
+ // odd cases, the call to writeOffset() might cause the vector to
+ // resize (as part of filling empties), so grab the buffer AFTER
+ // the call to writeOffset().
+
+ mandatoryResize(valueCount - 1);
+ fillEmpties(valueCount);
+ vector().getBuffer().writerIndex(valueCount * width());
+
+ // Last write index is either the last value we just filled,
+ // or it is the last actual write, if this is an overflow
+ // situation.
+
+ lastWriteIndex = Math.max(lastWriteIndex, valueCount - 1);
+ }
+
+ @Override
+ public void dump(HierarchicalFormatter format) {
+ format.extend();
+ super.dump(format);
+ format
+ .attribute("lastWriteIndex", lastWriteIndex)
+ .endObject();
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractObjectWriter.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractObjectWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractObjectWriter.java
new file mode 100644
index 0000000..a8f1c64
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractObjectWriter.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.writer;
+
+import org.apache.drill.exec.record.ColumnMetadata;
+import org.apache.drill.exec.vector.accessor.ArrayWriter;
+import org.apache.drill.exec.vector.accessor.ObjectWriter;
+import org.apache.drill.exec.vector.accessor.ScalarWriter;
+import org.apache.drill.exec.vector.accessor.ScalarWriter.ColumnWriterListener;
+import org.apache.drill.exec.vector.accessor.TupleWriter;
+import org.apache.drill.exec.vector.accessor.TupleWriter.TupleWriterListener;
+import org.apache.drill.exec.vector.accessor.impl.HierarchicalFormatter;
+
+/**
+ * Abstract base class for the object layer in writers. This class acts
+ * as the glue between a column and the data type of that column, per the
+ * JSON model which Drill uses. This base class provides stubs for most
+ * methods so that type-specific subclasses can simply fill in the bits
+ * needed for that particular class.
+ */
+
+public abstract class AbstractObjectWriter implements ObjectWriter {
+
+ private ColumnMetadata schema;
+
+ public AbstractObjectWriter(ColumnMetadata schema) {
+ this.schema = schema;
+ }
+
+ @Override
+ public ColumnMetadata schema() { return schema; }
+
+ @Override
+ public ScalarWriter scalar() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public TupleWriter tuple() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ArrayWriter array() {
+ throw new UnsupportedOperationException();
+ }
+
+ public abstract WriterEvents events();
+
+ @Override
+ public void bindListener(ColumnWriterListener listener) { }
+
+ @Override
+ public void bindListener(TupleWriterListener listener) { }
+
+ public abstract void dump(HierarchicalFormatter format);
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractScalarWriter.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractScalarWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractScalarWriter.java
new file mode 100644
index 0000000..c02e2d9
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractScalarWriter.java
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.writer;
+
+import java.math.BigDecimal;
+
+import org.apache.drill.exec.record.ColumnMetadata;
+import org.apache.drill.exec.vector.BaseDataValueVector;
+import org.apache.drill.exec.vector.accessor.ObjectType;
+import org.apache.drill.exec.vector.accessor.ScalarWriter;
+import org.apache.drill.exec.vector.accessor.impl.HierarchicalFormatter;
+import org.joda.time.Period;
+
+/**
+ * Column writer implementation that acts as the basis for the
+ * generated, vector-specific implementations. All set methods
+ * throw an exception; subclasses simply override the supported
+ * method(s).
+ */
+
+public abstract class AbstractScalarWriter implements ScalarWriter, WriterEvents {
+
+ public static class ScalarObjectWriter extends AbstractObjectWriter {
+
+ private AbstractScalarWriter scalarWriter;
+
+ public ScalarObjectWriter(ColumnMetadata schema, AbstractScalarWriter scalarWriter) {
+ super(schema);
+ this.scalarWriter = scalarWriter;
+ }
+
+ @Override
+ public ObjectType type() { return ObjectType.SCALAR; }
+
+ @Override
+ public void set(Object value) { scalarWriter.setObject(value); }
+
+ @Override
+ public ScalarWriter scalar() { return scalarWriter; }
+
+ @Override
+ public WriterEvents events() { return scalarWriter; }
+
+ @Override
+ public void bindListener(ColumnWriterListener listener) {
+ scalarWriter.bindListener(listener);
+ }
+
+ @Override
+ public void dump(HierarchicalFormatter format) {
+ format
+ .startObject(this)
+ .attribute("scalarWriter");
+ scalarWriter.dump(format);
+ format.endObject();
+ }
+ }
+
+ public abstract BaseDataValueVector vector();
+
+ @Override
+ public void startWrite() { }
+
+ @Override
+ public void startRow() { }
+
+ @Override
+ public void endArrayValue() { }
+
+ @Override
+ public void saveRow() { }
+
+ @Override
+ public void setObject(Object value) {
+ if (value == null) {
+ setNull();
+ } else if (value instanceof Integer) {
+ setInt((Integer) value);
+ } else if (value instanceof Long) {
+ setLong((Long) value);
+ } else if (value instanceof String) {
+ setString((String) value);
+ } else if (value instanceof BigDecimal) {
+ setDecimal((BigDecimal) value);
+ } else if (value instanceof Period) {
+ setPeriod((Period) value);
+ } else if (value instanceof byte[]) {
+ byte[] bytes = (byte[]) value;
+ setBytes(bytes, bytes.length);
+ } else if (value instanceof Byte) {
+ setInt((Byte) value);
+ } else if (value instanceof Short) {
+ setInt((Short) value);
+ } else if (value instanceof Double) {
+ setDouble((Double) value);
+ } else if (value instanceof Float) {
+ setDouble((Float) value);
+ } else {
+ throw new IllegalArgumentException("Unsupported type " +
+ value.getClass().getSimpleName());
+ }
+ }
+
+ public void dump(HierarchicalFormatter format) {
+ format
+ .startObject(this)
+ .attributeIdentity("vector", vector())
+ .attribute("schema", vector().getField())
+ .endObject();
+ }
+}
[09/15] drill git commit: DRILL-5657: Size-aware vector writer
structure
Posted by pr...@apache.org.
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/AbstractSingleRowSet.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/AbstractSingleRowSet.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/AbstractSingleRowSet.java
index 030f95a..e1e18dc 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/AbstractSingleRowSet.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/AbstractSingleRowSet.java
@@ -17,23 +17,13 @@
*/
package org.apache.drill.test.rowSet;
-import org.apache.drill.common.types.TypeProtos.MajorType;
-import org.apache.drill.common.types.TypeProtos.MinorType;
-import org.apache.drill.exec.expr.TypeHelper;
-import org.apache.drill.exec.memory.BufferAllocator;
import org.apache.drill.exec.physical.impl.spill.RecordBatchSizer;
-import org.apache.drill.exec.record.BatchSchema;
-import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode;
+import org.apache.drill.exec.physical.rowSet.model.ReaderIndex;
+import org.apache.drill.exec.physical.rowSet.model.MetadataProvider.MetadataRetrieval;
+import org.apache.drill.exec.physical.rowSet.model.single.BaseReaderBuilder;
+import org.apache.drill.exec.record.TupleMetadata;
import org.apache.drill.exec.record.VectorContainer;
-import org.apache.drill.exec.record.VectorWrapper;
-import org.apache.drill.exec.vector.ValueVector;
-import org.apache.drill.exec.vector.accessor.impl.AbstractColumnReader;
-import org.apache.drill.exec.vector.accessor.impl.ColumnAccessorFactory;
-import org.apache.drill.exec.vector.complex.MapVector;
import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
-import org.apache.drill.test.rowSet.RowSetSchema.FlattenedSchema;
-import org.apache.drill.test.rowSet.RowSetSchema.LogicalColumn;
-import org.apache.drill.test.rowSet.RowSetSchema.PhysicalSchema;
/**
* Base class for row sets backed by a single record batch.
@@ -41,151 +31,27 @@ import org.apache.drill.test.rowSet.RowSetSchema.PhysicalSchema;
public abstract class AbstractSingleRowSet extends AbstractRowSet implements SingleRowSet {
- /**
- * Internal helper class to organize a set of value vectors for use by the
- * row set class. Subclasses either build vectors from a schema, or map an
- * existing vector container into the row set structure. The row set
- * structure is based on a flattened structure; all vectors appear in
- * a single vector array. Maps are set aside in a separate map list.
- */
-
- public abstract static class StructureBuilder {
- protected final PhysicalSchema schema;
- protected final BufferAllocator allocator;
- protected final ValueVector[] valueVectors;
- protected final MapVector[] mapVectors;
- protected int vectorIndex;
- protected int mapIndex;
-
- public StructureBuilder(BufferAllocator allocator, RowSetSchema schema) {
- this.allocator = allocator;
- this.schema = schema.physical();
- FlattenedSchema flatSchema = schema.flatAccess();
- valueVectors = new ValueVector[flatSchema.count()];
- if (flatSchema.mapCount() == 0) {
- mapVectors = null;
- } else {
- mapVectors = new MapVector[flatSchema.mapCount()];
- }
- }
- }
-
- /**
- * Create a set of value vectors given a schema, then map them into both
- * the value container and the row set structure.
- */
-
- public static class VectorBuilder extends StructureBuilder {
-
- public VectorBuilder(BufferAllocator allocator, RowSetSchema schema) {
- super(allocator, schema);
- }
-
- public ValueVector[] buildContainer(VectorContainer container) {
- for (int i = 0; i < schema.count(); i++) {
- LogicalColumn colSchema = schema.column(i);
- @SuppressWarnings("resource")
- ValueVector v = TypeHelper.getNewVector(colSchema.field, allocator, null);
- container.add(v);
- if (colSchema.field.getType().getMinorType() == MinorType.MAP) {
- MapVector mv = (MapVector) v;
- mapVectors[mapIndex++] = mv;
- buildMap(mv, colSchema.mapSchema);
- } else {
- valueVectors[vectorIndex++] = v;
- }
- }
- container.buildSchema(SelectionVectorMode.NONE);
- return valueVectors;
- }
-
- private void buildMap(MapVector mapVector, PhysicalSchema mapSchema) {
- for (int i = 0; i < mapSchema.count(); i++) {
- LogicalColumn colSchema = mapSchema.column(i);
- MajorType type = colSchema.field.getType();
- Class<? extends ValueVector> vectorClass = TypeHelper.getValueVectorClass(type.getMinorType(), type.getMode());
- @SuppressWarnings("resource")
- ValueVector v = mapVector.addOrGet(colSchema.field.getName(), type, vectorClass);
- if (type.getMinorType() == MinorType.MAP) {
- MapVector mv = (MapVector) v;
- mapVectors[mapIndex++] = mv;
- buildMap(mv, colSchema.mapSchema);
- } else {
- valueVectors[vectorIndex++] = v;
- }
- }
- }
- }
-
- /**
- * Build a row set given an existing vector container. In this case,
- * the vectors exist and we simply need to pull them out of the container
- * and maps and put them into the row set arrays.
- */
-
- public static class VectorMapper extends StructureBuilder {
-
- public VectorMapper(BufferAllocator allocator, RowSetSchema schema) {
- super(allocator, schema);
- }
+ public static class RowSetReaderBuilder extends BaseReaderBuilder {
- public ValueVector[] mapContainer(VectorContainer container) {
- for (VectorWrapper<?> w : container) {
- @SuppressWarnings("resource")
- ValueVector v = w.getValueVector();
- if (v.getField().getType().getMinorType() == MinorType.MAP) {
- MapVector mv = (MapVector) v;
- mapVectors[mapIndex++] = mv;
- buildMap(mv);
- } else {
- valueVectors[vectorIndex++] = v;
- }
- }
- return valueVectors;
+ public RowSetReader buildReader(AbstractSingleRowSet rowSet, ReaderIndex rowIndex) {
+ TupleMetadata schema = rowSet.schema();
+ return new RowSetReaderImpl(schema, rowIndex,
+ buildContainerChildren(rowSet.container(),
+ new MetadataRetrieval(schema)));
}
-
- private void buildMap(MapVector mapVector) {
- for (ValueVector v : mapVector) {
- if (v.getField().getType().getMinorType() == MinorType.MAP) {
- MapVector mv = (MapVector) v;
- mapVectors[mapIndex++] = mv;
- buildMap(mv);
- } else {
- valueVectors[vectorIndex++] = v;
- }
- }
- }
- }
-
- /**
- * Flattened representation of value vectors using a depth-first
- * traversal of maps. Order of vectors here correspond to the column
- * indexes used to access columns in a reader or writer.
- */
-
- protected final ValueVector[] valueVectors;
-
- public AbstractSingleRowSet(BufferAllocator allocator, BatchSchema schema) {
- super(allocator, schema, new VectorContainer());
- valueVectors = new VectorBuilder(allocator, super.schema).buildContainer(container);
- }
-
- public AbstractSingleRowSet(BufferAllocator allocator, VectorContainer container) {
- super(allocator, container.getSchema(), container);
- valueVectors = new VectorMapper(allocator, super.schema).mapContainer(container);
}
public AbstractSingleRowSet(AbstractSingleRowSet rowSet) {
- super(rowSet.allocator, rowSet.schema.batch(), rowSet.container);
- valueVectors = rowSet.valueVectors;
+ super(rowSet.container, rowSet.schema);
}
- @Override
- public ValueVector[] vectors() { return valueVectors; }
+ public AbstractSingleRowSet(VectorContainer container, TupleMetadata schema) {
+ super(container, schema);
+ }
@Override
public long size() {
- RecordBatchSizer sizer = new RecordBatchSizer(container);
+ RecordBatchSizer sizer = new RecordBatchSizer(container());
return sizer.actualSize();
}
@@ -197,21 +63,7 @@ public abstract class AbstractSingleRowSet extends AbstractRowSet implements Sin
* (non-map) vectors.
*/
- protected RowSetReader buildReader(RowSetIndex rowIndex) {
- FlattenedSchema accessSchema = schema().flatAccess();
- ValueVector[] valueVectors = vectors();
- AbstractColumnReader[] readers = new AbstractColumnReader[valueVectors.length];
- for (int i = 0; i < readers.length; i++) {
- MinorType type = accessSchema.column(i).getType().getMinorType();
- if (type == MinorType.MAP) {
- readers[i] = null; // buildMapAccessor(i);
- } else if (type == MinorType.LIST) {
- readers[i] = null; // buildListAccessor(i);
- } else {
- readers[i] = ColumnAccessorFactory.newReader(valueVectors[i].getField().getType());
- readers[i].bind(rowIndex, valueVectors[i]);
- }
- }
- return new RowSetReaderImpl(accessSchema, rowIndex, readers);
+ protected RowSetReader buildReader(ReaderIndex rowIndex) {
+ return new RowSetReaderBuilder().buildReader(this, rowIndex);
}
}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/DirectRowSet.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/DirectRowSet.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/DirectRowSet.java
index 29a1702..5972f05 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/DirectRowSet.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/DirectRowSet.java
@@ -18,19 +18,21 @@
package org.apache.drill.test.rowSet;
import org.apache.drill.exec.memory.BufferAllocator;
+import org.apache.drill.exec.physical.rowSet.model.ReaderIndex;
+import org.apache.drill.exec.physical.rowSet.model.SchemaInference;
+import org.apache.drill.exec.physical.rowSet.model.MetadataProvider.MetadataRetrieval;
+import org.apache.drill.exec.physical.rowSet.model.single.BaseWriterBuilder;
+import org.apache.drill.exec.physical.rowSet.model.single.BuildVectorsFromMetadata;
+import org.apache.drill.exec.physical.rowSet.model.single.VectorAllocator;
import org.apache.drill.exec.record.BatchSchema;
import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode;
+import org.apache.drill.exec.record.TupleMetadata;
+import org.apache.drill.exec.record.TupleSchema;
import org.apache.drill.exec.record.VectorAccessible;
-import org.apache.drill.exec.record.VectorAccessibleUtilities;
import org.apache.drill.exec.record.VectorContainer;
import org.apache.drill.exec.record.selection.SelectionVector2;
-import org.apache.drill.exec.vector.AllocationHelper;
-import org.apache.drill.exec.vector.ValueVector;
-import org.apache.drill.exec.vector.accessor.TupleAccessor.TupleSchema;
-import org.apache.drill.exec.vector.accessor.impl.AbstractColumnWriter;
-import org.apache.drill.exec.vector.accessor.impl.ColumnAccessorFactory;
-import org.apache.drill.exec.vector.accessor.impl.TupleWriterImpl;
import org.apache.drill.test.rowSet.RowSet.ExtendableRowSet;
+import org.apache.drill.test.rowSet.RowSetWriterImpl.WriterIndexImpl;
/**
* Implementation of a single row set with no indirection (selection)
@@ -46,118 +48,54 @@ public class DirectRowSet extends AbstractSingleRowSet implements ExtendableRowS
* the first. (This is the JDBC RecordSet convention.)
*/
- private static class DirectRowIndex extends BoundedRowIndex {
+ private static class DirectRowIndex extends ReaderIndex {
public DirectRowIndex(int rowCount) {
super(rowCount);
}
@Override
- public int index() { return rowIndex; }
+ public int vectorIndex() { return rowIndex; }
@Override
- public int batch() { return 0; }
+ public int batchIndex() { return 0; }
}
- /**
- * Writer index that points to each row in the row set. The index starts at
- * the 0th row and advances one row on each increment. This allows writers to
- * start positioned at the first row. Writes happen in the current row.
- * Calling <tt>next()</tt> advances to the next position, effectively saving
- * the current row. The most recent row can be abandoned easily simply by not
- * calling <tt>next()</tt>. This means that the number of completed rows is
- * the same as the row index.
- */
-
- private static class ExtendableRowIndex extends RowSetIndex {
-
- private final int maxSize;
-
- public ExtendableRowIndex(int maxSize) {
- this.maxSize = maxSize;
- rowIndex = 0;
- }
+ public static class RowSetWriterBuilder extends BaseWriterBuilder {
- @Override
- public int index() { return rowIndex; }
-
- @Override
- public boolean next() {
- if (++rowIndex <= maxSize ) {
- return true;
- } else {
- rowIndex--;
- return false;
- }
+ public RowSetWriter buildWriter(DirectRowSet rowSet) {
+ WriterIndexImpl index = new WriterIndexImpl();
+ TupleMetadata schema = rowSet.schema();
+ RowSetWriterImpl writer = new RowSetWriterImpl(rowSet, schema, index,
+ buildContainerChildren(rowSet.container(),
+ new MetadataRetrieval(schema)));
+ return writer;
}
-
- @Override
- public int size() { return rowIndex; }
-
- @Override
- public boolean valid() { return rowIndex < maxSize; }
-
- @Override
- public int batch() { return 0; }
}
- /**
- * Implementation of a row set writer. Only available for newly-created,
- * empty, direct, single row sets. Rewriting is not allowed, nor is writing
- * to a hyper row set.
- */
-
- public class RowSetWriterImpl extends TupleWriterImpl implements RowSetWriter {
-
- private final ExtendableRowIndex index;
- private final ExtendableRowSet rowSet;
-
- protected RowSetWriterImpl(ExtendableRowSet rowSet, TupleSchema schema, ExtendableRowIndex index, AbstractColumnWriter[] writers) {
- super(schema, writers);
- this.rowSet = rowSet;
- this.index = index;
- start();
- }
-
- @Override
- public void setRow(Object...values) {
- if (! index.valid()) {
- throw new IndexOutOfBoundsException("Write past end of row set");
- }
- for (int i = 0; i < values.length; i++) {
- set(i, values[i]);
- }
- save();
- }
-
- @Override
- public boolean valid() { return index.valid(); }
-
- @Override
- public int index() { return index.position(); }
+ private DirectRowSet(VectorContainer container, TupleMetadata schema) {
+ super(container, schema);
+ }
- @Override
- public void save() {
- index.next();
- start();
- }
+ public DirectRowSet(AbstractSingleRowSet from) {
+ super(from);
+ }
- @Override
- public void done() {
- rowSet.setRowCount(index.size());
- }
+ public static DirectRowSet fromSchema(BufferAllocator allocator, BatchSchema schema) {
+ return fromSchema(allocator, TupleSchema.fromFields(schema));
}
- public DirectRowSet(BufferAllocator allocator, BatchSchema schema) {
- super(allocator, schema);
+ public static DirectRowSet fromSchema(BufferAllocator allocator, TupleMetadata schema) {
+ BuildVectorsFromMetadata builder = new BuildVectorsFromMetadata(allocator);
+ return new DirectRowSet(builder.build(schema), schema);
}
- public DirectRowSet(BufferAllocator allocator, VectorContainer container) {
- super(allocator, container);
+ public static DirectRowSet fromContainer(VectorContainer container) {
+ return new DirectRowSet(container, new SchemaInference().infer(container));
}
- public DirectRowSet(BufferAllocator allocator, VectorAccessible va) {
- super(allocator, toContainer(va, allocator));
+ public static DirectRowSet fromVectorAccessible(BufferAllocator allocator, VectorAccessible va) {
+ return fromContainer(toContainer(va, allocator));
}
private static VectorContainer toContainer(VectorAccessible va, BufferAllocator allocator) {
@@ -168,16 +106,8 @@ public class DirectRowSet extends AbstractSingleRowSet implements ExtendableRowS
}
@Override
- public void allocate(int recordCount) {
- for (final ValueVector v : valueVectors) {
- AllocationHelper.allocate(v, recordCount, 50, 10);
- }
- }
-
- @Override
- public void setRowCount(int rowCount) {
- container.setRecordCount(rowCount);
- VectorAccessibleUtilities.setValueCount(container, rowCount);
+ public void allocate(int rowCount) {
+ new VectorAllocator(container()).allocate(rowCount, schema());
}
@Override
@@ -187,29 +117,11 @@ public class DirectRowSet extends AbstractSingleRowSet implements ExtendableRowS
@Override
public RowSetWriter writer(int initialRowCount) {
- if (container.hasRecordCount()) {
+ if (container().hasRecordCount()) {
throw new IllegalStateException("Row set already contains data");
}
allocate(initialRowCount);
- return buildWriter(new ExtendableRowIndex(Character.MAX_VALUE));
- }
-
- /**
- * Build writer objects for each column based on the column type.
- *
- * @param rowIndex the index which points to each row
- * @return an array of writers
- */
-
- protected RowSetWriter buildWriter(ExtendableRowIndex rowIndex) {
- ValueVector[] valueVectors = vectors();
- AbstractColumnWriter[] writers = new AbstractColumnWriter[valueVectors.length];
- for (int i = 0; i < writers.length; i++) {
- writers[i] = ColumnAccessorFactory.newWriter(valueVectors[i].getField().getType());
- writers[i].bind(rowIndex, valueVectors[i]);
- }
- TupleSchema accessSchema = schema().hierarchicalAccess();
- return new RowSetWriterImpl(this, accessSchema, rowIndex, writers);
+ return new RowSetWriterBuilder().buildWriter(this);
}
@Override
@@ -233,9 +145,4 @@ public class DirectRowSet extends AbstractSingleRowSet implements ExtendableRowS
@Override
public SelectionVector2 getSv2() { return null; }
-
- @Override
- public RowSet merge(RowSet other) {
- return new DirectRowSet(allocator, container().merge(other.container()));
- }
}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/HyperRowSetImpl.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/HyperRowSetImpl.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/HyperRowSetImpl.java
index afc2e6e..8a3db9f 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/HyperRowSetImpl.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/HyperRowSetImpl.java
@@ -17,27 +17,14 @@
*/
package org.apache.drill.test.rowSet;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.drill.common.types.TypeProtos.MinorType;
-import org.apache.drill.exec.memory.BufferAllocator;
+import org.apache.drill.exec.physical.rowSet.model.MetadataProvider.MetadataRetrieval;
+import org.apache.drill.exec.physical.rowSet.model.SchemaInference;
+import org.apache.drill.exec.physical.rowSet.model.hyper.BaseReaderBuilder;
import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode;
-import org.apache.drill.exec.record.HyperVectorWrapper;
-import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.TupleMetadata;
import org.apache.drill.exec.record.VectorContainer;
-import org.apache.drill.exec.record.VectorWrapper;
import org.apache.drill.exec.record.selection.SelectionVector4;
-import org.apache.drill.exec.vector.ValueVector;
-import org.apache.drill.exec.vector.accessor.AccessorUtilities;
-import org.apache.drill.exec.vector.accessor.impl.AbstractColumnReader;
-import org.apache.drill.exec.vector.accessor.impl.AbstractColumnReader.VectorAccessor;
-import org.apache.drill.exec.vector.accessor.impl.ColumnAccessorFactory;
-import org.apache.drill.exec.vector.complex.AbstractMapVector;
import org.apache.drill.test.rowSet.RowSet.HyperRowSet;
-import org.apache.drill.test.rowSet.RowSetSchema.FlattenedSchema;
-import org.apache.drill.test.rowSet.RowSetSchema.LogicalColumn;
-import org.apache.drill.test.rowSet.RowSetSchema.PhysicalSchema;
/**
* Implements a row set wrapper around a collection of "hyper vectors."
@@ -52,176 +39,14 @@ import org.apache.drill.test.rowSet.RowSetSchema.PhysicalSchema;
public class HyperRowSetImpl extends AbstractRowSet implements HyperRowSet {
- /**
- * Read-only row index into the hyper row set with batch and index
- * values mapping via an SV4.
- */
-
- public static class HyperRowIndex extends BoundedRowIndex {
-
- private final SelectionVector4 sv4;
-
- public HyperRowIndex(SelectionVector4 sv4) {
- super(sv4.getCount());
- this.sv4 = sv4;
- }
-
- @Override
- public int index() {
- return AccessorUtilities.sv4Index(sv4.get(rowIndex));
- }
-
- @Override
- public int batch( ) {
- return AccessorUtilities.sv4Batch(sv4.get(rowIndex));
- }
- }
-
- /**
- * Vector accessor used by the column accessors to obtain the vector for
- * each column value. That is, position 0 might be batch 4, index 3,
- * while position 1 might be batch 1, index 7, and so on.
- */
-
- public static class HyperVectorAccessor implements VectorAccessor {
+ public static class RowSetReaderBuilder extends BaseReaderBuilder {
- private final HyperRowIndex rowIndex;
- private final ValueVector[] vectors;
-
- public HyperVectorAccessor(HyperVectorWrapper<ValueVector> hvw, HyperRowIndex rowIndex) {
- this.rowIndex = rowIndex;
- vectors = hvw.getValueVectors();
- }
-
- @Override
- public ValueVector vector() {
- return vectors[rowIndex.batch()];
- }
- }
-
- /**
- * Build a hyper row set by restructuring a hyper vector bundle into a uniform
- * shape. Consider this schema: <pre><code>
- * { a: 10, b: { c: 20, d: { e: 30 } } }</code></pre>
- * <p>
- * The hyper container, with two batches, has this structure:
- * <table border="1">
- * <tr><th>Batch</th><th>a</th><th>b</th></tr>
- * <tr><td>0</td><td>Int vector</td><td>Map Vector(Int vector, Map Vector(Int vector))</td></th>
- * <tr><td>1</td><td>Int vector</td><td>Map Vector(Int vector, Map Vector(Int vector))</td></th>
- * </table>
- * <p>
- * The above table shows that top-level scalar vectors (such as the Int Vector for column
- * a) appear "end-to-end" as a hyper-vector. Maps also appear end-to-end. But, the
- * contents of the map (column c) do not appear end-to-end. Instead, they appear as
- * contents in the map vector. To get to c, one indexes into the map vector, steps inside
- * the map to find c and indexes to the right row.
- * <p>
- * Similarly, the maps for d do not appear end-to-end, one must step to the right batch
- * in b, then step to d.
- * <p>
- * Finally, to get to e, one must step
- * into the hyper vector for b, then steps to the proper batch, steps to d, step to e
- * and finally step to the row within e. This is a very complex, costly indexing scheme
- * that differs depending on map nesting depth.
- * <p>
- * To simplify access, this class restructures the maps to flatten the scalar vectors
- * into end-to-end hyper vectors. For example, for the above:
- * <p>
- * <table border="1">
- * <tr><th>Batch</th><th>a</th><th>c</th><th>d</th></tr>
- * <tr><td>0</td><td>Int vector</td><td>Int vector</td><td>Int vector</td></th>
- * <tr><td>1</td><td>Int vector</td><td>Int vector</td><td>Int vector</td></th>
- * </table>
- *
- * The maps are still available as hyper vectors, but separated into map fields.
- * (Scalar access no longer needs to access the maps.) The result is a uniform
- * addressing scheme for both top-level and nested vectors.
- */
-
- public static class HyperVectorBuilder {
-
- protected final HyperVectorWrapper<?> valueVectors[];
- protected final HyperVectorWrapper<AbstractMapVector> mapVectors[];
- private final List<ValueVector> nestedScalars[];
- private int vectorIndex;
- private int mapIndex;
- private final PhysicalSchema physicalSchema;
-
- @SuppressWarnings("unchecked")
- public HyperVectorBuilder(RowSetSchema schema) {
- physicalSchema = schema.physical();
- FlattenedSchema flatSchema = schema.flatAccess();
- valueVectors = new HyperVectorWrapper<?>[schema.hierarchicalAccess().count()];
- if (flatSchema.mapCount() == 0) {
- mapVectors = null;
- nestedScalars = null;
- } else {
- mapVectors = (HyperVectorWrapper<AbstractMapVector>[])
- new HyperVectorWrapper<?>[flatSchema.mapCount()];
- nestedScalars = new ArrayList[flatSchema.count()];
- }
- }
-
- @SuppressWarnings("unchecked")
- public HyperVectorWrapper<ValueVector>[] mapContainer(VectorContainer container) {
- int i = 0;
- for (VectorWrapper<?> w : container) {
- HyperVectorWrapper<?> hvw = (HyperVectorWrapper<?>) w;
- if (w.getField().getType().getMinorType() == MinorType.MAP) {
- HyperVectorWrapper<AbstractMapVector> mw = (HyperVectorWrapper<AbstractMapVector>) hvw;
- mapVectors[mapIndex++] = mw;
- buildHyperMap(physicalSchema.column(i).mapSchema(), mw);
- } else {
- valueVectors[vectorIndex++] = hvw;
- }
- i++;
- }
- if (nestedScalars != null) {
- buildNestedHyperVectors();
- }
- return (HyperVectorWrapper<ValueVector>[]) valueVectors;
- }
-
- private void buildHyperMap(PhysicalSchema mapSchema, HyperVectorWrapper<AbstractMapVector> mapWrapper) {
- createHyperVectors(mapSchema);
- for (AbstractMapVector mapVector : mapWrapper.getValueVectors()) {
- buildMap(mapSchema, mapVector);
- }
- }
-
- private void buildMap(PhysicalSchema mapSchema, AbstractMapVector mapVector) {
- for (ValueVector v : mapVector) {
- LogicalColumn col = mapSchema.column(v.getField().getName());
- if (col.isMap()) {
- buildMap(col.mapSchema, (AbstractMapVector) v);
- } else {
- nestedScalars[col.accessIndex()].add(v);
- }
- }
- }
-
- private void createHyperVectors(PhysicalSchema mapSchema) {
- for (int i = 0; i < mapSchema.count(); i++) {
- LogicalColumn col = mapSchema.column(i);
- if (col.isMap()) {
- createHyperVectors(col.mapSchema);
- } else {
- nestedScalars[col.accessIndex()] = new ArrayList<ValueVector>();
- }
- }
- }
-
- private void buildNestedHyperVectors() {
- for (int i = 0; i < nestedScalars.length; i++) {
- if (nestedScalars[i] == null) {
- continue;
- }
- ValueVector vectors[] = new ValueVector[nestedScalars[i].size()];
- nestedScalars[i].toArray(vectors);
- assert valueVectors[i] == null;
- valueVectors[i] = new HyperVectorWrapper<ValueVector>(vectors[0].getField(), vectors, false);
- }
+ public RowSetReader buildReader(HyperRowSet rowSet, SelectionVector4 sv4) {
+ TupleMetadata schema = rowSet.schema();
+ HyperRowIndex rowIndex = new HyperRowIndex(sv4);
+ return new RowSetReaderImpl(schema, rowIndex,
+ buildContainerChildren(rowSet.container(),
+ new MetadataRetrieval(schema)));
}
}
@@ -231,18 +56,9 @@ public class HyperRowSetImpl extends AbstractRowSet implements HyperRowSet {
private final SelectionVector4 sv4;
- /**
- * Collection of hyper vectors in flattened order: a left-to-right,
- * depth first ordering of vectors in maps. Order here corresponds to
- * the order used for column indexes in the row set reader.
- */
-
- private final HyperVectorWrapper<ValueVector> hvw[];
-
- public HyperRowSetImpl(BufferAllocator allocator, VectorContainer container, SelectionVector4 sv4) {
- super(allocator, container.getSchema(), container);
+ public HyperRowSetImpl(VectorContainer container, SelectionVector4 sv4) {
+ super(container, new SchemaInference().infer(container));
this.sv4 = sv4;
- hvw = new HyperVectorBuilder(schema).mapContainer(container);
}
@Override
@@ -252,33 +68,8 @@ public class HyperRowSetImpl extends AbstractRowSet implements HyperRowSet {
public boolean isWritable() { return false; }
@Override
- public RowSetWriter writer() {
- throw new UnsupportedOperationException("Cannot write to a hyper vector");
- }
-
- @Override
public RowSetReader reader() {
- return buildReader(new HyperRowIndex(sv4));
- }
-
- /**
- * Internal method to build the set of column readers needed for
- * this row set. Used when building a row set reader.
- * @param rowIndex object that points to the current row
- * @return an array of column readers: in the same order as the
- * (non-map) vectors.
- */
-
- protected RowSetReader buildReader(HyperRowIndex rowIndex) {
- FlattenedSchema accessSchema = schema().flatAccess();
- AbstractColumnReader readers[] = new AbstractColumnReader[accessSchema.count()];
- for (int i = 0; i < readers.length; i++) {
- MaterializedField field = accessSchema.column(i);
- readers[i] = ColumnAccessorFactory.newReader(field.getType());
- HyperVectorWrapper<ValueVector> hvw = getHyperVector(i);
- readers[i].bind(rowIndex, field, new HyperVectorAccessor(hvw, rowIndex));
- }
- return new RowSetReaderImpl(accessSchema, rowIndex, readers);
+ return new RowSetReaderBuilder().buildReader(this, sv4);
}
@Override
@@ -288,13 +79,5 @@ public class HyperRowSetImpl extends AbstractRowSet implements HyperRowSet {
public SelectionVector4 getSv4() { return sv4; }
@Override
- public HyperVectorWrapper<ValueVector> getHyperVector(int i) { return hvw[i]; }
-
- @Override
public int rowCount() { return sv4.getCount(); }
-
- @Override
- public RowSet merge(RowSet other) {
- return new HyperRowSetImpl(allocator, container().merge(other.container()), sv4);
- }
}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/IndirectRowSet.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/IndirectRowSet.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/IndirectRowSet.java
index 1914705..e729bba 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/IndirectRowSet.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/IndirectRowSet.java
@@ -20,6 +20,8 @@ package org.apache.drill.test.rowSet;
import org.apache.drill.exec.exception.OutOfMemoryException;
import org.apache.drill.exec.memory.BufferAllocator;
import org.apache.drill.exec.physical.impl.spill.RecordBatchSizer;
+import org.apache.drill.exec.physical.rowSet.model.ReaderIndex;
+import org.apache.drill.exec.physical.rowSet.model.SchemaInference;
import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode;
import org.apache.drill.exec.record.VectorContainer;
import org.apache.drill.exec.record.selection.SelectionVector2;
@@ -33,14 +35,14 @@ public class IndirectRowSet extends AbstractSingleRowSet {
/**
* Reader index that points to each row indirectly through the
- * selection vector. The {@link #index()} method points to the
+ * selection vector. The {@link #vectorIndex()} method points to the
* actual data row, while the {@link #position()} method gives
* the position relative to the indirection vector. That is,
* the position increases monotonically, but the index jumps
* around as specified by the indirection vector.
*/
- private static class IndirectRowIndex extends BoundedRowIndex {
+ private static class IndirectRowIndex extends ReaderIndex {
private final SelectionVector2 sv2;
@@ -50,21 +52,25 @@ public class IndirectRowSet extends AbstractSingleRowSet {
}
@Override
- public int index() { return sv2.getIndex(rowIndex); }
+ public int vectorIndex() { return sv2.getIndex(rowIndex); }
@Override
- public int batch() { return 0; }
+ public int batchIndex() { return 0; }
}
private final SelectionVector2 sv2;
- public IndirectRowSet(BufferAllocator allocator, VectorContainer container) {
- this(allocator, container, makeSv2(allocator, container));
+ private IndirectRowSet(VectorContainer container, SelectionVector2 sv2) {
+ super(container, new SchemaInference().infer(container));
+ this.sv2 = sv2;
}
- public IndirectRowSet(BufferAllocator allocator, VectorContainer container, SelectionVector2 sv2) {
- super(allocator, container);
- this.sv2 = sv2;
+ public static IndirectRowSet fromContainer(VectorContainer container) {
+ return new IndirectRowSet(container, makeSv2(container.getAllocator(), container));
+ }
+
+ public static IndirectRowSet fromSv2(VectorContainer container, SelectionVector2 sv2) {
+ return new IndirectRowSet(container, sv2);
}
private static SelectionVector2 makeSv2(BufferAllocator allocator, VectorContainer container) {
@@ -83,7 +89,7 @@ public class IndirectRowSet extends AbstractSingleRowSet {
public IndirectRowSet(DirectRowSet directRowSet) {
super(directRowSet);
- sv2 = makeSv2(allocator, container);
+ sv2 = makeSv2(allocator(), container());
}
@Override
@@ -96,11 +102,6 @@ public class IndirectRowSet extends AbstractSingleRowSet {
}
@Override
- public RowSetWriter writer() {
- throw new UnsupportedOperationException("Cannot write to an existing row set");
- }
-
- @Override
public RowSetReader reader() {
return buildReader(new IndirectRowIndex(getSv2()));
}
@@ -119,12 +120,7 @@ public class IndirectRowSet extends AbstractSingleRowSet {
@Override
public long size() {
- RecordBatchSizer sizer = new RecordBatchSizer(container, sv2);
+ RecordBatchSizer sizer = new RecordBatchSizer(container(), sv2);
return sizer.actualSize();
}
-
- @Override
- public RowSet merge(RowSet other) {
- return new IndirectRowSet(allocator, container().merge(other.container()), sv2);
- }
}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSet.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSet.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSet.java
index 474508c..f2435de 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSet.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSet.java
@@ -20,25 +20,24 @@ package org.apache.drill.test.rowSet;
import org.apache.drill.exec.memory.BufferAllocator;
import org.apache.drill.exec.record.BatchSchema;
import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode;
-import org.apache.drill.exec.record.HyperVectorWrapper;
+import org.apache.drill.exec.record.TupleMetadata;
import org.apache.drill.exec.record.VectorAccessible;
import org.apache.drill.exec.record.VectorContainer;
import org.apache.drill.exec.record.selection.SelectionVector2;
import org.apache.drill.exec.record.selection.SelectionVector4;
-import org.apache.drill.exec.vector.ValueVector;
-import org.apache.drill.exec.vector.accessor.ColumnReader;
-import org.apache.drill.exec.vector.accessor.ColumnWriter;
-import org.apache.drill.exec.vector.accessor.TupleReader;
-import org.apache.drill.exec.vector.accessor.TupleWriter;
+import org.apache.drill.exec.vector.accessor.ScalarReader;
+import org.apache.parquet.column.ColumnWriter;
/**
* A row set is a collection of rows stored as value vectors. Elsewhere in
* Drill we call this a "record batch", but that term has been overloaded to
- * mean the runtime implementation of an operator...
+ * mean the runtime implementation of an operator.
* <p>
* A row set encapsulates a set of vectors and provides access to Drill's
* various "views" of vectors: {@link VectorContainer},
- * {@link VectorAccessible}, etc.
+ * {@link VectorAccessible}, etc. The row set wraps a {#link TupleModel}
+ * which holds the vectors and column metadata. This form is optimized
+ * for easy use in testing; use other implementations for production code.
* <p>
* A row set is defined by a {@link RowSetSchema}. For testing purposes, a row
* set has a fixed schema; we don't allow changing the set of vectors
@@ -52,7 +51,7 @@ import org.apache.drill.exec.vector.accessor.TupleWriter;
* Drill provides a large number of vector (data) types. Each requires a
* type-specific way to set data. The row set writer uses a {@link ColumnWriter}
* to set each value in a way unique to the specific data type. Similarly, the
- * row set reader provides a {@link ColumnReader} interface. In both cases,
+ * row set reader provides a {@link ScalarReader} interface. In both cases,
* columns can be accessed by index number (as defined in the schema) or
* by name.
* <p>
@@ -78,54 +77,6 @@ import org.apache.drill.exec.vector.accessor.TupleWriter;
public interface RowSet {
- /**
- * Interface for writing values to a row set. Only available
- * for newly-created, single, direct row sets. Eventually, if
- * we want to allow updating a row set, we have to create a
- * new row set with the updated columns, then merge the new
- * and old row sets to create a new immutable row set.
- */
- interface RowSetWriter extends TupleWriter {
- void setRow(Object...values);
- boolean valid();
- int index();
- void save();
- void done();
- }
-
- /**
- * Reader for all types of row sets.
- */
- interface RowSetReader extends TupleReader {
-
- /**
- * Total number of rows in the row set.
- * @return total number of rows
- */
- int size();
-
- boolean next();
- int index();
- void set(int index);
-
- /**
- * Batch index: 0 for a single batch, batch for the current
- * row is a hyper-batch.
- * @return index of the batch for the current row
- */
- int batchIndex();
-
- /**
- * The index of the underlying row which may be indexed by an
- * Sv2 or Sv4.
- *
- * @return
- */
-
- int rowIndex();
- boolean valid();
- }
-
boolean isExtendable();
boolean isWritable();
@@ -136,13 +87,11 @@ public interface RowSet {
int rowCount();
- RowSetWriter writer();
-
RowSetReader reader();
void clear();
- RowSetSchema schema();
+ TupleMetadata schema();
BufferAllocator allocator();
@@ -157,17 +106,16 @@ public interface RowSet {
*
* @return memory size in bytes
*/
- long size();
- RowSet merge(RowSet other);
+ long size();
BatchSchema batchSchema();
/**
* Row set that manages a single batch of rows.
*/
- interface SingleRowSet extends RowSet {
- ValueVector[] vectors();
+
+ public interface SingleRowSet extends RowSet {
SingleRowSet toIndirect();
SelectionVector2 getSv2();
}
@@ -177,9 +125,10 @@ public interface RowSet {
* Once writing is complete, the row set becomes an
* immutable direct row set.
*/
+
interface ExtendableRowSet extends SingleRowSet {
void allocate(int recordCount);
- void setRowCount(int rowCount);
+ RowSetWriter writer();
RowSetWriter writer(int initialRowCount);
}
@@ -187,8 +136,8 @@ public interface RowSet {
* Row set comprised of multiple single row sets, along with
* an indirection vector (SV4).
*/
+
interface HyperRowSet extends RowSet {
SelectionVector4 getSv4();
- HyperVectorWrapper<ValueVector> getHyperVector(int i);
}
}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetBuilder.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetBuilder.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetBuilder.java
index 6f9a8d9..7b1554c 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetBuilder.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetBuilder.java
@@ -19,7 +19,10 @@ package org.apache.drill.test.rowSet;
import org.apache.drill.exec.memory.BufferAllocator;
import org.apache.drill.exec.record.BatchSchema;
-import org.apache.drill.test.rowSet.RowSet.RowSetWriter;
+import org.apache.drill.exec.record.TupleMetadata;
+import org.apache.drill.exec.record.TupleSchema;
+import org.apache.drill.exec.vector.accessor.TupleWriter;
+import org.apache.drill.test.OperatorFixture;
import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
/**
@@ -40,14 +43,20 @@ public final class RowSetBuilder {
private boolean withSv2;
public RowSetBuilder(BufferAllocator allocator, BatchSchema schema) {
+ this(allocator, TupleSchema.fromFields(schema), 10);
+ }
+
+ public RowSetBuilder(BufferAllocator allocator, TupleMetadata schema) {
this(allocator, schema, 10);
}
- public RowSetBuilder(BufferAllocator allocator, BatchSchema schema, int capacity) {
- rowSet = new DirectRowSet(allocator, schema);
+ public RowSetBuilder(BufferAllocator allocator, TupleMetadata schema, int capacity) {
+ rowSet = DirectRowSet.fromSchema(allocator, schema);
writer = rowSet.writer(capacity);
}
+ public TupleWriter writer() { return writer; }
+
/**
* Add a new row using column values passed as variable-length arguments. Expects
* map values to be flattened. a schema of (a:int, b:map(c:varchar)) would be>
@@ -56,17 +65,18 @@ public final class RowSetBuilder {
* <tt>add(10, new int[] {100, 200});</tt><br>
* @param values column values in column index order
* @return this builder
- * @see {@link #addSingleCol(Object)} to create a row of a single column when
- * the value to <tt>add()</tt> is ambiguous
+ * @throws IllegalStateException if the batch, or any vector in the batch,
+ * becomes full. This method is designed to be used in tests where we will
+ * seldom create a full vector of data.
*/
- public RowSetBuilder add(Object...values) {
+ public RowSetBuilder addRow(Object...values) {
writer.setRow(values);
return this;
}
/**
- * The {@link #add(Object...)} method uses Java variable-length arguments to
+ * The {@link #addRow(Object...)} method uses Java variable-length arguments to
* pass a row of values. But, when the row consists of a single array, Java
* gets confused: is that an array for variable-arguments or is it the value
* of the first argument? This method clearly states that the single value
@@ -93,7 +103,7 @@ public final class RowSetBuilder {
*/
public RowSetBuilder addSingleCol(Object value) {
- return add(new Object[] { value });
+ return addRow(new Object[] { value });
}
/**
@@ -110,10 +120,10 @@ public final class RowSetBuilder {
}
public SingleRowSet build() {
- writer.done();
+ SingleRowSet result = writer.done();
if (withSv2) {
- return rowSet.toIndirect();
+ return result.toIndirect();
}
- return rowSet;
+ return result;
}
}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetComparison.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetComparison.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetComparison.java
index 6e72923..1cae64f 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetComparison.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetComparison.java
@@ -21,8 +21,10 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import org.apache.drill.exec.vector.accessor.ArrayReader;
-import org.apache.drill.exec.vector.accessor.ColumnReader;
-import org.apache.drill.test.rowSet.RowSet.RowSetReader;
+import org.apache.drill.exec.vector.accessor.ObjectReader;
+import org.apache.drill.exec.vector.accessor.ScalarElementReader;
+import org.apache.drill.exec.vector.accessor.ScalarReader;
+import org.apache.drill.exec.vector.accessor.TupleReader;
import org.bouncycastle.util.Arrays;
import java.util.Comparator;
@@ -31,19 +33,48 @@ import java.util.Comparator;
* For testing, compare the contents of two row sets (record batches)
* to verify that they are identical. Supports masks to exclude certain
* columns from comparison.
+ * <p>
+ * Drill rows are analogous to JSON documents: they can have scalars,
+ * arrays and maps, with maps and lists holding maps, arrays and scalars.
+ * This class walks the row structure tree to compare each structure
+ * of two row sets checking counts, types and values to ensure that the
+ * "actual" result set (result of a test) matches the "expected" result
+ * set.
+ * <p>
+ * This class acts as an example of how to use the suite of reader
+ * abstractions.
*/
public class RowSetComparison {
+ /**
+ * Row set with the expected outcome of a test. This is the "golden"
+ * copy defined in the test itself.
+ */
private RowSet expected;
+ /**
+ * Some tests wish to ignore certain (top-level) columns. If a
+ * mask is provided, then only those columns with a <tt>true</tt>
+ * will be verified.
+ */
private boolean mask[];
+ /**
+ * Floats and doubles do not compare exactly. This delta is used
+ * by JUnit for such comparisons.
+ */
private double delta = 0.001;
+ /**
+ * Tests can skip the first n rows.
+ */
private int offset;
private int span = -1;
public RowSetComparison(RowSet expected) {
this.expected = expected;
- mask = new boolean[expected.schema().hierarchicalAccess().count()];
+
+ // TODO: The mask only works at the top level presently
+
+ mask = new boolean[expected.schema().size()];
for (int i = 0; i < mask.length; i++) {
mask[i] = true;
}
@@ -134,7 +165,8 @@ public class RowSetComparison {
for (int i = 0; i < testLength; i++) {
er.next();
ar.next();
- verifyRow(er, ar);
+ String label = Integer.toString(er.index() + 1);
+ verifyRow(label, er, ar);
}
}
@@ -167,22 +199,50 @@ public class RowSetComparison {
}
}
- private void verifyRow(RowSetReader er, RowSetReader ar) {
+ private void verifyRow(String label, TupleReader er, TupleReader ar) {
+ String prefix = label + ":";
for (int i = 0; i < mask.length; i++) {
if (! mask[i]) {
continue;
}
- ColumnReader ec = er.column(i);
- ColumnReader ac = ar.column(i);
- String label = (er.index() + 1) + ":" + i;
- assertEquals(label, ec.valueType(), ac.valueType());
- if (ec.isNull()) {
- assertTrue(label + " - column not null", ac.isNull());
- continue;
- }
- if (! ec.isNull()) {
- assertTrue(label + " - column is null", ! ac.isNull());
- }
+ verifyColumn(prefix + i, er.column(i), ar.column(i));
+ }
+ }
+
+ private void verifyColumn(String label, ObjectReader ec, ObjectReader ac) {
+ assertEquals(label, ec.type(), ac.type());
+ switch (ec.type()) {
+ case ARRAY:
+ verifyArray(label, ec.array(), ac.array());
+ break;
+ case SCALAR:
+ verifyScalar(label, ec.scalar(), ac.scalar());
+ break;
+ case TUPLE:
+ verifyTuple(label, ec.tuple(), ac.tuple());
+ break;
+ default:
+ throw new IllegalStateException( "Unexpected type: " + ec.type());
+ }
+ }
+
+ private void verifyTuple(String label, TupleReader er, TupleReader ar) {
+ assertEquals(label + " - tuple count", er.columnCount(), ar.columnCount());
+ String prefix = label + ":";
+ for (int i = 0; i < er.columnCount(); i++) {
+ verifyColumn(prefix + i, er.column(i), ar.column(i));
+ }
+ }
+
+ private void verifyScalar(String label, ScalarReader ec, ScalarReader ac) {
+ assertEquals(label + " - value type", ec.valueType(), ac.valueType());
+ if (ec.isNull()) {
+ assertTrue(label + " - column not null", ac.isNull());
+ return;
+ }
+ if (! ec.isNull()) {
+ assertTrue(label + " - column is null", ! ac.isNull());
+ }
switch (ec.valueType()) {
case BYTES: {
byte expected[] = ac.getBytes();
@@ -209,24 +269,42 @@ public class RowSetComparison {
case PERIOD:
assertEquals(label, ec.getPeriod(), ac.getPeriod());
break;
- case ARRAY:
- verifyArray(label, ec.array(), ac.array());
- break;
default:
throw new IllegalStateException( "Unexpected type: " + ec.valueType());
- }
}
}
- private void verifyArray(String colLabel, ArrayReader ea,
+ private void verifyArray(String label, ArrayReader ea,
ArrayReader aa) {
+ assertEquals(label, ea.entryType(), aa.entryType());
+ assertEquals(label, ea.size(), aa.size());
+ switch (ea.entryType()) {
+ case ARRAY:
+ throw new UnsupportedOperationException();
+ case SCALAR:
+ verifyScalarArray(label, ea.elements(), aa.elements());
+ break;
+ case TUPLE:
+ verifyTupleArray(label, ea, aa);
+ break;
+ default:
+ throw new IllegalStateException( "Unexpected type: " + ea.entryType());
+ }
+ }
+
+ private void verifyTupleArray(String label, ArrayReader ea, ArrayReader aa) {
+ for (int i = 0; i < ea.size(); i++) {
+ verifyTuple(label + "[" + i + "]", ea.tuple(i), aa.tuple(i));
+ }
+ }
+
+ private void verifyScalarArray(String colLabel, ScalarElementReader ea,
+ ScalarElementReader aa) {
assertEquals(colLabel, ea.valueType(), aa.valueType());
assertEquals(colLabel, ea.size(), aa.size());
for (int i = 0; i < ea.size(); i++) {
String label = colLabel + "[" + i + "]";
switch (ea.valueType()) {
- case ARRAY:
- throw new IllegalStateException("Arrays of arrays not supported yet");
case BYTES: {
byte expected[] = ea.getBytes(i);
byte actual[] = aa.getBytes(i);
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetPrinter.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetPrinter.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetPrinter.java
index 42a7e63..e730987 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetPrinter.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetPrinter.java
@@ -20,8 +20,8 @@ package org.apache.drill.test.rowSet;
import java.io.PrintStream;
import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode;
-import org.apache.drill.exec.vector.accessor.TupleAccessor.TupleSchema;
-import org.apache.drill.test.rowSet.RowSet.RowSetReader;
+import org.apache.drill.exec.record.ColumnMetadata;
+import org.apache.drill.exec.record.TupleMetadata;
/**
* Print a row set in CSV-like format. Primarily for debugging.
@@ -41,21 +41,21 @@ public class RowSetPrinter {
public void print(PrintStream out) {
SelectionVectorMode selectionMode = rowSet.indirectionType();
RowSetReader reader = rowSet.reader();
- int colCount = reader.schema().count();
- printSchema(out, selectionMode);
+ int colCount = reader.schema().size();
+ printSchema(out, selectionMode, reader);
while (reader.next()) {
printHeader(out, reader, selectionMode);
for (int i = 0; i < colCount; i++) {
if (i > 0) {
out.print(", ");
}
- out.print(reader.getAsString(i));
+ out.print(reader.column(i).getAsString());
}
out.println();
}
}
- private void printSchema(PrintStream out, SelectionVectorMode selectionMode) {
+ private void printSchema(PrintStream out, SelectionVectorMode selectionMode, RowSetReader reader) {
out.print("#");
switch (selectionMode) {
case FOUR_BYTE:
@@ -68,14 +68,24 @@ public class RowSetPrinter {
break;
}
out.print(": ");
- TupleSchema schema = rowSet.schema().hierarchicalAccess();
- for (int i = 0; i < schema.count(); i++) {
+ TupleMetadata schema = reader.schema();
+ printTupleSchema(out, schema);
+ out.println();
+ }
+
+ private void printTupleSchema(PrintStream out, TupleMetadata schema) {
+ for (int i = 0; i < schema.size(); i++) {
if (i > 0) {
out.print(", ");
}
- out.print(schema.column(i).getName());
+ ColumnMetadata colSchema = schema.metadata(i);
+ out.print(colSchema.name());
+ if (colSchema.isMap()) {
+ out.print("(");
+ printTupleSchema(out, colSchema.mapSchema());
+ out.print(")");
+ }
}
- out.println();
}
private void printHeader(PrintStream out, RowSetReader reader, SelectionVectorMode selectionMode) {
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetReader.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetReader.java
new file mode 100644
index 0000000..3e27529
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetReader.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.test.rowSet;
+
+import org.apache.drill.exec.vector.accessor.TupleReader;
+
+/**
+ * Reader for all types of row sets.
+ */
+
+public interface RowSetReader extends TupleReader {
+
+ /**
+ * Total number of rows in the row set.
+ * @return total number of rows
+ */
+ int rowCount();
+
+ boolean next();
+ int index();
+ void set(int index);
+
+ /**
+ * Batch index: 0 for a single batch, batch for the current
+ * row is a hyper-batch.
+ * @return index of the batch for the current row
+ */
+ int batchIndex();
+
+ /**
+ * The index of the underlying row which may be indexed by an
+ * Sv2 or Sv4.
+ *
+ * @return
+ */
+
+ int rowIndex();
+ boolean valid();
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetReaderImpl.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetReaderImpl.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetReaderImpl.java
new file mode 100644
index 0000000..2bae085
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetReaderImpl.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.test.rowSet;
+
+import java.util.List;
+
+import org.apache.drill.exec.physical.rowSet.model.ReaderIndex;
+import org.apache.drill.exec.record.TupleMetadata;
+import org.apache.drill.exec.vector.accessor.reader.AbstractObjectReader;
+import org.apache.drill.exec.vector.accessor.reader.AbstractTupleReader;
+
+/**
+ * Reader implementation for a row set.
+ */
+
+public class RowSetReaderImpl extends AbstractTupleReader implements RowSetReader {
+
+ protected final ReaderIndex readerIndex;
+
+ public RowSetReaderImpl(TupleMetadata schema, ReaderIndex index, AbstractObjectReader[] readers) {
+ super(schema, readers);
+ this.readerIndex = index;
+ bindIndex(index);
+ }
+
+ public RowSetReaderImpl(TupleMetadata schema, ReaderIndex index,
+ List<AbstractObjectReader> readers) {
+ this(schema, index,
+ readers.toArray(new AbstractObjectReader[readers.size()]));
+ }
+
+ @Override
+ public boolean next() {
+ if (! readerIndex.next()) {
+ return false;
+ }
+ reposition();
+ return true;
+ }
+
+ @Override
+ public boolean valid() { return readerIndex.valid(); }
+
+ @Override
+ public int index() { return readerIndex.position(); }
+
+ @Override
+ public int rowCount() { return readerIndex.size(); }
+
+ @Override
+ public int rowIndex() { return readerIndex.vectorIndex(); }
+
+ @Override
+ public int batchIndex() { return readerIndex.batchIndex(); }
+
+ @Override
+ public void set(int index) {
+ this.readerIndex.set(index);
+ reposition();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetSchema.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetSchema.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetSchema.java
deleted file mode 100644
index 55b5f12..0000000
--- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetSchema.java
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.test.rowSet;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.drill.common.types.TypeProtos.MinorType;
-import org.apache.drill.exec.record.BatchSchema;
-import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode;
-import org.apache.drill.exec.vector.accessor.TupleAccessor.TupleSchema;
-import org.apache.drill.exec.record.MaterializedField;
-
-/**
- * Row set schema presented as a number of distinct "views" for various
- * purposes:
- * <ul>
- * <li>Batch schema: the schema used by a VectorContainer.</li>
- * <li>Physical schema: the schema expressed as a hierarchy of
- * tuples with the top tuple representing the row, nested tuples
- * representing maps.</li>
- * <li>Access schema: a flattened schema with all scalar columns
- * at the top level, and with map columns pulled out into a separate
- * collection. The flattened-scalar view is the one used to write to,
- * and read from, the row set.</li>
- * </ul>
- * Allows easy creation of multiple row sets from the same schema.
- * Each schema is immutable, which is fine for tests in which we
- * want known inputs and outputs.
- */
-
-public class RowSetSchema {
-
- /**
- * Logical description of a column. A logical column is a
- * materialized field. For maps, also includes a logical schema
- * of the map.
- */
-
- public static class LogicalColumn {
- protected final String fullName;
- protected final int accessIndex;
- protected int flatIndex;
- protected final MaterializedField field;
-
- /**
- * Schema of the map. Includes only those fields directly within
- * the map; does not include fields from nested tuples.
- */
-
- protected PhysicalSchema mapSchema;
-
- public LogicalColumn(String fullName, int accessIndex, MaterializedField field) {
- this.fullName = fullName;
- this.accessIndex = accessIndex;
- this.field = field;
- }
-
- private void updateStructure(int index, PhysicalSchema children) {
- flatIndex = index;
- mapSchema = children;
- }
-
- public int accessIndex() { return accessIndex; }
- public int flatIndex() { return flatIndex; }
- public boolean isMap() { return mapSchema != null; }
- public PhysicalSchema mapSchema() { return mapSchema; }
- public MaterializedField field() { return field; }
- public String fullName() { return fullName; }
- }
-
- /**
- * Implementation of a tuple name space. Tuples allow both indexed and
- * named access to their members.
- *
- * @param <T> the type of object representing each column
- */
-
- public static class NameSpace<T> {
- private final Map<String,Integer> nameSpace = new HashMap<>();
- private final List<T> columns = new ArrayList<>();
-
- public int add(String key, T value) {
- int index = columns.size();
- nameSpace.put(key, index);
- columns.add(value);
- return index;
- }
-
- public T get(int index) {
- return columns.get(index);
- }
-
- public T get(String key) {
- int index = getIndex(key);
- if (index == -1) {
- return null;
- }
- return get(index);
- }
-
- public int getIndex(String key) {
- Integer index = nameSpace.get(key);
- if (index == null) {
- return -1;
- }
- return index;
- }
-
- public int count() { return columns.size(); }
- }
-
- /**
- * Provides a non-flattened, physical view of the schema. The top-level
- * row includes maps, maps expand to a nested tuple schema. This view
- * corresponds, more-or-less, to the physical storage of vectors in
- * a vector accessible or vector container.
- */
-
- private static class TupleSchemaImpl implements TupleSchema {
-
- private NameSpace<LogicalColumn> columns;
-
- public TupleSchemaImpl(NameSpace<LogicalColumn> ns) {
- this.columns = ns;
- }
-
- @Override
- public MaterializedField column(int index) {
- return logicalColumn(index).field();
- }
-
- public LogicalColumn logicalColumn(int index) { return columns.get(index); }
-
- @Override
- public MaterializedField column(String name) {
- LogicalColumn col = columns.get(name);
- return col == null ? null : col.field();
- }
-
- @Override
- public int columnIndex(String name) {
- return columns.getIndex(name);
- }
-
- @Override
- public int count() { return columns.count(); }
- }
-
- /**
- * Represents the flattened view of the schema used to get and set columns.
- * Represents a left-to-right, depth-first traversal of the row and map
- * columns. Holds only materialized vectors (non-maps). For completeness,
- * provides access to maps also via separate methods, but this is generally
- * of little use.
- */
-
- public static class FlattenedSchema extends TupleSchemaImpl {
- protected final TupleSchemaImpl maps;
-
- public FlattenedSchema(NameSpace<LogicalColumn> cols, NameSpace<LogicalColumn> maps) {
- super(cols);
- this.maps = new TupleSchemaImpl(maps);
- }
-
- public LogicalColumn logicalMap(int index) { return maps.logicalColumn(index); }
- public MaterializedField map(int index) { return maps.column(index); }
- public MaterializedField map(String name) { return maps.column(name); }
- public int mapIndex(String name) { return maps.columnIndex(name); }
- public int mapCount() { return maps.count(); }
- }
-
- /**
- * Physical schema of a row set showing the logical hierarchy of fields
- * with map fields as first-class fields. Map members appear as children
- * under the map, much as they appear in the physical value-vector
- * implementation.
- */
-
- public static class PhysicalSchema {
- protected final NameSpace<LogicalColumn> schema = new NameSpace<>();
-
- public LogicalColumn column(int index) {
- return schema.get(index);
- }
-
- public LogicalColumn column(String name) {
- return schema.get(name);
- }
-
- public int count() { return schema.count(); }
-
- public NameSpace<LogicalColumn> nameSpace() { return schema; }
- }
-
- private static class SchemaExpander {
- private final PhysicalSchema physicalSchema;
- private final NameSpace<LogicalColumn> cols = new NameSpace<>();
- private final NameSpace<LogicalColumn> maps = new NameSpace<>();
-
- public SchemaExpander(BatchSchema schema) {
- physicalSchema = expand("", schema);
- }
-
- private PhysicalSchema expand(String prefix, Iterable<MaterializedField> fields) {
- PhysicalSchema physical = new PhysicalSchema();
- for (MaterializedField field : fields) {
- String name = prefix + field.getName();
- int index;
- LogicalColumn colSchema = new LogicalColumn(name, physical.count(), field);
- physical.schema.add(field.getName(), colSchema);
- PhysicalSchema children = null;
- if (field.getType().getMinorType() == MinorType.MAP) {
- index = maps.add(name, colSchema);
- children = expand(name + ".", field.getChildren());
- } else {
- index = cols.add(name, colSchema);
- }
- colSchema.updateStructure(index, children);
- }
- return physical;
- }
- }
-
- private final BatchSchema batchSchema;
- private final TupleSchemaImpl accessSchema;
- private final FlattenedSchema flatSchema;
- private final PhysicalSchema physicalSchema;
-
- public RowSetSchema(BatchSchema schema) {
- batchSchema = schema;
- SchemaExpander expander = new SchemaExpander(schema);
- physicalSchema = expander.physicalSchema;
- accessSchema = new TupleSchemaImpl(physicalSchema.nameSpace());
- flatSchema = new FlattenedSchema(expander.cols, expander.maps);
- }
-
- /**
- * A hierarchical schema that includes maps, with maps expanding
- * to a nested tuple schema. Not used at present; this is intended
- * to be the bases of non-flattened accessors if we find the need.
- * @return the hierarchical access schema
- */
-
- public TupleSchema hierarchicalAccess() { return accessSchema; }
-
- /**
- * A flattened (left-to-right, depth-first traversal) of the non-map
- * columns in the row. Used to define the column indexes in the
- * get methods for row readers and the set methods for row writers.
- * @return the flattened access schema
- */
-
- public FlattenedSchema flatAccess() { return flatSchema; }
-
- /**
- * Internal physical schema in hierarchical order. Mostly used to create
- * the other schemas, but may be of use in special cases. Has the same
- * structure as the batch schema, but with additional information.
- * @return a tree-structured physical schema
- */
-
- public PhysicalSchema physical() { return physicalSchema; }
-
- /**
- * The batch schema used by the Drill runtime. Represents a tree-structured
- * list of top-level fields, including maps. Maps contain a nested schema.
- * @return the batch schema used by the Drill runtime
- */
-
- public BatchSchema batch() { return batchSchema; }
-
- /**
- * Convert this schema to a new batch schema that includes the specified
- * selection vector mode.
- * @param svMode selection vector mode for the new schema
- * @return the new batch schema
- */
-
- public BatchSchema toBatchSchema(SelectionVectorMode svMode) {
- List<MaterializedField> fields = new ArrayList<>();
- for (MaterializedField field : batchSchema) {
- fields.add(field);
- }
- return new BatchSchema(svMode, fields);
- }
-}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetUtilities.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetUtilities.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetUtilities.java
index 261a9c1..32b61ca 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetUtilities.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetUtilities.java
@@ -17,12 +17,18 @@
*/
package org.apache.drill.test.rowSet;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.math.BigDecimal;
+
+import org.apache.drill.common.types.TypeProtos.MajorType;
import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.record.MaterializedField;
import org.apache.drill.exec.record.selection.SelectionVector2;
-import org.apache.drill.exec.vector.accessor.AccessorUtilities;
-import org.apache.drill.exec.vector.accessor.ColumnAccessor.ValueType;
-import org.apache.drill.exec.vector.accessor.ColumnWriter;
-import org.apache.drill.test.rowSet.RowSet.RowSetWriter;
+import org.apache.drill.exec.vector.accessor.ScalarWriter;
+import org.apache.drill.exec.vector.accessor.ValueType;
+import org.bouncycastle.util.Arrays;
import org.joda.time.Duration;
import org.joda.time.Period;
@@ -62,11 +68,42 @@ public class RowSetUtilities {
*/
public static void setFromInt(RowSetWriter rowWriter, int index, int value) {
- ColumnWriter writer = rowWriter.column(index);
- if (writer.valueType() == ValueType.PERIOD) {
- setPeriodFromInt(writer, rowWriter.schema().column(index).getType().getMinorType(), value);
- } else {
- AccessorUtilities.setFromInt(writer, value);
+ ScalarWriter writer = rowWriter.scalar(index);
+ MaterializedField field = rowWriter.schema().column(index);
+ writer.setObject(testDataFromInt(writer.valueType(), field.getType(), value));
+ }
+
+ public static Object testDataFromInt(ValueType valueType, MajorType dataType, int value) {
+ switch (valueType) {
+ case BYTES:
+ return Integer.toHexString(value).getBytes();
+ case DOUBLE:
+ return (double) value;
+ case INTEGER:
+ switch (dataType.getMinorType()) {
+ case BIT:
+ return value & 0x01;
+ case SMALLINT:
+ return value % 32768;
+ case UINT2:
+ return value & 0xFFFF;
+ case TINYINT:
+ return value % 128;
+ case UINT1:
+ return value & 0xFF;
+ default:
+ return value;
+ }
+ case LONG:
+ return (long) value;
+ case STRING:
+ return Integer.toString(value);
+ case DECIMAL:
+ return BigDecimal.valueOf(value, dataType.getScale());
+ case PERIOD:
+ return periodFromInt(dataType.getMinorType(), value);
+ default:
+ throw new IllegalStateException("Unknown writer type: " + valueType);
}
}
@@ -81,26 +118,56 @@ public class RowSetUtilities {
* @param writer column writer for a period column
* @param minorType the Drill data type
* @param value the integer value to apply
+ * @throws VectorOverflowException
*/
- public static void setPeriodFromInt(ColumnWriter writer, MinorType minorType,
- int value) {
+ public static Period periodFromInt(MinorType minorType, int value) {
switch (minorType) {
case INTERVAL:
- writer.setPeriod(Duration.millis(value).toPeriod());
- break;
+ return Duration.millis(value).toPeriod();
case INTERVALYEAR:
- writer.setPeriod(Period.years(value / 12).withMonths(value % 12));
- break;
+ return Period.years(value / 12).withMonths(value % 12);
case INTERVALDAY:
int sec = value % 60;
value = value / 60;
int min = value % 60;
value = value / 60;
- writer.setPeriod(Period.days(value).withMinutes(min).withSeconds(sec));
- break;
+ return Period.days(value).withMinutes(min).withSeconds(sec);
default:
throw new IllegalArgumentException("Writer is not an interval: " + minorType);
}
}
+
+ public static void assertEqualValues(ValueType type, Object expectedObj, Object actualObj) {
+ assertEqualValues(type.toString(), type, expectedObj, actualObj);
+ }
+
+ public static void assertEqualValues(String msg, ValueType type, Object expectedObj, Object actualObj) {
+ switch (type) {
+ case BYTES: {
+ byte expected[] = (byte[]) expectedObj;
+ byte actual[] = (byte[]) actualObj;
+ assertEquals(msg + " - byte lengths differ", expected.length, actual.length);
+ assertTrue(msg, Arrays.areEqual(expected, actual));
+ break;
+ }
+ case DOUBLE:
+ assertEquals(msg, (double) expectedObj, (double) actualObj, 0.0001);
+ break;
+ case INTEGER:
+ case LONG:
+ case STRING:
+ case DECIMAL:
+ assertEquals(msg, expectedObj, actualObj);
+ break;
+ case PERIOD: {
+ Period expected = (Period) expectedObj;
+ Period actual = (Period) actualObj;
+ assertEquals(msg, expected.normalizedStandard(), actual.normalizedStandard());
+ break;
+ }
+ default:
+ throw new IllegalStateException( "Unexpected type: " + type);
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetWriter.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetWriter.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetWriter.java
new file mode 100644
index 0000000..874c0e1
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetWriter.java
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.test.rowSet;
+
+import org.apache.drill.exec.vector.accessor.TupleWriter;
+import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
+
+/**
+ * Interface for writing values to a row set. Only available
+ * for newly-created, single, direct row sets. Eventually, if
+ * we want to allow updating a row set, we have to create a
+ * new row set with the updated columns, then merge the new
+ * and old row sets to create a new immutable row set.
+ * <p>
+ * Typical usage:
+ * <pre></code>
+ * void writeABatch() {
+ * RowSetWriter writer = ...
+ * while (! writer.isFull()) {
+ * writer.scalar(0).setInt(10);
+ * writer.scalar(1).setString("foo");
+ * ...
+ * writer.save();
+ * }
+ * }</code></pre>
+ * The above writes until the batch is full, based on size. If values
+ * are large enough to potentially cause vector overflow, do the
+ * following instead:
+ * <pre></code>
+ * void writeABatch() {
+ * RowSetWriter writer = ...
+ * while (! writer.isFull()) {
+ * writer.column(0).setInt(10);
+ * try {
+ * writer.column(1).setString("foo");
+ * } catch (VectorOverflowException e) { break; }
+ * ...
+ * writer.save();
+ * }
+ * // Do something with the partially-written last row.
+ * }</code></pre>
+ * <p>
+ * This writer is for testing, so no provision is available to handle a
+ * partial last row. (Elsewhere n Drill there are classes that handle that case.)
+ */
+
+public interface RowSetWriter extends TupleWriter {
+
+ /**
+ * Write a row of values, given by Java objects. Object type must
+ * match expected column type. Stops writing, and returns false,
+ * if any value causes vector overflow. Value format:
+ * <ul>
+ * <li>For scalars, the value as a suitable Java type (int or
+ * Integer, say, for <tt>INTEGER</tt> values.)</li>
+ * <li>For scalar arrays, an array of a suitable Java primitive type
+ * for scalars. For example, <tt>int[]</tt> for an <tt>INTEGER</tt>
+ * column.</li>
+ * <li>For a Map, an <tt>Object<tt> array with values encoded as above.
+ * (In fact, the list here is the same as the map format.</li>
+ * <li>For a list (repeated map, list of list), an <tt>Object</tt>
+ * array with values encoded as above. (So, for a repeated map, an outer
+ * <tt>Object</tt> map encodes the array, an inner one encodes the
+ * map members.</li>
+ * </ul>
+ *
+ * @param values variable-length argument list of column values
+ */
+
+ void setRow(Object...values);
+
+ /**
+ * Indicates if the current row position is valid for
+ * writing. Will be false on the first row, and all subsequent
+ * rows until either the maximum number of rows are written,
+ * or a vector overflows. After that, will return true. The
+ * method returns false as soon as any column writer overflows
+ * even in the middle of a row write. That is, this writer
+ * does not automatically handle overflow rows because that
+ * added complexity is seldom needed for tests.
+ *
+ * @return true if the current row can be written, false
+ * if not
+ */
+
+ boolean isFull();
+ int rowIndex();
+
+ /**
+ * Saves the current row and moves to the next row.
+ * Done automatically if using <tt>setRow()</tt>.
+ */
+
+ void save();
+
+ /**
+ * Finish writing and finalize the row set being
+ * written.
+ * @return the completed, read-only row set without a
+ * selection vector
+ */
+
+ SingleRowSet done();
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetWriterImpl.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetWriterImpl.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetWriterImpl.java
new file mode 100644
index 0000000..074842d
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/RowSetWriterImpl.java
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.test.rowSet;
+
+import java.util.List;
+
+import org.apache.drill.exec.record.TupleMetadata;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.accessor.ColumnWriterIndex;
+import org.apache.drill.exec.vector.accessor.writer.AbstractObjectWriter;
+import org.apache.drill.exec.vector.accessor.writer.AbstractTupleWriter;
+import org.apache.drill.test.rowSet.RowSet.ExtendableRowSet;
+import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
+
+/**
+ * Implementation of a row set writer. Only available for newly-created,
+ * empty, direct, single row sets. Rewriting is not allowed, nor is writing
+ * to a hyper row set.
+ */
+
+public class RowSetWriterImpl extends AbstractTupleWriter implements RowSetWriter {
+
+ /**
+ * Writer index that points to each row in the row set. The index starts at
+ * the 0th row and advances one row on each increment. This allows writers to
+ * start positioned at the first row. Writes happen in the current row.
+ * Calling <tt>next()</tt> advances to the next position, effectively saving
+ * the current row. The most recent row can be abandoned easily simply by not
+ * calling <tt>next()</tt>. This means that the number of completed rows is
+ * the same as the row index.
+ */
+
+ static class WriterIndexImpl implements ColumnWriterIndex {
+
+ public enum State { OK, VECTOR_OVERFLOW, END_OF_BATCH }
+
+ private int rowIndex = 0;
+ private State state = State.OK;
+
+ @Override
+ public final int vectorIndex() { return rowIndex; }
+
+ public final boolean next() {
+ if (++rowIndex < ValueVector.MAX_ROW_COUNT) {
+ return true;
+ }
+ // Should not call next() again once batch is full.
+ assert rowIndex == ValueVector.MAX_ROW_COUNT;
+ rowIndex = ValueVector.MAX_ROW_COUNT;
+ state = state == State.OK ? State.END_OF_BATCH : state;
+ return false;
+ }
+
+ public int size() {
+ // The index always points to the next slot past the
+ // end of valid rows.
+ return rowIndex;
+ }
+
+ public boolean valid() { return state == State.OK; }
+
+ public boolean hasOverflow() { return state == State.VECTOR_OVERFLOW; }
+
+ @Override
+ public final void nextElement() { }
+
+ @Override
+ public void rollover() {
+ throw new UnsupportedOperationException("Rollover not supported in the row set writer.");
+ }
+
+ @Override
+ public int rowStartIndex() { return rowIndex; }
+
+ @Override
+ public ColumnWriterIndex outerIndex() { return null; }
+
+ @Override
+ public String toString() {
+ return new StringBuilder()
+ .append("[")
+ .append(getClass().getSimpleName())
+ .append(" state = ")
+ .append(state)
+ .append(", rowIndex = ")
+ .append(rowIndex)
+ .append("]")
+ .toString();
+ }
+ }
+
+ private final WriterIndexImpl writerIndex;
+ private final ExtendableRowSet rowSet;
+
+ protected RowSetWriterImpl(ExtendableRowSet rowSet, TupleMetadata schema, WriterIndexImpl index, List<AbstractObjectWriter> writers) {
+ super(schema, writers);
+ this.rowSet = rowSet;
+ this.writerIndex = index;
+ bindIndex(index);
+ startWrite();
+ startRow();
+ }
+
+ @Override
+ public void setRow(Object...values) {
+ setObject(values);
+ save();
+ }
+
+ @Override
+ public int rowIndex() { return writerIndex.vectorIndex(); }
+
+ @Override
+ public void save() {
+ endArrayValue();
+ saveRow();
+
+ // For convenience, start a new row after each save.
+ // The last (unused) row is abandoned when the batch is full.
+
+ if (writerIndex.next()) {
+ startRow();
+ }
+ }
+
+ @Override
+ public boolean isFull( ) { return ! writerIndex.valid(); }
+
+ @Override
+ public SingleRowSet done() {
+ endWrite();
+ rowSet.container().setRecordCount(writerIndex.vectorIndex());
+ return rowSet;
+ }
+
+ @Override
+ public int lastWriteIndex() {
+ return writerIndex.vectorIndex();
+ }
+}
[10/15] drill git commit: DRILL-5657: Size-aware vector writer
structure
Posted by pr...@apache.org.
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderProtocol.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderProtocol.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderProtocol.java
new file mode 100644
index 0000000..ffcc84a
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderProtocol.java
@@ -0,0 +1,586 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.impl;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertSame;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.util.Arrays;
+
+import org.apache.drill.common.types.TypeProtos.DataMode;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.physical.rowSet.ResultSetLoader;
+import org.apache.drill.exec.physical.rowSet.RowSetLoader;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.TupleMetadata;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.accessor.ScalarWriter;
+import org.apache.drill.exec.vector.accessor.TupleWriter.UndefinedColumnException;
+import org.apache.drill.test.SubOperatorTest;
+import org.apache.drill.test.rowSet.RowSet;
+import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
+import org.apache.drill.test.rowSet.RowSetComparison;
+import org.apache.drill.test.rowSet.RowSetReader;
+import org.apache.drill.test.rowSet.SchemaBuilder;
+import org.junit.Test;
+
+/**
+ * Tests of the overall result set loader protocol focusing on which operations
+ * are valid in each state, basics of column lookup, basics of adding columns
+ * and so on. Uses the simplest possible type: a required int.
+ * <p>
+ * Run this test first to do a sanity check of the result set loader after making
+ * changes.
+ * <p>
+ * You will find that the result set loader creates a very complex tree of
+ * objects that can be quite hard to understand and debug. Please read the
+ * material in the various subsystems to see how the classes fit together
+ * to implement Drill's rich JSON-like data model.
+ * <p>
+ * To aid in debugging, you can also dump the result set loader, and all its
+ * child objects as follows:<pre><code>
+ * ((ResultSetLoaderImpl) rsLoader).dump(new HierarchicalPrinter());
+ * </code></pre>
+ * Simply insert that line into these tests anywhere you want to visualize
+ * the structure. The object tree will show all the components and their
+ * current state.
+ */
+
+public class TestResultSetLoaderProtocol extends SubOperatorTest {
+
+ @Test
+ public void testBasics() {
+ ResultSetLoaderImpl rsLoaderImpl = new ResultSetLoaderImpl(fixture.allocator());
+ ResultSetLoader rsLoader = rsLoaderImpl;
+ assertEquals(0, rsLoader.schemaVersion());
+ assertEquals(ResultSetLoader.DEFAULT_ROW_COUNT, rsLoader.targetRowCount());
+ assertEquals(ValueVector.MAX_BUFFER_SIZE, rsLoader.targetVectorSize());
+ assertEquals(0, rsLoader.writer().rowCount());
+ assertEquals(0, rsLoader.batchCount());
+ assertEquals(0, rsLoader.totalRowCount());
+
+ // Failures due to wrong state (Start)
+
+ try {
+ rsLoader.harvest();
+ fail();
+ } catch (IllegalStateException e) {
+ // Expected
+ }
+
+ // Can define schema before starting the first batch.
+
+ RowSetLoader rootWriter = rsLoader.writer();
+ TupleMetadata schema = rootWriter.schema();
+ assertEquals(0, schema.size());
+
+ MaterializedField fieldA = SchemaBuilder.columnSchema("a", MinorType.INT, DataMode.REQUIRED);
+ rootWriter.addColumn(fieldA);
+
+ assertEquals(1, schema.size());
+ assertSame(fieldA, schema.column(0));
+ assertSame(fieldA, schema.column("a"));
+
+ // Error to start a row before the first batch.
+
+ try {
+ rootWriter.start();
+ fail();
+ } catch (IllegalStateException e) {
+ // Expected
+ }
+
+ // Error to end a row before the first batch.
+
+ try {
+ rootWriter.save();
+ fail();
+ } catch (IllegalStateException e) {
+ // Expected
+ }
+
+ // Because writing is an inner loop; no checks are
+ // done to ensure that writing occurs only in the proper
+ // state. So, can't test setInt() in the wrong state.
+
+ rsLoader.startBatch();
+ try {
+ rsLoader.startBatch();
+ fail();
+ } catch (IllegalStateException e) {
+ // Expected
+ }
+ assertFalse(rootWriter.isFull());
+
+ rootWriter.start();
+ rootWriter.scalar(0).setInt(100);
+ assertEquals(0, rootWriter.rowCount());
+ assertEquals(0, rsLoader.batchCount());
+ rootWriter.save();
+ assertEquals(1, rootWriter.rowCount());
+ assertEquals(1, rsLoader.batchCount());
+ assertEquals(1, rsLoader.totalRowCount());
+
+ // Can add a field after first row, prior rows are
+ // "back-filled".
+
+ MaterializedField fieldB = SchemaBuilder.columnSchema("b", MinorType.INT, DataMode.OPTIONAL);
+ rootWriter.addColumn(fieldB);
+
+ assertEquals(2, schema.size());
+ assertSame(fieldB, schema.column(1));
+ assertSame(fieldB, schema.column("b"));
+
+ rootWriter.start();
+ rootWriter.scalar(0).setInt(200);
+ rootWriter.scalar(1).setInt(210);
+ rootWriter.save();
+ assertEquals(2, rootWriter.rowCount());
+ assertEquals(1, rsLoader.batchCount());
+ assertEquals(2, rsLoader.totalRowCount());
+
+ // Harvest the first batch. Version number is the number
+ // of columns added.
+
+ assertFalse(rootWriter.isFull());
+ RowSet result = fixture.wrap(rsLoader.harvest());
+ assertEquals(2, rsLoader.schemaVersion());
+ assertEquals(0, rootWriter.rowCount());
+ assertEquals(1, rsLoader.batchCount());
+ assertEquals(2, rsLoader.totalRowCount());
+
+ SingleRowSet expected = fixture.rowSetBuilder(result.batchSchema())
+ .addRow(100, null)
+ .addRow(200, 210)
+ .build();
+ new RowSetComparison(expected)
+ .verifyAndClearAll(result);
+
+ // Between batches: batch-based operations fail
+
+ try {
+ rootWriter.start();
+ fail();
+ } catch (IllegalStateException e) {
+ // Expected
+ }
+ try {
+ rsLoader.harvest();
+ fail();
+ } catch (IllegalStateException e) {
+ // Expected
+ }
+ try {
+ rootWriter.save();
+ fail();
+ } catch (IllegalStateException e) {
+ // Expected
+ }
+
+ // Create a second batch
+
+ rsLoader.startBatch();
+ assertEquals(0, rootWriter.rowCount());
+ assertEquals(1, rsLoader.batchCount());
+ assertEquals(2, rsLoader.totalRowCount());
+ rootWriter.start();
+ rootWriter.scalar(0).setInt(300);
+ rootWriter.scalar(1).setInt(310);
+ rootWriter.save();
+ assertEquals(1, rootWriter.rowCount());
+ assertEquals(2, rsLoader.batchCount());
+ assertEquals(3, rsLoader.totalRowCount());
+ rootWriter.start();
+ rootWriter.scalar(0).setInt(400);
+ rootWriter.scalar(1).setInt(410);
+ rootWriter.save();
+
+ // Harvest. Schema has not changed.
+
+ result = fixture.wrap(rsLoader.harvest());
+ assertEquals(2, rsLoader.schemaVersion());
+ assertEquals(0, rootWriter.rowCount());
+ assertEquals(2, rsLoader.batchCount());
+ assertEquals(4, rsLoader.totalRowCount());
+
+ expected = fixture.rowSetBuilder(result.batchSchema())
+ .addRow(300, 310)
+ .addRow(400, 410)
+ .build();
+ new RowSetComparison(expected)
+ .verifyAndClearAll(result);
+
+ // Next batch. Schema has changed.
+
+ rsLoader.startBatch();
+ rootWriter.start();
+ rootWriter.scalar(0).setInt(500);
+ rootWriter.scalar(1).setInt(510);
+ rootWriter.addColumn(SchemaBuilder.columnSchema("c", MinorType.INT, DataMode.OPTIONAL));
+ rootWriter.scalar(2).setInt(520);
+ rootWriter.save();
+ rootWriter.start();
+ rootWriter.scalar(0).setInt(600);
+ rootWriter.scalar(1).setInt(610);
+ rootWriter.scalar(2).setInt(620);
+ rootWriter.save();
+
+ result = fixture.wrap(rsLoader.harvest());
+ assertEquals(3, rsLoader.schemaVersion());
+ expected = fixture.rowSetBuilder(result.batchSchema())
+ .addRow(500, 510, 520)
+ .addRow(600, 610, 620)
+ .build();
+ new RowSetComparison(expected)
+ .verifyAndClearAll(result);
+
+ rsLoader.close();
+
+ // Key operations fail after close.
+
+ try {
+ rootWriter.start();
+ fail();
+ } catch (IllegalStateException e) {
+ // Expected
+ }
+ try {
+ rsLoader.writer();
+ fail();
+ } catch (IllegalStateException e) {
+ // Expected
+ }
+ try {
+ rsLoader.startBatch();
+ fail();
+ } catch (IllegalStateException e) {
+ // Expected
+ }
+ try {
+ rsLoader.harvest();
+ fail();
+ } catch (IllegalStateException e) {
+ // Expected
+ }
+ try {
+ rootWriter.save();
+ fail();
+ } catch (IllegalStateException e) {
+ // Expected
+ }
+
+ // Benign to close twice
+
+ rsLoader.close();
+ }
+
+ /**
+ * Schemas are case insensitive by default. Verify that
+ * the schema mechanism works, with emphasis on the
+ * case insensitive case.
+ */
+
+ @Test
+ public void testCaseInsensitiveSchema() {
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator());
+ RowSetLoader rootWriter = rsLoader.writer();
+ TupleMetadata schema = rootWriter.schema();
+
+ // No columns defined in schema
+
+ assertNull(schema.column("a"));
+ try {
+ schema.column(0);
+ fail();
+ } catch (IndexOutOfBoundsException e) {
+ // Expected
+ }
+
+ // No columns defined in writer
+
+ try {
+ rootWriter.column("a");
+ fail();
+ } catch (UndefinedColumnException e) {
+ // Expected
+ }
+ try {
+ rootWriter.column(0);
+ fail();
+ } catch (IndexOutOfBoundsException e) {
+ // Expected
+ }
+
+ // Define a column
+
+ MaterializedField colSchema = SchemaBuilder.columnSchema("a", MinorType.VARCHAR, DataMode.REQUIRED);
+ rootWriter.addColumn(colSchema);
+
+ // Can now be found, case insensitive
+
+ assertSame(colSchema, schema.column(0));
+ assertSame(colSchema, schema.column("a"));
+ assertSame(colSchema, schema.column("A"));
+ assertNotNull(rootWriter.column(0));
+ assertNotNull(rootWriter.column("a"));
+ assertNotNull(rootWriter.column("A"));
+ assertEquals(1, schema.size());
+ assertEquals(0, schema.index("a"));
+ assertEquals(0, schema.index("A"));
+
+ // Reject a duplicate name, case insensitive
+
+ try {
+ rootWriter.addColumn(colSchema);
+ fail();
+ } catch(IllegalArgumentException e) {
+ // Expected
+ }
+ try {
+ MaterializedField testCol = SchemaBuilder.columnSchema("A", MinorType.VARCHAR, DataMode.REQUIRED);
+ rootWriter.addColumn(testCol);
+ fail();
+ } catch (IllegalArgumentException e) {
+ // Expected
+ assertTrue(e.getMessage().contains("Duplicate"));
+ }
+
+ // Can still add required fields while writing the first row.
+
+ rsLoader.startBatch();
+ rootWriter.start();
+ rootWriter.scalar(0).setString("foo");
+
+ MaterializedField col2 = SchemaBuilder.columnSchema("b", MinorType.VARCHAR, DataMode.REQUIRED);
+ rootWriter.addColumn(col2);
+ assertSame(col2, schema.column(1));
+ assertSame(col2, schema.column("b"));
+ assertSame(col2, schema.column("B"));
+ assertEquals(2, schema.size());
+ assertEquals(1, schema.index("b"));
+ assertEquals(1, schema.index("B"));
+ rootWriter.scalar(1).setString("second");
+
+ // After first row, can add an optional or repeated.
+ // Also allows a required field: values will be back-filled.
+
+ rootWriter.save();
+ rootWriter.start();
+ rootWriter.scalar(0).setString("bar");
+ rootWriter.scalar(1).setString("");
+
+ MaterializedField col3 = SchemaBuilder.columnSchema("c", MinorType.VARCHAR, DataMode.REQUIRED);
+ rootWriter.addColumn(col3);
+ assertSame(col3, schema.column(2));
+ assertSame(col3, schema.column("c"));
+ assertSame(col3, schema.column("C"));
+ assertEquals(3, schema.size());
+ assertEquals(2, schema.index("c"));
+ assertEquals(2, schema.index("C"));
+ rootWriter.scalar("c").setString("c.2");
+
+ MaterializedField col4 = SchemaBuilder.columnSchema("d", MinorType.VARCHAR, DataMode.OPTIONAL);
+ rootWriter.addColumn(col4);
+ assertSame(col4, schema.column(3));
+ assertSame(col4, schema.column("d"));
+ assertSame(col4, schema.column("D"));
+ assertEquals(4, schema.size());
+ assertEquals(3, schema.index("d"));
+ assertEquals(3, schema.index("D"));
+ rootWriter.scalar("d").setString("d.2");
+
+ MaterializedField col5 = SchemaBuilder.columnSchema("e", MinorType.VARCHAR, DataMode.REPEATED);
+ rootWriter.addColumn(col5);
+ assertSame(col5, schema.column(4));
+ assertSame(col5, schema.column("e"));
+ assertSame(col5, schema.column("E"));
+ assertEquals(5, schema.size());
+ assertEquals(4, schema.index("e"));
+ assertEquals(4, schema.index("E"));
+ rootWriter.array(4).set("e1", "e2", "e3");
+ rootWriter.save();
+
+ // Verify. No reason to expect problems, but might as well check.
+
+ RowSet result = fixture.wrap(rsLoader.harvest());
+ assertEquals(5, rsLoader.schemaVersion());
+ SingleRowSet expected = fixture.rowSetBuilder(result.batchSchema())
+ .addRow("foo", "second", "", null, new String[] { } )
+ .addRow("bar", "", "c.2", "d.2", new String[] {"e1", "e2", "e3"} )
+ .build();
+ new RowSetComparison(expected)
+ .verifyAndClearAll(result);
+
+ // Handy way to test that close works to abort an in-flight batch
+ // and clean up.
+
+ rsLoader.close();
+ }
+
+ /**
+ * Provide a schema up front to the loader; schema is built before
+ * the first row.
+ * <p>
+ * Also verifies the test-time method to set a row of values using
+ * a single method.
+ */
+
+ @Test
+ public void testInitialSchema() {
+ TupleMetadata schema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .addNullable("b", MinorType.INT)
+ .add("c", MinorType.VARCHAR)
+ .buildSchema();
+ ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder()
+ .setSchema(schema)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ rsLoader.startBatch();
+ rootWriter
+ .addRow(10, 100, "fred")
+ .addRow(20, null, "barney")
+ .addRow(30, 300, "wilma");
+ RowSet actual = fixture.wrap(rsLoader.harvest());
+
+ RowSet expected = fixture.rowSetBuilder(schema)
+ .addRow(10, 100, "fred")
+ .addRow(20, null, "barney")
+ .addRow(30, 300, "wilma")
+ .build();
+
+ new RowSetComparison(expected).verifyAndClearAll(actual);
+ rsLoader.close();
+ }
+
+ /**
+ * The writer protocol allows a client to write to a row any number of times
+ * before invoking <tt>save()</tt>. In this case, each new value simply
+ * overwrites the previous value. Here, we test the most basic case: a simple,
+ * flat tuple with no arrays. We use a very large Varchar that would, if
+ * overwrite were not working, cause vector overflow.
+ * <p>
+ * The ability to overwrite rows is seldom needed except in one future use
+ * case: writing a row, then applying a filter "in-place" to discard unwanted
+ * rows, without having to send the row downstream.
+ * <p>
+ * Because of this use case, specific rules apply when discarding row or
+ * overwriting values.
+ * <ul>
+ * <li>Values can be written once per row. Fixed-width columns actually allow
+ * multiple writes. But, because of the way variable-width columns work,
+ * multiple writes will cause undefined results.</li>
+ * <li>To overwrite a row, call <tt>start()</tt> without calling
+ * <tt>save()</tt> on the previous row. Doing so ignores data for the
+ * previous row and starts a new row in place of the old one.</li>
+ * </ul>
+ * Note that there is no explicit method to discard a row. Instead,
+ * the rule is that a row is not saved until <tt>save()</tt> is called.
+ */
+
+ @Test
+ public void testOverwriteRow() {
+ TupleMetadata schema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .add("b", MinorType.VARCHAR)
+ .buildSchema();
+ ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder()
+ .setSchema(schema)
+ .setRowCountLimit(ValueVector.MAX_ROW_COUNT)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ // Can't use the shortcut to populate rows when doing overwrites.
+
+ ScalarWriter aWriter = rootWriter.scalar("a");
+ ScalarWriter bWriter = rootWriter.scalar("b");
+
+ // Write 100,000 rows, overwriting 99% of them. This will cause vector
+ // overflow and data corruption if overwrite does not work; but will happily
+ // produce the correct result if everything works as it should.
+
+ byte value[] = new byte[512];
+ Arrays.fill(value, (byte) 'X');
+ int count = 0;
+ rsLoader.startBatch();
+ while (count < 100_000) {
+ rootWriter.start();
+ count++;
+ aWriter.setInt(count);
+ bWriter.setBytes(value, value.length);
+ if (count % 100 == 0) {
+ rootWriter.save();
+ }
+ }
+
+ // Verify using a reader.
+
+ RowSet result = fixture.wrap(rsLoader.harvest());
+ assertEquals(count / 100, result.rowCount());
+ RowSetReader reader = result.reader();
+ int rowId = 1;
+ while (reader.next()) {
+ assertEquals(rowId * 100, reader.scalar("a").getInt());
+ assertTrue(Arrays.equals(value, reader.scalar("b").getBytes()));
+ rowId++;
+ }
+
+ result.clear();
+ rsLoader.close();
+ }
+
+ /**
+ * Test that memory is released if the loader is closed with an active
+ * batch (that is, before the batch is harvested.)
+ */
+
+ @Test
+ public void testCloseWithoutHarvest() {
+ TupleMetadata schema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .add("b", MinorType.VARCHAR)
+ .buildSchema();
+ ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder()
+ .setSchema(schema)
+ .setRowCountLimit(ValueVector.MAX_ROW_COUNT)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ rsLoader.startBatch();
+ for (int i = 0; i < 100; i++) {
+ rootWriter.start();
+ rootWriter.scalar("a").setInt(i);
+ rootWriter.scalar("b").setString("b-" + i);
+ rootWriter.save();
+ }
+
+ // Don't harvest the batch. Allocator will complain if the
+ // loader does not release memory.
+
+ rsLoader.close();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderTorture.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderTorture.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderTorture.java
new file mode 100644
index 0000000..33b9826
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderTorture.java
@@ -0,0 +1,453 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.impl;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.util.Arrays;
+
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.physical.rowSet.ResultSetLoader;
+import org.apache.drill.exec.physical.rowSet.RowSetLoader;
+import org.apache.drill.exec.record.TupleMetadata;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.accessor.ArrayReader;
+import org.apache.drill.exec.vector.accessor.ArrayWriter;
+import org.apache.drill.exec.vector.accessor.ScalarElementReader;
+import org.apache.drill.exec.vector.accessor.ScalarReader;
+import org.apache.drill.exec.vector.accessor.ScalarWriter;
+import org.apache.drill.exec.vector.accessor.TupleReader;
+import org.apache.drill.exec.vector.accessor.TupleWriter;
+import org.apache.drill.test.LogFixture;
+import org.apache.drill.test.LogFixture.LogFixtureBuilder;
+import org.apache.drill.test.SubOperatorTest;
+import org.apache.drill.test.rowSet.RowSet;
+import org.apache.drill.test.rowSet.RowSetReader;
+import org.apache.drill.test.rowSet.SchemaBuilder;
+import org.junit.Test;
+
+import com.google.common.base.Charsets;
+
+/**
+ * Runs a worst-case scenario test that combines aspects of all
+ * previous tests. Run this test only <i>after</i> all other tests
+ * pass. Combined conditions tested:
+ * <ul>
+ * <li>Nested maps and map arrays.</li>
+ * <li>Nullable VarChar (which has an offset vector and null-bit vector
+ * be kept in sync.)
+ * <li>Repeated Varchar (which requires to offset vectors be kept in
+ * sync.)</li>
+ * <li>Null values.</li>
+ * <li>Omitted values.</li>
+ * <li>Skipped rows.</li>
+ * <li>Vector overflow deep in the structure.</li>
+ * <li>Multiple batches.</li>
+ * </ul>
+ * The proposition that this test asserts is that if this test passes,
+ * then most clients will also work as they generally do not do all these
+ * things in a single query.
+ */
+
+public class TestResultSetLoaderTorture extends SubOperatorTest {
+
+ private static class TestSetup {
+ int n1Cycle = 5;
+ int n2Cycle = 7;
+ int s2Cycle = 11;
+ int m2Cycle = 13;
+ int n3Cycle = 17;
+ int s3Cycle = 19;
+ int skipCycle = 23;
+ int nullCycle = 3;
+ int m2Count = 9;
+ int s3Count = 29;
+
+ String s3Value;
+
+ public TestSetup() {
+ byte s3Bytes[] = new byte[512];
+ Arrays.fill(s3Bytes, (byte) 'X');
+ s3Value = new String(s3Bytes, Charsets.UTF_8);
+ }
+ }
+
+ // Write rows, skipping every 10th.
+ // n0 is the row id, so appears in every row.
+ // For n1, n2 and n3 and s2, omit selected values and makes others null.
+ // For s3, write values large enough to cause overflow; but skip some
+ // values and write 0 values for others.
+
+ private static class BatchWriter {
+
+ TestSetup setup;
+ RowSetLoader rootWriter;
+ ScalarWriter n1Writer;
+ ArrayWriter a2Writer;
+ ScalarWriter n2Writer;
+ ScalarWriter s2Writer;
+ ScalarWriter n3Writer;
+ ScalarWriter s3Writer;
+ int rowId = 0;
+ int innerCount = 0;
+ int writeRowCount = 0;
+ int startPrint = -1;
+ int endPrint = -1;
+ boolean lastRowDiscarded;
+
+ public BatchWriter(TestSetup setup, RowSetLoader rootWriter) {
+ this.setup = setup;
+ this.rootWriter = rootWriter;
+
+ TupleWriter m1Writer = rootWriter.tuple("m1");
+ n1Writer = m1Writer.scalar("n1");
+ a2Writer = m1Writer.array("m2");
+ TupleWriter m2Writer = a2Writer.tuple();
+ n2Writer = m2Writer.scalar("n2");
+ s2Writer = m2Writer.scalar("s2");
+ TupleWriter m3Writer = m2Writer.tuple("m3");
+ n3Writer = m3Writer.scalar("n3");
+ s3Writer = m3Writer.array("s3").scalar();
+ }
+
+ public void writeBatch() {
+
+ // Write until overflow
+
+ writeRowCount = rootWriter.rowCount();
+ //System.out.println("Start count: " + writeRowCount);
+ while (! rootWriter.isFull()) {
+ lastRowDiscarded = false;
+ writeRow();
+ rowId++;
+ }
+// System.out.println("End of batch: rowId: " + rowId +
+// ", count: " + writeRowCount +
+// ", writer count:" + rootWriter.rowCount());
+ }
+
+ private void writeRow() {
+ rootWriter.start();
+
+ // Outer column
+
+ rootWriter.scalar("n0").setInt(rowId);
+ print("n0", rowId);
+
+ // Map 1: non-array
+
+ setInt("n1", n1Writer, rowId, setup.n1Cycle);
+
+ // Map2: an array.
+
+ if (rowId % setup.m2Cycle != 0) {
+ writeM2Array();
+ }
+
+ // Skip some rows
+
+ if (rowId % setup.skipCycle != 0) {
+ rootWriter.save();
+ writeRowCount++;
+ } else {
+ lastRowDiscarded = true;
+// System.out.println("Skip row ID: " + rowId +
+// ", count: " + writeRowCount +
+// ", row set: " + rootWriter.rowCount());
+ }
+ if (rowId >= startPrint && rowId <= endPrint) {
+ System.out.println();
+ }
+ }
+
+ private void writeM2Array() {
+ for (int i = 0; i < setup.m2Count; i++) {
+
+ // n2: usual int
+
+ setInt("n2." + i, n2Writer, innerCount, setup.n2Cycle);
+
+ // S2: a nullable Varchar
+
+ if (innerCount % setup.s2Cycle == 0) {
+ // Skip
+ } else if (innerCount % setup.s2Cycle % setup.nullCycle == 0) {
+ s2Writer.setNull();
+ print("s2." + i, null);
+ } else {
+ s2Writer.setString("s2-" + innerCount);
+ print("s2." + i, "s2-" + innerCount);
+ }
+
+ // Map3: a non-repeated map
+
+ // n2: usual int
+
+ setInt("n3." + i, n3Writer, innerCount, setup.n3Cycle);
+
+ // s3: a repeated VarChar
+
+ if (innerCount % setup.s3Cycle != 0) {
+ for (int j = 0; j < setup.s3Count; j++) {
+ s3Writer.setString(setup.s3Value + (innerCount * setup.s3Count + j));
+ }
+ print("s3." + i, setup.s3Count + "x");
+ }
+ innerCount++;
+ a2Writer.save();
+ }
+ }
+
+ public void setInt(String label, ScalarWriter writer, int id, int cycle) {
+ int cycleIndex = id % cycle;
+ if (cycleIndex == 0) {
+ // Skip
+ } else if (cycleIndex % setup.nullCycle == 0) {
+ writer.setNull();
+ print(label, null);
+ } else {
+ writer.setInt(id * cycle);
+ print(label, id * cycle);
+ }
+ }
+
+ public void print(String label, Object value) {
+ if (rowId >= startPrint && rowId <= endPrint) {
+ System.out.print(label);
+ System.out.print(" = ");
+ System.out.print(value);
+ System.out.print(" ");
+ }
+ }
+
+ public int rowCount() {
+ return writeRowCount -
+ (lastRowDiscarded ? 0 : 1);
+ }
+ }
+
+ public static class ReadState {
+ int rowId = 0;
+ int innerCount = 0;
+ }
+
+ private static class BatchReader {
+
+ private TestSetup setup;
+ private RowSetReader rootReader;
+ ScalarReader n1Reader;
+ ArrayReader a2Reader;
+ ScalarReader n2Reader;
+ ScalarReader s2Reader;
+ ScalarReader n3Reader;
+ ScalarElementReader s3Reader;
+ ReadState readState;
+
+ public BatchReader(TestSetup setup, RowSetReader reader, ReadState readState) {
+ this.setup = setup;
+ this.rootReader = reader;
+ this.readState = readState;;
+
+ TupleReader m1Reader = rootReader.tuple("m1");
+ n1Reader = m1Reader.scalar("n1");
+ a2Reader = m1Reader.array("m2");
+ TupleReader m2Reader = a2Reader.tuple();
+ n2Reader = m2Reader.scalar("n2");
+ s2Reader = m2Reader.scalar("s2");
+ TupleReader m3Reader = m2Reader.tuple("m3");
+ n3Reader = m3Reader.scalar("n3");
+ s3Reader = m3Reader.array("s3").elements();
+ }
+
+ public void verify() {
+ while (rootReader.next()) {
+// System.out.println(readState.rowId);
+ verifyRow();
+ readState.rowId++;
+ }
+ }
+
+ private void verifyRow() {
+ // Skipped original row? Bump the row id.
+
+ if (readState.rowId % setup.skipCycle == 0) {
+ if (readState.rowId % setup.m2Cycle != 0) {
+ readState.innerCount += setup.m2Count;
+ }
+ readState.rowId++;
+ }
+
+ // Outer column
+
+ assertEquals(readState.rowId, rootReader.scalar("n0").getInt());
+
+ // Map 1: non-array
+
+ checkInt(n1Reader, readState.rowId, setup.n1Cycle);
+
+ // Map2: an array.
+
+ if (readState.rowId % setup.m2Cycle == 0) {
+ assertEquals(0, a2Reader.size());
+ } else {
+ verifyM2Array();
+ }
+ }
+
+ private void verifyM2Array() {
+ for (int i = 0; i < setup.m2Count; i++) {
+ a2Reader.setPosn(i);
+
+ // n2: usual int
+
+ checkInt(n2Reader, readState.innerCount, setup.n2Cycle);
+
+ if (readState.innerCount % setup.s2Cycle == 0) {
+ // Skipped values should be null
+ assertTrue(
+ String.format("Row %d, entry %d", rootReader.rowIndex(), i),
+ s2Reader.isNull());
+ } else if (readState.innerCount % setup.s2Cycle % setup.nullCycle == 0) {
+ assertTrue(s2Reader.isNull());
+ } else {
+ assertEquals("s2-" + readState.innerCount, s2Reader.getString());
+ }
+
+ // Map3: a non-repeated map
+
+ // n2: usual int
+
+ checkInt(n3Reader, readState.innerCount, setup.n3Cycle);
+
+ // s3: a repeated VarChar
+
+ if (readState.innerCount % setup.s3Cycle == 0) {
+ assertEquals(0, s3Reader.size());
+ } else {
+ for (int j = 0; j < setup.s3Count; j++) {
+ assertEquals(setup.s3Value + (readState.innerCount * setup.s3Count + j), s3Reader.getString(j));
+ }
+ }
+ readState.innerCount++;
+ }
+ }
+
+ public void checkInt(ScalarReader reader, int id, int cycle) {
+ if (id % cycle == 0) {
+ // Skipped values should be null
+ assertTrue("id = " + id + " expected null for skipped", reader.isNull());
+ } else if (id % cycle % setup.nullCycle == 0) {
+ assertTrue(reader.isNull());
+ } else {
+ assertEquals(id * cycle, reader.getInt());
+ }
+ }
+ }
+
+ @Test
+ public void tortureTest() {
+ LogFixtureBuilder logBuilder = new LogFixtureBuilder()
+
+ // Enable to get detailed tracing when things go wrong.
+
+// .logger("org.apache.drill.exec.physical.rowSet", Level.TRACE)
+ ;
+ try (LogFixture logFixture = logBuilder.build()) {
+ doTortureTest();
+ }
+ }
+
+ private void doTortureTest() {
+ TupleMetadata schema = new SchemaBuilder()
+ .add("n0", MinorType.INT)
+ .addMap("m1")
+ .addNullable("n1", MinorType.INT)
+ .addMapArray("m2")
+ .addNullable("n2", MinorType.INT)
+ .addNullable("s2", MinorType.VARCHAR)
+ .addMap("m3")
+ .addNullable("n3", MinorType.INT)
+ .addArray("s3", MinorType.VARCHAR)
+ .buildMap()
+ .buildMap()
+ .buildMap()
+ .buildSchema();
+ ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder()
+ .setRowCountLimit(ValueVector.MAX_ROW_COUNT)
+ .setSchema(schema)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ TestSetup setup = new TestSetup();
+ BatchWriter batchWriter = new BatchWriter(setup, rootWriter);
+
+ int totalRowCount = 0;
+
+ ReadState readState = new ReadState();
+ for (int batchCount = 0; batchCount < 10; batchCount++) {
+ rsLoader.startBatch();
+ batchWriter.writeBatch();
+
+ // Now the hard part. Verify the above batch.
+
+ RowSet result = fixture.wrap(rsLoader.harvest());
+// result.print();
+
+ // Should have overflowed
+
+ int savedCount = batchWriter.rowCount();
+ assertEquals(savedCount, result.rowCount());
+
+ totalRowCount += savedCount;
+ assertEquals(totalRowCount, rsLoader.totalRowCount());
+ assertEquals(batchCount + 1, rsLoader.batchCount());
+
+ BatchReader reader = new BatchReader(setup, result.reader(), readState);
+ reader.verify();
+ result.clear();
+ }
+
+ // Last row overflow row
+
+ {
+ rsLoader.startBatch();
+
+ // Use this to visualize a string buffer. There is also a method
+ // to visualize offset vectors. These two are the most pesky vectors
+ // to get right.
+
+// VectorPrinter.printStrings((VarCharVector) ((NullableVarCharVector) ((AbstractScalarWriter) batchWriter.s2Writer).vector()).getValuesVector(), 0, 8);
+ RowSet result = fixture.wrap(rsLoader.harvest());
+
+ // Use this here, or earlier, when things go amiss and you need
+ // to see what the actual results might be.
+
+// result.print();
+
+ totalRowCount++;
+ assertEquals(totalRowCount, rsLoader.totalRowCount());
+
+ BatchReader reader = new BatchReader(setup, result.reader(), readState);
+ reader.verify();
+ result.clear();
+ }
+ rsLoader.close();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetSchemaChange.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetSchemaChange.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetSchemaChange.java
new file mode 100644
index 0000000..9787189
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetSchemaChange.java
@@ -0,0 +1,245 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.impl;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.util.Arrays;
+
+import org.apache.drill.common.types.TypeProtos.DataMode;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.physical.rowSet.ResultSetLoader;
+import org.apache.drill.exec.physical.rowSet.RowSetLoader;
+import org.apache.drill.exec.physical.rowSet.impl.ResultSetLoaderImpl.ResultSetOptions;
+import org.apache.drill.exec.record.BatchSchema;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.accessor.ScalarWriter;
+import org.apache.drill.test.SubOperatorTest;
+import org.apache.drill.test.rowSet.RowSet;
+import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
+import org.apache.drill.test.rowSet.RowSetComparison;
+import org.apache.drill.test.rowSet.RowSetReader;
+import org.apache.drill.test.rowSet.SchemaBuilder;
+import org.junit.Test;
+
+public class TestResultSetSchemaChange extends SubOperatorTest {
+
+ /**
+ * Test the case where the schema changes in the first batch.
+ * Schema changes before the first record are trivial and tested
+ * elsewhere. Here we write some records, then add new columns, as a
+ * JSON reader might do.
+ */
+
+ @Test
+ public void testSchemaChangeFirstBatch() {
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator());
+ RowSetLoader rootWriter = rsLoader.writer();
+ rootWriter.addColumn(SchemaBuilder.columnSchema("a", MinorType.VARCHAR, DataMode.REQUIRED));
+
+ // Create initial rows
+
+ rsLoader.startBatch();
+ int rowCount = 0;
+ for (int i = 0; i < 2; i++) {
+ rootWriter.start();
+ rowCount++;
+ rootWriter.scalar(0).setString("a_" + rowCount);
+ rootWriter.save();
+ }
+
+ // Add a second column: nullable.
+
+ rootWriter.addColumn(SchemaBuilder.columnSchema("b", MinorType.INT, DataMode.OPTIONAL));
+ for (int i = 0; i < 2; i++) {
+ rootWriter.start();
+ rowCount++;
+ rootWriter.scalar(0).setString("a_" + rowCount);
+ rootWriter.scalar(1).setInt(rowCount);
+ rootWriter.save();
+ }
+
+ // Add a third column. Use variable-width so that offset
+ // vectors must be back-filled.
+
+ rootWriter.addColumn(SchemaBuilder.columnSchema("c", MinorType.VARCHAR, DataMode.OPTIONAL));
+ for (int i = 0; i < 2; i++) {
+ rootWriter.start();
+ rowCount++;
+ rootWriter.scalar(0).setString("a_" + rowCount);
+ rootWriter.scalar(1).setInt(rowCount);
+ rootWriter.scalar(2).setString("c_" + rowCount);
+ rootWriter.save();
+ }
+
+ // Fourth: Required Varchar. Previous rows are back-filled with empty strings.
+ // And a required int. Back-filled with zeros.
+ // May occasionally be useful. But, does have to work to prevent
+ // vector corruption if some reader decides to go this route.
+
+ rootWriter.addColumn(SchemaBuilder.columnSchema("d", MinorType.VARCHAR, DataMode.REQUIRED));
+ rootWriter.addColumn(SchemaBuilder.columnSchema("e", MinorType.INT, DataMode.REQUIRED));
+ for (int i = 0; i < 2; i++) {
+ rootWriter.start();
+ rowCount++;
+ rootWriter.scalar(0).setString("a_" + rowCount);
+ rootWriter.scalar(1).setInt(rowCount);
+ rootWriter.scalar(2).setString("c_" + rowCount);
+ rootWriter.scalar(3).setString("d_" + rowCount);
+ rootWriter.scalar(4).setInt(rowCount * 10);
+ rootWriter.save();
+ }
+
+ // Add an array. Now two offset vectors must be back-filled.
+
+ rootWriter.addColumn(SchemaBuilder.columnSchema("f", MinorType.VARCHAR, DataMode.REPEATED));
+ for (int i = 0; i < 2; i++) {
+ rootWriter.start();
+ rowCount++;
+ rootWriter.scalar(0).setString("a_" + rowCount);
+ rootWriter.scalar(1).setInt(rowCount);
+ rootWriter.scalar(2).setString("c_" + rowCount);
+ rootWriter.scalar(3).setString("d_" + rowCount);
+ rootWriter.scalar(4).setInt(rowCount * 10);
+ ScalarWriter arrayWriter = rootWriter.column(5).array().scalar();
+ arrayWriter.setString("f_" + rowCount + "-1");
+ arrayWriter.setString("f_" + rowCount + "-2");
+ rootWriter.save();
+ }
+
+ // Harvest the batch and verify.
+
+ RowSet actual = fixture.wrap(rsLoader.harvest());
+
+ BatchSchema expectedSchema = new SchemaBuilder()
+ .add("a", MinorType.VARCHAR)
+ .addNullable("b", MinorType.INT)
+ .addNullable("c", MinorType.VARCHAR)
+ .add("d", MinorType.VARCHAR)
+ .add("e", MinorType.INT)
+ .addArray("f", MinorType.VARCHAR)
+ .build();
+ SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
+ .addRow("a_1", null, null, "", 0, new String[] {})
+ .addRow("a_2", null, null, "", 0, new String[] {})
+ .addRow("a_3", 3, null, "", 0, new String[] {})
+ .addRow("a_4", 4, null, "", 0, new String[] {})
+ .addRow("a_5", 5, "c_5", "", 0, new String[] {})
+ .addRow("a_6", 6, "c_6", "", 0, new String[] {})
+ .addRow("a_7", 7, "c_7", "d_7", 70, new String[] {})
+ .addRow("a_8", 8, "c_8", "d_8", 80, new String[] {})
+ .addRow("a_9", 9, "c_9", "d_9", 90, new String[] {"f_9-1", "f_9-2"})
+ .addRow("a_10", 10, "c_10", "d_10", 100, new String[] {"f_10-1", "f_10-2"})
+ .build();
+
+ new RowSetComparison(expected)
+ .verifyAndClearAll(actual);
+ rsLoader.close();
+ }
+
+ /**
+ * Test a schema change on the row that overflows. If the
+ * new column is added after overflow, it will appear as
+ * a schema-change in the following batch. This is fine as
+ * we are essentially time-shifting: pretending that the
+ * overflow row was written in the next batch (which, in
+ * fact, it is: that's what overflow means.)
+ */
+
+ @Test
+ public void testSchemaChangeWithOverflow() {
+ ResultSetOptions options = new OptionBuilder()
+ .setRowCountLimit(ValueVector.MAX_ROW_COUNT)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ RowSetLoader rootWriter = rsLoader.writer();
+ rootWriter.addColumn(SchemaBuilder.columnSchema("a", MinorType.VARCHAR, DataMode.REQUIRED));
+
+ rsLoader.startBatch();
+ byte value[] = new byte[512];
+ Arrays.fill(value, (byte) 'X');
+ int count = 0;
+ while (! rootWriter.isFull()) {
+ rootWriter.start();
+ rootWriter.scalar(0).setBytes(value, value.length);
+
+ // Relies on fact that isFull becomes true right after
+ // a vector overflows; don't have to wait for saveRow().
+
+ if (rootWriter.isFull()) {
+ rootWriter.addColumn(SchemaBuilder.columnSchema("b", MinorType.INT, DataMode.OPTIONAL));
+ rootWriter.scalar(1).setInt(count);
+
+ // Add a Varchar to ensure its offset fiddling is done properly
+
+ rootWriter.addColumn(SchemaBuilder.columnSchema("c", MinorType.VARCHAR, DataMode.OPTIONAL));
+ rootWriter.scalar(2).setString("c-" + count);
+
+ // Allow adding a required column at this point.
+ // (Not intuitively obvious that this should work; we back-fill
+ // with zeros.)
+
+ rootWriter.addColumn(SchemaBuilder.columnSchema("d", MinorType.INT, DataMode.REQUIRED));
+ }
+ rootWriter.save();
+ count++;
+ }
+
+ // Result should include only the first column.
+
+ BatchSchema expectedSchema = new SchemaBuilder()
+ .add("a", MinorType.VARCHAR)
+ .build();
+ RowSet result = fixture.wrap(rsLoader.harvest());
+ assertTrue(result.batchSchema().isEquivalent(expectedSchema));
+ assertEquals(count - 1, result.rowCount());
+ result.clear();
+ assertEquals(1, rsLoader.schemaVersion());
+
+ // Double check: still can add a required column after
+ // starting the next batch. (No longer in overflow state.)
+
+ rsLoader.startBatch();
+ rootWriter.addColumn(SchemaBuilder.columnSchema("e", MinorType.INT, DataMode.REQUIRED));
+
+ // Next batch should start with the overflow row, including
+ // the column added at the end of the previous batch, after
+ // overflow.
+
+ result = fixture.wrap(rsLoader.harvest());
+ assertEquals(5, rsLoader.schemaVersion());
+ assertEquals(1, result.rowCount());
+ expectedSchema = new SchemaBuilder(expectedSchema)
+ .addNullable("b", MinorType.INT)
+ .addNullable("c", MinorType.VARCHAR)
+ .add("d", MinorType.INT)
+ .add("e", MinorType.INT)
+ .build();
+ assertTrue(result.batchSchema().isEquivalent(expectedSchema));
+ RowSetReader reader = result.reader();
+ reader.next();
+ assertEquals(count - 1, reader.scalar(1).getInt());
+ assertEquals("c-" + (count - 1), reader.scalar(2).getString());
+ assertEquals(0, reader.scalar("d").getInt());
+ assertEquals(0, reader.scalar("e").getInt());
+ result.clear();
+
+ rsLoader.close();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/exec/record/TestTupleSchema.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/record/TestTupleSchema.java b/exec/java-exec/src/test/java/org/apache/drill/exec/record/TestTupleSchema.java
new file mode 100644
index 0000000..45c0b55
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/record/TestTupleSchema.java
@@ -0,0 +1,509 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.record;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertSame;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.drill.common.types.TypeProtos.DataMode;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode;
+import org.apache.drill.exec.record.TupleSchema.MapColumnMetadata;
+import org.apache.drill.exec.record.TupleSchema.PrimitiveColumnMetadata;
+import org.apache.drill.test.SubOperatorTest;
+import org.apache.drill.test.rowSet.SchemaBuilder;
+import org.junit.Test;
+
+/**
+ * Test the tuple and column metadata, including extended attributes.
+ */
+
+public class TestTupleSchema extends SubOperatorTest {
+
+ /**
+ * Test a fixed-width, primitive, required column. Includes basic
+ * tests common to all data types. (Basic tests are not repeated for
+ * other types.)
+ */
+
+ @Test
+ public void testRequiredFixedWidthColumn() {
+
+ MaterializedField field = SchemaBuilder.columnSchema("c", MinorType.INT, DataMode.REQUIRED );
+ ColumnMetadata col = TupleSchema.fromField(field);
+
+ // Code may depend on the specific column class
+
+ assertTrue(col instanceof PrimitiveColumnMetadata);
+
+ // Generic checks
+
+ assertEquals(ColumnMetadata.StructureType.PRIMITIVE, col.structureType());
+ assertNull(col.mapSchema());
+ assertSame(field, col.schema());
+ assertEquals(field.getName(), col.name());
+ assertEquals(field.getType(), col.majorType());
+ assertEquals(field.getType().getMinorType(), col.type());
+ assertEquals(field.getDataMode(), col.mode());
+ assertFalse(col.isNullable());
+ assertFalse(col.isArray());
+ assertFalse(col.isVariableWidth());
+ assertFalse(col.isMap());
+ assertFalse(col.isList());
+ assertTrue(col.isEquivalent(col));
+
+ ColumnMetadata col2 = TupleSchema.fromField(field);
+ assertTrue(col.isEquivalent(col2));
+
+ MaterializedField field3 = SchemaBuilder.columnSchema("d", MinorType.INT, DataMode.REQUIRED );
+ ColumnMetadata col3 = TupleSchema.fromField(field3);
+ assertFalse(col.isEquivalent(col3));
+
+ MaterializedField field4 = SchemaBuilder.columnSchema("c", MinorType.BIGINT, DataMode.REQUIRED );
+ ColumnMetadata col4 = TupleSchema.fromField(field4);
+ assertFalse(col.isEquivalent(col4));
+
+ MaterializedField field5 = SchemaBuilder.columnSchema("c", MinorType.INT, DataMode.OPTIONAL );
+ ColumnMetadata col5 = TupleSchema.fromField(field5);
+ assertFalse(col.isEquivalent(col5));
+
+ ColumnMetadata col6 = col.cloneEmpty();
+ assertTrue(col.isEquivalent(col6));
+
+ assertEquals(4, col.expectedWidth());
+ col.setExpectedWidth(10);
+ assertEquals(4, col.expectedWidth());
+
+ assertEquals(1, col.expectedElementCount());
+ col.setExpectedElementCount(2);
+ assertEquals(1, col.expectedElementCount());
+ }
+
+ @Test
+ public void testNullableFixedWidthColumn() {
+
+ MaterializedField field = SchemaBuilder.columnSchema("c", MinorType.INT, DataMode.OPTIONAL );
+ ColumnMetadata col = TupleSchema.fromField(field);
+
+ assertEquals(ColumnMetadata.StructureType.PRIMITIVE, col.structureType());
+ assertTrue(col.isNullable());
+ assertFalse(col.isArray());
+ assertFalse(col.isVariableWidth());
+ assertFalse(col.isMap());
+ assertFalse(col.isList());
+
+ assertEquals(4, col.expectedWidth());
+ col.setExpectedWidth(10);
+ assertEquals(4, col.expectedWidth());
+
+ assertEquals(1, col.expectedElementCount());
+ col.setExpectedElementCount(2);
+ assertEquals(1, col.expectedElementCount());
+ }
+
+ @Test
+ public void testRepeatedFixedWidthColumn() {
+
+ MaterializedField field = SchemaBuilder.columnSchema("c", MinorType.INT, DataMode.REPEATED );
+ ColumnMetadata col = TupleSchema.fromField(field);
+
+ assertFalse(col.isNullable());
+ assertTrue(col.isArray());
+ assertFalse(col.isVariableWidth());
+ assertFalse(col.isMap());
+ assertFalse(col.isList());
+
+ assertEquals(4, col.expectedWidth());
+ col.setExpectedWidth(10);
+ assertEquals(4, col.expectedWidth());
+
+ assertEquals(ColumnMetadata.DEFAULT_ARRAY_SIZE, col.expectedElementCount());
+
+ col.setExpectedElementCount(2);
+ assertEquals(2, col.expectedElementCount());
+
+ col.setExpectedElementCount(0);
+ assertEquals(1, col.expectedElementCount());
+ }
+
+ @Test
+ public void testRequiredVariableWidthColumn() {
+
+ MaterializedField field = SchemaBuilder.columnSchema("c", MinorType.VARCHAR, DataMode.REQUIRED );
+ ColumnMetadata col = TupleSchema.fromField(field);
+
+ assertEquals(ColumnMetadata.StructureType.PRIMITIVE, col.structureType());
+ assertNull(col.mapSchema());
+ assertFalse(col.isNullable());
+ assertFalse(col.isArray());
+ assertTrue(col.isVariableWidth());
+ assertFalse(col.isMap());
+ assertFalse(col.isList());
+
+ // A different precision is a different type.
+
+ MaterializedField field2 = new SchemaBuilder.ColumnBuilder("c", MinorType.VARCHAR)
+ .setMode(DataMode.REQUIRED)
+ .setPrecision(10)
+ .build();
+
+ ColumnMetadata col2 = TupleSchema.fromField(field2);
+ assertFalse(col.isEquivalent(col2));
+
+ assertEquals(50, col.expectedWidth());
+ col.setExpectedWidth(10);
+ assertEquals(10, col.expectedWidth());
+
+ assertEquals(1, col.expectedElementCount());
+ col.setExpectedElementCount(2);
+ assertEquals(1, col.expectedElementCount());
+
+ // If precision is provided, then that is the default width
+
+ col = TupleSchema.fromField(field2);
+ assertEquals(10, col.expectedWidth());
+ }
+
+ @Test
+ public void testNullableVariableWidthColumn() {
+
+ MaterializedField field = SchemaBuilder.columnSchema("c", MinorType.VARCHAR, DataMode.OPTIONAL );
+ ColumnMetadata col = TupleSchema.fromField(field);
+
+ assertTrue(col.isNullable());
+ assertFalse(col.isArray());
+ assertTrue(col.isVariableWidth());
+ assertFalse(col.isMap());
+ assertFalse(col.isList());
+
+ assertEquals(50, col.expectedWidth());
+ col.setExpectedWidth(10);
+ assertEquals(10, col.expectedWidth());
+
+ assertEquals(1, col.expectedElementCount());
+ col.setExpectedElementCount(2);
+ assertEquals(1, col.expectedElementCount());
+ }
+
+ @Test
+ public void testRepeatedVariableWidthColumn() {
+
+ MaterializedField field = SchemaBuilder.columnSchema("c", MinorType.VARCHAR, DataMode.REPEATED );
+ ColumnMetadata col = TupleSchema.fromField(field);
+
+ assertFalse(col.isNullable());
+ assertTrue(col.isArray());
+ assertTrue(col.isVariableWidth());
+ assertFalse(col.isMap());
+ assertFalse(col.isList());
+
+ assertEquals(50, col.expectedWidth());
+ col.setExpectedWidth(10);
+ assertEquals(10, col.expectedWidth());
+
+ assertEquals(ColumnMetadata.DEFAULT_ARRAY_SIZE, col.expectedElementCount());
+
+ col.setExpectedElementCount(2);
+ assertEquals(2, col.expectedElementCount());
+ }
+
+ /**
+ * Tests a map column. Maps can only be required or repeated, not nullable.
+ * (But, the columns in the map can be nullable.)
+ */
+
+ @Test
+ public void testMapColumn() {
+
+ MaterializedField field = SchemaBuilder.columnSchema("m", MinorType.MAP, DataMode.REQUIRED );
+ ColumnMetadata col = TupleSchema.fromField(field);
+
+ assertTrue(col instanceof MapColumnMetadata);
+ assertNotNull(col.mapSchema());
+ assertEquals(0, col.mapSchema().size());
+ assertSame(col, col.mapSchema().parent());
+
+ MapColumnMetadata mapCol = (MapColumnMetadata) col;
+ assertNull(mapCol.parentTuple());
+
+ assertEquals(ColumnMetadata.StructureType.TUPLE, col.structureType());
+ assertFalse(col.isNullable());
+ assertFalse(col.isArray());
+ assertFalse(col.isVariableWidth());
+ assertTrue(col.isMap());
+ assertFalse(col.isList());
+
+ assertEquals(0, col.expectedWidth());
+ col.setExpectedWidth(10);
+ assertEquals(0, col.expectedWidth());
+
+ assertEquals(1, col.expectedElementCount());
+ col.setExpectedElementCount(2);
+ assertEquals(1, col.expectedElementCount());
+ }
+
+ @Test
+ public void testRepeatedMapColumn() {
+
+ MaterializedField field = SchemaBuilder.columnSchema("m", MinorType.MAP, DataMode.REPEATED );
+ ColumnMetadata col = TupleSchema.fromField(field);
+
+ assertTrue(col instanceof MapColumnMetadata);
+ assertNotNull(col.mapSchema());
+ assertEquals(0, col.mapSchema().size());
+
+ assertFalse(col.isNullable());
+ assertTrue(col.isArray());
+ assertFalse(col.isVariableWidth());
+ assertTrue(col.isMap());
+ assertFalse(col.isList());
+
+ assertEquals(0, col.expectedWidth());
+ col.setExpectedWidth(10);
+ assertEquals(0, col.expectedWidth());
+
+ assertEquals(ColumnMetadata.DEFAULT_ARRAY_SIZE, col.expectedElementCount());
+
+ col.setExpectedElementCount(2);
+ assertEquals(2, col.expectedElementCount());
+ }
+
+ // List
+
+ // Repeated list
+
+ /**
+ * Test the basics of an empty root tuple (i.e. row) schema.
+ */
+
+ @Test
+ public void testEmptyRootTuple() {
+
+ TupleMetadata root = new TupleSchema();
+
+ assertEquals(0, root.size());
+ assertTrue(root.isEmpty());
+ assertEquals(-1, root.index("foo"));
+
+ try {
+ root.metadata(0);
+ fail();
+ } catch (IndexOutOfBoundsException e) {
+ // Expected
+ }
+ assertNull(root.metadata("foo"));
+
+ try {
+ root.column(0);
+ fail();
+ } catch (IndexOutOfBoundsException e) {
+ // Expected
+ }
+ assertNull(root.column("foo"));
+
+ try {
+ root.fullName(0);
+ fail();
+ } catch (IndexOutOfBoundsException e) {
+ // Expected
+ }
+
+ // The full name method does not check if the column is actually
+ // in the tuple.
+
+ MaterializedField field = SchemaBuilder.columnSchema("c", MinorType.INT, DataMode.REQUIRED );
+ ColumnMetadata col = TupleSchema.fromField(field);
+ assertEquals("c", root.fullName(col));
+
+ assertTrue(root.isEquivalent(root));
+ assertNull(root.parent());
+ assertTrue(root.toFieldList().isEmpty());
+ }
+
+ /**
+ * Test the basics of a non-empty root tuple (i.e. a row) using a pair
+ * of primitive columns.
+ */
+
+ @Test
+ public void testNonEmptyRootTuple() {
+
+ TupleMetadata root = new TupleSchema();
+
+ MaterializedField fieldA = SchemaBuilder.columnSchema("a", MinorType.INT, DataMode.REQUIRED );
+ ColumnMetadata colA = root.add(fieldA);
+
+ assertEquals(1, root.size());
+ assertFalse(root.isEmpty());
+ assertEquals(0, root.index("a"));
+ assertEquals(-1, root.index("b"));
+
+ assertSame(fieldA, root.column(0));
+ assertSame(fieldA, root.column("a"));
+ assertSame(fieldA, root.column("A"));
+
+ assertSame(colA, root.metadata(0));
+ assertSame(colA, root.metadata("a"));
+
+ assertEquals("a", root.fullName(0));
+ assertEquals("a", root.fullName(colA));
+
+ try {
+ root.add(fieldA);
+ fail();
+ } catch (IllegalArgumentException e) {
+ // Expected
+ }
+
+ MaterializedField fieldB = SchemaBuilder.columnSchema("b", MinorType.VARCHAR, DataMode.OPTIONAL );
+ ColumnMetadata colB = TupleSchema.fromField(fieldB);
+ int indexB = root.addColumn(colB);
+
+ assertEquals(1, indexB);
+ assertEquals(2, root.size());
+ assertFalse(root.isEmpty());
+ assertEquals(indexB, root.index("b"));
+
+ assertSame(fieldB, root.column(1));
+ assertSame(fieldB, root.column("b"));
+
+ assertSame(colB, root.metadata(1));
+ assertSame(colB, root.metadata("b"));
+
+ assertEquals("b", root.fullName(1));
+ assertEquals("b", root.fullName(colB));
+
+ try {
+ root.add(fieldB);
+ fail();
+ } catch (IllegalArgumentException e) {
+ // Expected
+ }
+
+ List<MaterializedField> fieldList = root.toFieldList();
+ assertSame(fieldA, fieldList.get(0));
+ assertSame(fieldB, fieldList.get(1));
+
+ TupleMetadata emptyRoot = new TupleSchema();
+ assertFalse(emptyRoot.isEquivalent(root));
+
+ // Same schema: the tuples are equivalent
+
+ TupleMetadata root3 = new TupleSchema();
+ root3.add(fieldA);
+ root3.addColumn(colB);
+ assertTrue(root3.isEquivalent(root));
+ assertTrue(root.isEquivalent(root3));
+
+ // Same columns, different order. The tuples are not equivalent.
+
+ TupleMetadata root4 = new TupleSchema();
+ root4.addColumn(colB);
+ root4.add(fieldA);
+ assertFalse(root4.isEquivalent(root));
+ assertFalse(root.isEquivalent(root4));
+
+ // A tuple is equivalent to its copy.
+
+ assertTrue(root.isEquivalent(((TupleSchema) root).copy()));
+
+ // And it is equivalent to the round trip to a batch schema.
+
+ BatchSchema batchSchema = ((TupleSchema) root).toBatchSchema(SelectionVectorMode.NONE);
+ assertTrue(root.isEquivalent(TupleSchema.fromFields(batchSchema)));
+ }
+
+ /**
+ * Test a complex map schema of the form:<br>
+ * a.`b.x`.`c.y`.d<br>
+ * in which columns "a", "b.x" and "c.y" are maps, "b.x" and "c.y" are names
+ * that contains dots, and d is primitive.
+ */
+
+ @Test
+ public void testMapTuple() {
+
+ TupleMetadata root = new TupleSchema();
+
+ MaterializedField fieldA = SchemaBuilder.columnSchema("a", MinorType.MAP, DataMode.REQUIRED);
+ ColumnMetadata colA = root.add(fieldA);
+ TupleMetadata mapA = colA.mapSchema();
+
+ MaterializedField fieldB = SchemaBuilder.columnSchema("b.x", MinorType.MAP, DataMode.REQUIRED);
+ ColumnMetadata colB = mapA.add(fieldB);
+ TupleMetadata mapB = colB.mapSchema();
+
+ MaterializedField fieldC = SchemaBuilder.columnSchema("c.y", MinorType.MAP, DataMode.REQUIRED);
+ ColumnMetadata colC = mapB.add(fieldC);
+ TupleMetadata mapC = colC.mapSchema();
+
+ MaterializedField fieldD = SchemaBuilder.columnSchema("d", MinorType.VARCHAR, DataMode.REQUIRED);
+ ColumnMetadata colD = mapC.add(fieldD);
+
+ MaterializedField fieldE = SchemaBuilder.columnSchema("e", MinorType.INT, DataMode.REQUIRED);
+ ColumnMetadata colE = mapC.add(fieldE);
+
+ assertEquals(1, root.size());
+ assertEquals(1, mapA.size());
+ assertEquals(1, mapB.size());
+ assertEquals(2, mapC.size());
+
+ assertSame(colA, root.metadata("a"));
+ assertSame(colB, mapA.metadata("b.x"));
+ assertSame(colC, mapB.metadata("c.y"));
+ assertSame(colD, mapC.metadata("d"));
+ assertSame(colE, mapC.metadata("e"));
+
+ // The full name contains quoted names if the contain dots.
+ // This name is more for diagnostic than semantic purposes.
+
+ assertEquals("a", root.fullName(0));
+ assertEquals("a.`b.x`", mapA.fullName(0));
+ assertEquals("a.`b.x`.`c.y`", mapB.fullName(0));
+ assertEquals("a.`b.x`.`c.y`.d", mapC.fullName(0));
+ assertEquals("a.`b.x`.`c.y`.e", mapC.fullName(1));
+
+ assertEquals(1, colA.schema().getChildren().size());
+ assertEquals(1, colB.schema().getChildren().size());
+ assertEquals(2, colC.schema().getChildren().size());
+
+ // Yes, it is awful that MaterializedField does not provide indexed
+ // access to its children. That's one reason we have the TupleMetadata
+ // classes..
+
+ assertSame(fieldB, colA.schema().getChildren().iterator().next());
+ assertSame(fieldC, colB.schema().getChildren().iterator().next());
+ Iterator<MaterializedField> iterC = colC.schema().getChildren().iterator();
+ assertSame(fieldD, iterC.next());
+ assertSame(fieldE, iterC.next());
+
+ // Copying should be deep.
+
+ TupleMetadata root2 = ((TupleSchema) root).copy();
+ assertEquals(2, root2.metadata(0).mapSchema().metadata(0).mapSchema().metadata(0).mapSchema().size());
+ assert(root.isEquivalent(root2));
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/exec/record/TestVectorContainer.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/record/TestVectorContainer.java b/exec/java-exec/src/test/java/org/apache/drill/exec/record/TestVectorContainer.java
deleted file mode 100644
index b17bf18..0000000
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/record/TestVectorContainer.java
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.exec.record;
-
-import static org.junit.Assert.*;
-
-import org.apache.drill.categories.VectorTest;
-import org.apache.drill.common.types.TypeProtos.MinorType;
-import org.apache.drill.test.DrillTest;
-import org.apache.drill.test.OperatorFixture;
-import org.apache.drill.test.rowSet.RowSet;
-import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
-import org.apache.drill.test.rowSet.RowSetComparison;
-import org.apache.drill.test.rowSet.SchemaBuilder;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-@Category(VectorTest.class)
-public class TestVectorContainer extends DrillTest {
-
- // TODO: Replace the following with an extension of SubOperatorTest class
- // once that is available.
-
- protected static OperatorFixture fixture;
-
- @BeforeClass
- public static void setUpBeforeClass() throws Exception {
- fixture = OperatorFixture.standardFixture();
- }
-
- @AfterClass
- public static void tearDownAfterClass() throws Exception {
- fixture.close();
- }
-
- /**
- * Test of the ability to merge two schemas and to merge
- * two vector containers. The merge is "horizontal", like
- * a row-by-row join. Since each container is a list of
- * vectors, we just combine the two lists to create the
- * merged result.
- */
- @Test
- public void testContainerMerge() {
-
- // Simulated data from a reader
-
- BatchSchema leftSchema = new SchemaBuilder()
- .add("a", MinorType.INT)
- .addNullable("b", MinorType.VARCHAR)
- .build();
- SingleRowSet left = fixture.rowSetBuilder(leftSchema)
- .add(10, "fred")
- .add(20, "barney")
- .add(30, "wilma")
- .build();
-
- // Simulated "implicit" coumns: row number and file name
-
- BatchSchema rightSchema = new SchemaBuilder()
- .add("x", MinorType.SMALLINT)
- .add("y", MinorType.VARCHAR)
- .build();
- SingleRowSet right = fixture.rowSetBuilder(rightSchema)
- .add(1, "foo.txt")
- .add(2, "bar.txt")
- .add(3, "dino.txt")
- .build();
-
- // The merge batch we expect to see
-
- BatchSchema expectedSchema = new SchemaBuilder()
- .add("a", MinorType.INT)
- .addNullable("b", MinorType.VARCHAR)
- .add("x", MinorType.SMALLINT)
- .add("y", MinorType.VARCHAR)
- .build();
- SingleRowSet expected = fixture.rowSetBuilder(expectedSchema)
- .add(10, "fred", 1, "foo.txt")
- .add(20, "barney", 2, "bar.txt")
- .add(30, "wilma", 3, "dino.txt")
- .build();
-
- // Merge containers without selection vector
-
- RowSet merged = fixture.wrap(
- left.container().merge(right.container()));
-
- RowSetComparison comparison = new RowSetComparison(expected);
- comparison.verify(merged);
-
- // Merge containers via row set facade
-
- RowSet mergedRs = left.merge(right);
- comparison.verifyAndClearAll(mergedRs);
-
- // Add a selection vector. Merging is forbidden, in the present code,
- // for batches that have a selection vector.
-
- SingleRowSet leftIndirect = left.toIndirect();
- try {
- leftIndirect.merge(right);
- fail();
- } catch (IllegalArgumentException e) {
- // Expected
- }
- leftIndirect.clear();
- right.clear();
- }
-}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/exec/record/vector/TestValueVector.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/record/vector/TestValueVector.java b/exec/java-exec/src/test/java/org/apache/drill/exec/record/vector/TestValueVector.java
index 0f8f766..621d288 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/record/vector/TestValueVector.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/record/vector/TestValueVector.java
@@ -251,6 +251,7 @@ public class TestValueVector extends ExecTest {
final DrillBuf newBuf = allocator.buffer(size);
final DrillBuf writeBuf = newBuf;
for(final DrillBuf buffer : buffers) {
+ @SuppressWarnings("resource")
final DrillBuf readBuf = (DrillBuf) buffer.slice();
final int nBytes = readBuf.readableBytes();
final byte[] bytes = new byte[nBytes];
@@ -266,6 +267,7 @@ public class TestValueVector extends ExecTest {
final MaterializedField field = MaterializedField.create(EMPTY_SCHEMA_PATH, RepeatedIntHolder.TYPE);
// Create a new value vector.
+ @SuppressWarnings("resource")
final RepeatedIntVector vector1 = new RepeatedIntVector(field, allocator);
// Populate the vector.
@@ -321,6 +323,7 @@ the interface to load has changed
final MaterializedField field = MaterializedField.create(EMPTY_SCHEMA_PATH, VarCharHolder.TYPE);
// Create a new value vector for 1024 variable length strings.
+ @SuppressWarnings("resource")
final VarCharVector vector1 = new VarCharVector(field, allocator);
final VarCharVector.Mutator mutator = vector1.getMutator();
vector1.allocateNew(1024 * 10, 1024);
@@ -337,7 +340,9 @@ the interface to load has changed
// Combine the backing buffers so we can load them into a new vector.
final DrillBuf[] buffers1 = vector1.getBuffers(false);
+ @SuppressWarnings("resource")
final DrillBuf buffer1 = combineBuffers(allocator, buffers1);
+ @SuppressWarnings("resource")
final VarCharVector vector2 = new VarCharVector(field, allocator);
vector2.load(vector1.getMetadata(), buffer1);
@@ -360,6 +365,7 @@ the interface to load has changed
final MaterializedField field = MaterializedField.create(EMPTY_SCHEMA_PATH, NullableVarCharHolder.TYPE);
// Create a new value vector for 1024 nullable variable length strings.
+ @SuppressWarnings("resource")
final NullableVarCharVector vector1 = new NullableVarCharVector(field, allocator);
final NullableVarCharVector.Mutator mutator = vector1.getMutator();
vector1.allocateNew(1024 * 10, 1024);
@@ -394,7 +400,9 @@ the interface to load has changed
// Combine into a single buffer so we can load it into a new vector.
final DrillBuf[] buffers1 = vector1.getBuffers(false);
+ @SuppressWarnings("resource")
final DrillBuf buffer1 = combineBuffers(allocator, buffers1);
+ @SuppressWarnings("resource")
final NullableVarCharVector vector2 = new NullableVarCharVector(field, allocator);
vector2.load(vector1.getMetadata(), buffer1);
@@ -673,6 +681,7 @@ the interface to load has changed
}
for (int i = 0; i < valueVectors.length; i++) {
+ @SuppressWarnings("resource")
final ValueVector vv = valueVectors[i];
final int vvCapacity = vv.getValueCapacity();
@@ -718,6 +727,7 @@ the interface to load has changed
*
* @param test test function to execute
*/
+ @SuppressWarnings("resource")
private void testVectors(VectorVerifier test) throws Exception {
final MaterializedField[] fields = {
MaterializedField.create(EMPTY_SCHEMA_PATH, UInt4Holder.TYPE),
@@ -777,6 +787,7 @@ the interface to load has changed
@Test
public void testVectorCanLoadEmptyBuffer() throws Exception {
+ @SuppressWarnings("resource")
final DrillBuf empty = allocator.getEmpty();
testVectors(new VectorVerifier() {
@@ -798,6 +809,7 @@ the interface to load has changed
});
}
+ @SuppressWarnings("resource")
@Test
public void testListVectorShouldNotThrowOversizedAllocationException() throws Exception {
final MaterializedField field = MaterializedField.create(EMPTY_SCHEMA_PATH,
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestInfoSchema.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestInfoSchema.java b/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestInfoSchema.java
index a8eef3c..5af0306 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestInfoSchema.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestInfoSchema.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/TestCsv.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/TestCsv.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/TestCsv.java
index c792233..5ce8e3f 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/TestCsv.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/TestCsv.java
@@ -103,7 +103,7 @@ public class TestCsv extends ClusterTest {
.add("c", MinorType.VARCHAR)
.build();
RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
- .add("10", "foo", "bar")
+ .addRow("10", "foo", "bar")
.build();
new RowSetComparison(expected)
.verifyAndClearAll(actual);
@@ -129,7 +129,7 @@ public class TestCsv extends ClusterTest {
.add("c_2_2", MinorType.VARCHAR)
.build();
RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
- .add("10", "foo", "bar", "fourth", "fifth", "sixth")
+ .addRow("10", "foo", "bar", "fourth", "fifth", "sixth")
.build();
new RowSetComparison(expected)
.verifyAndClearAll(actual);
@@ -151,7 +151,7 @@ public class TestCsv extends ClusterTest {
assertEquals(expectedSchema, actual.batchSchema());
RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
- .add("10", "foo", "bar")
+ .addRow("10", "foo", "bar")
.build();
new RowSetComparison(expected)
.verifyAndClearAll(actual);
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/ExampleTest.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/ExampleTest.java b/exec/java-exec/src/test/java/org/apache/drill/test/ExampleTest.java
index 8366b7a..69667a8 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/test/ExampleTest.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/ExampleTest.java
@@ -123,8 +123,8 @@ public class ExampleTest {
.build();
final RowSet rowSet = new RowSetBuilder(allocator, schema)
- .add("1", "kiwi")
- .add("2", "watermelon")
+ .addRow("1", "kiwi")
+ .addRow("2", "watermelon")
.build();
new JsonFileBuilder(rowSet).build(tableFile);
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/OperatorFixture.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/OperatorFixture.java b/exec/java-exec/src/test/java/org/apache/drill/test/OperatorFixture.java
index c03f0b7..a1b8af5 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/test/OperatorFixture.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/OperatorFixture.java
@@ -42,8 +42,12 @@ import org.apache.drill.exec.ops.OperatorStatReceiver;
import org.apache.drill.exec.ops.OperatorStats;
import org.apache.drill.exec.physical.base.PhysicalOperator;
import org.apache.drill.exec.record.BatchSchema;
+import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode;
+import org.apache.drill.exec.record.TupleMetadata;
+import org.apache.drill.exec.record.TupleSchema;
import org.apache.drill.exec.record.VectorContainer;
import org.apache.drill.exec.server.DrillbitContext;
+import org.apache.drill.exec.record.selection.SelectionVector2;
import org.apache.drill.exec.server.options.OptionSet;
import org.apache.drill.exec.server.options.SystemOptionManager;
import org.apache.drill.exec.testing.ExecutionControls;
@@ -290,21 +294,29 @@ public class OperatorFixture extends BaseFixture implements AutoCloseable {
}
public RowSetBuilder rowSetBuilder(BatchSchema schema) {
+ return rowSetBuilder(TupleSchema.fromFields(schema));
+ }
+
+ public RowSetBuilder rowSetBuilder(TupleMetadata schema) {
return new RowSetBuilder(allocator, schema);
}
public ExtendableRowSet rowSet(BatchSchema schema) {
- return new DirectRowSet(allocator, schema);
+ return DirectRowSet.fromSchema(allocator, schema);
+ }
+
+ public ExtendableRowSet rowSet(TupleMetadata schema) {
+ return DirectRowSet.fromSchema(allocator, schema);
}
public RowSet wrap(VectorContainer container) {
switch (container.getSchema().getSelectionVectorMode()) {
case FOUR_BYTE:
- return new HyperRowSetImpl(allocator(), container, container.getSelectionVector4());
+ return new HyperRowSetImpl(container, container.getSelectionVector4());
case NONE:
- return new DirectRowSet(allocator(), container);
+ return DirectRowSet.fromContainer(container);
case TWO_BYTE:
- return new IndirectRowSet(allocator(), container);
+ return IndirectRowSet.fromSv2(container, container.getSelectionVector2());
default:
throw new IllegalStateException( "Unexpected selection mode" );
}
@@ -342,4 +354,14 @@ public class OperatorFixture extends BaseFixture implements AutoCloseable {
public OperatorContext operatorContext(PhysicalOperator config) {
return new TestOperatorContext(context, allocator(), config, stats);
}
+
+ public RowSet wrap(VectorContainer container, SelectionVector2 sv2) {
+ if (sv2 == null) {
+ assert container.getSchema().getSelectionVectorMode() == SelectionVectorMode.NONE;
+ return DirectRowSet.fromContainer(container);
+ } else {
+ assert container.getSchema().getSelectionVectorMode() == SelectionVectorMode.TWO_BYTE;
+ return IndirectRowSet.fromSv2(container, sv2);
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/QueryBuilder.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/QueryBuilder.java b/exec/java-exec/src/test/java/org/apache/drill/test/QueryBuilder.java
index 58f888d..2d1aa9b 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/test/QueryBuilder.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/QueryBuilder.java
@@ -55,7 +55,7 @@ import org.apache.drill.test.BufferingQueryEventListener.QueryEvent;
import org.apache.drill.test.ClientFixture.StatementParser;
import org.apache.drill.test.rowSet.DirectRowSet;
import org.apache.drill.test.rowSet.RowSet;
-import org.apache.drill.test.rowSet.RowSet.RowSetReader;
+import org.apache.drill.test.rowSet.RowSetReader;
import com.google.common.base.Preconditions;
@@ -338,7 +338,7 @@ public class QueryBuilder {
dataBatch.release();
VectorContainer container = loader.getContainer();
container.setRecordCount(loader.getRecordCount());
- return new DirectRowSet(client.allocator(), container);
+ return DirectRowSet.fromContainer(container);
} catch (SchemaChangeException e) {
throw new IllegalStateException(e);
}
@@ -364,7 +364,7 @@ public class QueryBuilder {
}
RowSetReader reader = rowSet.reader();
reader.next();
- long value = reader.column(0).getLong();
+ long value = reader.scalar(0).getLong();
rowSet.clear();
return value;
}
@@ -385,7 +385,7 @@ public class QueryBuilder {
}
RowSetReader reader = rowSet.reader();
reader.next();
- int value = reader.column(0).getInt();
+ int value = reader.scalar(0).getInt();
rowSet.clear();
return value;
}
@@ -407,10 +407,10 @@ public class QueryBuilder {
RowSetReader reader = rowSet.reader();
reader.next();
String value;
- if (reader.column(0).isNull()) {
+ if (reader.scalar(0).isNull()) {
value = null;
} else {
- value = reader.column(0).getString();
+ value = reader.scalar(0).getString();
}
rowSet.clear();
return value;
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/QueryRowSetIterator.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/QueryRowSetIterator.java b/exec/java-exec/src/test/java/org/apache/drill/test/QueryRowSetIterator.java
index c329690..c1b9253 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/test/QueryRowSetIterator.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/QueryRowSetIterator.java
@@ -93,7 +93,7 @@ public class QueryRowSetIterator implements Iterator<DirectRowSet>, Iterable<Dir
batch = null;
VectorContainer container = loader.getContainer();
container.setRecordCount(loader.getRecordCount());
- return new DirectRowSet(allocator, container);
+ return DirectRowSet.fromContainer(container);
} catch (SchemaChangeException e) {
throw new IllegalStateException(e);
}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/AbstractRowSet.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/AbstractRowSet.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/AbstractRowSet.java
index 6400a5b..d128e4f 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/AbstractRowSet.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/AbstractRowSet.java
@@ -19,12 +19,10 @@ package org.apache.drill.test.rowSet;
import org.apache.drill.exec.memory.BufferAllocator;
import org.apache.drill.exec.record.BatchSchema;
+import org.apache.drill.exec.record.TupleMetadata;
import org.apache.drill.exec.record.VectorAccessible;
import org.apache.drill.exec.record.VectorContainer;
import org.apache.drill.exec.vector.SchemaChangeCallBack;
-import org.apache.drill.exec.vector.accessor.impl.AbstractColumnAccessor.RowIndex;
-import org.apache.drill.exec.vector.accessor.impl.AbstractColumnReader;
-import org.apache.drill.exec.vector.accessor.impl.TupleReaderImpl;
/**
* Basic implementation of a row set for both the single and multiple
@@ -33,119 +31,36 @@ import org.apache.drill.exec.vector.accessor.impl.TupleReaderImpl;
public abstract class AbstractRowSet implements RowSet {
- /**
- * Row set index base class used when indexing rows within a row
- * set for a row set reader. Keeps track of the current position,
- * which starts before the first row, meaning that the client
- * must call <tt>next()</tt> to advance to the first row.
- */
-
- public static abstract class RowSetIndex implements RowIndex {
- protected int rowIndex = -1;
-
- public int position() { return rowIndex; }
- public abstract boolean next();
- public abstract int size();
- public abstract boolean valid();
- public void set(int index) { rowIndex = index; }
- }
-
- /**
- * Bounded (read-only) version of the row set index. When reading,
- * the row count is fixed, and set here.
- */
-
- public static abstract class BoundedRowIndex extends RowSetIndex {
-
- protected final int rowCount;
-
- public BoundedRowIndex(int rowCount) {
- this.rowCount = rowCount;
- }
-
- @Override
- public boolean next() {
- if (++rowIndex < rowCount ) {
- return true;
- } else {
- rowIndex--;
- return false;
- }
- }
-
- @Override
- public int size() { return rowCount; }
-
- @Override
- public boolean valid() { return rowIndex < rowCount; }
- }
-
- /**
- * Reader implementation for a row set.
- */
-
- public class RowSetReaderImpl extends TupleReaderImpl implements RowSetReader {
-
- protected final RowSetIndex index;
-
- public RowSetReaderImpl(TupleSchema schema, RowSetIndex index, AbstractColumnReader[] readers) {
- super(schema, readers);
- this.index = index;
- }
-
- @Override
- public boolean next() { return index.next(); }
-
- @Override
- public boolean valid() { return index.valid(); }
-
- @Override
- public int index() { return index.position(); }
-
- @Override
- public int size() { return index.size(); }
-
- @Override
- public int rowIndex() { return index.index(); }
-
- @Override
- public int batchIndex() { return index.batch(); }
-
- @Override
- public void set(int index) { this.index.set(index); }
- }
-
- protected final BufferAllocator allocator;
- protected final RowSetSchema schema;
- protected final VectorContainer container;
protected SchemaChangeCallBack callBack = new SchemaChangeCallBack();
+ protected VectorContainer container;
+ protected TupleMetadata schema;
- public AbstractRowSet(BufferAllocator allocator, BatchSchema schema, VectorContainer container) {
- this.allocator = allocator;
- this.schema = new RowSetSchema(schema);
+ public AbstractRowSet(VectorContainer container, TupleMetadata schema) {
this.container = container;
+ this.schema = schema;
}
@Override
- public VectorAccessible vectorAccessible() { return container; }
+ public VectorAccessible vectorAccessible() { return container(); }
@Override
public VectorContainer container() { return container; }
@Override
- public int rowCount() { return container.getRecordCount(); }
+ public int rowCount() { return container().getRecordCount(); }
@Override
public void clear() {
+ VectorContainer container = container();
container.zeroVectors();
container.setRecordCount(0);
}
@Override
- public RowSetSchema schema() { return schema; }
+ public TupleMetadata schema() { return schema; }
@Override
- public BufferAllocator allocator() { return allocator; }
+ public BufferAllocator allocator() { return container.getAllocator(); }
@Override
public void print() {
@@ -158,7 +73,5 @@ public abstract class AbstractRowSet implements RowSet {
}
@Override
- public BatchSchema batchSchema() {
- return container.getSchema();
- }
+ public BatchSchema batchSchema() { return container().getSchema(); }
}
[04/15] drill git commit: DRILL-5657: Size-aware vector writer
structure
Posted by pr...@apache.org.
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnWriterIndex.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnWriterIndex.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnWriterIndex.java
new file mode 100644
index 0000000..7e225c9
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ColumnWriterIndex.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor;
+
+/**
+ * A Drill record batch consists of a variety of vectors, including maps and lists.
+ * Each vector is written independently. A reader may skip some values in each row
+ * if no values appear for those columns.
+ * <p>
+ * This index provides a single view of the "current row" or "current array index"
+ * across a set of vectors. Each writer consults this index to determine:
+ * <ul>
+ * <li>The position to which to write a value.</li>
+ * <li>Whether the write position is beyond the "last write" position, which
+ * would require filling in any "missing" values.</li>
+ * </ul>
+ */
+
+public interface ColumnWriterIndex {
+
+ /**
+ * Index of the first entry for the current row
+ * @return index of the first entry for the current row
+ */
+
+ int rowStartIndex();
+
+ /**
+ * Current row or array index.
+ * @return row or array index
+ */
+
+ int vectorIndex();
+
+ /**
+ * Index for array elements that allows the caller to increment the
+ * index. For arrays, writing (or saving) one value automatically
+ * moves to the next value. Ignored for non-element indexes.
+ */
+
+ void nextElement();
+
+ /**
+ * When handling overflow, the index must be reset so that the current row
+ * starts at the start of the vector. Relative offsets must be preserved.
+ * (That is, if the current write position for an array is four greater than
+ * the start, then that offset must now be reset to four from the start of
+ * the vector.)
+ */
+
+ void rollover();
+
+ /**
+ * If this index represents a repeat level, return the index of the
+ * next higher repeat level.
+ *
+ * @return the outer repeat level index, if any
+ */
+
+ ColumnWriterIndex outerIndex();
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ObjectReader.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ObjectReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ObjectReader.java
new file mode 100644
index 0000000..9c53e58
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ObjectReader.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor;
+
+/**
+ * Defines a reader to get values for value vectors using
+ * a simple, uniform interface modeled after a JSON object.
+ * Every column value is an object of one of three types:
+ * scalar, array or tuple. Methods exist to "cast" this object
+ * to the proper type. This model allows a very simple representation:
+ * tuples (rows, maps) consist of objects. Arrays are lists of
+ * objects.
+ * <p>
+ * {@see ObjectWriter>
+ */
+
+public interface ObjectReader {
+
+ /**
+ * The type of this reader.
+ *
+ * @return type of reader
+ */
+
+ ObjectType type();
+ ScalarReader scalar();
+ ScalarElementReader elements();
+ TupleReader tuple();
+ ArrayReader array();
+
+ /**
+ * Return the value of the underlying data as a Java object.
+ * Primarily for testing
+ * @return Java object that represents the underlying value
+ */
+
+ Object getObject();
+
+ /**
+ * Return the entire object as a string. Primarily for debugging.
+ * @return string representation of the object
+ */
+
+ String getAsString();
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ObjectType.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ObjectType.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ObjectType.java
new file mode 100644
index 0000000..e07ea75
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ObjectType.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor;
+
+/**
+ * Type of writer. Follows the JSON-style model, with the
+ * most abstract object types being a scalar (primitive),
+ * tuple (map or row) or an array (repeated type.)
+ */
+
+public enum ObjectType {
+ SCALAR, TUPLE, ARRAY
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ObjectWriter.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ObjectWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ObjectWriter.java
new file mode 100644
index 0000000..a49b0d8
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ObjectWriter.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor;
+
+import org.apache.drill.exec.record.ColumnMetadata;
+import org.apache.drill.exec.vector.accessor.ScalarWriter.ColumnWriterListener;
+import org.apache.drill.exec.vector.accessor.TupleWriter.TupleWriterListener;
+
+/**
+ * Represents a column within a tuple. A column can be an array, a scalar or a
+ * tuple. Each has an associated column metadata (schema) and a writer. The
+ * writer is one of three kinds, depending on the kind of the column. If the
+ * column is a map, then the column also has an associated tuple loader to
+ * define and write to the tuple.
+ * <p>
+ * This interface defines a writer to set values for value vectors using a
+ * simple, uniform interface modeled after a JSON object. Every column value is
+ * an object of one of three types: scalar, array or tuple. Methods exist to
+ * "cast" this object to the proper type. This model allows a very simple
+ * representation: tuples (rows, maps) consist of objects. Arrays are lists of
+ * objects.
+ * <p>
+ * Every column resides at an index, is defined by a schema, is backed by a
+ * value vector, and and is written to by a writer. Each column also tracks the
+ * schema version in which it was added to detect schema evolution. Each column
+ * has an optional overflow vector that holds overflow record values when a
+ * batch becomes full.
+ * <p>
+ * {@see ObjectReader}
+ */
+
+public interface ObjectWriter {
+
+ /**
+ * Returns the schema of the column associated with this writer.
+ *
+ * @return schema for this writer's column
+ */
+
+ ColumnMetadata schema();
+
+ /**
+ * Bind a listener to the underlying scalar column, or array of scalar
+ * columns. Not valid if the underlying writer is a map or array of maps.
+ *
+ * @param listener
+ * the column listener to bind
+ */
+
+ void bindListener(ColumnWriterListener listener);
+
+ /**
+ * Bind a listener to the underlying map or map array column. Not valid if the
+ * underlying writer is a scalar or scalar array.
+ *
+ * @param listener
+ * the tuple listener to bind
+ */
+
+ void bindListener(TupleWriterListener listener);
+
+ /**
+ * Return the object (structure) type of this writer.
+ *
+ * @return type indicating if this is a scalar, tuple or array
+ */
+
+ ObjectType type();
+
+ ScalarWriter scalar();
+
+ TupleWriter tuple();
+
+ ArrayWriter array();
+
+ /**
+ * For debugging, set the object to the proper form of Java object as defined
+ * by the underlying writer type.
+ *
+ * @param value
+ * Java object value to write
+ * @throws VectorOverflowException
+ */
+
+ void set(Object value);
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ScalarElementReader.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ScalarElementReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ScalarElementReader.java
new file mode 100644
index 0000000..d1f31a8
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ScalarElementReader.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor;
+
+import java.math.BigDecimal;
+
+import org.joda.time.Period;
+
+/**
+ * Interface to access the values of an array column. In general, each
+ * vector implements just one of the get methods. Check the vector type
+ * to know which method to use. Though, generally, when writing test
+ * code, the type is known to the test writer.
+ * <p>
+ * Arrays allow random access to the values within the array. The index
+ * passed to each method is the index into the array for the current
+ * row and column. (This means that arrays are three dimensional:
+ * the usual (row, column) dimensions plus an array index dimension:
+ * (row, column, array index).
+ * <p>
+ * Note that the <tt>isNull()</tt> method is provided for completeness,
+ * but no Drill array allows null values at present.
+ * <p>
+ * {@see ScalarWriter}
+ */
+
+public interface ScalarElementReader {
+ /**
+ * Describe the type of the value. This is a compression of the
+ * value vector type: it describes which method will return the
+ * vector value.
+ * @return the value type which indicates which get method
+ * is valid for the column
+ */
+
+ ValueType valueType();
+ int size();
+
+ boolean isNull(int index);
+ int getInt(int index);
+ long getLong(int index);
+ double getDouble(int index);
+ String getString(int index);
+ byte[] getBytes(int index);
+ BigDecimal getDecimal(int index);
+ Period getPeriod(int index);
+
+ Object getObject(int index);
+ String getAsString(int index);
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ScalarReader.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ScalarReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ScalarReader.java
new file mode 100644
index 0000000..e1c26bf
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ScalarReader.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor;
+
+import java.math.BigDecimal;
+
+import org.joda.time.Period;
+
+/**
+ * Defines a reader to obtain values from value vectors using
+ * a simple, uniform interface. Vector values are mapped to
+ * their "natural" representations: the representation closest
+ * to the actual vector value. For date and time values, this
+ * generally means a numeric value. Applications can then map
+ * this value to Java objects as desired. Decimal types all
+ * map to BigDecimal as that is the only way in Java to
+ * represent large decimal values.
+ * <p>
+ * In general, a column maps to just one value. However, derived
+ * classes may choose to provide type conversions if convenient.
+ * An exception is thrown if a call is made to a method that
+ * is not supported by the column type.
+ * <p>
+ * Values of scalars are provided directly, using the get method
+ * for the target type. Maps and arrays are structured types and
+ * require another level of reader abstraction to access each value
+ * in the structure.
+ * <p>
+ * {@see ScalarWriter}
+ */
+
+public interface ScalarReader {
+ /**
+ * Describe the type of the value. This is a compression of the
+ * value vector type: it describes which method will return the
+ * vector value.
+ * @return the value type which indicates which get method
+ * is valid for the column
+ */
+
+ ValueType valueType();
+
+ /**
+ * Report if the column is null. Non-nullable columns always
+ * return <tt>false</tt>.
+ * @return true if the column value is null, false if the
+ * value is set
+ */
+ boolean isNull();
+ int getInt();
+ long getLong();
+ double getDouble();
+ String getString();
+ byte[] getBytes();
+ BigDecimal getDecimal();
+ Period getPeriod();
+
+ Object getObject();
+ String getAsString();
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ScalarWriter.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ScalarWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ScalarWriter.java
index 5cbe80a..776dc9c 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ScalarWriter.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ScalarWriter.java
@@ -22,16 +22,81 @@ import java.math.BigDecimal;
import org.joda.time.Period;
/**
- * Methods common to the {@link ColumnWriter} and
- * {@link ArrayWriter} interfaces.
+ * Represents a scalar value: a required column, a nullable column,
+ * or one element within an array of scalars.
+ * <p>
+ * Vector values are mapped to
+ * their "natural" representations: the representation closest
+ * to the actual vector value. For date and time values, this
+ * generally means a numeric value. Applications can then map
+ * this value to Java objects as desired. Decimal types all
+ * map to BigDecimal as that is the only way in Java to
+ * represent large decimal values.
+ * <p>
+ * In general, a column maps to just one value. However, derived
+ * classes may choose to provide type conversions if convenient.
+ * An exception is thrown if a call is made to a method that
+ * is not supported by the column type.
+ * <p>
+ * {@see ScalarReader}
+ * {@see ScalarElementReader}
*/
public interface ScalarWriter {
+
+ /**
+ * Listener (callback) for vector overflow events. To be optionally
+ * implemented and bound by the client code of the writer. If no
+ * listener is bound, and a vector overflows, then an exception is
+ * thrown.
+ */
+
+ public interface ColumnWriterListener {
+
+ /**
+ * Alert the listener that a vector has overflowed. Upon return,
+ * all writers must have a new set of buffers available, ready
+ * to accept the in-flight value that triggered the overflow.
+ *
+ * @param writer the writer that triggered the overflow
+ */
+
+ void overflowed(ScalarWriter writer);
+
+ /**
+ * A writer wants to expand its vector. Allows the listener to
+ * either allow the growth, or trigger and overflow to limit
+ * batch size.
+ *
+ * @param writer the writer that wishes to grow its vector
+ * @param delta the amount by which the vector is to grow
+ * @return true if the vector can be grown, false if the writer
+ * should instead trigger an overflow by calling
+ * <tt>overflowed()</tt>
+ */
+
+ boolean canExpand(ScalarWriter writer, int delta);
+ }
+
+ void bindListener(ColumnWriterListener listener);
+
+ /**
+ * Describe the type of the value. This is a compression of the
+ * value vector type: it describes which method will return the
+ * vector value.
+ * @return the value type which indicates which get method
+ * is valid for the column
+ */
+
+ ValueType valueType();
+ void setNull();
void setInt(int value);
void setLong(long value);
void setDouble(double value);
void setString(String value);
- void setBytes(byte[] value);
+ void setBytes(byte[] value, int len);
void setDecimal(BigDecimal value);
void setPeriod(Period value);
+
+ void setObject(Object value);
}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/TupleAccessor.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/TupleAccessor.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/TupleAccessor.java
deleted file mode 100644
index ea9b869..0000000
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/TupleAccessor.java
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.exec.vector.accessor;
-
-import org.apache.drill.exec.record.MaterializedField;
-
-/**
- * Provides access to a "tuple". In Drill, both rows and maps are
- * tuples: both are an ordered collection of values, defined by a
- * schema. Each tuple has a schema that defines the column ordering
- * for indexed access. Each tuple also provides methods to get column
- * accessors by name or index.
- */
-
-public interface TupleAccessor {
-
- /**
- * Flattened view of the schema as needed for row-based access of scalar
- * members. The scalar view presents scalar fields: those that can be set
- * or retrieved. A separate map view presents map vectors. The scalar
- * view is the one used by row set readers and writers. Column indexes
- * are into the flattened view, with maps removed and map members flattened
- * into the top-level name space with compound names.
- */
-
- public interface TupleSchema {
- /**
- * Return a column schema given an indexed into the flattened row structure.
- *
- * @param index index of the row in the flattened structure
- * @return schema of the column
- */
-
- MaterializedField column(int index);
-
- /**
- * Returns {@code MaterializedField} instance from schema using the name specified in param.
- *
- * @param name name of the column in the schema
- * @return {@code MaterializedField} instance
- */
- MaterializedField column(String name);
-
- /**
- * Returns index of the column in the schema with name specified in param.
- *
- * @param name name of the column in the schema
- * @return index of the column in the schema
- */
- int columnIndex(String name);
-
- int count();
- }
-
- TupleSchema schema();
-}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/TupleReader.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/TupleReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/TupleReader.java
index acca767..908d6a0 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/TupleReader.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/TupleReader.java
@@ -17,24 +17,38 @@
*/
package org.apache.drill.exec.vector.accessor;
+import org.apache.drill.exec.record.TupleMetadata;
+
/**
* Interface for reading from tuples (rows or maps). Provides
* a column reader for each column that can be obtained either
* by name or column index (as defined in the tuple schema.)
* Also provides two generic methods to get the value as a
* Java object or as a string.
+ * <p>
+ * {@see TupleWriter}
*/
-public interface TupleReader extends TupleAccessor {
- ColumnReader column(int colIndex);
+public interface TupleReader {
+ TupleMetadata schema();
+ int columnCount();
+
+ ObjectReader column(int colIndex);
+ ObjectReader column(String colName);
+
+ // Convenience methods
+
+ ObjectType type(int colIndex);
+ ObjectType type(String colName);
+ ScalarReader scalar(int colIndex);
+ ScalarReader scalar(String colName);
+ TupleReader tuple(int colIndex);
+ TupleReader tuple(String colName);
+ ArrayReader array(int colIndex);
+ ArrayReader array(String colName);
+ ScalarElementReader elements(int colIndex);
+ ScalarElementReader elements(String colName);
- /**
- * Returns column reader for the column with name specified in param.
- *
- * @param colName name of the column in the schema
- * @return column reader
- */
- ColumnReader column(String colName);
- Object get(int colIndex);
- String getAsString(int colIndex);
+ Object getObject();
+ String getAsString();
}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/TupleWriter.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/TupleWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/TupleWriter.java
index 563734e..056c9b3 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/TupleWriter.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/TupleWriter.java
@@ -17,25 +17,153 @@
*/
package org.apache.drill.exec.vector.accessor;
+import org.apache.drill.exec.record.ColumnMetadata;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.TupleMetadata;
+
/**
- * Interface for writing to rows via a column writer.
- * Column writers can be obtained by name or index. Column
- * indexes are defined by the tuple schema. Also provides
- * a convenience method to set the column value from a Java
- * object. The caller is responsible for providing the
- * correct object type for each column. (The object type
- * must match the column accessor type.)
+ * Writer for a tuple. A tuple is composed of columns with a fixed order and
+ * unique names: either can be used to reference columns. Columns are scalar
+ * (simple values), tuples (i.e. maps), or arrays (of scalars, tuples or
+ * arrays.) The row itself is just the top-level (anonymous) tuple. Generally,
+ * implementers of this interface provide additional services on the
+ * implementation of the top-level tuple (often called a "row writer.") Columns
+ * are accessible via the associated column writer by name or index. Column
+ * indexes are defined by the tuple schema.
+ * <p>
+ * Consumers of this interface can define the schema up front, or can define the
+ * schema as the write progresses. To avoid redundant checks to see if a column
+ * is already defined, consumers can simply ask for a column by name. The
+ * <tt>column()</tt> (and related) methods will throw an (unchecked)
+ * {@link UndefinedColumnException} exception if the column is undefined. The
+ * consumer can catch the exception, define the column, and fetch the column
+ * writer again. New columns may be added via this interface at any time; the
+ * new column takes the next available index.
+ * <p>
+ * Also provides a convenience method to set the column value from a Java
+ * object. The caller is responsible for providing the correct object type for
+ * each column. (The object type must match the column accessor type.)
+ * <p>
+ * Convenience methods allow getting a column as a scalar, tuple or array. These
+ * methods throw an exception if the column is not of the requested type.
+ *
+ * @see {@link SingleMapWriter}, the class which this class replaces
*/
-public interface TupleWriter extends TupleAccessor {
- ColumnWriter column(int colIndex);
+public interface TupleWriter {
+
+ /**
+ * Listener (callback) to handle requests to add a new column to a tuple (row
+ * or map). Implemented and bound by the client code that creates or uses the
+ * tuple writer. If no listener is bound, then an attempt to add a column
+ * throws an exception.
+ */
+
+ public interface TupleWriterListener {
+ ObjectWriter addColumn(TupleWriter tuple, ColumnMetadata column);
+
+ ObjectWriter addColumn(TupleWriter tuple, MaterializedField field);
+ }
+
+ /**
+ * Unchecked exception thrown when attempting to access a column writer by
+ * name for an undefined columns. Clients that use a fixed schema can simply
+ * omit catch blocks for the exception since it is unchecked and won't be
+ * thrown if the schema can't evolve. Clients that can discover new columns
+ * should catch the exception and define the column (using an implementation
+ * that allows dynamic schema definition.)
+ */
+
+ @SuppressWarnings("serial")
+ public static class UndefinedColumnException extends RuntimeException {
+ public UndefinedColumnException(String colName) {
+ super("Undefined column: " + colName);
+ }
+ }
+
+ void bindListener(TupleWriterListener listener);
/**
- * Returns column writer for the column with name specified in param.
+ * Add a column to the tuple (row or map) that backs this writer. Support for
+ * this operation depends on whether the client code has registered a listener
+ * to implement the addition. Throws an exception if no listener is
+ * implemented, or if the add request is otherwise invalid (duplicate name,
+ * etc.)
*
- * @param colName name of the column in the schema
- * @return column writer
+ * @param column
+ * the metadata for the column to add
+ * @return the index of the newly added column which can be used to access the
+ * newly added writer
*/
- ColumnWriter column(String colName);
+
+ int addColumn(ColumnMetadata column);
+
+ int addColumn(MaterializedField schema);
+
+ TupleMetadata schema();
+
+ int size();
+
+ // Return the column as a generic object
+
+ ObjectWriter column(int colIndex);
+
+ ObjectWriter column(String colName);
+
+ // Convenience methods
+
+ ScalarWriter scalar(int colIndex);
+
+ ScalarWriter scalar(String colName);
+
+ TupleWriter tuple(int colIndex);
+
+ TupleWriter tuple(String colName);
+
+ ArrayWriter array(int colIndex);
+
+ ArrayWriter array(String colName);
+
+ ObjectType type(int colIndex);
+
+ ObjectType type(String colName);
+
+ /**
+ * Write a value to the given column, automatically calling the proper
+ * <tt>set<i>Type</i></tt> method for the data. While this method is
+ * convenient for testing, it incurs quite a bit of type-checking overhead and
+ * is not suitable for production code.
+ *
+ * @param colIndex
+ * the index of the column to set
+ * @param value
+ * the value to set. The type of the object must be compatible with
+ * the type of the target column
+ */
+
void set(int colIndex, Object value);
+
+ /**
+ * Write a row or map of values, given by Java objects. Object type must match
+ * expected column type.
+ * <p>
+ * Note that a single-column tuple is ambiguous if that column is an array. To
+ * avoid ambiguity, use <tt>set(0, value)</tt> in this case.
+ *
+ * @param values
+ * variable-length argument list of column values
+ * @return true if the row was written, false if any column caused vector
+ * overflow.
+ */
+
+ void setTuple(Object... values);
+
+ /**
+ * Set the tuple from an array of objects. Primarily for use in test tools.
+ *
+ * @param value
+ * the object to set, which must be a generic <tt>Object</tt> array
+ */
+
+ void setObject(Object value);
}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ValueType.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ValueType.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ValueType.java
new file mode 100644
index 0000000..e6687dc
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/ValueType.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor;
+
+/**
+ * Represents the primitive types supported to read and write data
+ * from value vectors. Vectors support many data widths. For simplicity
+ * (and because of no difference in performance), the get/set methods
+ * use a reduced set of types. In general, each reader and writer
+ * supports just one type. Though some may provide more than one
+ * (such as access to bytes for a <tt>STRING</tt> value.)
+ */
+
+public enum ValueType {
+ INTEGER, LONG, DOUBLE, STRING, BYTES, DECIMAL, PERIOD
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractArrayReader.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractArrayReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractArrayReader.java
deleted file mode 100644
index deea7f8..0000000
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractArrayReader.java
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.exec.vector.accessor.impl;
-
-import java.math.BigDecimal;
-
-import org.apache.drill.exec.record.MaterializedField;
-import org.apache.drill.exec.vector.ValueVector;
-import org.apache.drill.exec.vector.accessor.ArrayReader;
-import org.apache.drill.exec.vector.accessor.TupleReader;
-import org.apache.drill.exec.vector.accessor.impl.AbstractColumnReader.VectorAccessor;
-import org.joda.time.Period;
-
-/**
- * Reader for an array-valued column. This reader provides access to specific
- * array members via an array index. This is an abstract base class;
- * subclasses are generated for each repeated value vector type.
- */
-
-public abstract class AbstractArrayReader extends AbstractColumnAccessor implements ArrayReader {
-
- /**
- * Column reader that provides access to an array column by returning a
- * separate reader specifically for that array. That is, reading a column
- * is a two-part process:<pre><code>
- * tupleReader.column("arrayCol").array().getInt(2);</code></pre>
- * This pattern is used to avoid overloading the column reader with
- * both scalar and array access. Also, this pattern mimics the way
- * that nested tuples (Drill maps) are handled.
- */
-
- public static class ArrayColumnReader extends AbstractColumnReader {
-
- private final AbstractArrayReader arrayReader;
-
- public ArrayColumnReader(AbstractArrayReader arrayReader) {
- this.arrayReader = arrayReader;
- }
-
- @Override
- public ValueType valueType() {
- return ValueType.ARRAY;
- }
-
- @Override
- public void bind(RowIndex rowIndex, ValueVector vector) {
- arrayReader.bind(rowIndex, vector);
- vectorIndex = rowIndex;
- }
-
- @Override
- public ArrayReader array() {
- return arrayReader;
- }
- }
-
- protected VectorAccessor vectorAccessor;
-
- public void bind(RowIndex rowIndex, MaterializedField field, VectorAccessor va) {
- bind(rowIndex);
- vectorAccessor = va;
- }
-
- @Override
- public boolean isNull(int index) {
- return false;
- }
-
- @Override
- public int getInt(int index) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public long getLong(int index) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public double getDouble(int index) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public String getString(int index) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public byte[] getBytes(int index) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public BigDecimal getDecimal(int index) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public Period getPeriod(int index) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public TupleReader map(int index) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public ArrayReader array(int index) {
- throw new UnsupportedOperationException();
- }
-}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractArrayWriter.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractArrayWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractArrayWriter.java
deleted file mode 100644
index d1d1263..0000000
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractArrayWriter.java
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.exec.vector.accessor.impl;
-
-import java.math.BigDecimal;
-
-import org.apache.drill.exec.vector.ValueVector;
-import org.apache.drill.exec.vector.accessor.ArrayWriter;
-import org.apache.drill.exec.vector.complex.BaseRepeatedValueVector;
-import org.joda.time.Period;
-
-/**
- * Writer for an array-valued column. This writer appends values: once a value
- * is written, it cannot be changed. As a result, writer methods have no item index;
- * each set advances the array to the next position. This is an abstract base class;
- * subclasses are generated for each repeated value vector type.
- */
-
-public abstract class AbstractArrayWriter extends AbstractColumnAccessor implements ArrayWriter {
-
- /**
- * Column writer that provides access to an array column by returning a
- * separate writer specifically for that array. That is, writing an array
- * is a two-part process:<pre><code>
- * tupleWriter.column("arrayCol").array().setInt(2);</code></pre>
- * This pattern is used to avoid overloading the column reader with
- * both scalar and array access. Also, this pattern mimics the way
- * that nested tuples (Drill maps) are handled.
- */
-
- public static class ArrayColumnWriter extends AbstractColumnWriter {
-
- private final AbstractArrayWriter arrayWriter;
-
- public ArrayColumnWriter(AbstractArrayWriter arrayWriter) {
- this.arrayWriter = arrayWriter;
- }
-
- @Override
- public ValueType valueType() {
- return ValueType.ARRAY;
- }
-
- @Override
- public void bind(RowIndex rowIndex, ValueVector vector) {
- arrayWriter.bind(rowIndex, vector);
- vectorIndex = rowIndex;
- }
-
- @Override
- public ArrayWriter array() {
- return arrayWriter;
- }
-
- /**
- * Arrays require a start step for each row, regardless of
- * whether any values are written for that row.
- */
-
- public void start() {
- arrayWriter.mutator().startNewValue(vectorIndex.index());
- }
- }
-
- protected abstract BaseRepeatedValueVector.BaseRepeatedMutator mutator();
-
- @Override
- public int size() {
- return mutator().getInnerValueCountAt(vectorIndex.index());
- }
-
- @Override
- public boolean valid() {
- // Not implemented yet
- return true;
- }
-
- @Override
- public void setInt(int value) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void setLong(long value) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void setDouble(double value) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void setString(String value) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void setBytes(byte[] value) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void setDecimal(BigDecimal value) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void setPeriod(Period value) {
- throw new UnsupportedOperationException();
- }
-}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractColumnAccessor.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractColumnAccessor.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractColumnAccessor.java
deleted file mode 100644
index 5b751c5..0000000
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractColumnAccessor.java
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.exec.vector.accessor.impl;
-
-import org.apache.drill.exec.vector.ValueVector;
-
-/**
- * Abstract base class for column readers and writers that
- * implements the mechanism for binding accessors to a row
- * index. The row index is implicit: index a row, then
- * column accessors pull out columns from that row.
- */
-
-public abstract class AbstractColumnAccessor {
-
- public interface RowIndex {
- int batch();
- int index();
- }
-
- protected RowIndex vectorIndex;
-
- protected void bind(RowIndex rowIndex) {
- this.vectorIndex = rowIndex;
- }
-
- public abstract void bind(RowIndex rowIndex, ValueVector vector);
-}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractColumnReader.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractColumnReader.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractColumnReader.java
deleted file mode 100644
index b88b08b..0000000
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractColumnReader.java
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.exec.vector.accessor.impl;
-
-import java.math.BigDecimal;
-
-import org.apache.drill.exec.record.MaterializedField;
-import org.apache.drill.exec.vector.ValueVector;
-import org.apache.drill.exec.vector.accessor.ArrayReader;
-import org.apache.drill.exec.vector.accessor.ColumnReader;
-import org.apache.drill.exec.vector.accessor.TupleReader;
-import org.joda.time.Period;
-
-/**
- * Column reader implementation that acts as the basis for the
- * generated, vector-specific implementations. All set methods
- * throw an exception; subclasses simply override the supported
- * method(s).
- */
-
-public abstract class AbstractColumnReader extends AbstractColumnAccessor implements ColumnReader {
-
- public interface VectorAccessor {
- ValueVector vector();
- }
-
- protected VectorAccessor vectorAccessor;
-
- public void bind(RowIndex rowIndex, MaterializedField field, VectorAccessor va) {
- bind(rowIndex);
- vectorAccessor = va;
- }
-
- @Override
- public Object getObject() {
- switch (valueType()) {
- case ARRAY:
- // TODO: build an array. Just a bit tedious...
- throw new UnsupportedOperationException();
- case BYTES:
- return getBytes();
- case DECIMAL:
- return getDecimal();
- case DOUBLE:
- return getDouble();
- case INTEGER:
- return getInt();
- case LONG:
- return getLong();
- case MAP:
- // TODO: build an array. Just a bit tedious...
- throw new UnsupportedOperationException();
- case PERIOD:
- return getPeriod();
- case STRING:
- return getString();
- default:
- throw new IllegalStateException("Unexpected type: " + valueType());
- }
- }
-
- @Override
- public boolean isNull() {
- return false;
- }
-
- @Override
- public int getInt() {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public long getLong() {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public double getDouble() {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public String getString() {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public byte[] getBytes() {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public BigDecimal getDecimal() {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public Period getPeriod() {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public TupleReader map() {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public ArrayReader array() {
- throw new UnsupportedOperationException();
- }
-}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractColumnWriter.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractColumnWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractColumnWriter.java
deleted file mode 100644
index 5071e03..0000000
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractColumnWriter.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.exec.vector.accessor.impl;
-
-import java.math.BigDecimal;
-
-import org.apache.drill.exec.vector.accessor.ArrayWriter;
-import org.apache.drill.exec.vector.accessor.ColumnWriter;
-import org.apache.drill.exec.vector.accessor.TupleWriter;
-import org.joda.time.Period;
-
-/**
- * Column writer implementation that acts as the basis for the
- * generated, vector-specific implementations. All set methods
- * throw an exception; subclasses simply override the supported
- * method(s).
- */
-
-public abstract class AbstractColumnWriter extends AbstractColumnAccessor implements ColumnWriter {
-
- public void start() { }
-
- @Override
- public void setNull() {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void setInt(int value) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void setLong(long value) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void setDouble(double value) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void setString(String value) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void setBytes(byte[] value) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void setDecimal(BigDecimal value) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void setPeriod(Period value) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public TupleWriter map() {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public ArrayWriter array() {
- throw new UnsupportedOperationException();
- }
-}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractTupleAccessor.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractTupleAccessor.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractTupleAccessor.java
deleted file mode 100644
index 98ea6ac..0000000
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AbstractTupleAccessor.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.exec.vector.accessor.impl;
-
-import org.apache.drill.exec.vector.accessor.TupleAccessor;
-
-/**
- * Common base class for tuple readers and writers.
- */
-
-public abstract class AbstractTupleAccessor implements TupleAccessor {
-
- protected final TupleSchema schema;
-
- public AbstractTupleAccessor(TupleSchema schema) {
- this.schema = schema;
- }
-
- @Override
- public TupleSchema schema() {
- return schema;
- }
-}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AccessorUtilities.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AccessorUtilities.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AccessorUtilities.java
new file mode 100644
index 0000000..4bf8f98
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/AccessorUtilities.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.impl;
+
+public class AccessorUtilities {
+
+ private AccessorUtilities() { }
+
+ public static int sv4Batch(int sv4Index) {
+ return sv4Index >>> 16;
+ }
+
+ public static int sv4Index(int sv4Index) {
+ return sv4Index & 0xFFFF;
+ }
+
+ public static String bytesToString(byte[] value) {
+ StringBuilder buf = new StringBuilder()
+ .append("[");
+ int len = Math.min(value.length, 20);
+ for (int i = 0; i < len; i++) {
+ if (i > 0) {
+ buf.append(", ");
+ }
+ String str = Integer.toHexString(value[i] & 0xFF);
+ if (str.length() < 2) {
+ buf.append("0");
+ }
+ buf.append(str);
+ }
+ if (value.length > len) {
+ buf.append("...");
+ }
+ buf.append("]");
+ return buf.toString();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/ColumnAccessorFactory.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/ColumnAccessorFactory.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/ColumnAccessorFactory.java
deleted file mode 100644
index 019d3be..0000000
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/ColumnAccessorFactory.java
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.exec.vector.accessor.impl;
-
-import org.apache.drill.common.types.TypeProtos.DataMode;
-import org.apache.drill.common.types.TypeProtos.MajorType;
-import org.apache.drill.common.types.TypeProtos.MinorType;
-import org.apache.drill.exec.vector.accessor.ColumnAccessors;
-import org.apache.drill.exec.vector.accessor.impl.AbstractArrayReader.ArrayColumnReader;
-import org.apache.drill.exec.vector.accessor.impl.AbstractArrayWriter.ArrayColumnWriter;
-
-/**
- * Gather generated accessor classes into a set of class
- * tables to allow rapid run-time creation of accessors.
- * The caller is responsible for binding the accessor to
- * a vector and a row index.
- */
-
-public class ColumnAccessorFactory {
-
- private static Class<? extends AbstractColumnWriter> columnWriters[][] = buildColumnWriters();
- private static Class<? extends AbstractColumnReader> columnReaders[][] = buildColumnReaders();
- private static Class<? extends AbstractArrayWriter> arrayWriters[] = buildArrayWriters();
- private static Class<? extends AbstractArrayReader> arrayReaders[] = buildArrayReaders();
-
- @SuppressWarnings("unchecked")
- private static Class<? extends AbstractColumnWriter>[][] buildColumnWriters() {
- int typeCount = MinorType.values().length;
- int modeCount = DataMode.values().length;
- Class<? extends AbstractColumnWriter> writers[][] = new Class[typeCount][];
- for (int i = 0; i < typeCount; i++) {
- writers[i] = new Class[modeCount];
- }
-
- ColumnAccessors.defineWriters(writers);
- return writers;
- }
-
- @SuppressWarnings("unchecked")
- private static Class<? extends AbstractColumnReader>[][] buildColumnReaders() {
- int typeCount = MinorType.values().length;
- int modeCount = DataMode.values().length;
- Class<? extends AbstractColumnReader> readers[][] = new Class[typeCount][];
- for (int i = 0; i < typeCount; i++) {
- readers[i] = new Class[modeCount];
- }
-
- ColumnAccessors.defineReaders(readers);
- return readers;
- }
-
- @SuppressWarnings("unchecked")
- private static Class<? extends AbstractArrayWriter>[] buildArrayWriters() {
- int typeCount = MinorType.values().length;
- Class<? extends AbstractArrayWriter> writers[] = new Class[typeCount];
- ColumnAccessors.defineArrayWriters(writers);
- return writers;
- }
-
- @SuppressWarnings("unchecked")
- private static Class<? extends AbstractArrayReader>[] buildArrayReaders() {
- int typeCount = MinorType.values().length;
- Class<? extends AbstractArrayReader> readers[] = new Class[typeCount];
- ColumnAccessors.defineArrayReaders(readers);
- return readers;
- }
-
- public static AbstractColumnWriter newWriter(MajorType type) {
- try {
- if (type.getMode() == DataMode.REPEATED) {
- Class<? extends AbstractArrayWriter> writerClass = arrayWriters[type.getMinorType().ordinal()];
- if (writerClass == null) {
- throw new UnsupportedOperationException();
- }
- return new ArrayColumnWriter(writerClass.newInstance());
- } else {
- Class<? extends AbstractColumnWriter> writerClass = columnWriters[type.getMinorType().ordinal()][type.getMode().ordinal()];
- if (writerClass == null) {
- throw new UnsupportedOperationException();
- }
- return writerClass.newInstance();
- }
- } catch (InstantiationException | IllegalAccessException e) {
- throw new IllegalStateException(e);
- }
- }
-
- public static AbstractColumnReader newReader(MajorType type) {
- try {
- if (type.getMode() == DataMode.REPEATED) {
- Class<? extends AbstractArrayReader> readerClass = arrayReaders[type.getMinorType().ordinal()];
- if (readerClass == null) {
- throw new UnsupportedOperationException();
- }
- return new ArrayColumnReader(readerClass.newInstance());
- } else {
- Class<? extends AbstractColumnReader> readerClass = columnReaders[type.getMinorType().ordinal()][type.getMode().ordinal()];
- if (readerClass == null) {
- throw new UnsupportedOperationException();
- }
- return readerClass.newInstance();
- }
- } catch (InstantiationException | IllegalAccessException e) {
- throw new IllegalStateException(e);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/HierarchicalFormatter.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/HierarchicalFormatter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/HierarchicalFormatter.java
new file mode 100644
index 0000000..35bd445
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/HierarchicalFormatter.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.impl;
+
+/**
+ * Interface for dumping object state in a hierarchical fashion during
+ * debugging.
+ */
+
+public interface HierarchicalFormatter {
+
+ HierarchicalFormatter startObject(Object obj);
+ void extend();
+ HierarchicalFormatter attribute(String label);
+ HierarchicalFormatter attribute(String label, Object value);
+ HierarchicalFormatter attributeIdentity(String label, Object value);
+ HierarchicalFormatter attributeArray(String label);
+ HierarchicalFormatter element(int index, Object value);
+ HierarchicalFormatter elementIdentity(int index, Object value);
+ HierarchicalFormatter element(int index);
+ HierarchicalFormatter endArray();
+ HierarchicalFormatter endObject();
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/HierarchicalPrinter.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/HierarchicalPrinter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/HierarchicalPrinter.java
new file mode 100644
index 0000000..0c37d23
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/HierarchicalPrinter.java
@@ -0,0 +1,238 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.impl;
+
+import java.io.PrintStream;
+import java.util.ArrayDeque;
+import java.util.Deque;
+
+/**
+ * Prints a complex object structure in a quasi-JSON format for use
+ * in debugging. Generally only used in an ad-hoc fashion during
+ * debugging sessions; never in production.
+ */
+
+public class HierarchicalPrinter implements HierarchicalFormatter {
+
+ public enum State { OBJECT, OBJECT_ATTRIB, ARRAY, OBJECT_ELEMENT }
+
+ private static class ObjState {
+ private State state = State.OBJECT;
+ private String attrib;
+ private int index = -1;
+ private int extensions;
+
+ public ObjState(int extensions) {
+ this.extensions = extensions;
+ }
+ }
+
+ private final PrintStream out;
+ private Deque<ObjState> stack = new ArrayDeque<ObjState>();
+ private int pendingExtensions = 0;
+ private ObjState curObject;
+ private int level;
+
+ public HierarchicalPrinter() {
+ out = System.out;
+ }
+
+ @Override
+ public void extend() {
+ pendingExtensions++;
+ }
+
+ @Override
+ public HierarchicalFormatter startObject(Object obj) {
+ if (curObject != null) {
+ stack.push(curObject);
+ switch (curObject.state) {
+ case OBJECT_ATTRIB:
+ startAttrib(curObject.attrib);
+ curObject.attrib = null;
+ curObject.state = State.OBJECT;
+ break;
+ case OBJECT:
+ startAttrib("missing-attrib");
+ curObject.state = State.OBJECT;
+ break;
+ case OBJECT_ELEMENT:
+ startElement(curObject.index);
+ curObject.state = State.ARRAY;
+ curObject.index = -1;
+ break;
+ default:
+ assert false;
+ }
+ }
+
+ printObjIdentity(obj);
+ out.println(" {");
+ level++;
+ curObject = new ObjState(pendingExtensions);
+ pendingExtensions = 0;
+ return this;
+ }
+
+ private void printObjIdentity(Object value) {
+ out.print(value.getClass().getSimpleName());
+ out.print( " (");
+ out.print(System.identityHashCode(value) % 1000);
+ out.print(")");
+ }
+
+ private void startElement(int index) {
+ indent();
+ out.print("[");
+ out.print(index);
+ out.print("] = ");
+ }
+
+ private void startAttrib(String label) {
+ indent();
+ out.print(label);
+ out.print(" = ");
+ }
+
+ @Override
+ public HierarchicalFormatter attribute(String label) {
+ curObject.attrib = label;
+ curObject.state = State.OBJECT_ATTRIB;
+ return this;
+ }
+
+ @Override
+ public HierarchicalFormatter attribute(String label, Object value) {
+ attribPrefix();
+ startAttrib(label);
+ printValue(value);
+ out.println();
+ return this;
+ }
+
+ private void attribPrefix() {
+ switch (curObject.state) {
+ case OBJECT_ATTRIB:
+ startAttrib(curObject.attrib);
+ out.println("<Unknown> {}");
+ break;
+ case OBJECT:
+ break;
+ default:
+ assert false;
+ }
+ }
+
+ @Override
+ public HierarchicalFormatter attributeIdentity(String label, Object obj) {
+ attribPrefix();
+ startAttrib(label);
+ objIdentity(obj);
+ out.println();
+ return this;
+ }
+
+ private void objIdentity(Object obj) {
+ if (obj == null) {
+ out.print("null");
+ } else {
+ printObjIdentity(obj);
+ }
+ }
+
+ private void printValue(Object value) {
+ if (value == null) {
+ out.print("null");
+ } else if (value instanceof String) {
+ out.print("\"");
+ out.print(value);
+ out.print("\"");
+ } else {
+ out.print(value.toString());
+ }
+ }
+
+ @Override
+ public HierarchicalFormatter endObject() {
+ if (level == 0) {
+ out.println( "} // Mismatch!");
+ return this;
+ }
+ if (curObject.extensions == 0) {
+ level--;
+ indent();
+ out.println("}");
+ if (level == 0) {
+ curObject = null;
+ } else {
+ curObject = stack.pop();
+ }
+ } else {
+ curObject.extensions--;
+ }
+ return this;
+ }
+
+ private void indent() {
+ for (int i = 0; i < level; i++) {
+ out.print(" ");
+ }
+ }
+
+ @Override
+ public HierarchicalFormatter attributeArray(String label) {
+ startAttrib(label);
+ out.println("[");
+ level++;
+ curObject.state = State.ARRAY;
+ return this;
+ }
+
+ @Override
+ public HierarchicalFormatter element(int index, Object value) {
+ startElement(index);
+ printValue(value);
+ out.println();
+ return this;
+ }
+
+ @Override
+ public HierarchicalFormatter element(int index) {
+ curObject.index = index;
+ curObject.state = State.OBJECT_ELEMENT;
+ return this;
+ }
+
+ @Override
+ public HierarchicalFormatter elementIdentity(int index, Object obj) {
+ startElement(index);
+ objIdentity(obj);
+ out.println();
+ return this;
+ }
+
+ @Override
+ public HierarchicalFormatter endArray() {
+ level--;
+ indent();
+ out.println("]");
+ curObject.state = State.OBJECT;
+ return this;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/TupleReaderImpl.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/TupleReaderImpl.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/TupleReaderImpl.java
deleted file mode 100644
index 97a6e3c..0000000
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/TupleReaderImpl.java
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.exec.vector.accessor.impl;
-
-import org.apache.drill.exec.vector.accessor.ArrayReader;
-import org.apache.drill.exec.vector.accessor.ColumnReader;
-import org.apache.drill.exec.vector.accessor.TupleReader;
-
-/**
- * Reader for a tuple (a row or a map.) Provides access to each
- * column using either a name or a numeric index.
- */
-
-public class TupleReaderImpl extends AbstractTupleAccessor implements TupleReader {
-
- private final AbstractColumnReader readers[];
-
- public TupleReaderImpl(TupleSchema schema, AbstractColumnReader readers[]) {
- super(schema);
- this.readers = readers;
- }
-
- @Override
- public ColumnReader column(int colIndex) {
- return readers[colIndex];
- }
-
- @Override
- public ColumnReader column(String colName) {
- int index = schema.columnIndex(colName);
- if (index == -1) {
- return null; }
- return readers[index];
- }
-
- @Override
- public Object get(int colIndex) {
- ColumnReader colReader = column(colIndex);
- if (colReader.isNull()) {
- return null; }
- switch (colReader.valueType()) {
- case BYTES:
- return colReader.getBytes();
- case DOUBLE:
- return colReader.getDouble();
- case INTEGER:
- return colReader.getInt();
- case LONG:
- return colReader.getLong();
- case STRING:
- return colReader.getString();
- default:
- throw new IllegalArgumentException("Unsupported type " + colReader.valueType());
- }
- }
-
- @Override
- public String getAsString(int colIndex) {
- ColumnReader colReader = column(colIndex);
- if (colReader.isNull()) {
- return "null";
- }
- switch (colReader.valueType()) {
- case BYTES:
- return bytesToString(colReader.getBytes());
- case DOUBLE:
- return Double.toString(colReader.getDouble());
- case INTEGER:
- return Integer.toString(colReader.getInt());
- case LONG:
- return Long.toString(colReader.getLong());
- case STRING:
- return "\"" + colReader.getString() + "\"";
- case DECIMAL:
- return colReader.getDecimal().toPlainString();
- case ARRAY:
- return getArrayAsString(colReader.array());
- default:
- throw new IllegalArgumentException("Unsupported type " + colReader.valueType());
- }
- }
-
- private String bytesToString(byte[] value) {
- StringBuilder buf = new StringBuilder()
- .append("[");
- int len = Math.min(value.length, 20);
- for (int i = 0; i < len; i++) {
- if (i > 0) {
- buf.append(", ");
- }
- buf.append((int) value[i]);
- }
- if (value.length > len) {
- buf.append("...");
- }
- buf.append("]");
- return buf.toString();
- }
-
- private String getArrayAsString(ArrayReader array) {
- StringBuilder buf = new StringBuilder();
- buf.append("[");
- for (int i = 0; i < array.size(); i++) {
- if (i > 0) {
- buf.append( ", " );
- }
- switch (array.valueType()) {
- case BYTES:
- buf.append(bytesToString(array.getBytes(i)));
- break;
- case DOUBLE:
- buf.append(Double.toString(array.getDouble(i)));
- break;
- case INTEGER:
- buf.append(Integer.toString(array.getInt(i)));
- break;
- case LONG:
- buf.append(Long.toString(array.getLong(i)));
- break;
- case STRING:
- buf.append("\"" + array.getString(i) + "\"");
- break;
- case DECIMAL:
- buf.append(array.getDecimal(i).toPlainString());
- break;
- case MAP:
- case ARRAY:
- throw new UnsupportedOperationException("Unsupported type " + array.valueType());
- default:
- throw new IllegalArgumentException("Unexpected type " + array.valueType());
- }
- }
- buf.append("]");
- return buf.toString();
- }
-}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/TupleWriterImpl.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/TupleWriterImpl.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/TupleWriterImpl.java
deleted file mode 100644
index 015b099..0000000
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/impl/TupleWriterImpl.java
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.exec.vector.accessor.impl;
-
-import java.math.BigDecimal;
-
-import org.apache.drill.exec.vector.accessor.AccessorUtilities;
-import org.apache.drill.exec.vector.accessor.ArrayWriter;
-import org.apache.drill.exec.vector.accessor.ColumnAccessor.ValueType;
-import org.apache.drill.exec.vector.accessor.ColumnWriter;
-import org.apache.drill.exec.vector.accessor.TupleWriter;
-import org.joda.time.Period;
-
-/**
- * Implementation for a writer for a tuple (a row or a map.) Provides access to each
- * column using either a name or a numeric index.
- */
-
-public class TupleWriterImpl extends AbstractTupleAccessor implements TupleWriter {
-
- private final AbstractColumnWriter writers[];
-
- public TupleWriterImpl(TupleSchema schema, AbstractColumnWriter writers[]) {
- super(schema);
- this.writers = writers;
- }
-
- public void start() {
- for (int i = 0; i < writers.length; i++) {
- writers[i].start();
- }
- }
-
- @Override
- public ColumnWriter column(int colIndex) {
- return writers[colIndex];
- }
-
- @Override
- public ColumnWriter column(String colName) {
- int index = schema.columnIndex(colName);
- if (index == -1) {
- return null; }
- return writers[index];
- }
-
- @Override
- public void set(int colIndex, Object value) {
- ColumnWriter colWriter = column(colIndex);
- if (value == null) {
- // Arrays have no null concept, just an empty array.
- if (colWriter.valueType() != ValueType.ARRAY) {
- colWriter.setNull();
- }
- } else if (value instanceof Integer) {
- colWriter.setInt((Integer) value);
- } else if (value instanceof Long) {
- colWriter.setLong((Long) value);
- } else if (value instanceof String) {
- colWriter.setString((String) value);
- } else if (value instanceof BigDecimal) {
- colWriter.setDecimal((BigDecimal) value);
- } else if (value instanceof Period) {
- colWriter.setPeriod((Period) value);
- } else if (value instanceof byte[]) {
- colWriter.setBytes((byte[]) value);
- } else if (value instanceof Byte) {
- colWriter.setInt((Byte) value);
- } else if (value instanceof Short) {
- colWriter.setInt((Short) value);
- } else if (value instanceof Double) {
- colWriter.setDouble((Double) value);
- } else if (value instanceof Float) {
- colWriter.setDouble((Float) value);
- } else if (value.getClass().getName().startsWith("[")) {
- setArray(colIndex, value);
- } else {
- throw new IllegalArgumentException("Unsupported type " +
- value.getClass().getSimpleName() + " for column " + colIndex);
- }
- }
-
- public void setArray(int colIndex, Object value) {
- if (value == null) {
- // Assume null means a 0-element array since Drill does
- // not support null for the whole array.
-
- return;
- }
- String objClass = value.getClass().getName();
- if (!objClass.startsWith("[")) {
- throw new IllegalArgumentException("Argument is not an array");
- }
-
- ColumnWriter colWriter = column(colIndex);
- if (colWriter.valueType() != ValueType.ARRAY) {
- throw new IllegalArgumentException("Column is not an array");
- }
-
- ArrayWriter arrayWriter = colWriter.array();
-
- // Figure out type
-
- char second = objClass.charAt( 1 );
- switch ( second ) {
- case 'B':
- AccessorUtilities.setByteArray(arrayWriter, (byte[]) value );
- break;
- case 'S':
- AccessorUtilities.setShortArray(arrayWriter, (short[]) value );
- break;
- case 'I':
- AccessorUtilities.setIntArray(arrayWriter, (int[]) value );
- break;
- case 'J':
- AccessorUtilities.setLongArray(arrayWriter, (long[]) value );
- break;
- case 'F':
- AccessorUtilities.setFloatArray(arrayWriter, (float[]) value );
- break;
- case 'D':
- AccessorUtilities.setDoubleArray(arrayWriter, (double[]) value );
- break;
- case 'Z':
- AccessorUtilities.setBooleanArray(arrayWriter, (boolean[]) value );
- break;
- case 'L':
- int posn = objClass.indexOf(';');
-
- // If the array is of type Object, then we have no type info.
-
- String memberClassName = objClass.substring( 2, posn );
- if (memberClassName.equals(String.class.getName())) {
- AccessorUtilities.setStringArray(arrayWriter, (String[]) value );
- } else if (memberClassName.equals(Period.class.getName())) {
- AccessorUtilities.setPeriodArray(arrayWriter, (Period[]) value );
- } else if (memberClassName.equals(BigDecimal.class.getName())) {
- AccessorUtilities.setBigDecimalArray(arrayWriter, (BigDecimal[]) value );
- } else {
- throw new IllegalArgumentException( "Unknown Java array type: " + memberClassName );
- }
- break;
- default:
- throw new IllegalArgumentException( "Unknown Java array type: " + second );
- }
- }
-}
[07/15] drill git commit: DRILL-5657: Size-aware vector writer
structure
Posted by pr...@apache.org.
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestOffsetVectorWriter.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestOffsetVectorWriter.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestOffsetVectorWriter.java
new file mode 100644
index 0000000..82d4d08
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestOffsetVectorWriter.java
@@ -0,0 +1,425 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.test.rowSet.test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.drill.common.types.TypeProtos.DataMode;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.vector.UInt4Vector;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.accessor.ScalarWriter;
+import org.apache.drill.exec.vector.accessor.ScalarWriter.ColumnWriterListener;
+import org.apache.drill.exec.vector.accessor.ValueType;
+import org.apache.drill.exec.vector.accessor.writer.OffsetVectorWriter;
+import org.apache.drill.test.SubOperatorTest;
+import org.apache.drill.test.rowSet.SchemaBuilder;
+import org.apache.drill.test.rowSet.test.TestFixedWidthWriter.TestIndex;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import io.netty.buffer.DrillBuf;
+
+/**
+ * The offset vector writer is unique: it follows the same API as
+ * the other writers, but has a unique twist because offsets are written
+ * into the slot one after the other vectors. That is, if we are writing
+ * row 5, the offset vector writer writes to position 6. This is done to
+ * write the end offset of row 5 as the start offset of row 6. (It does,
+ * however, waste space as we need twice the number of elements in the
+ * offset vector as other vectors when writing power-of-two record
+ * counts.)
+ */
+
+public class TestOffsetVectorWriter extends SubOperatorTest {
+
+ /**
+ * Party over enough memory that the uninitialized nature of
+ * vectors under the new writers will cause test to fail if
+ * the writer's don't correctly fill in all values.
+ */
+
+ @BeforeClass
+ public static void setup() {
+ DrillBuf bufs[] = new DrillBuf[100];
+ for (int i = 0; i < bufs.length; i++) {
+ bufs[i] = fixture.allocator().buffer(ValueVector.MAX_BUFFER_SIZE);
+ for (int j = 0; j < ValueVector.MAX_BUFFER_SIZE; j++) {
+ bufs[i].setByte(j, (byte) (j & 0x7f));
+ }
+ }
+ for (int i = 0; i < bufs.length; i++) {
+ bufs[i].close();
+ }
+ }
+
+ /**
+ * Basic test to write a contiguous set of offsets, enough to cause
+ * the vector to double in size twice, then read back the values.
+ */
+
+ @Test
+ public void testWrite() {
+ try (UInt4Vector vector = allocVector(1000)) {
+
+ TestIndex index = new TestIndex();
+ OffsetVectorWriter writer = makeWriter(vector, index);
+
+ // Start write sets initial position to 0.
+
+ writer.startWrite();
+ assertEquals(0, vector.getAccessor().get(0));
+
+ // Pretend to write offsets for values of width 10. We write
+ // the end position of each field.
+ // Write enough that the vector is resized.
+
+ long origAddr = vector.getBuffer().addr();
+ for (int i = 0; i < 3000; i++) {
+ index.index = i;
+ writer.startRow();
+ assertEquals(i * 10, writer.nextOffset());
+ writer.setNextOffset((i+1) * 10);
+ assertEquals((i+1) * 10, writer.nextOffset());
+ writer.saveRow();
+ }
+ writer.endWrite();
+
+ // Should have been reallocated.
+
+ assertNotEquals(origAddr, vector.getBuffer().addr());
+
+ // Verify values
+
+ for (int i = 0; i < 3001; i++) {
+ assertEquals(i * 10, vector.getAccessor().get(i));
+ }
+ }
+ }
+
+ @Test
+ public void testRestartRow() {
+ try (UInt4Vector vector = allocVector(1000)) {
+
+ TestIndex index = new TestIndex();
+ OffsetVectorWriter writer = makeWriter(vector, index);
+ writer.startWrite();
+
+ // Write rows, rewriting every other row.
+
+ writer.startRow();
+ index.index = 0;
+ for (int i = 0; i < 50; i++) {
+ if (i % 2 == 0) {
+ assertEquals(i == 0 ? 0 : (i - 1) * 10, writer.nextOffset());
+ writer.setNextOffset((i + 1) * 10);
+ writer.saveRow();
+ writer.startRow();
+ index.index++;
+ } else {
+ writer.setNextOffset((i + 1) * 10);
+ writer.restartRow();
+ }
+ }
+ writer.endWrite();
+
+ // Verify values
+
+ assertEquals(0, vector.getAccessor().get(0));
+ for (int i = 1; i < 25; i++) {
+ assertEquals((2 * i - 1) * 10, vector.getAccessor().get(i));
+ }
+ }
+ }
+
+
+ /**
+ * Offset vectors have specific behavior when back-filling missing values:
+ * the last offset must be carried forward into the missing slots. The
+ * slots cannot be zero-filled, or entries will end up with a negative
+ * length.
+ */
+
+ @Test
+ public void testFillEmpties() {
+ try (UInt4Vector vector = allocVector(1000)) {
+ TestIndex index = new TestIndex();
+ OffsetVectorWriter writer = makeWriter(vector, index);
+ writer.startWrite();
+
+ // Pretend to write offsets for values of width 10, but
+ // skip four out of five values, forcing backfill.
+ // The loop will cause the vector to double in size.
+ // The number of values is odd, forcing the writer to
+ // back-fill at the end as well as between values.
+
+ long origAddr = vector.getBuffer().addr();
+ for (int i = 5; i < 3001; i += 5) {
+ index.index = i;
+ writer.startRow();
+ int startOffset = writer.nextOffset();
+ assertEquals((i/5 - 1) * 10, startOffset);
+ writer.setNextOffset(startOffset + 10);
+ writer.saveRow();
+ }
+ index.index = 3003;
+ writer.endWrite();
+
+ // Should have been reallocated.
+
+ assertNotEquals(origAddr, vector.getBuffer().addr());
+
+ // Verify values
+
+ for (int i = 0; i < 3004; i++) {
+ assertEquals(((i-1)/5) * 10, vector.getAccessor().get(i));
+ }
+ }
+ }
+
+ /**
+ * The rollover method is used during vector overflow.
+ */
+
+ @Test
+ public void testRollover() {
+ try (UInt4Vector vector = allocVector(1000)) {
+ TestIndex index = new TestIndex();
+ OffsetVectorWriter writer = makeWriter(vector, index);
+ writer.startWrite();
+
+ // Simulate doing an overflow of ten values.
+
+ for (int i = 0; i < 10; i++) {
+ index.index = i;
+ writer.startRow();
+ writer.setNextOffset((i+1) * 10);
+ writer.saveRow();
+ }
+
+ // Overflow occurs after writing the 11th row
+
+ index.index = 10;
+ writer.startRow();
+ writer.setNextOffset(110);
+
+ // Overflow occurs
+
+ writer.preRollover();
+
+ // Simulate rollover
+
+ for (int i = 0; i < 15; i++) {
+ vector.getMutator().set(i, 0xdeadbeef);
+ }
+
+ // Simulate shifting the last value down (which changes
+ // the offset.)
+
+ vector.getMutator().set(1, 10);
+
+ // Post rollover, slot 0 should be initialized
+
+ writer.postRollover();
+ index.index = 0;
+ writer.saveRow();
+
+ // Simulate resuming with a few more values.
+
+ for (int i = 1; i < 5; i++) {
+ index.index = i;
+ writer.startRow();
+ writer.setNextOffset((i + 1) * 10);
+ writer.saveRow();
+ }
+ writer.endWrite();
+
+ // Verify the results
+
+ for (int i = 0; i < 6; i++) {
+ assertEquals(i * 10, vector.getAccessor().get(i));
+ }
+ }
+ }
+
+ /**
+ * Simulate the case in which the tail end of an overflow
+ * batch has empties. <tt>preRollover()</tt> should back-fill
+ * them with the next offset prior to rollover.
+ */
+
+ @Test
+ public void testRolloverWithEmpties() {
+ try (UInt4Vector vector = allocVector(1000)) {
+ TestIndex index = new TestIndex();
+ OffsetVectorWriter writer = makeWriter(vector, index);
+ writer.startWrite();
+
+ // Simulate doing an overflow of 15 values,
+ // of which 5 are empty.
+
+ for (int i = 0; i < 10; i++) {
+ index.index = i;
+ writer.startRow();
+ writer.setNextOffset((i+1) * 10);
+ writer.saveRow();
+ }
+
+ for (int i = 10; i < 15; i++) {
+ index.index = i;
+ writer.startRow();
+ writer.saveRow();
+ }
+
+ // Overflow occurs before writing the 16th row
+
+ index.index = 15;
+ writer.startRow();
+
+ // Overflow occurs. This should fill empty offsets.
+
+ writer.preRollover();
+
+ // Verify the first "batch" results
+
+ for (int i = 0; i < 11; i++) {
+ assertEquals(i * 10, vector.getAccessor().get(i));
+ }
+ for (int i = 11; i < 16; i++) {
+ assertEquals("i = " + i, 100, vector.getAccessor().get(i));
+ }
+
+ // Simulate rollover
+
+ for (int i = 0; i < 20; i++) {
+ vector.getMutator().set(i, 0xdeadbeef);
+ }
+
+ // Post rollover, slot 0 should be initialized.
+ // This is a rollover. This row must set the value
+ // for the new row 0 (which was presumably set/filled
+ // after the overflow.)
+
+ writer.postRollover();
+ index.index = 0;
+ writer.setNextOffset(0);
+ writer.saveRow();
+
+ // Skip more values.
+
+ for (int i = 1; i < 5; i++) {
+ index.index = i;
+ writer.startRow();
+ writer.saveRow();
+ }
+
+ // Simulate resuming with a few more values.
+
+ for (int i = 5; i < 10; i++) {
+ index.index = i;
+ writer.startRow();
+ writer.setNextOffset((i - 4) * 10);
+ writer.saveRow();
+ }
+ writer.endWrite();
+
+ // Verify the results
+
+ for (int i = 0; i < 6; i++) {
+ assertEquals(0, vector.getAccessor().get(i));
+ }
+ for (int i = 6; i < 11; i++) {
+ assertEquals((i - 5) * 10, vector.getAccessor().get(i));
+ }
+ }
+ }
+
+ /**
+ * Test resize monitoring. Add a listener to an offsets writer,
+ * capture each resize, and refuse a resize when the number
+ * of ints exceeds 8K values. This will trigger an overflow,
+ * which will throw an exception which we then check for.
+ */
+
+ @Test
+ public void testSizeLimit() {
+ try (UInt4Vector vector = allocVector(1000)) {
+ TestIndex index = new TestIndex();
+ OffsetVectorWriter writer = makeWriter(vector, index);
+ writer.bindListener(new ColumnWriterListener() {
+ int totalAlloc = 4096;
+
+ @Override
+ public void overflowed(ScalarWriter writer) {
+ throw new IllegalStateException("overflow called");
+ }
+
+ @Override
+ public boolean canExpand(ScalarWriter writer, int delta) {
+// System.out.println("Delta: " + delta);
+ totalAlloc += delta;
+ return totalAlloc < 16_384 * 4;
+ }
+ });
+ writer.startWrite();
+ try {
+ for (int i = 0; ; i++ ) {
+ index.index = i;
+ writer.startRow();
+ writer.setNextOffset(i);
+ writer.saveRow();
+ }
+ }
+ catch(IllegalStateException e) {
+ assertTrue(e.getMessage().contains("overflow called"));
+ }
+
+ // Should have failed on 8191, which doubled vector
+ // to 16K, which was rejected. Note the 8191 value,
+ // because offsets are one ahead of the index.
+
+ assertEquals(8191, index.index);
+ }
+ }
+
+ private UInt4Vector allocVector(int size) {
+ MaterializedField field = SchemaBuilder.columnSchema("x", MinorType.UINT4,
+ DataMode.REQUIRED);
+ UInt4Vector vector = new UInt4Vector(field, fixture.allocator());
+ vector.allocateNew(size);
+
+ // Party on the bytes of the vector so we start dirty
+
+ for (int i = 0; i < size; i++) {
+ vector.getMutator().set(i, 0xdeadbeef);
+ }
+ assertNotEquals(0, vector.getAccessor().get(0));
+ return vector;
+ }
+
+ private OffsetVectorWriter makeWriter(UInt4Vector vector, TestIndex index) {
+ OffsetVectorWriter writer = new OffsetVectorWriter(vector);
+ writer.bindIndex(index);
+
+ assertEquals(ValueType.INTEGER, writer.valueType());
+ return writer;
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestScalarAccessors.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestScalarAccessors.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestScalarAccessors.java
new file mode 100644
index 0000000..939377a
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestScalarAccessors.java
@@ -0,0 +1,1266 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.test.rowSet.test;
+
+import static org.junit.Assert.*;
+
+import java.math.BigDecimal;
+import java.util.Arrays;
+
+import org.apache.drill.common.types.TypeProtos.DataMode;
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.record.BatchSchema;
+import org.apache.drill.exec.vector.accessor.ScalarElementReader;
+import org.apache.drill.exec.vector.accessor.ScalarReader;
+import org.apache.drill.exec.vector.accessor.ValueType;
+import org.apache.drill.test.SubOperatorTest;
+import org.apache.drill.test.rowSet.RowSetReader;
+import org.apache.drill.test.rowSet.SchemaBuilder;
+import org.joda.time.Period;
+import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
+import org.junit.Test;
+
+/**
+ * Verify that simple scalar (non-repeated) column readers
+ * and writers work as expected. The focus is on the generated
+ * and type-specific functions for each type.
+ */
+
+// The following types are not fully supported in Drill
+// TODO: Var16Char
+// TODO: Bit
+// TODO: Decimal28Sparse
+// TODO: Decimal38Sparse
+
+public class TestScalarAccessors extends SubOperatorTest {
+
+ @Test
+ public void testTinyIntRW() {
+ BatchSchema batchSchema = new SchemaBuilder()
+ .add("col", MinorType.TINYINT)
+ .build();
+ SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
+ .addRow(0)
+ .addRow(Byte.MAX_VALUE)
+ .addRow(Byte.MIN_VALUE)
+ .build();
+ assertEquals(3, rs.rowCount());
+
+ RowSetReader reader = rs.reader();
+ ScalarReader colReader = reader.scalar(0);
+ assertEquals(ValueType.INTEGER, colReader.valueType());
+
+ assertTrue(reader.next());
+ assertFalse(colReader.isNull());
+ assertEquals(0, colReader.getInt());
+
+ assertTrue(reader.next());
+ assertEquals(Byte.MAX_VALUE, colReader.getInt());
+ assertEquals((int) Byte.MAX_VALUE, colReader.getObject());
+ assertEquals(Byte.toString(Byte.MAX_VALUE), colReader.getAsString());
+
+ assertTrue(reader.next());
+ assertEquals(Byte.MIN_VALUE, colReader.getInt());
+
+ assertFalse(reader.next());
+ rs.clear();
+ }
+
+ private void nullableIntTester(MinorType type) {
+ BatchSchema batchSchema = new SchemaBuilder()
+ .addNullable("col", type)
+ .build();
+ SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
+ .addRow(10)
+ .addSingleCol(null)
+ .addRow(30)
+ .build();
+ assertEquals(3, rs.rowCount());
+
+ RowSetReader reader = rs.reader();
+ ScalarReader colReader = reader.scalar(0);
+
+ assertTrue(reader.next());
+ assertFalse(colReader.isNull());
+ assertEquals(10, colReader.getInt());
+
+ assertTrue(reader.next());
+ assertTrue(colReader.isNull());
+ assertNull(colReader.getObject());
+ assertEquals("null", colReader.getAsString());
+ // Data value is undefined, may be garbage
+
+ assertTrue(reader.next());
+ assertEquals(30, colReader.getInt());
+
+ assertFalse(reader.next());
+ rs.clear();
+ }
+
+ @Test
+ public void testNullableTinyInt() {
+ nullableIntTester(MinorType.TINYINT);
+ }
+
+ private void intArrayTester(MinorType type) {
+ BatchSchema batchSchema = new SchemaBuilder()
+ .addArray("col", type)
+ .build();
+ SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
+ .addSingleCol(new int[] {})
+ .addSingleCol(new int[] {0, 20, 30})
+ .build();
+ assertEquals(2, rs.rowCount());
+
+ RowSetReader reader = rs.reader();
+ ScalarElementReader colReader = reader.elements(0);
+ assertEquals(ValueType.INTEGER, colReader.valueType());
+
+ assertTrue(reader.next());
+ assertEquals(0, colReader.size());
+
+ assertTrue(reader.next());
+ assertEquals(3, colReader.size());
+ assertEquals(0, colReader.getInt(0));
+ assertEquals(20, colReader.getInt(1));
+ assertEquals(30, colReader.getInt(2));
+ assertEquals(0, colReader.getObject(0));
+ assertEquals(20, colReader.getObject(1));
+ assertEquals(30, colReader.getObject(2));
+ assertEquals("0", colReader.getAsString(0));
+ assertEquals("20", colReader.getAsString(1));
+ assertEquals("30", colReader.getAsString(2));
+
+ assertFalse(reader.next());
+ rs.clear();
+ }
+
+ @Test
+ public void testTinyIntArray() {
+ intArrayTester(MinorType.TINYINT);
+ }
+
+ @Test
+ public void testSmallIntRW() {
+ BatchSchema batchSchema = new SchemaBuilder()
+ .add("col", MinorType.SMALLINT)
+ .build();
+ SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
+ .addRow(0)
+ .addRow(Short.MAX_VALUE)
+ .addRow(Short.MIN_VALUE)
+ .build();
+ assertEquals(3, rs.rowCount());
+
+ RowSetReader reader = rs.reader();
+ ScalarReader colReader = reader.scalar(0);
+ assertEquals(ValueType.INTEGER, colReader.valueType());
+
+ assertTrue(reader.next());
+ assertFalse(colReader.isNull());
+ assertEquals(0, colReader.getInt());
+
+ assertTrue(reader.next());
+ assertEquals(Short.MAX_VALUE, colReader.getInt());
+ assertEquals((int) Short.MAX_VALUE, colReader.getObject());
+ assertEquals(Short.toString(Short.MAX_VALUE), colReader.getAsString());
+
+ assertTrue(reader.next());
+ assertEquals(Short.MIN_VALUE, colReader.getInt());
+
+ assertFalse(reader.next());
+ rs.clear();
+ }
+
+ @Test
+ public void testNullableSmallInt() {
+ nullableIntTester(MinorType.SMALLINT);
+ }
+
+ @Test
+ public void testSmallArray() {
+ intArrayTester(MinorType.SMALLINT);
+ }
+
+ @Test
+ public void testIntRW() {
+ BatchSchema batchSchema = new SchemaBuilder()
+ .add("col", MinorType.INT)
+ .build();
+ SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
+ .addRow(0)
+ .addRow(Integer.MAX_VALUE)
+ .addRow(Integer.MIN_VALUE)
+ .build();
+ assertEquals(3, rs.rowCount());
+
+ RowSetReader reader = rs.reader();
+ ScalarReader colReader = reader.scalar(0);
+ assertEquals(ValueType.INTEGER, colReader.valueType());
+
+ assertTrue(reader.next());
+ assertFalse(colReader.isNull());
+ assertEquals(0, reader.scalar(0).getInt());
+
+ assertTrue(reader.next());
+ assertEquals(Integer.MAX_VALUE, colReader.getInt());
+ assertEquals(Integer.MAX_VALUE, colReader.getObject());
+ assertEquals(Integer.toString(Integer.MAX_VALUE), colReader.getAsString());
+
+ assertTrue(reader.next());
+ assertEquals(Integer.MIN_VALUE, colReader.getInt());
+
+ assertFalse(reader.next());
+ rs.clear();
+ }
+
+ @Test
+ public void testNullableInt() {
+ nullableIntTester(MinorType.INT);
+ }
+
+ @Test
+ public void testIntArray() {
+ intArrayTester(MinorType.INT);
+ }
+
+ private void longRWTester(MinorType type) {
+ BatchSchema batchSchema = new SchemaBuilder()
+ .add("col", type)
+ .build();
+ SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
+ .addRow(0L)
+ .addRow(Long.MAX_VALUE)
+ .addRow(Long.MIN_VALUE)
+ .build();
+ assertEquals(3, rs.rowCount());
+
+ RowSetReader reader = rs.reader();
+ ScalarReader colReader = reader.scalar(0);
+ assertEquals(ValueType.LONG, colReader.valueType());
+
+ assertTrue(reader.next());
+ assertFalse(colReader.isNull());
+ assertEquals(0, colReader.getLong());
+
+ assertTrue(reader.next());
+ assertEquals(Long.MAX_VALUE, colReader.getLong());
+ assertEquals(Long.MAX_VALUE, colReader.getObject());
+ assertEquals(Long.toString(Long.MAX_VALUE), colReader.getAsString());
+
+ assertTrue(reader.next());
+ assertEquals(Long.MIN_VALUE, colReader.getLong());
+
+ assertFalse(reader.next());
+ rs.clear();
+ }
+
+ @Test
+ public void testLongRW() {
+ longRWTester(MinorType.BIGINT);
+ }
+
+ private void nullableLongTester(MinorType type) {
+ BatchSchema batchSchema = new SchemaBuilder()
+ .addNullable("col", type)
+ .build();
+ SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
+ .addRow(10L)
+ .addSingleCol(null)
+ .addRow(30L)
+ .build();
+ assertEquals(3, rs.rowCount());
+
+ RowSetReader reader = rs.reader();
+ ScalarReader colReader = reader.scalar(0);
+
+ assertTrue(reader.next());
+ assertFalse(colReader.isNull());
+ assertEquals(10, colReader.getLong());
+
+ assertTrue(reader.next());
+ assertTrue(colReader.isNull());
+ assertNull(colReader.getObject());
+ assertEquals("null", colReader.getAsString());
+ // Data value is undefined, may be garbage
+
+ assertTrue(reader.next());
+ assertEquals(30, colReader.getLong());
+
+ assertFalse(reader.next());
+ rs.clear();
+ }
+
+ @Test
+ public void testNullableLong() {
+ nullableLongTester(MinorType.BIGINT);
+ }
+
+ private void longArrayTester(MinorType type) {
+ BatchSchema batchSchema = new SchemaBuilder()
+ .addArray("col", type)
+ .build();
+ SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
+ .addSingleCol(new long[] {})
+ .addSingleCol(new long[] {0, 20, 30})
+ .build();
+ assertEquals(2, rs.rowCount());
+
+ RowSetReader reader = rs.reader();
+ ScalarElementReader colReader = reader.elements(0);
+ assertEquals(ValueType.LONG, colReader.valueType());
+
+ assertTrue(reader.next());
+ assertEquals(0, colReader.size());
+
+ assertTrue(reader.next());
+ assertEquals(3, colReader.size());
+ assertEquals(0, colReader.getLong(0));
+ assertEquals(20, colReader.getLong(1));
+ assertEquals(30, colReader.getLong(2));
+ assertEquals(0L, colReader.getObject(0));
+ assertEquals(20L, colReader.getObject(1));
+ assertEquals(30L, colReader.getObject(2));
+ assertEquals("0", colReader.getAsString(0));
+ assertEquals("20", colReader.getAsString(1));
+ assertEquals("30", colReader.getAsString(2));
+
+ assertFalse(reader.next());
+ rs.clear();
+ }
+
+ @Test
+ public void testLongArray() {
+ longArrayTester(MinorType.BIGINT);
+ }
+
+ @Test
+ public void testFloatRW() {
+ BatchSchema batchSchema = new SchemaBuilder()
+ .add("col", MinorType.FLOAT4)
+ .build();
+ SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
+ .addRow(0F)
+ .addRow(Float.MAX_VALUE)
+ .addRow(Float.MIN_VALUE)
+ .addRow(100F)
+ .build();
+ assertEquals(4, rs.rowCount());
+
+ RowSetReader reader = rs.reader();
+ ScalarReader colReader = reader.scalar(0);
+ assertEquals(ValueType.DOUBLE, colReader.valueType());
+
+ assertTrue(reader.next());
+ assertFalse(colReader.isNull());
+ assertEquals(0, colReader.getDouble(), 0.000001);
+
+ assertTrue(reader.next());
+ assertEquals(Float.MAX_VALUE, colReader.getDouble(), 0.000001);
+ assertEquals((double) Float.MAX_VALUE, (double) colReader.getObject(), 0.000001);
+
+ assertTrue(reader.next());
+ assertEquals(Float.MIN_VALUE, colReader.getDouble(), 0.000001);
+
+ assertTrue(reader.next());
+ assertEquals(100, colReader.getDouble(), 0.000001);
+ assertEquals("100.0", colReader.getAsString());
+
+ assertFalse(reader.next());
+ rs.clear();
+ }
+
+ private void nullableDoubleTester(MinorType type) {
+ BatchSchema batchSchema = new SchemaBuilder()
+ .addNullable("col", type)
+ .build();
+ SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
+ .addRow(10F)
+ .addSingleCol(null)
+ .addRow(30F)
+ .build();
+ assertEquals(3, rs.rowCount());
+
+ RowSetReader reader = rs.reader();
+ ScalarReader colReader = reader.scalar(0);
+
+ assertTrue(reader.next());
+ assertFalse(colReader.isNull());
+ assertEquals(10, colReader.getDouble(), 0.000001);
+
+ assertTrue(reader.next());
+ assertTrue(colReader.isNull());
+ assertNull(colReader.getObject());
+ assertEquals("null", colReader.getAsString());
+ // Data value is undefined, may be garbage
+
+ assertTrue(reader.next());
+ assertEquals(30, colReader.getDouble(), 0.000001);
+
+ assertFalse(reader.next());
+ rs.clear();
+ }
+
+ @Test
+ public void testNullableFloat() {
+ nullableDoubleTester(MinorType.FLOAT4);
+ }
+
+ private void doubleArrayTester(MinorType type) {
+ BatchSchema batchSchema = new SchemaBuilder()
+ .addArray("col", type)
+ .build();
+ SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
+ .addSingleCol(new double[] {})
+ .addSingleCol(new double[] {0, 20.5, 30.0})
+ .build();
+ assertEquals(2, rs.rowCount());
+
+ RowSetReader reader = rs.reader();
+ ScalarElementReader colReader = reader.elements(0);
+ assertEquals(ValueType.DOUBLE, colReader.valueType());
+
+ assertTrue(reader.next());
+ assertEquals(0, colReader.size());
+
+ assertTrue(reader.next());
+ assertEquals(3, colReader.size());
+ assertEquals(0, colReader.getDouble(0), 0.00001);
+ assertEquals(20.5, colReader.getDouble(1), 0.00001);
+ assertEquals(30.0, colReader.getDouble(2), 0.00001);
+ assertEquals(0, (double) colReader.getObject(0), 0.00001);
+ assertEquals(20.5, (double) colReader.getObject(1), 0.00001);
+ assertEquals(30.0, (double) colReader.getObject(2), 0.00001);
+ assertEquals("0.0", colReader.getAsString(0));
+ assertEquals("20.5", colReader.getAsString(1));
+ assertEquals("30.0", colReader.getAsString(2));
+
+ assertFalse(reader.next());
+ rs.clear();
+ }
+
+ @Test
+ public void testFloatArray() {
+ doubleArrayTester(MinorType.FLOAT4);
+ }
+
+ @Test
+ public void testDoubleRW() {
+ BatchSchema batchSchema = new SchemaBuilder()
+ .add("col", MinorType.FLOAT8)
+ .build();
+ SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
+ .addRow(0D)
+ .addRow(Double.MAX_VALUE)
+ .addRow(Double.MIN_VALUE)
+ .addRow(100D)
+ .build();
+ assertEquals(4, rs.rowCount());
+
+ RowSetReader reader = rs.reader();
+ ScalarReader colReader = reader.scalar(0);
+ assertEquals(ValueType.DOUBLE, colReader.valueType());
+
+ assertTrue(reader.next());
+ assertFalse(colReader.isNull());
+ assertEquals(0, colReader.getDouble(), 0.000001);
+
+ assertTrue(reader.next());
+ assertEquals(Double.MAX_VALUE, colReader.getDouble(), 0.000001);
+ assertEquals(Double.MAX_VALUE, (double) colReader.getObject(), 0.000001);
+
+ assertTrue(reader.next());
+ assertEquals(Double.MIN_VALUE, colReader.getDouble(), 0.000001);
+
+ assertTrue(reader.next());
+ assertEquals(100, colReader.getDouble(), 0.000001);
+ assertEquals("100.0", colReader.getAsString());
+
+ assertFalse(reader.next());
+ rs.clear();
+ }
+
+ @Test
+ public void testNullableDouble() {
+ nullableDoubleTester(MinorType.FLOAT8);
+ }
+
+ @Test
+ public void testDoubleArray() {
+ doubleArrayTester(MinorType.FLOAT8);
+ }
+
+ @Test
+ public void testStringRW() {
+ BatchSchema batchSchema = new SchemaBuilder()
+ .add("col", MinorType.VARCHAR)
+ .build();
+ SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
+ .addRow("")
+ .addRow("abcd")
+ .build();
+ assertEquals(2, rs.rowCount());
+
+ RowSetReader reader = rs.reader();
+ ScalarReader colReader = reader.scalar(0);
+ assertEquals(ValueType.STRING, colReader.valueType());
+
+ assertTrue(reader.next());
+ assertFalse(colReader.isNull());
+ assertEquals("", colReader.getString());
+
+ assertTrue(reader.next());
+ assertEquals("abcd", colReader.getString());
+ assertEquals("abcd", colReader.getObject());
+ assertEquals("\"abcd\"", colReader.getAsString());
+
+ assertFalse(reader.next());
+ rs.clear();
+ }
+
+ @Test
+ public void testNullableString() {
+ BatchSchema batchSchema = new SchemaBuilder()
+ .addNullable("col", MinorType.VARCHAR)
+ .build();
+ SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
+ .addRow("")
+ .addSingleCol(null)
+ .addRow("abcd")
+ .build();
+ assertEquals(3, rs.rowCount());
+
+ RowSetReader reader = rs.reader();
+ ScalarReader colReader = reader.scalar(0);
+
+ assertTrue(reader.next());
+ assertFalse(colReader.isNull());
+ assertEquals("", colReader.getString());
+
+ assertTrue(reader.next());
+ assertTrue(colReader.isNull());
+ assertNull(colReader.getObject());
+ assertEquals("null", colReader.getAsString());
+
+ assertTrue(reader.next());
+ assertEquals("abcd", colReader.getString());
+ assertEquals("abcd", colReader.getObject());
+ assertEquals("\"abcd\"", colReader.getAsString());
+
+ assertFalse(reader.next());
+ rs.clear();
+ }
+
+ @Test
+ public void testStringArray() {
+ BatchSchema batchSchema = new SchemaBuilder()
+ .addArray("col", MinorType.VARCHAR)
+ .build();
+ SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
+ .addSingleCol(new String[] {})
+ .addSingleCol(new String[] {"fred", "", "wilma"})
+ .build();
+ assertEquals(2, rs.rowCount());
+
+ RowSetReader reader = rs.reader();
+ ScalarElementReader colReader = reader.elements(0);
+ assertEquals(ValueType.STRING, colReader.valueType());
+
+ assertTrue(reader.next());
+ assertEquals(0, colReader.size());
+
+ assertTrue(reader.next());
+ assertEquals(3, colReader.size());
+ assertEquals("fred", colReader.getString(0));
+ assertEquals("", colReader.getString(1));
+ assertEquals("wilma", colReader.getString(2));
+ assertEquals("fred", colReader.getObject(0));
+ assertEquals("", colReader.getObject(1));
+ assertEquals("wilma", colReader.getObject(2));
+ assertEquals("\"fred\"", colReader.getAsString(0));
+ assertEquals("\"\"", colReader.getAsString(1));
+ assertEquals("\"wilma\"", colReader.getAsString(2));
+
+ assertFalse(reader.next());
+ rs.clear();
+ }
+
+ @Test
+ public void testIntervalYearRW() {
+ BatchSchema batchSchema = new SchemaBuilder()
+ .add("col", MinorType.INTERVALYEAR)
+ .build();
+
+ Period p1 = Period.years(0);
+ Period p2 = Period.years(2).plusMonths(3);
+ Period p3 = Period.years(1234).plusMonths(11);
+
+ SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
+ .addRow(p1)
+ .addRow(p2)
+ .addRow(p3)
+ .build();
+ assertEquals(3, rs.rowCount());
+
+ RowSetReader reader = rs.reader();
+ ScalarReader colReader = reader.scalar(0);
+ assertEquals(ValueType.PERIOD, colReader.valueType());
+
+ assertTrue(reader.next());
+ assertFalse(colReader.isNull());
+ assertEquals(p1, colReader.getPeriod());
+
+ assertTrue(reader.next());
+ assertEquals(p2, colReader.getPeriod());
+ assertEquals(p2, colReader.getObject());
+ assertEquals(p2.toString(), colReader.getAsString());
+
+ assertTrue(reader.next());
+ assertEquals(p3, colReader.getPeriod());
+
+ assertFalse(reader.next());
+ rs.clear();
+ }
+
+ @Test
+ public void testNullableIntervalYear() {
+ BatchSchema batchSchema = new SchemaBuilder()
+ .addNullable("col", MinorType.INTERVALYEAR)
+ .build();
+
+ Period p1 = Period.years(0);
+ Period p2 = Period.years(2).plusMonths(3);
+
+ SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
+ .addRow(p1)
+ .addSingleCol(null)
+ .addRow(p2)
+ .build();
+ assertEquals(3, rs.rowCount());
+
+ RowSetReader reader = rs.reader();
+ ScalarReader colReader = reader.scalar(0);
+ assertEquals(ValueType.PERIOD, colReader.valueType());
+
+ assertTrue(reader.next());
+ assertFalse(colReader.isNull());
+ assertEquals(p1, colReader.getPeriod());
+
+ assertTrue(reader.next());
+ assertTrue(colReader.isNull());
+ assertNull(colReader.getPeriod());
+ assertNull(colReader.getObject());
+ assertEquals("null", colReader.getAsString());
+
+ assertTrue(reader.next());
+ assertEquals(p2, colReader.getPeriod());
+
+ assertFalse(reader.next());
+ rs.clear();
+ }
+
+ @Test
+ public void testIntervalYearArray() {
+ BatchSchema batchSchema = new SchemaBuilder()
+ .addArray("col", MinorType.INTERVALYEAR)
+ .build();
+
+ Period p1 = Period.years(0);
+ Period p2 = Period.years(2).plusMonths(3);
+ Period p3 = Period.years(1234).plusMonths(11);
+
+ SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
+ .addSingleCol(new Period[] {})
+ .addSingleCol(new Period[] {p1, p2, p3})
+ .build();
+ assertEquals(2, rs.rowCount());
+
+ RowSetReader reader = rs.reader();
+ ScalarElementReader colReader = reader.elements(0);
+ assertEquals(ValueType.PERIOD, colReader.valueType());
+
+ assertTrue(reader.next());
+ assertEquals(0, colReader.size());
+
+ assertTrue(reader.next());
+ assertEquals(3, colReader.size());
+ assertEquals(p1, colReader.getPeriod(0));
+ assertEquals(p2, colReader.getPeriod(1));
+ assertEquals(p3, colReader.getPeriod(2));
+ assertEquals(p2, colReader.getObject(1));
+ assertEquals(p2.toString(), colReader.getAsString(1));
+
+ assertFalse(reader.next());
+ rs.clear();
+ }
+
+ @Test
+ public void testIntervalDayRW() {
+ BatchSchema batchSchema = new SchemaBuilder()
+ .add("col", MinorType.INTERVALDAY)
+ .build();
+
+ Period p1 = Period.days(0);
+ Period p2 = Period.days(3).plusHours(4).plusMinutes(5).plusSeconds(23);
+ Period p3 = Period.days(999).plusHours(23).plusMinutes(59).plusSeconds(59);
+
+ SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
+ .addRow(p1)
+ .addRow(p2)
+ .addRow(p3)
+ .build();
+ assertEquals(3, rs.rowCount());
+
+ RowSetReader reader = rs.reader();
+ ScalarReader colReader = reader.scalar(0);
+ assertEquals(ValueType.PERIOD, colReader.valueType());
+
+ assertTrue(reader.next());
+ assertFalse(colReader.isNull());
+ // The normalizedStandard() call is a hack. See DRILL-5689.
+ assertEquals(p1, colReader.getPeriod().normalizedStandard());
+
+ assertTrue(reader.next());
+ assertEquals(p2, colReader.getPeriod().normalizedStandard());
+ assertEquals(p2, ((Period) colReader.getObject()).normalizedStandard());
+ assertEquals(p2.toString(), colReader.getAsString());
+
+ assertTrue(reader.next());
+ assertEquals(p3.normalizedStandard(), colReader.getPeriod().normalizedStandard());
+
+ assertFalse(reader.next());
+ rs.clear();
+ }
+
+ @Test
+ public void testNullableIntervalDay() {
+ BatchSchema batchSchema = new SchemaBuilder()
+ .addNullable("col", MinorType.INTERVALDAY)
+ .build();
+
+ Period p1 = Period.years(0);
+ Period p2 = Period.days(3).plusHours(4).plusMinutes(5).plusSeconds(23);
+
+ SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
+ .addRow(p1)
+ .addSingleCol(null)
+ .addRow(p2)
+ .build();
+ assertEquals(3, rs.rowCount());
+
+ RowSetReader reader = rs.reader();
+ ScalarReader colReader = reader.scalar(0);
+ assertEquals(ValueType.PERIOD, colReader.valueType());
+
+ assertTrue(reader.next());
+ assertFalse(colReader.isNull());
+ assertEquals(p1, colReader.getPeriod().normalizedStandard());
+
+ assertTrue(reader.next());
+ assertTrue(colReader.isNull());
+ assertNull(colReader.getPeriod());
+ assertNull(colReader.getObject());
+ assertEquals("null", colReader.getAsString());
+
+ assertTrue(reader.next());
+ assertEquals(p2, colReader.getPeriod().normalizedStandard());
+
+ assertFalse(reader.next());
+ rs.clear();
+ }
+
+ @Test
+ public void testIntervalDayArray() {
+ BatchSchema batchSchema = new SchemaBuilder()
+ .addArray("col", MinorType.INTERVALDAY)
+ .build();
+
+ Period p1 = Period.days(0);
+ Period p2 = Period.days(3).plusHours(4).plusMinutes(5).plusSeconds(23);
+ Period p3 = Period.days(999).plusHours(23).plusMinutes(59).plusSeconds(59);
+
+ SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
+ .addSingleCol(new Period[] {})
+ .addSingleCol(new Period[] {p1, p2, p3})
+ .build();
+ assertEquals(2, rs.rowCount());
+
+ RowSetReader reader = rs.reader();
+ ScalarElementReader colReader = reader.elements(0);
+ assertEquals(ValueType.PERIOD, colReader.valueType());
+
+ assertTrue(reader.next());
+ assertEquals(0, colReader.size());
+
+ assertTrue(reader.next());
+ assertEquals(3, colReader.size());
+ assertEquals(p1, colReader.getPeriod(0).normalizedStandard());
+ assertEquals(p2, colReader.getPeriod(1).normalizedStandard());
+ assertEquals(p3.normalizedStandard(), colReader.getPeriod(2).normalizedStandard());
+ assertEquals(p2, ((Period) colReader.getObject(1)).normalizedStandard());
+ assertEquals(p2.toString(), colReader.getAsString(1));
+
+ assertFalse(reader.next());
+ rs.clear();
+ }
+
+ @Test
+ public void testIntervalRW() {
+ BatchSchema batchSchema = new SchemaBuilder()
+ .add("col", MinorType.INTERVAL)
+ .build();
+
+ Period p1 = Period.days(0);
+ Period p2 = Period.years(7).plusMonths(8)
+ .plusDays(3).plusHours(4)
+ .plusMinutes(5).plusSeconds(23);
+ Period p3 = Period.years(9999).plusMonths(11)
+ .plusDays(365).plusHours(23)
+ .plusMinutes(59).plusSeconds(59);
+
+ SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
+ .addRow(p1)
+ .addRow(p2)
+ .addRow(p3)
+ .build();
+ assertEquals(3, rs.rowCount());
+
+ RowSetReader reader = rs.reader();
+ ScalarReader colReader = reader.scalar(0);
+ assertEquals(ValueType.PERIOD, colReader.valueType());
+
+ assertTrue(reader.next());
+ assertFalse(colReader.isNull());
+ // The normalizedStandard() call is a hack. See DRILL-5689.
+ assertEquals(p1, colReader.getPeriod().normalizedStandard());
+
+ assertTrue(reader.next());
+ assertEquals(p2, colReader.getPeriod().normalizedStandard());
+ assertEquals(p2, ((Period) colReader.getObject()).normalizedStandard());
+ assertEquals(p2.toString(), colReader.getAsString());
+
+ assertTrue(reader.next());
+ assertEquals(p3.normalizedStandard(), colReader.getPeriod().normalizedStandard());
+
+ assertFalse(reader.next());
+ rs.clear();
+ }
+
+ @Test
+ public void testNullableInterval() {
+ BatchSchema batchSchema = new SchemaBuilder()
+ .addNullable("col", MinorType.INTERVAL)
+ .build();
+
+ Period p1 = Period.years(0);
+ Period p2 = Period.years(7).plusMonths(8)
+ .plusDays(3).plusHours(4)
+ .plusMinutes(5).plusSeconds(23);
+
+ SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
+ .addRow(p1)
+ .addSingleCol(null)
+ .addRow(p2)
+ .build();
+ assertEquals(3, rs.rowCount());
+
+ RowSetReader reader = rs.reader();
+ ScalarReader colReader = reader.scalar(0);
+ assertEquals(ValueType.PERIOD, colReader.valueType());
+
+ assertTrue(reader.next());
+ assertFalse(colReader.isNull());
+ assertEquals(p1, colReader.getPeriod().normalizedStandard());
+
+ assertTrue(reader.next());
+ assertTrue(colReader.isNull());
+ assertNull(colReader.getPeriod());
+ assertNull(colReader.getObject());
+ assertEquals("null", colReader.getAsString());
+
+ assertTrue(reader.next());
+ assertEquals(p2, colReader.getPeriod().normalizedStandard());
+
+ assertFalse(reader.next());
+ rs.clear();
+ }
+
+ @Test
+ public void testIntervalArray() {
+ BatchSchema batchSchema = new SchemaBuilder()
+ .addArray("col", MinorType.INTERVAL)
+ .build();
+
+ Period p1 = Period.days(0);
+ Period p2 = Period.years(7).plusMonths(8)
+ .plusDays(3).plusHours(4)
+ .plusMinutes(5).plusSeconds(23);
+ Period p3 = Period.years(9999).plusMonths(11)
+ .plusDays(365).plusHours(23)
+ .plusMinutes(59).plusSeconds(59);
+
+ SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
+ .addSingleCol(new Period[] {})
+ .addSingleCol(new Period[] {p1, p2, p3})
+ .build();
+ assertEquals(2, rs.rowCount());
+
+ RowSetReader reader = rs.reader();
+ ScalarElementReader colReader = reader.elements(0);
+ assertEquals(ValueType.PERIOD, colReader.valueType());
+
+ assertTrue(reader.next());
+ assertEquals(0, colReader.size());
+
+ assertTrue(reader.next());
+ assertEquals(3, colReader.size());
+ assertEquals(p1, colReader.getPeriod(0).normalizedStandard());
+ assertEquals(p2, colReader.getPeriod(1).normalizedStandard());
+ assertEquals(p3.normalizedStandard(), colReader.getPeriod(2).normalizedStandard());
+ assertEquals(p2, ((Period) colReader.getObject(1)).normalizedStandard());
+ assertEquals(p2.toString(), colReader.getAsString(1));
+
+ assertFalse(reader.next());
+ rs.clear();
+ }
+
+ @Test
+ public void testDecimal9RW() {
+ MajorType type = MajorType.newBuilder()
+ .setMinorType(MinorType.DECIMAL9)
+ .setScale(3)
+ .setPrecision(9)
+ .setMode(DataMode.REQUIRED)
+ .build();
+ BatchSchema batchSchema = new SchemaBuilder()
+ .add("col", type)
+ .build();
+
+ BigDecimal v1 = BigDecimal.ZERO;
+ BigDecimal v2 = BigDecimal.valueOf(123_456_789, 3);
+ BigDecimal v3 = BigDecimal.valueOf(999_999_999, 3);
+
+ SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
+ .addRow(v1)
+ .addRow(v2)
+ .addRow(v3)
+ .build();
+ assertEquals(3, rs.rowCount());
+
+ RowSetReader reader = rs.reader();
+ ScalarReader colReader = reader.scalar(0);
+ assertEquals(ValueType.DECIMAL, colReader.valueType());
+
+ assertTrue(reader.next());
+ assertFalse(colReader.isNull());
+ assertEquals(0, v1.compareTo(colReader.getDecimal()));
+
+ assertTrue(reader.next());
+ assertEquals(0, v2.compareTo(colReader.getDecimal()));
+ assertEquals(0, v2.compareTo((BigDecimal) colReader.getObject()));
+ assertEquals(v2.toString(), colReader.getAsString());
+
+ assertTrue(reader.next());
+ assertEquals(0, v3.compareTo(colReader.getDecimal()));
+
+ assertFalse(reader.next());
+ rs.clear();
+ }
+
+ private void nullableDecimalTester(MinorType type, int precision) {
+ MajorType majorType = MajorType.newBuilder()
+ .setMinorType(type)
+ .setScale(3)
+ .setPrecision(precision)
+ .setMode(DataMode.OPTIONAL)
+ .build();
+ BatchSchema batchSchema = new SchemaBuilder()
+ .add("col", majorType)
+ .build();
+
+ BigDecimal v1 = BigDecimal.ZERO;
+ BigDecimal v2 = BigDecimal.valueOf(123_456_789, 3);
+
+ SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
+ .addRow(v1)
+ .addSingleCol(null)
+ .addRow(v2)
+ .build();
+ assertEquals(3, rs.rowCount());
+
+ RowSetReader reader = rs.reader();
+ ScalarReader colReader = reader.scalar(0);
+ assertEquals(ValueType.DECIMAL, colReader.valueType());
+
+ assertTrue(reader.next());
+ assertFalse(colReader.isNull());
+ assertEquals(0, v1.compareTo(colReader.getDecimal()));
+
+ assertTrue(reader.next());
+ assertTrue(colReader.isNull());
+ assertNull(colReader.getObject());
+ assertEquals("null", colReader.getAsString());
+
+ assertTrue(reader.next());
+ assertEquals(0, v2.compareTo(colReader.getDecimal()));
+
+ assertFalse(reader.next());
+ rs.clear();
+ }
+
+ @Test
+ public void testNullableDecimal9() {
+ nullableDecimalTester(MinorType.DECIMAL9, 9);
+ }
+
+ private void decimalArrayTester(MinorType type, int precision) {
+ MajorType majorType = MajorType.newBuilder()
+ .setMinorType(type)
+ .setScale(3)
+ .setPrecision(precision)
+ .setMode(DataMode.REPEATED)
+ .build();
+ BatchSchema batchSchema = new SchemaBuilder()
+ .add("col", majorType)
+ .build();
+
+ BigDecimal v1 = BigDecimal.ZERO;
+ BigDecimal v2 = BigDecimal.valueOf(123_456_789, 3);
+ BigDecimal v3 = BigDecimal.TEN;
+
+ SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
+ .addSingleCol(new BigDecimal[] {})
+ .addSingleCol(new BigDecimal[] {v1, v2, v3})
+ .build();
+ assertEquals(2, rs.rowCount());
+
+ RowSetReader reader = rs.reader();
+ ScalarElementReader colReader = reader.elements(0);
+ assertEquals(ValueType.DECIMAL, colReader.valueType());
+
+ assertTrue(reader.next());
+ assertEquals(0, colReader.size());
+
+ assertTrue(reader.next());
+ assertEquals(3, colReader.size());
+ assertEquals(0, v1.compareTo(colReader.getDecimal(0)));
+ assertEquals(0, v2.compareTo(colReader.getDecimal(1)));
+ assertEquals(0, v3.compareTo(colReader.getDecimal(2)));
+ assertEquals(0, v2.compareTo((BigDecimal) colReader.getObject(1)));
+ assertEquals(v2.toString(), colReader.getAsString(1));
+
+ assertFalse(reader.next());
+ rs.clear();
+ }
+
+ @Test
+ public void testDecimal9Array() {
+ decimalArrayTester(MinorType.DECIMAL9, 9);
+ }
+
+ @Test
+ public void testDecimal18RW() {
+ MajorType type = MajorType.newBuilder()
+ .setMinorType(MinorType.DECIMAL18)
+ .setScale(3)
+ .setPrecision(9)
+ .setMode(DataMode.REQUIRED)
+ .build();
+ BatchSchema batchSchema = new SchemaBuilder()
+ .add("col", type)
+ .build();
+
+ BigDecimal v1 = BigDecimal.ZERO;
+ BigDecimal v2 = BigDecimal.valueOf(123_456_789_123_456_789L, 3);
+ BigDecimal v3 = BigDecimal.valueOf(999_999_999_999_999_999L, 3);
+
+ SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
+ .addRow(v1)
+ .addRow(v2)
+ .addRow(v3)
+ .build();
+ assertEquals(3, rs.rowCount());
+
+ RowSetReader reader = rs.reader();
+ ScalarReader colReader = reader.scalar(0);
+ assertEquals(ValueType.DECIMAL, colReader.valueType());
+
+ assertTrue(reader.next());
+ assertFalse(colReader.isNull());
+ assertEquals(0, v1.compareTo(colReader.getDecimal()));
+
+ assertTrue(reader.next());
+ assertEquals(0, v2.compareTo(colReader.getDecimal()));
+ assertEquals(0, v2.compareTo((BigDecimal) colReader.getObject()));
+ assertEquals(v2.toString(), colReader.getAsString());
+
+ assertTrue(reader.next());
+ assertEquals(0, v3.compareTo(colReader.getDecimal()));
+
+ assertFalse(reader.next());
+ rs.clear();
+ }
+
+ @Test
+ public void testNullableDecimal18() {
+ nullableDecimalTester(MinorType.DECIMAL18, 9);
+ }
+
+ @Test
+ public void testDecimal18Array() {
+ decimalArrayTester(MinorType.DECIMAL18, 9);
+ }
+
+ // From the perspective of the vector, a date vector is just a long.
+
+ @Test
+ public void testDateRW() {
+ longRWTester(MinorType.DATE);
+ }
+
+ @Test
+ public void testNullableDate() {
+ nullableLongTester(MinorType.DATE);
+ }
+
+ @Test
+ public void testDateArray() {
+ longArrayTester(MinorType.DATE);
+ }
+
+ // From the perspective of the vector, a timestamp vector is just a long.
+
+ @Test
+ public void testTimestampRW() {
+ longRWTester(MinorType.TIMESTAMP);
+ }
+
+ @Test
+ public void testNullableTimestamp() {
+ nullableLongTester(MinorType.TIMESTAMP);
+ }
+
+ @Test
+ public void testTimestampArray() {
+ longArrayTester(MinorType.TIMESTAMP);
+ }
+
+ @Test
+ public void testVarBinaryRW() {
+ BatchSchema batchSchema = new SchemaBuilder()
+ .add("col", MinorType.VARBINARY)
+ .build();
+
+ byte v1[] = new byte[] {};
+ byte v2[] = new byte[] { (byte) 0x00, (byte) 0x7f, (byte) 0x80, (byte) 0xFF};
+
+ SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
+ .addRow(v1)
+ .addRow(v2)
+ .build();
+ assertEquals(2, rs.rowCount());
+
+ RowSetReader reader = rs.reader();
+ ScalarReader colReader = reader.scalar(0);
+ assertEquals(ValueType.BYTES, colReader.valueType());
+
+ assertTrue(reader.next());
+ assertFalse(colReader.isNull());
+ assertTrue(Arrays.equals(v1, colReader.getBytes()));
+
+ assertTrue(reader.next());
+ assertTrue(Arrays.equals(v2, colReader.getBytes()));
+ assertTrue(Arrays.equals(v2, (byte[]) colReader.getObject()));
+ assertEquals("[00, 7f, 80, ff]", colReader.getAsString());
+
+ assertFalse(reader.next());
+ rs.clear();
+ }
+
+ @Test
+ public void testNullableVarBinary() {
+ BatchSchema batchSchema = new SchemaBuilder()
+ .addNullable("col", MinorType.VARBINARY)
+ .build();
+
+ byte v1[] = new byte[] {};
+ byte v2[] = new byte[] { (byte) 0x00, (byte) 0x7f, (byte) 0x80, (byte) 0xFF};
+
+ SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
+ .addRow(v1)
+ .addSingleCol(null)
+ .addRow(v2)
+ .build();
+ assertEquals(3, rs.rowCount());
+
+ RowSetReader reader = rs.reader();
+ ScalarReader colReader = reader.scalar(0);
+ assertEquals(ValueType.BYTES, colReader.valueType());
+
+ assertTrue(reader.next());
+ assertFalse(colReader.isNull());
+ assertTrue(Arrays.equals(v1, colReader.getBytes()));
+
+ assertTrue(reader.next());
+ assertTrue(colReader.isNull());
+ assertNull(colReader.getObject());
+ assertEquals("null", colReader.getAsString());
+
+ assertTrue(reader.next());
+ assertTrue(Arrays.equals(v2, colReader.getBytes()));
+
+ assertFalse(reader.next());
+ rs.clear();
+ }
+
+ @Test
+ public void testVarBinaryArray() {
+ BatchSchema batchSchema = new SchemaBuilder()
+ .addArray("col", MinorType.VARBINARY)
+ .build();
+
+ byte v1[] = new byte[] {};
+ byte v2[] = new byte[] { (byte) 0x00, (byte) 0x7f, (byte) 0x80, (byte) 0xFF};
+ byte v3[] = new byte[] { (byte) 0xDE, (byte) 0xAD, (byte) 0xBE, (byte) 0xAF};
+
+ SingleRowSet rs = fixture.rowSetBuilder(batchSchema)
+ .addSingleCol(new byte[][] {})
+ .addSingleCol(new byte[][] {v1, v2, v3})
+ .build();
+ assertEquals(2, rs.rowCount());
+
+ RowSetReader reader = rs.reader();
+ ScalarElementReader colReader = reader.elements(0);
+ assertEquals(ValueType.BYTES, colReader.valueType());
+
+ assertTrue(reader.next());
+ assertEquals(0, colReader.size());
+
+ assertTrue(reader.next());
+ assertEquals(3, colReader.size());
+ assertTrue(Arrays.equals(v1, colReader.getBytes(0)));
+ assertTrue(Arrays.equals(v2, colReader.getBytes(1)));
+ assertTrue(Arrays.equals(v3, colReader.getBytes(2)));
+ assertTrue(Arrays.equals(v2, (byte[]) colReader.getObject(1)));
+ assertEquals("[00, 7f, 80, ff]", colReader.getAsString(1));
+
+ assertFalse(reader.next());
+ rs.clear();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestVariableWidthWriter.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestVariableWidthWriter.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestVariableWidthWriter.java
new file mode 100644
index 0000000..103b212
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/TestVariableWidthWriter.java
@@ -0,0 +1,418 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.test.rowSet.test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.drill.common.types.TypeProtos.DataMode;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.vector.VarCharVector;
+import org.apache.drill.exec.vector.accessor.ColumnAccessors.VarCharColumnWriter;
+import org.apache.drill.exec.vector.accessor.ScalarWriter;
+import org.apache.drill.exec.vector.accessor.ScalarWriter.ColumnWriterListener;
+import org.apache.drill.exec.vector.accessor.ValueType;
+import org.apache.drill.test.SubOperatorTest;
+import org.apache.drill.test.rowSet.SchemaBuilder;
+import org.apache.drill.test.rowSet.test.TestFixedWidthWriter.TestIndex;
+import org.bouncycastle.util.Arrays;
+import org.junit.Test;
+
+import com.google.common.base.Charsets;
+
+public class TestVariableWidthWriter extends SubOperatorTest {
+
+ /**
+ * Basic test to write a contiguous set of values, enough to cause
+ * the vector to double in size twice, then read back the values.
+ */
+
+ @Test
+ public void testWrite() {
+ try (VarCharVector vector = allocVector(1000)) {
+ TestIndex index = new TestIndex();
+ VarCharColumnWriter writer = makeWriter(vector, index);
+
+ writer.startWrite();
+
+ // Write integers.
+ // Write enough that the vector is resized.
+
+ long origAddr = vector.getBuffer().addr();
+ String base = "sample-value";
+ for (int i = 0; i < 3000; i++) {
+ index.index = i;
+ writer.setString(base + i);
+ }
+ writer.endWrite();
+
+ // Should have been reallocated.
+
+ assertNotEquals(origAddr, vector.getBuffer().addr());
+
+ // Verify values
+
+ for (int i = 0; i < 3000; i++) {
+ assertEquals(base + i, stringAt(vector, i));
+ }
+ }
+ }
+
+ @Test
+ public void testRestartRow() {
+ try (VarCharVector vector = allocVector(1000)) {
+ TestIndex index = new TestIndex();
+ VarCharColumnWriter writer = makeWriter(vector, index);
+ writer.startWrite();
+
+ // Write rows, rewriting every other row.
+
+ String base = "sample-value";
+ writer.startRow();
+ index.index = 0;
+ for (int i = 0; i < 50; i++) {
+ writer.setString(base + i);
+ if (i % 2 == 0) {
+ writer.saveRow();
+ writer.startRow();
+ index.index++;
+ } else {
+ writer.restartRow();
+ }
+ }
+ writer.endWrite();
+
+ // Verify values
+
+ for (int i = 0; i < 25; i++) {
+ assertEquals(base + (2 * i), stringAt(vector, i));
+ }
+ }
+ }
+
+ /**
+ * Filling empties in a variable-width row means carrying forward
+ * offsets (as tested elsewhere), leaving zero-length values.
+ */
+
+ @Test
+ public void testFillEmpties() {
+ try (VarCharVector vector = allocVector(1000)) {
+ TestIndex index = new TestIndex();
+ VarCharColumnWriter writer = makeWriter(vector, index);
+ writer.startWrite();
+
+ // Write values, skipping four out of five positions,
+ // forcing backfill.
+ // The number of values is odd, forcing the writer to
+ // back-fill at the end as well as between values.
+
+ String base = "sample-value";
+ for (int i = 0; i < 501; i += 5) {
+ index.index = i;
+ writer.startRow();
+ writer.setString(base + i);
+ writer.saveRow();
+ }
+ // At end, vector index defined to point one past the
+ // last row. That is, the vector index gives the row count.
+
+ index.index = 504;
+ writer.endWrite();
+
+ // Verify values
+
+ for (int i = 0; i < 504; i++) {
+ assertEquals("Mismatch on " + i,
+ (i%5) == 0 ? base + i : "", stringAt(vector, i));
+ }
+ }
+ }
+
+ /**
+ * The rollover method is used during vector overflow.
+ */
+
+ @Test
+ public void testRollover() {
+ try (VarCharVector vector = allocVector(1000)) {
+ TestIndex index = new TestIndex();
+ VarCharColumnWriter writer = makeWriter(vector, index);
+ writer.startWrite();
+
+ // Simulate doing an overflow of ten values.
+
+ String base = "sample-value";
+ for (int i = 0; i < 10; i++) {
+ index.index = i;
+ writer.startRow();
+ writer.setString(base + i);
+ writer.saveRow();
+ }
+
+ // Overflow occurs after writing the 11th row
+
+ index.index = 10;
+ writer.startRow();
+ String overflowValue = base + 10;
+ writer.setString(overflowValue);
+
+ // Overflow occurs
+
+ writer.preRollover();
+
+ // Simulate rollover
+
+ byte dummy[] = new byte[] { (byte) 0x55 };
+ for (int i = 0; i < 500; i++) {
+ vector.getMutator().setSafe(i, dummy);
+ }
+ for (int i = 1; i < 15; i++) {
+ vector.getOffsetVector().getMutator().set(i, 0xdeadbeef);
+ }
+ vector.getMutator().setSafe(0, overflowValue.getBytes(Charsets.UTF_8));
+
+ writer.postRollover();
+ index.index = 0;
+ writer.saveRow();
+
+ // Simulate resuming with a few more values.
+
+ for (int i = 1; i < 5; i++) {
+ index.index = i;
+ writer.startRow();
+ writer.setString(base + (i + 10));
+ writer.saveRow();
+ }
+ writer.endWrite();
+
+ // Verify the results
+
+ for (int i = 0; i < 5; i++) {
+ assertEquals(base + (10 + i), stringAt(vector, i));
+ }
+ }
+ }
+
+ /**
+ * Simulate the case in which the tail end of an overflow
+ * batch has empties. <tt>preRollover()</tt> should back-fill
+ * them with the next offset prior to rollover.
+ */
+
+ @Test
+ public void testRolloverWithEmpties() {
+ try (VarCharVector vector = allocVector(1000)) {
+ TestIndex index = new TestIndex();
+ VarCharColumnWriter writer = makeWriter(vector, index);
+ writer.startWrite();
+
+ // Simulate doing an overflow of 15 values,
+ // of which 5 are empty.
+
+ String base = "sample-value";
+ for (int i = 0; i < 10; i++) {
+ index.index = i;
+ writer.startRow();
+ writer.setString(base + i);
+ writer.saveRow();
+ }
+
+ for (int i = 10; i < 15; i++) {
+ index.index = i;
+ writer.startRow();
+ writer.saveRow();
+ }
+
+ // Overflow occurs before writing the 16th row
+
+ index.index = 15;
+ writer.startRow();
+
+ // Overflow occurs. This should fill empty offsets.
+
+ writer.preRollover();
+
+ // Verify the first "batch" results
+
+ for (int i = 0; i < 10; i++) {
+ assertEquals(base + i, stringAt(vector, i));
+ }
+ for (int i = 10; i < 15; i++) {
+ assertEquals("", stringAt(vector, i));
+ }
+
+ // Simulate rollover
+
+ byte dummy[] = new byte[] { (byte) 0x55 };
+ for (int i = 0; i < 500; i++) {
+ vector.getMutator().setSafe(i, dummy);
+ }
+ for (int i = 1; i < 15; i++) {
+ vector.getOffsetVector().getMutator().set(i, 0xdeadbeef);
+ }
+ vector.getMutator().setSafe(0, new byte[] {});
+
+ writer.postRollover();
+ index.index = 0;
+ writer.saveRow();
+
+ // Skip more values.
+
+ for (int i = 1; i < 5; i++) {
+ index.index = i;
+ writer.startRow();
+ writer.saveRow();
+ }
+
+ // Simulate resuming with a few more values.
+
+ for (int i = 5; i < 10; i++) {
+ index.index = i;
+ writer.startRow();
+ writer.setString(base + (i + 20));
+ writer.saveRow();
+ }
+ writer.endWrite();
+
+ // Verify the results
+
+ for (int i = 0; i < 5; i++) {
+ assertEquals("", stringAt(vector, i));
+ }
+ for (int i = 5; i < 10; i++) {
+ assertEquals(base + (i + 20), stringAt(vector, i));
+ }
+ }
+ }
+
+
+ /**
+ * Test the case in which a scalar vector is used in conjunction
+ * with a nullable bits vector. The nullable vector will call the
+ * <tt>skipNulls()</tt> method to avoid writing values for null
+ * entries. For variable-width, there is no difference between
+ * filling empties and skipping nulls: both result in zero-sized
+ * entries.
+ */
+
+ @Test
+ public void testSkipNulls() {
+ try (VarCharVector vector = allocVector(1000)) {
+ TestIndex index = new TestIndex();
+ VarCharColumnWriter writer = makeWriter(vector, index);
+ writer.startWrite();
+
+ // Write values, skipping four out of five positions,
+ // skipping nulls.
+ // The number of values is odd, forcing the writer to
+ // skip nulls at the end as well as between values.
+
+ String base = "sample-value";
+ for (int i = 0; i < 3000; i += 5) {
+ index.index = i;
+ writer.startRow();
+ writer.skipNulls();
+ writer.setString(base + i);
+ writer.saveRow();
+ }
+ index.index = 3003;
+ writer.startRow();
+ writer.skipNulls();
+ writer.saveRow();
+ writer.endWrite();
+
+ // Verify values. Skipping nulls should back-fill
+ // offsets, resulting in zero-length strings.
+
+ for (int i = 0; i < 3000; i++) {
+ assertEquals("Mismatch at " + i,
+ (i%5) == 0 ? base + i : "", stringAt(vector, i));
+ }
+ }
+ }
+
+ /**
+ * Test resize monitoring. Add a listener to an Varchar writer,
+ * capture each resize, and refuse a resize when the s
+ * of the vector exceeds 1 MB. This will trigger an overflow,
+ * which will throw an exception which we then check for.
+ */
+
+ @Test
+ public void testSizeLimit() {
+ try (VarCharVector vector = allocVector(1000)) {
+ TestIndex index = new TestIndex();
+ VarCharColumnWriter writer = makeWriter(vector, index);
+ writer.bindListener(new ColumnWriterListener() {
+ // Because assumed array size is 10, so 10 * 1000 = 10,000
+ // rounded to 16K
+ int totalAlloc = 16384;
+
+ @Override
+ public void overflowed(ScalarWriter writer) {
+ throw new IllegalStateException("overflow called");
+ }
+
+ @Override
+ public boolean canExpand(ScalarWriter writer, int delta) {
+ System.out.println("Delta: " + delta);
+ totalAlloc += delta;
+ return totalAlloc < 1024 * 1024;
+ }
+ });
+ writer.startWrite();
+
+ byte value[] = new byte[423];
+ Arrays.fill(value, (byte) 'X');
+ try {
+ for (int i = 0; ; i++ ) {
+ index.index = i;
+ writer.startRow();
+ writer.setBytes(value, value.length);
+ writer.saveRow();
+ }
+ }
+ catch(IllegalStateException e) {
+ assertTrue(e.getMessage().contains("overflow called"));
+ }
+ }
+ }
+
+ private String stringAt(VarCharVector vector, int i) {
+ return new String(vector.getAccessor().get(i), Charsets.UTF_8);
+ }
+
+ private VarCharVector allocVector(int size) {
+ MaterializedField field =
+ SchemaBuilder.columnSchema("x", MinorType.VARCHAR, DataMode.REQUIRED);
+ VarCharVector vector = new VarCharVector(field, fixture.allocator());
+ vector.allocateNew(size * 10, size);
+ return vector;
+ }
+
+ private VarCharColumnWriter makeWriter(VarCharVector vector, TestIndex index) {
+ VarCharColumnWriter writer = new VarCharColumnWriter(vector);
+ writer.bindIndex(index);
+
+ assertEquals(ValueType.STRING, writer.valueType());
+ return writer;
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/VectorPrinter.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/VectorPrinter.java b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/VectorPrinter.java
new file mode 100644
index 0000000..2056220
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/rowSet/test/VectorPrinter.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.test.rowSet.test;
+
+import org.apache.drill.exec.vector.UInt4Vector;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.VarCharVector;
+
+import com.google.common.base.Charsets;
+
+/**
+ * Handy tool to visualize string and offset vectors for
+ * debugging.
+ */
+
+public class VectorPrinter {
+
+ public static void printOffsets(UInt4Vector vector, int start, int length) {
+ header(vector, start, length);
+ for (int i = start, j = 0; j < length; i++, j++) {
+ if (j > 0) {
+ System.out.print(" ");
+ }
+ System.out.print(vector.getAccessor().get(i));
+ }
+ System.out.print("], addr = ");
+ System.out.println(vector.getBuffer().addr());
+ }
+
+ public static void printStrings(VarCharVector vector, int start, int length) {
+ printOffsets(vector.getOffsetVector(), start, length + 1);
+ header(vector, start, length);
+ System.out.println();
+ for (int i = start, j = 0; j < length; i++, j++) {
+ System.out.print(" ");
+ System.out.print(i);
+ System.out.print(": \"");
+ System.out.print(stringAt(vector, i));
+ System.out.println("\"");
+ }
+ System.out.println("]");
+ }
+
+ public static void header(ValueVector vector, int start, int length) {
+ System.out.print(vector.getClass());
+ System.out.print(": (");
+ System.out.print(start);
+ System.out.print(" - ");
+ System.out.print(start + length - 1);
+ System.out.print("): [");
+ }
+
+ public static String stringAt(VarCharVector vector, int i) {
+ return new String(vector.getAccessor().get(i), Charsets.UTF_8);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/vector/TestFillEmpties.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/vector/TestFillEmpties.java b/exec/java-exec/src/test/java/org/apache/drill/vector/TestFillEmpties.java
index 4da526e..f3390d3 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/vector/TestFillEmpties.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/vector/TestFillEmpties.java
@@ -19,59 +19,30 @@
package org.apache.drill.vector;
import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.fail;
import org.apache.drill.categories.VectorTest;
import org.apache.drill.common.types.TypeProtos.DataMode;
-import org.apache.drill.common.types.TypeProtos.MajorType;
import org.apache.drill.common.types.TypeProtos.MinorType;
-import org.apache.drill.exec.record.MaterializedField;
import org.apache.drill.exec.vector.BaseDataValueVector;
import org.apache.drill.exec.vector.IntVector;
import org.apache.drill.exec.vector.NullableVarCharVector;
import org.apache.drill.exec.vector.RepeatedVarCharVector;
import org.apache.drill.exec.vector.UInt4Vector;
import org.apache.drill.exec.vector.VarCharVector;
-import org.apache.drill.exec.vector.VectorOverflowException;
-import org.apache.drill.test.DrillTest;
-import org.apache.drill.test.OperatorFixture;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
+import org.apache.drill.test.SubOperatorTest;
+import org.apache.drill.test.rowSet.SchemaBuilder;
import org.junit.Test;
import io.netty.buffer.DrillBuf;
import org.junit.experimental.categories.Category;
@Category(VectorTest.class)
-public class TestFillEmpties extends DrillTest {
-
- public static OperatorFixture fixture;
-
- @BeforeClass
- public static void setUpBeforeClass() throws Exception {
- fixture = OperatorFixture.builder().build();
- }
-
- @AfterClass
- public static void tearDownAfterClass() throws Exception {
- fixture.close();
- }
-
- // To be replaced by a test method in a separate commit.
-
- public static MaterializedField makeField(String name, MinorType dataType, DataMode mode) {
- MajorType type = MajorType.newBuilder()
- .setMinorType(dataType)
- .setMode(mode)
- .build();
-
- return MaterializedField.create(name, type);
- }
+public class TestFillEmpties extends SubOperatorTest {
@Test
public void testNullableVarChar() {
@SuppressWarnings("resource")
- NullableVarCharVector vector = new NullableVarCharVector(makeField("a", MinorType.VARCHAR, DataMode.OPTIONAL), fixture.allocator());
+ NullableVarCharVector vector = new NullableVarCharVector(SchemaBuilder.columnSchema("a", MinorType.VARCHAR, DataMode.OPTIONAL), fixture.allocator());
vector.allocateNew( );
// Create "foo", null, "bar", but omit the null.
@@ -91,7 +62,7 @@ public class TestFillEmpties extends DrillTest {
@Test
public void testVarChar() {
@SuppressWarnings("resource")
- VarCharVector vector = new VarCharVector(makeField("a", MinorType.VARCHAR, DataMode.REQUIRED), fixture.allocator());
+ VarCharVector vector = new VarCharVector(SchemaBuilder.columnSchema("a", MinorType.VARCHAR, DataMode.REQUIRED), fixture.allocator());
vector.allocateNew( );
// Create "foo", null, "bar", but omit the null.
@@ -103,11 +74,7 @@ public class TestFillEmpties extends DrillTest {
// Work around: test fails without this. But, only the new column writers
// call this method.
- try {
- mutator.fillEmptiesBounded(0, 2);
- } catch (VectorOverflowException e) {
- fail();
- }
+ mutator.fillEmpties(0, 2);
value = makeValue("bar");
mutator.setSafe(2, value, 0, value.length);
@@ -119,7 +86,7 @@ public class TestFillEmpties extends DrillTest {
@Test
public void testInt() {
@SuppressWarnings("resource")
- IntVector vector = new IntVector(makeField("a", MinorType.INT, DataMode.REQUIRED), fixture.allocator());
+ IntVector vector = new IntVector(SchemaBuilder.columnSchema("a", MinorType.INT, DataMode.REQUIRED), fixture.allocator());
vector.allocateNew( );
// Create 1, 0, 2, but omit the 0.
@@ -136,7 +103,7 @@ public class TestFillEmpties extends DrillTest {
@Test
public void testRepeatedVarChar() {
@SuppressWarnings("resource")
- RepeatedVarCharVector vector = new RepeatedVarCharVector(makeField("a", MinorType.VARCHAR, DataMode.REPEATED), fixture.allocator());
+ RepeatedVarCharVector vector = new RepeatedVarCharVector(SchemaBuilder.columnSchema("a", MinorType.VARCHAR, DataMode.REPEATED), fixture.allocator());
vector.allocateNew( );
// Create "foo", null, "bar", but omit the null.
@@ -151,11 +118,7 @@ public class TestFillEmpties extends DrillTest {
// Work around: test fails without this. But, only the new column writers
// call this method.
- try {
- mutator.fillEmptiesBounded(0, 2);
- } catch (VectorOverflowException e) {
- fail();
- }
+ mutator.fillEmpties(0, 2);
mutator.startNewValue(2);
value = makeValue( "c" );
mutator.addSafe(2, value, 0, value.length);
[12/15] drill git commit: DRILL-5657: Size-aware vector writer
structure
Posted by pr...@apache.org.
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/record/TupleSchema.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/TupleSchema.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/TupleSchema.java
new file mode 100644
index 0000000..27a88f0
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/TupleSchema.java
@@ -0,0 +1,534 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.record;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.drill.common.types.TypeProtos.DataMode;
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.expr.TypeHelper;
+import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode;
+
+/**
+ * Defines the schema of a tuple: either the top-level row or a nested
+ * "map" (really structure). A schema is a collection of columns (backed
+ * by vectors in the loader itself.) Columns are accessible by name or
+ * index. New columns may be added at any time; the new column takes the
+ * next available index.
+ */
+
+public class TupleSchema implements TupleMetadata {
+
+ /**
+ * Abstract definition of column metadata. Allows applications to create
+ * specialized forms of a column metadata object by extending from this
+ * abstract class.
+ * <p>
+ * Note that, by design, primitive columns do not have a link to their
+ * tuple parent, or their index within that parent. This allows the same
+ * metadata to be shared between two views of a tuple, perhaps physical
+ * and projected views. This restriction does not apply to map columns,
+ * since maps (and the row itself) will, by definition, differ between
+ * the two views.
+ */
+
+ public static abstract class AbstractColumnMetadata implements ColumnMetadata {
+
+ protected MaterializedField schema;
+ protected boolean projected = true;
+
+ /**
+ * Predicted number of elements per array entry. Default is
+ * taken from the often hard-coded value of 10.
+ */
+
+ protected int expectedElementCount = 1;
+
+ public AbstractColumnMetadata(MaterializedField schema) {
+ this.schema = schema;
+ if (isArray()) {
+ expectedElementCount = DEFAULT_ARRAY_SIZE;
+ }
+ }
+
+ public AbstractColumnMetadata(AbstractColumnMetadata from) {
+ schema = from.schema;
+ expectedElementCount = from.expectedElementCount;
+ }
+
+ protected void bind(TupleSchema parentTuple) { }
+
+ @Override
+ public MaterializedField schema() { return schema; }
+
+ public void replaceField(MaterializedField field) {
+ this.schema = field;
+ }
+ @Override
+ public String name() { return schema().getName(); }
+
+ @Override
+ public MajorType majorType() { return schema().getType(); }
+
+ @Override
+ public MinorType type() { return schema().getType().getMinorType(); }
+
+ @Override
+ public DataMode mode() { return schema().getDataMode(); }
+
+ @Override
+ public boolean isNullable() { return mode() == DataMode.OPTIONAL; }
+
+ @Override
+ public boolean isArray() { return mode() == DataMode.REPEATED; }
+
+ @Override
+ public boolean isList() { return false; }
+
+ @Override
+ public boolean isVariableWidth() {
+ MinorType type = type();
+ return type == MinorType.VARCHAR || type == MinorType.VAR16CHAR || type == MinorType.VARBINARY;
+ }
+
+ @Override
+ public boolean isEquivalent(ColumnMetadata other) {
+ return schema().isEquivalent(other.schema());
+ }
+
+ @Override
+ public int expectedWidth() { return 0; }
+
+ @Override
+ public void setExpectedWidth(int width) { }
+
+ @Override
+ public void setExpectedElementCount(int childCount) {
+ // The allocation utilities don't like an array size of zero, so set to
+ // 1 as the minimum. Adjusted to avoid trivial errors if the caller
+ // makes an error.
+
+ if (isArray()) {
+ expectedElementCount = Math.max(1, childCount);
+ }
+ }
+
+ @Override
+ public int expectedElementCount() { return expectedElementCount; }
+
+ @Override
+ public void setProjected(boolean projected) {
+ this.projected = projected;
+ }
+
+ @Override
+ public boolean isProjected() { return projected; }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder()
+ .append("[")
+ .append(getClass().getSimpleName())
+ .append(" ")
+ .append(schema().toString())
+ .append(",")
+ .append(projected ? "" : "not ")
+ .append("projected");
+ if (isArray()) {
+ buf.append(", cardinality: ")
+ .append(expectedElementCount);
+ }
+ return buf
+ .append("]")
+ .toString();
+ }
+
+ public abstract AbstractColumnMetadata copy();
+ }
+
+ /**
+ * Primitive (non-map) column. Describes non-nullable, nullable and
+ * array types (which differ only in mode, but not in metadata structure.)
+ */
+
+ public static class PrimitiveColumnMetadata extends AbstractColumnMetadata {
+
+ protected int expectedWidth;
+
+ public PrimitiveColumnMetadata(MaterializedField schema) {
+ super(schema);
+ expectedWidth = TypeHelper.getSize(majorType());
+ if (isVariableWidth()) {
+
+ // The above getSize() method uses the deprecated getWidth()
+ // method to get the expected VarChar size. If zero (which
+ // it will be), try the revised precision field.
+
+ int precision = majorType().getPrecision();
+ if (precision > 0) {
+ expectedWidth = precision;
+ } else {
+ // TypeHelper includes the offset vector width
+
+ expectedWidth = expectedWidth - 4;
+ }
+ }
+ }
+
+ public PrimitiveColumnMetadata(PrimitiveColumnMetadata from) {
+ super(from);
+ expectedWidth = from.expectedWidth;
+ }
+
+ @Override
+ public AbstractColumnMetadata copy() {
+ return new PrimitiveColumnMetadata(this);
+ }
+
+ @Override
+ public ColumnMetadata.StructureType structureType() { return ColumnMetadata.StructureType.PRIMITIVE; }
+
+ @Override
+ public TupleMetadata mapSchema() { return null; }
+
+ @Override
+ public boolean isMap() { return false; }
+
+ @Override
+ public int expectedWidth() { return expectedWidth; }
+
+ @Override
+ public void setExpectedWidth(int width) {
+ // The allocation utilities don't like a width of zero, so set to
+ // 1 as the minimum. Adjusted to avoid trivial errors if the caller
+ // makes an error.
+
+ if (isVariableWidth()) {
+ expectedWidth = Math.max(1, width);
+ }
+ }
+
+ @Override
+ public ColumnMetadata cloneEmpty() {
+ return new PrimitiveColumnMetadata(this);
+ }
+ }
+
+ /**
+ * Describes a map and repeated map. Both are tuples that have a tuple
+ * schema as part of the column definition.
+ */
+
+ public static class MapColumnMetadata extends AbstractColumnMetadata {
+ private TupleMetadata parentTuple;
+ private final TupleSchema mapSchema;
+
+ /**
+ * Build a new map column from the field provided
+ *
+ * @param schema materialized field description of the map
+ */
+
+ public MapColumnMetadata(MaterializedField schema) {
+ this(schema, null);
+ }
+
+ /**
+ * Build a map column metadata by cloning the type information (but not
+ * the children) of the materialized field provided. Use the hints
+ * provided.
+ *
+ * @param schema the schema to use
+ * @param hints metadata hints for this column
+ */
+
+ private MapColumnMetadata(MaterializedField schema, TupleSchema mapSchema) {
+ super(schema);
+ if (mapSchema == null) {
+ this.mapSchema = new TupleSchema();
+ } else {
+ this.mapSchema = mapSchema;
+ }
+ this.mapSchema.bind(this);
+ }
+
+ @Override
+ public AbstractColumnMetadata copy() {
+ return new MapColumnMetadata(schema, (TupleSchema) mapSchema.copy());
+ }
+
+ @Override
+ protected void bind(TupleSchema parentTuple) {
+ this.parentTuple = parentTuple;
+ }
+
+ @Override
+ public ColumnMetadata.StructureType structureType() { return ColumnMetadata.StructureType.TUPLE; }
+
+ @Override
+ public TupleMetadata mapSchema() { return mapSchema; }
+
+ @Override
+ public int expectedWidth() { return 0; }
+
+ @Override
+ public boolean isMap() { return true; }
+
+ public TupleMetadata parentTuple() { return parentTuple; }
+
+ public TupleSchema mapSchemaImpl() { return mapSchema; }
+
+ @Override
+ public ColumnMetadata cloneEmpty() {
+ return new MapColumnMetadata(schema().cloneEmpty(), null);
+ }
+ }
+
+ private MapColumnMetadata parentMap;
+ private final TupleNameSpace<ColumnMetadata> nameSpace = new TupleNameSpace<>();
+
+ public void bind(MapColumnMetadata parentMap) {
+ this.parentMap = parentMap;
+ }
+
+ public static TupleSchema fromFields(Iterable<MaterializedField> fields) {
+ TupleSchema tuple = new TupleSchema();
+ for (MaterializedField field : fields) {
+ tuple.add(field);
+ }
+ return tuple;
+ }
+
+ public TupleMetadata copy() {
+ TupleMetadata tuple = new TupleSchema();
+ for (ColumnMetadata md : this) {
+ tuple.addColumn(((AbstractColumnMetadata) md).copy());
+ }
+ return tuple;
+ }
+
+ /**
+ * Create a column metadata object that holds the given
+ * {@link MaterializedField}. The type of the object will be either a
+ * primitive or map column, depending on the field's type.
+ *
+ * @param field the materialized field to wrap
+ * @return the column metadata that wraps the field
+ */
+
+ public static AbstractColumnMetadata fromField(MaterializedField field) {
+ if (field.getType().getMinorType() == MinorType.MAP) {
+ return newMap(field);
+ } else {
+ return new PrimitiveColumnMetadata(field);
+ }
+ }
+
+ public static AbstractColumnMetadata fromView(MaterializedField field) {
+ if (field.getType().getMinorType() == MinorType.MAP) {
+ return new MapColumnMetadata(field, null);
+ } else {
+ return new PrimitiveColumnMetadata(field);
+ }
+ }
+
+ /**
+ * Create a tuple given the list of columns that make up the tuple.
+ * Creates nested maps as needed.
+ *
+ * @param columns list of columns that make up the tuple
+ * @return a tuple metadata object that contains the columns
+ */
+
+ public static TupleSchema fromColumns(List<ColumnMetadata> columns) {
+ TupleSchema tuple = new TupleSchema();
+ for (ColumnMetadata column : columns) {
+ tuple.add((AbstractColumnMetadata) column);
+ }
+ return tuple;
+ }
+
+ /**
+ * Create a column metadata object for a map column, given the
+ * {@link MaterializedField} that describes the column, and a list
+ * of column metadata objects that describe the columns in the map.
+ *
+ * @param field the materialized field that describes the map column
+ * @param schema metadata that describes the tuple of columns in
+ * the map
+ * @return a map column metadata for the map
+ */
+
+ public static MapColumnMetadata newMap(MaterializedField field, TupleSchema schema) {
+ return new MapColumnMetadata(field, schema);
+ }
+
+ public static MapColumnMetadata newMap(MaterializedField field) {
+ return new MapColumnMetadata(field, fromFields(field.getChildren()));
+ }
+
+ @Override
+ public ColumnMetadata add(MaterializedField field) {
+ AbstractColumnMetadata md = fromField(field);
+ add(md);
+ return md;
+ }
+
+ public ColumnMetadata addView(MaterializedField field) {
+ AbstractColumnMetadata md = fromView(field);
+ add(md);
+ return md;
+ }
+
+ /**
+ * Add a column metadata column created by the caller. Used for specialized
+ * cases beyond those handled by {@link #add(MaterializedField)}.
+ *
+ * @param md the custom column metadata which must have the correct
+ * index set (from {@link #size()}
+ */
+
+ public void add(AbstractColumnMetadata md) {
+ md.bind(this);
+ nameSpace.add(md.name(), md);
+ if (parentMap != null) {
+ parentMap.schema.addChild(md.schema());
+ }
+ }
+
+ @Override
+ public int addColumn(ColumnMetadata column) {
+ add((AbstractColumnMetadata) column);
+ return size() - 1;
+ }
+
+ @Override
+ public MaterializedField column(String name) {
+ ColumnMetadata md = metadata(name);
+ return md == null ? null : md.schema();
+ }
+
+ @Override
+ public ColumnMetadata metadata(String name) {
+ return nameSpace.get(name);
+ }
+
+ @Override
+ public int index(String name) {
+ return nameSpace.indexOf(name);
+ }
+
+ @Override
+ public MaterializedField column(int index) {
+ return metadata(index).schema();
+ }
+
+ @Override
+ public ColumnMetadata metadata(int index) {
+ return nameSpace.get(index);
+ }
+
+ @Override
+ public MapColumnMetadata parent() { return parentMap; }
+
+ @Override
+ public int size() { return nameSpace.count(); }
+
+ @Override
+ public boolean isEmpty() { return nameSpace.count( ) == 0; }
+
+ @Override
+ public Iterator<ColumnMetadata> iterator() {
+ return nameSpace.iterator();
+ }
+
+ @Override
+ public boolean isEquivalent(TupleMetadata other) {
+ TupleSchema otherSchema = (TupleSchema) other;
+ if (nameSpace.count() != otherSchema.nameSpace.count()) {
+ return false;
+ }
+ for (int i = 0; i < nameSpace.count(); i++) {
+ if (! nameSpace.get(i).isEquivalent(otherSchema.nameSpace.get(i))) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ @Override
+ public List<MaterializedField> toFieldList() {
+ List<MaterializedField> cols = new ArrayList<>();
+ for (ColumnMetadata md : nameSpace) {
+ cols.add(md.schema());
+ }
+ return cols;
+ }
+
+ public BatchSchema toBatchSchema(SelectionVectorMode svMode) {
+ return new BatchSchema(svMode, toFieldList());
+ }
+
+ @Override
+ public String fullName(int index) {
+ return fullName(metadata(index));
+ }
+
+ @Override
+ public String fullName(ColumnMetadata column) {
+ String quotedName = column.name();
+ if (quotedName.contains(".")) {
+ quotedName = "`" + quotedName + "`";
+ }
+ if (isRoot()) {
+ return column.name();
+ } else {
+ return fullName() + "." + quotedName;
+ }
+ }
+
+ public String fullName() {
+ if (isRoot()) {
+ return "<root>";
+ } else {
+ return parentMap.parentTuple().fullName(parentMap);
+ }
+ }
+
+ public boolean isRoot() { return parentMap == null; }
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder()
+ .append("[")
+ .append(getClass().getSimpleName())
+ .append(" ");
+ boolean first = true;
+ for (ColumnMetadata md : nameSpace) {
+ if (! first) {
+ buf.append(", ");
+ }
+ buf.append(md.toString());
+ }
+ buf.append("]");
+ return buf.toString();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/main/java/org/apache/drill/exec/record/selection/SelectionVector2.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/selection/SelectionVector2.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/selection/SelectionVector2.java
index a38a7fe..42f3473 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/record/selection/SelectionVector2.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/selection/SelectionVector2.java
@@ -24,10 +24,16 @@ import org.apache.drill.exec.exception.OutOfMemoryException;
import org.apache.drill.exec.record.DeadBuf;
/**
- * A selection vector that fronts, at most, a
+ * A selection vector that fronts, at most, 64K values.
+ * The selection vector is used for two cases:
+ * <ol>
+ * <li>To create a list of values retained by a filter.</li>
+ * <li>To provide a redirection level for sorted
+ * batches.</li>
+ * </ol>
*/
+
public class SelectionVector2 implements AutoCloseable {
- // private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(SelectionVector2.class);
private final BufferAllocator allocator;
private int recordCount;
@@ -39,9 +45,19 @@ public class SelectionVector2 implements AutoCloseable {
this.allocator = allocator;
}
+ /**
+ * Create a selection vector with the given buffer. The selection vector
+ * increments the buffer's reference count, talking ownership of the buffer.
+ *
+ * @param allocator allocator used to allocate the buffer
+ * @param buf the buffer containing the selection vector's data
+ * @param count the number of values in the selection vector
+ */
+
public SelectionVector2(BufferAllocator allocator, DrillBuf buf, int count) {
this.allocator = allocator;
buffer = buf;
+ buffer.retain(1);
recordCount = count;
}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/exec/cache/TestBatchSerialization.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/cache/TestBatchSerialization.java b/exec/java-exec/src/test/java/org/apache/drill/exec/cache/TestBatchSerialization.java
index cfb8645..a283924 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/cache/TestBatchSerialization.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/cache/TestBatchSerialization.java
@@ -27,15 +27,16 @@ import java.io.InputStream;
import java.io.OutputStream;
import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.cache.VectorSerializer.Reader;
import org.apache.drill.exec.record.BatchSchema;
import org.apache.drill.test.DrillTest;
import org.apache.drill.test.OperatorFixture;
import org.apache.drill.test.rowSet.RowSet;
import org.apache.drill.test.rowSet.RowSet.ExtendableRowSet;
-import org.apache.drill.test.rowSet.RowSet.RowSetWriter;
import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
import org.apache.drill.test.rowSet.RowSetComparison;
import org.apache.drill.test.rowSet.RowSetUtilities;
+import org.apache.drill.test.rowSet.RowSetWriter;
import org.apache.drill.test.rowSet.SchemaBuilder;
import org.junit.AfterClass;
import org.junit.BeforeClass;
@@ -73,7 +74,7 @@ public class TestBatchSerialization extends DrillTest {
if (i % 2 == 0) {
RowSetUtilities.setFromInt(writer, 0, i);
} else {
- writer.column(0).setNull();
+ writer.scalar(0).setNull();
}
writer.save();
}
@@ -125,9 +126,8 @@ public class TestBatchSerialization extends DrillTest {
RowSet result;
try (InputStream in = new BufferedInputStream(new FileInputStream(outFile))) {
- result = fixture.wrap(
- VectorSerializer.reader(fixture.allocator(), in)
- .read());
+ Reader reader = VectorSerializer.reader(fixture.allocator(), in);
+ result = fixture.wrap(reader.read(), reader.sv2());
}
new RowSetComparison(expected)
@@ -163,17 +163,17 @@ public class TestBatchSerialization extends DrillTest {
private SingleRowSet buildMapSet(BatchSchema schema) {
return fixture.rowSetBuilder(schema)
- .add(1, 100, "first")
- .add(2, 200, "second")
- .add(3, 300, "third")
+ .addRow(1, new Object[] {100, "first"})
+ .addRow(2, new Object[] {200, "second"})
+ .addRow(3, new Object[] {300, "third"})
.build();
}
private SingleRowSet buildArraySet(BatchSchema schema) {
return fixture.rowSetBuilder(schema)
- .add(1, new String[] { "first, second, third" } )
- .add(2, null)
- .add(3, new String[] { "third, fourth, fifth" } )
+ .addRow(1, new String[] { "first, second, third" } )
+ .addRow(2, null)
+ .addRow(3, new String[] { "third, fourth, fifth" } )
.build();
}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/TopN/TopNBatchTest.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/TopN/TopNBatchTest.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/TopN/TopNBatchTest.java
index fa6e318..e7d0a97 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/TopN/TopNBatchTest.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/TopN/TopNBatchTest.java
@@ -80,16 +80,16 @@ public class TopNBatchTest extends PopUnitTestBase {
try (RootAllocator allocator = new RootAllocator(100_000_000)) {
expectedRowSet = new RowSetBuilder(allocator, batchSchema)
- .add(110, 10)
- .add(109, 9)
- .add(108, 8)
- .add(107, 7)
- .add(106, 6)
- .add(105, 5)
- .add(104, 4)
- .add(103, 3)
- .add(102, 2)
- .add(101, 1)
+ .addRow(110, 10)
+ .addRow(109, 9)
+ .addRow(108, 8)
+ .addRow(107, 7)
+ .addRow(106, 6)
+ .addRow(105, 5)
+ .addRow(104, 4)
+ .addRow(103, 3)
+ .addRow(102, 2)
+ .addRow(101, 1)
.build();
PriorityQueue queue;
@@ -121,10 +121,10 @@ public class TopNBatchTest extends PopUnitTestBase {
for (int batchCounter = 0; batchCounter < numBatches; batchCounter++) {
RowSetBuilder rowSetBuilder = new RowSetBuilder(allocator, batchSchema);
- rowSetBuilder.add((batchCounter + bound), batchCounter);
+ rowSetBuilder.addRow((batchCounter + bound), batchCounter);
for (int recordCounter = 0; recordCounter < numRecordsPerBatch; recordCounter++) {
- rowSetBuilder.add(random.nextInt(bound), random.nextInt(bound));
+ rowSetBuilder.addRow(random.nextInt(bound), random.nextInt(bound));
}
VectorContainer vectorContainer = rowSetBuilder.build().container();
@@ -135,7 +135,7 @@ public class TopNBatchTest extends PopUnitTestBase {
VectorContainer resultContainer = queue.getHyperBatch();
resultContainer.buildSchema(BatchSchema.SelectionVectorMode.NONE);
- RowSet.HyperRowSet actualHyperSet = new HyperRowSetImpl(allocator, resultContainer, queue.getFinalSv4());
+ RowSet.HyperRowSet actualHyperSet = new HyperRowSetImpl(resultContainer, queue.getFinalSv4());
new RowSetComparison(expectedRowSet).verify(actualHyperSet);
} finally {
if (expectedRowSet != null) {
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/validate/TestBatchValidator.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/validate/TestBatchValidator.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/validate/TestBatchValidator.java
index eafb4c8..202a0f1 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/validate/TestBatchValidator.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/validate/TestBatchValidator.java
@@ -68,10 +68,10 @@ public class TestBatchValidator /* TODO: extends SubOperatorTest */ {
.build();
SingleRowSet batch = fixture.rowSetBuilder(schema)
- .add(10, 100)
- .add(20, 120)
- .add(30, null)
- .add(40, 140)
+ .addRow(10, 100)
+ .addRow(20, 120)
+ .addRow(30, null)
+ .addRow(40, 140)
.build();
BatchValidator validator = new BatchValidator(batch.vectorAccessible(), true);
@@ -88,10 +88,10 @@ public class TestBatchValidator /* TODO: extends SubOperatorTest */ {
.build();
SingleRowSet batch = fixture.rowSetBuilder(schema)
- .add("col1.1", "col1.2")
- .add("col2.1", "col2.2")
- .add("col3.1", null)
- .add("col4.1", "col4.2")
+ .addRow("col1.1", "col1.2")
+ .addRow("col2.1", "col2.2")
+ .addRow("col3.1", null)
+ .addRow("col4.1", "col4.2")
.build();
BatchValidator validator = new BatchValidator(batch.vectorAccessible(), true);
@@ -108,9 +108,9 @@ public class TestBatchValidator /* TODO: extends SubOperatorTest */ {
.build();
SingleRowSet batch = fixture.rowSetBuilder(schema)
- .add(new int[] {}, new String[] {})
- .add(new int[] {1, 2, 3}, new String[] {"fred", "barney", "wilma"})
- .add(new int[] {4}, new String[] {"dino"})
+ .addRow(new int[] {}, new String[] {})
+ .addRow(new int[] {1, 2, 3}, new String[] {"fred", "barney", "wilma"})
+ .addRow(new int[] {4}, new String[] {"dino"})
.build();
BatchValidator validator = new BatchValidator(batch.vectorAccessible(), true);
@@ -126,9 +126,9 @@ public class TestBatchValidator /* TODO: extends SubOperatorTest */ {
.build();
SingleRowSet batch = fixture.rowSetBuilder(schema)
- .add("x")
- .add("y")
- .add("z")
+ .addRow("x")
+ .addRow("y")
+ .addRow("z")
.build();
// Here we are evil: stomp on the last offset to simulate corruption.
@@ -160,9 +160,9 @@ public class TestBatchValidator /* TODO: extends SubOperatorTest */ {
.build();
SingleRowSet batch = fixture.rowSetBuilder(schema)
- .add("x")
- .add("y")
- .add("z")
+ .addRow("x")
+ .addRow("y")
+ .addRow("z")
.build();
zapOffset(batch, 0, 1);
@@ -198,9 +198,9 @@ public class TestBatchValidator /* TODO: extends SubOperatorTest */ {
.build();
SingleRowSet batch = fixture.rowSetBuilder(schema)
- .add("xx")
- .add("yy")
- .add("zz")
+ .addRow("xx")
+ .addRow("yy")
+ .addRow("zz")
.build();
zapOffset(batch, 2, 1);
@@ -222,9 +222,9 @@ public class TestBatchValidator /* TODO: extends SubOperatorTest */ {
.build();
SingleRowSet batch = fixture.rowSetBuilder(schema)
- .add("xx")
- .add("yy")
- .add("zz")
+ .addRow("xx")
+ .addRow("yy")
+ .addRow("zz")
.build();
zapOffset(batch, 1, 10);
@@ -246,9 +246,9 @@ public class TestBatchValidator /* TODO: extends SubOperatorTest */ {
.build();
SingleRowSet batch = fixture.rowSetBuilder(schema)
- .add("xx")
- .add("yy")
- .add("zz")
+ .addRow("xx")
+ .addRow("yy")
+ .addRow("zz")
.build();
zapOffset(batch, 3, 100_000);
@@ -270,9 +270,9 @@ public class TestBatchValidator /* TODO: extends SubOperatorTest */ {
.build();
SingleRowSet batch = fixture.rowSetBuilder(schema)
- .add((Object) new String[] {})
- .add((Object) new String[] {"fred", "barney", "wilma"})
- .add((Object) new String[] {"dino"})
+ .addRow((Object) new String[] {})
+ .addRow((Object) new String[] {"fred", "barney", "wilma"})
+ .addRow((Object) new String[] {"dino"})
.build();
VectorAccessible va = batch.vectorAccessible();
@@ -298,9 +298,9 @@ public class TestBatchValidator /* TODO: extends SubOperatorTest */ {
.build();
SingleRowSet batch = fixture.rowSetBuilder(schema)
- .add((Object) new String[] {})
- .add((Object) new String[] {"fred", "barney", "wilma"})
- .add((Object) new String[] {"dino"})
+ .addRow((Object) new String[] {})
+ .addRow((Object) new String[] {"fred", "barney", "wilma"})
+ .addRow((Object) new String[] {"dino"})
.build();
VectorAccessible va = batch.vectorAccessible();
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/TestExternalSort.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/TestExternalSort.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/TestExternalSort.java
index c52f1a9..563d97e 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/TestExternalSort.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/TestExternalSort.java
@@ -60,7 +60,7 @@ public class TestExternalSort extends BaseTestQuery {
final RowSetBuilder rowSetBuilder = new RowSetBuilder(allocator, schema);
for (int i = 0; i <= record_count; i += 2) {
- rowSetBuilder.add(i);
+ rowSetBuilder.addRow(i);
}
final RowSet rowSet = rowSetBuilder.build();
@@ -76,7 +76,7 @@ public class TestExternalSort extends BaseTestQuery {
final RowSetBuilder rowSetBuilder = new RowSetBuilder(allocator, schema);
for (int i = 1; i <= record_count; i += 2) {
- rowSetBuilder.add((float) i);
+ rowSetBuilder.addRow((float) i);
}
final RowSet rowSet = rowSetBuilder.build();
@@ -131,7 +131,7 @@ public class TestExternalSort extends BaseTestQuery {
final RowSetBuilder rowSetBuilder = new RowSetBuilder(allocator, schema);
for (int i = 0; i <= record_count; i += 2) {
- rowSetBuilder.add(i);
+ rowSetBuilder.addRow(i);
}
final RowSet rowSet = rowSetBuilder.build();
@@ -147,7 +147,7 @@ public class TestExternalSort extends BaseTestQuery {
final RowSetBuilder rowSetBuilder = new RowSetBuilder(allocator, schema);
for (int i = 1; i <= record_count; i += 2) {
- rowSetBuilder.add(i);
+ rowSetBuilder.addRow(i);
}
final RowSet rowSet = rowSetBuilder.build();
@@ -199,7 +199,7 @@ public class TestExternalSort extends BaseTestQuery {
final RowSetBuilder rowSetBuilder = new RowSetBuilder(allocator, schema);
for (int i = 0; i <= record_count; i += 2) {
- rowSetBuilder.add(i, i);
+ rowSetBuilder.addRow(i, i);
}
final RowSet rowSet = rowSetBuilder.build();
@@ -216,7 +216,7 @@ public class TestExternalSort extends BaseTestQuery {
final RowSetBuilder rowSetBuilder = new RowSetBuilder(allocator, schema);
for (int i = 1; i <= record_count; i += 2) {
- rowSetBuilder.add(i, i);
+ rowSetBuilder.addRow(i, i);
}
final RowSet rowSet = rowSetBuilder.build();
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/SortTestUtilities.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/SortTestUtilities.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/SortTestUtilities.java
index c58abd6..cd408cb 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/SortTestUtilities.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/SortTestUtilities.java
@@ -33,12 +33,12 @@ import org.apache.drill.exec.physical.impl.xsort.managed.PriorityQueueCopierWrap
import org.apache.drill.exec.record.BatchSchema;
import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode;
import org.apache.drill.exec.record.VectorContainer;
+import org.apache.drill.exec.record.TupleMetadata;
import org.apache.drill.test.OperatorFixture;
import org.apache.drill.test.rowSet.DirectRowSet;
import org.apache.drill.test.rowSet.RowSet;
import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
import org.apache.drill.test.rowSet.RowSetComparison;
-import org.apache.drill.test.rowSet.RowSetSchema;
import org.apache.drill.test.rowSet.SchemaBuilder;
import com.google.common.collect.Lists;
@@ -93,7 +93,7 @@ public class SortTestUtilities {
public void run() throws Exception {
PriorityQueueCopierWrapper copier = makeCopier(fixture, sortOrder, nullOrder);
List<BatchGroup> batches = new ArrayList<>();
- RowSetSchema schema = null;
+ TupleMetadata schema = null;
for (SingleRowSet rowSet : rowSets) {
batches.add(new BatchGroup.InputBatch(rowSet.container(), rowSet.getSv2(),
fixture.allocator(), rowSet.size()));
@@ -103,7 +103,7 @@ public class SortTestUtilities {
}
int rowCount = outputRowCount();
VectorContainer dest = new VectorContainer();
- BatchMerger merger = copier.startMerge(schema.toBatchSchema(SelectionVectorMode.NONE),
+ BatchMerger merger = copier.startMerge(new BatchSchema(SelectionVectorMode.NONE, schema.toFieldList()),
batches, dest, rowCount, null);
verifyResults(merger, dest);
@@ -121,7 +121,7 @@ public class SortTestUtilities {
protected void verifyResults(BatchMerger merger, VectorContainer dest) {
for (RowSet expectedSet : expected) {
assertTrue(merger.next());
- RowSet rowSet = new DirectRowSet(fixture.allocator(), dest);
+ RowSet rowSet = DirectRowSet.fromContainer(dest);
new RowSetComparison(expectedSet)
.verifyAndClearAll(rowSet);
}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestCopier.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestCopier.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestCopier.java
index f1c622f..5d438ee 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestCopier.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestCopier.java
@@ -29,15 +29,13 @@ import org.apache.drill.exec.physical.impl.xsort.managed.PriorityQueueCopierWrap
import org.apache.drill.exec.physical.impl.xsort.managed.SortTestUtilities.CopierTester;
import org.apache.drill.exec.record.BatchSchema;
import org.apache.drill.exec.record.VectorContainer;
-import org.apache.drill.test.DrillTest;
import org.apache.drill.test.OperatorFixture;
+import org.apache.drill.test.SubOperatorTest;
import org.apache.drill.test.rowSet.RowSet.ExtendableRowSet;
-import org.apache.drill.test.rowSet.RowSet.RowSetWriter;
import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
import org.apache.drill.test.rowSet.RowSetUtilities;
+import org.apache.drill.test.rowSet.RowSetWriter;
import org.apache.drill.test.rowSet.SchemaBuilder;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@@ -51,19 +49,7 @@ import org.junit.experimental.categories.Category;
*/
@Category(OperatorTest.class)
-public class TestCopier extends DrillTest {
-
- public static OperatorFixture fixture;
-
- @BeforeClass
- public static void setup() {
- fixture = OperatorFixture.builder().build();
- }
-
- @AfterClass
- public static void tearDown() throws Exception {
- fixture.close();
- }
+public class TestCopier extends SubOperatorTest {
@Test
public void testEmptyInput() throws Exception {
@@ -101,12 +87,12 @@ public class TestCopier extends DrillTest {
BatchSchema schema = SortTestUtilities.nonNullSchema();
CopierTester tester = new CopierTester(fixture);
tester.addInput(fixture.rowSetBuilder(schema)
- .add(10, "10")
+ .addRow(10, "10")
.withSv2()
.build());
tester.addOutput(fixture.rowSetBuilder(schema)
- .add(10, "10")
+ .addRow(10, "10")
.build());
tester.run();
}
@@ -116,17 +102,17 @@ public class TestCopier extends DrillTest {
BatchSchema schema = SortTestUtilities.nonNullSchema();
CopierTester tester = new CopierTester(fixture);
tester.addInput(fixture.rowSetBuilder(schema)
- .add(10, "10")
+ .addRow(10, "10")
.withSv2()
.build());
tester.addInput(fixture.rowSetBuilder(schema)
- .add(20, "20")
+ .addRow(20, "20")
.withSv2()
.build());
tester.addOutput(fixture.rowSetBuilder(schema)
- .add(10, "10")
- .add(20, "20")
+ .addRow(10, "10")
+ .addRow(20, "20")
.build());
tester.run();
}
@@ -137,7 +123,7 @@ public class TestCopier extends DrillTest {
int value = first;
for (int i = 0; i < count; i++, value += step) {
RowSetUtilities.setFromInt(writer, 0, value);
- writer.column(1).setString(Integer.toString(value));
+ writer.scalar(1).setString(Integer.toString(value));
writer.save();
}
writer.done();
@@ -188,25 +174,25 @@ public class TestCopier extends DrillTest {
tester.sortOrder = Ordering.ORDER_ASC;
tester.nullOrder = Ordering.NULLS_LAST;
tester.addInput(fixture.rowSetBuilder(schema)
- .add(1, "1")
- .add(4, "4")
- .add(null, "null")
+ .addRow(1, "1")
+ .addRow(4, "4")
+ .addRow(null, "null")
.withSv2()
.build());
tester.addInput(fixture.rowSetBuilder(schema)
- .add(2, "2")
- .add(3, "3")
- .add(null, "null")
+ .addRow(2, "2")
+ .addRow(3, "3")
+ .addRow(null, "null")
.withSv2()
.build());
tester.addOutput(fixture.rowSetBuilder(schema)
- .add(1, "1")
- .add(2, "2")
- .add(3, "3")
- .add(4, "4")
- .add(null, "null")
- .add(null, "null")
+ .addRow(1, "1")
+ .addRow(2, "2")
+ .addRow(3, "3")
+ .addRow(4, "4")
+ .addRow(null, "null")
+ .addRow(null, "null")
.build());
tester.run();
@@ -220,25 +206,25 @@ public class TestCopier extends DrillTest {
tester.sortOrder = Ordering.ORDER_ASC;
tester.nullOrder = Ordering.NULLS_FIRST;
tester.addInput(fixture.rowSetBuilder(schema)
- .add(null, "null")
- .add(1, "1")
- .add(4, "4")
+ .addRow(null, "null")
+ .addRow(1, "1")
+ .addRow(4, "4")
.withSv2()
.build());
tester.addInput(fixture.rowSetBuilder(schema)
- .add(null, "null")
- .add(2, "2")
- .add(3, "3")
+ .addRow(null, "null")
+ .addRow(2, "2")
+ .addRow(3, "3")
.withSv2()
.build());
tester.addOutput(fixture.rowSetBuilder(schema)
- .add(null, "null")
- .add(null, "null")
- .add(1, "1")
- .add(2, "2")
- .add(3, "3")
- .add(4, "4")
+ .addRow(null, "null")
+ .addRow(null, "null")
+ .addRow(1, "1")
+ .addRow(2, "2")
+ .addRow(3, "3")
+ .addRow(4, "4")
.build());
tester.run();
@@ -252,25 +238,25 @@ public class TestCopier extends DrillTest {
tester.sortOrder = Ordering.ORDER_DESC;
tester.nullOrder = Ordering.NULLS_LAST;
tester.addInput(fixture.rowSetBuilder(schema)
- .add(4, "4")
- .add(1, "1")
- .add(null, "null")
+ .addRow(4, "4")
+ .addRow(1, "1")
+ .addRow(null, "null")
.withSv2()
.build());
tester.addInput(fixture.rowSetBuilder(schema)
- .add(3, "3")
- .add(2, "2")
- .add(null, "null")
+ .addRow(3, "3")
+ .addRow(2, "2")
+ .addRow(null, "null")
.withSv2()
.build());
tester.addOutput(fixture.rowSetBuilder(schema)
- .add(4, "4")
- .add(3, "3")
- .add(2, "2")
- .add(1, "1")
- .add(null, "null")
- .add(null, "null")
+ .addRow(4, "4")
+ .addRow(3, "3")
+ .addRow(2, "2")
+ .addRow(1, "1")
+ .addRow(null, "null")
+ .addRow(null, "null")
.build());
tester.run();
@@ -284,25 +270,25 @@ public class TestCopier extends DrillTest {
tester.sortOrder = Ordering.ORDER_DESC;
tester.nullOrder = Ordering.NULLS_FIRST;
tester.addInput(fixture.rowSetBuilder(schema)
- .add(null, "null")
- .add(4, "4")
- .add(1, "1")
+ .addRow(null, "null")
+ .addRow(4, "4")
+ .addRow(1, "1")
.withSv2()
.build());
tester.addInput(fixture.rowSetBuilder(schema)
- .add(null, "null")
- .add(3, "3")
- .add(2, "2")
+ .addRow(null, "null")
+ .addRow(3, "3")
+ .addRow(2, "2")
.withSv2()
.build());
tester.addOutput(fixture.rowSetBuilder(schema)
- .add(null, "null")
- .add(null, "null")
- .add(4, "4")
- .add(3, "3")
- .add(2, "2")
- .add(1, "1")
+ .addRow(null, "null")
+ .addRow(null, "null")
+ .addRow(4, "4")
+ .addRow(3, "3")
+ .addRow(2, "2")
+ .addRow(1, "1")
.build());
tester.run();
@@ -362,22 +348,22 @@ public class TestCopier extends DrillTest {
CopierTester tester = new CopierTester(fixture);
tester.addInput(fixture.rowSetBuilder(schema)
- .add(1, 10, 100)
- .add(5, 50, 500)
+ .addRow(1, new Object[] {10, new Object[] {100}})
+ .addRow(5, new Object[] {50, new Object[] {500}})
.withSv2()
.build());
tester.addInput(fixture.rowSetBuilder(schema)
- .add(2, 20, 200)
- .add(6, 60, 600)
+ .addRow(2, new Object[] {20, new Object[] {200}})
+ .addRow(6, new Object[] {60, new Object[] {600}})
.withSv2()
.build());
tester.addOutput(fixture.rowSetBuilder(schema)
- .add(1, 10, 100)
- .add(2, 20, 200)
- .add(5, 50, 500)
- .add(6, 60, 600)
+ .addRow(1, new Object[] {10, new Object[] {100}})
+ .addRow(2, new Object[] {20, new Object[] {200}})
+ .addRow(5, new Object[] {50, new Object[] {500}})
+ .addRow(6, new Object[] {60, new Object[] {600}})
.build());
tester.run();
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestShortArrays.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestShortArrays.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestShortArrays.java
index ba5dfce..38e3698 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestShortArrays.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestShortArrays.java
@@ -61,9 +61,9 @@ public class TestShortArrays extends SubOperatorTest {
.addArray("b", MinorType.INT)
.build();
RowSetBuilder builder = fixture.rowSetBuilder(schema)
- .add(1, new int[] {10});
+ .addRow(1, new int[] {10});
for (int i = 2; i <= 10; i++) {
- builder.add(i, new int[] {});
+ builder.addRow(i, new int[] {});
}
RowSet rows = builder.build();
@@ -87,9 +87,9 @@ public class TestShortArrays extends SubOperatorTest {
SingleRowSet empty = fixture.rowSet(schema);
vi.allocateBatch(empty.container(), 100);
- assertEquals(2, empty.vectors().length);
+ assertEquals(2, empty.container().getNumberOfColumns());
@SuppressWarnings("resource")
- ValueVector bVector = empty.vectors()[1];
+ ValueVector bVector = empty.container().getValueVector(1).getValueVector();
assertTrue(bVector instanceof RepeatedIntVector);
assertEquals(16, ((RepeatedIntVector) bVector).getDataVector().getValueCapacity());
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestSortImpl.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestSortImpl.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestSortImpl.java
index d83a765..93411d7 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestSortImpl.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestSortImpl.java
@@ -46,8 +46,8 @@ import org.apache.drill.test.rowSet.HyperRowSetImpl;
import org.apache.drill.test.rowSet.IndirectRowSet;
import org.apache.drill.test.rowSet.RowSet;
import org.apache.drill.test.rowSet.RowSet.ExtendableRowSet;
-import org.apache.drill.test.rowSet.RowSet.RowSetReader;
-import org.apache.drill.test.rowSet.RowSet.RowSetWriter;
+import org.apache.drill.test.rowSet.RowSetReader;
+import org.apache.drill.test.rowSet.RowSetWriter;
import org.apache.drill.test.rowSet.RowSetBuilder;
import org.apache.drill.test.rowSet.RowSetComparison;
import org.apache.drill.test.rowSet.SchemaBuilder;
@@ -193,11 +193,11 @@ public class TestSortImpl extends DrillTest {
private static RowSet toRowSet(OperatorFixture fixture, SortResults results, VectorContainer dest) {
if (results.getSv4() != null) {
- return new HyperRowSetImpl(fixture.allocator(), dest, results.getSv4());
+ return new HyperRowSetImpl(dest, results.getSv4());
} else if (results.getSv2() != null) {
- return new IndirectRowSet(fixture.allocator(), dest, results.getSv2());
+ return IndirectRowSet.fromSv2(dest, results.getSv2());
} else {
- return new DirectRowSet(fixture.allocator(), dest);
+ return DirectRowSet.fromContainer(dest);
}
}
@@ -242,10 +242,10 @@ public class TestSortImpl extends DrillTest {
BatchSchema schema = SortTestUtilities.nonNullSchema();
SortTestFixture sortTest = new SortTestFixture(fixture);
sortTest.addInput(fixture.rowSetBuilder(schema)
- .add(1, "first")
+ .addRow(1, "first")
.build());
sortTest.addOutput(fixture.rowSetBuilder(schema)
- .add(1, "first")
+ .addRow(1, "first")
.build());
sortTest.run();
}
@@ -262,12 +262,12 @@ public class TestSortImpl extends DrillTest {
BatchSchema schema = SortTestUtilities.nonNullSchema();
SortTestFixture sortTest = new SortTestFixture(fixture);
sortTest.addInput(fixture.rowSetBuilder(schema)
- .add(2, "second")
- .add(1, "first")
+ .addRow(2, "second")
+ .addRow(1, "first")
.build());
sortTest.addOutput(fixture.rowSetBuilder(schema)
- .add(1, "first")
- .add(2, "second")
+ .addRow(1, "first")
+ .addRow(2, "second")
.build());
sortTest.run();
}
@@ -285,14 +285,14 @@ public class TestSortImpl extends DrillTest {
BatchSchema schema = SortTestUtilities.nonNullSchema();
SortTestFixture sortTest = new SortTestFixture(fixture);
sortTest.addInput(fixture.rowSetBuilder(schema)
- .add(2, "second")
+ .addRow(2, "second")
.build());
sortTest.addInput(fixture.rowSetBuilder(schema)
- .add(1, "first")
+ .addRow(1, "first")
.build());
sortTest.addOutput(fixture.rowSetBuilder(schema)
- .add(1, "first")
- .add(2, "second")
+ .addRow(1, "first")
+ .addRow(2, "second")
.build());
sortTest.run();
}
@@ -356,7 +356,7 @@ public class TestSortImpl extends DrillTest {
RowSetBuilder builder = fixture.rowSetBuilder(schema);
int end = Math.min(batchSize, targetCount - rowCount);
for (int i = 0; i < end; i++) {
- builder.add(currentValue, i + ", " + currentValue);
+ builder.addRow(currentValue, i + ", " + currentValue);
currentValue = (currentValue + step) % targetCount;
rowCount++;
}
@@ -387,7 +387,7 @@ public class TestSortImpl extends DrillTest {
RowSetReader reader = output.reader();
while (reader.next()) {
assertEquals("Value of " + batchCount + ":" + rowCount,
- rowCount, reader.column(0).getInt());
+ rowCount, reader.scalar(0).getInt());
rowCount++;
}
}
@@ -593,18 +593,18 @@ public class TestSortImpl extends DrillTest {
}
};
sortTest.addInput(fixture.rowSetBuilder(schema)
- .add(2, "second")
+ .addRow(2, "second")
.build());
sortTest.addInput(fixture.rowSetBuilder(schema)
- .add(3, "third")
+ .addRow(3, "third")
.build());
sortTest.addInput(fixture.rowSetBuilder(schema)
- .add(1, "first")
+ .addRow(1, "first")
.build());
sortTest.addOutput(fixture.rowSetBuilder(schema)
- .add(1, "first")
- .add(2, "second")
- .add(3, "third")
+ .addRow(1, "first")
+ .addRow(2, "second")
+ .addRow(3, "third")
.build());
sortTest.run();
}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestSorter.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestSorter.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestSorter.java
index 5f04da6..c24f1a6 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestSorter.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/xsort/managed/TestSorter.java
@@ -36,8 +36,8 @@ import org.apache.drill.test.DrillTest;
import org.apache.drill.test.OperatorFixture;
import org.apache.drill.test.rowSet.RowSet;
import org.apache.drill.test.rowSet.RowSet.ExtendableRowSet;
-import org.apache.drill.test.rowSet.RowSet.RowSetReader;
-import org.apache.drill.test.rowSet.RowSet.RowSetWriter;
+import org.apache.drill.test.rowSet.RowSetReader;
+import org.apache.drill.test.rowSet.RowSetWriter;
import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
import org.apache.drill.test.rowSet.RowSetBuilder;
import org.apache.drill.test.rowSet.RowSetComparison;
@@ -111,12 +111,12 @@ public class TestSorter extends DrillTest {
public void testSingleRow() throws Exception {
BatchSchema schema = SortTestUtilities.nonNullSchema();
SingleRowSet rowSet = new RowSetBuilder(fixture.allocator(), schema)
- .add(0, "0")
+ .addRow(0, "0")
.withSv2()
.build();
SingleRowSet expected = new RowSetBuilder(fixture.allocator(), schema)
- .add(0, "0")
+ .addRow(0, "0")
.build();
runSorterTest(rowSet, expected);
}
@@ -127,14 +127,14 @@ public class TestSorter extends DrillTest {
public void testTwoRows() throws Exception {
BatchSchema schema = SortTestUtilities.nonNullSchema();
SingleRowSet rowSet = new RowSetBuilder(fixture.allocator(), schema)
- .add(1, "1")
- .add(0, "0")
+ .addRow(1, "1")
+ .addRow(0, "0")
.withSv2()
.build();
SingleRowSet expected = new RowSetBuilder(fixture.allocator(), schema)
- .add(0, "0")
- .add(1, "1")
+ .addRow(0, "0")
+ .addRow(1, "1")
.build();
runSorterTest(rowSet, expected);
}
@@ -207,11 +207,11 @@ public class TestSorter extends DrillTest {
for (int i = 0; i < items.length; i++) {
DataItem item = items[i];
if (nullable && item.isNull) {
- writer.column(0).setNull();
+ writer.scalar(0).setNull();
} else {
RowSetUtilities.setFromInt(writer, 0, item.key);
}
- writer.column(1).setString(Integer.toString(item.value));
+ writer.scalar(1).setString(Integer.toString(item.value));
writer.save();
}
writer.done();
@@ -221,7 +221,7 @@ public class TestSorter extends DrillTest {
private void verify(RowSet actual) {
DataItem expected[] = Arrays.copyOf(data, data.length);
doSort(expected);
- RowSet expectedRows = makeDataSet(actual.allocator(), actual.schema().batch(), expected);
+ RowSet expectedRows = makeDataSet(actual.allocator(), actual.batchSchema(), expected);
doVerify(expected, expectedRows, actual);
}
@@ -369,7 +369,7 @@ public class TestSorter extends DrillTest {
int mo = rand.nextInt(12);
int yr = rand.nextInt(10);
Period period = makePeriod(yr, mo, day, hr, min, sec, ms);
- builder.add(period);
+ builder.addRow(period);
}
return builder.build();
}
@@ -383,7 +383,7 @@ public class TestSorter extends DrillTest {
int prevMonths = 0;
long prevMs = 0;
while (reader.next()) {
- Period period = reader.column(0).getPeriod().normalizedStandard();
+ Period period = reader.scalar(0).getPeriod().normalizedStandard();
int years = period.getYears();
assertTrue(prevYears <= years);
if (prevYears != years) {
@@ -586,16 +586,16 @@ public class TestSorter extends DrillTest {
.build();
SingleRowSet input = fixture.rowSetBuilder(schema)
- .add(3, "third")
- .add(1, "first")
- .add(2, "second")
+ .addRow(3, "third")
+ .addRow(1, "first")
+ .addRow(2, "second")
.withSv2()
.build();
SingleRowSet output = fixture.rowSetBuilder(schema)
- .add(1, "first")
- .add(2, "second")
- .add(3, "third")
+ .addRow(1, "first")
+ .addRow(2, "second")
+ .addRow(3, "third")
.build();
Sort popConfig = makeSortConfig("map.key", Ordering.ORDER_ASC, Ordering.NULLS_LAST);
runSorterTest(popConfig, input, output);
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderLimits.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderLimits.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderLimits.java
new file mode 100644
index 0000000..f9f5128
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderLimits.java
@@ -0,0 +1,224 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.impl;
+
+import static org.junit.Assert.*;
+
+import java.util.Arrays;
+
+import org.apache.drill.common.types.TypeProtos.DataMode;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.physical.rowSet.ResultSetLoader;
+import org.apache.drill.exec.physical.rowSet.RowSetLoader;
+import org.apache.drill.exec.physical.rowSet.impl.ResultSetLoaderImpl.ResultSetOptions;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.test.SubOperatorTest;
+import org.apache.drill.test.rowSet.SchemaBuilder;
+import org.junit.Test;
+
+/**
+ * Tests of the row limit functionality of the result set loader. The
+ * row limit is set up front and has a default value. Because Drill must
+ * discover data structure as it reads, the result set loader also allows changing
+ * the row limit between batches (perhaps Drill discovers that rows are much
+ * narrower or wider than expected.)
+ * <p>
+ * The tests here are independent of the tests for vector allocation (which does,
+ * in fact, depend on the row count) and vector overflow (which an occur when
+ * the row limit turns out to be too large.)
+ */
+
+public class TestResultSetLoaderLimits extends SubOperatorTest {
+
+ /**
+ * Verify that the writer stops when reaching the row limit.
+ * In this case there is no look-ahead row.
+ */
+
+ @Test
+ public void testRowLimit() {
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator());
+ assertEquals(ResultSetLoaderImpl.DEFAULT_ROW_COUNT, rsLoader.targetRowCount());
+ RowSetLoader rootWriter = rsLoader.writer();
+ rootWriter.addColumn(SchemaBuilder.columnSchema("s", MinorType.VARCHAR, DataMode.REQUIRED));
+
+ byte value[] = new byte[200];
+ Arrays.fill(value, (byte) 'X');
+ int count = 0;
+ rsLoader.startBatch();
+ while (! rootWriter.isFull()) {
+ rootWriter.start();
+ rootWriter.scalar(0).setBytes(value, value.length);
+ rootWriter.save();
+ count++;
+ }
+ assertEquals(ResultSetLoaderImpl.DEFAULT_ROW_COUNT, count);
+ assertEquals(count, rootWriter.rowCount());
+
+ rsLoader.harvest().clear();
+
+ // Do it again, a different way.
+
+ count = 0;
+ rsLoader.startBatch();
+ assertEquals(0, rootWriter.rowCount());
+ while (rootWriter.start()) {
+ rootWriter.scalar(0).setBytes(value, value.length);
+ rootWriter.save();
+ count++;
+ }
+ assertEquals(ResultSetLoaderImpl.DEFAULT_ROW_COUNT, count);
+ assertEquals(count, rootWriter.rowCount());
+
+ rsLoader.harvest().clear();
+
+ rsLoader.close();
+ }
+
+ private static final int TEST_ROW_LIMIT = 1024;
+
+ /**
+ * Verify that the caller can set a row limit lower than the default.
+ */
+
+ @Test
+ public void testCustomRowLimit() {
+
+ // Try to set a default value larger than the hard limit. Value
+ // is truncated to the limit.
+
+ ResultSetOptions options = new OptionBuilder()
+ .setRowCountLimit(ValueVector.MAX_ROW_COUNT + 1)
+ .build();
+ assertEquals(ValueVector.MAX_ROW_COUNT, options.rowCountLimit);
+
+ // Just a bit of paranoia that we check against the vector limit,
+ // not any previous value...
+
+ options = new OptionBuilder()
+ .setRowCountLimit(ValueVector.MAX_ROW_COUNT + 1)
+ .setRowCountLimit(TEST_ROW_LIMIT)
+ .build();
+ assertEquals(TEST_ROW_LIMIT, options.rowCountLimit);
+
+ options = new OptionBuilder()
+ .setRowCountLimit(TEST_ROW_LIMIT)
+ .setRowCountLimit(ValueVector.MAX_ROW_COUNT + 1)
+ .build();
+ assertEquals(ValueVector.MAX_ROW_COUNT, options.rowCountLimit);
+
+ // Can't set the limit lower than 1
+
+ options = new OptionBuilder()
+ .setRowCountLimit(0)
+ .build();
+ assertEquals(1, options.rowCountLimit);
+
+ // Do load with a (valid) limit lower than the default.
+
+ options = new OptionBuilder()
+ .setRowCountLimit(TEST_ROW_LIMIT)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ assertEquals(TEST_ROW_LIMIT, rsLoader.targetRowCount());
+
+ RowSetLoader rootWriter = rsLoader.writer();
+ rootWriter.addColumn(SchemaBuilder.columnSchema("s", MinorType.VARCHAR, DataMode.REQUIRED));
+
+ rsLoader.startBatch();
+ int count = fillToLimit(rootWriter);
+ assertEquals(TEST_ROW_LIMIT, count);
+ assertEquals(count, rootWriter.rowCount());
+
+ // Should fail to write beyond the row limit
+
+ assertFalse(rootWriter.start());
+ try {
+ rootWriter.save();
+ fail();
+ } catch (IllegalStateException e) {
+ // Expected
+ }
+
+ rsLoader.harvest().clear();
+ rsLoader.startBatch();
+ assertEquals(0, rootWriter.rowCount());
+
+ rsLoader.close();
+ }
+
+ private int fillToLimit(RowSetLoader rootWriter) {
+ byte value[] = new byte[200];
+ Arrays.fill(value, (byte) 'X');
+ int count = 0;
+ while (! rootWriter.isFull()) {
+ rootWriter.start();
+ rootWriter.scalar(0).setBytes(value, value.length);
+ rootWriter.save();
+ count++;
+ }
+ return count;
+ }
+
+ /**
+ * Test that the row limit can change between batches.
+ */
+
+ @Test
+ public void testDynamicLimit() {
+
+ // Start with a small limit.
+
+ ResultSetOptions options = new OptionBuilder()
+ .setRowCountLimit(TEST_ROW_LIMIT)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ assertEquals(TEST_ROW_LIMIT, rsLoader.targetRowCount());
+
+ RowSetLoader rootWriter = rsLoader.writer();
+ rootWriter.addColumn(SchemaBuilder.columnSchema("s", MinorType.VARCHAR, DataMode.REQUIRED));
+
+ rsLoader.startBatch();
+ int count = fillToLimit(rootWriter);
+ assertEquals(TEST_ROW_LIMIT, count);
+ assertEquals(count, rootWriter.rowCount());
+ rsLoader.harvest().clear();
+
+ // Reset the batch size larger and fill a second batch
+
+ int newLimit = 8000;
+ rsLoader.setTargetRowCount(newLimit);
+ rsLoader.startBatch();
+ count = fillToLimit(rootWriter);
+ assertEquals(newLimit, count);
+ assertEquals(count, rootWriter.rowCount());
+ rsLoader.harvest().clear();
+
+ // Put the limit back to a lower number.
+
+ newLimit = 1000;
+ rsLoader.setTargetRowCount(newLimit);
+ rsLoader.startBatch();
+ count = fillToLimit(rootWriter);
+ assertEquals(newLimit, count);
+ assertEquals(count, rootWriter.rowCount());
+ rsLoader.harvest().clear();
+
+ rsLoader.close();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderMapArray.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderMapArray.java b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderMapArray.java
new file mode 100644
index 0000000..115e52d
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/rowSet/impl/TestResultSetLoaderMapArray.java
@@ -0,0 +1,481 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.rowSet.impl;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.util.Arrays;
+
+import org.apache.drill.common.types.TypeProtos.DataMode;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.physical.rowSet.ResultSetLoader;
+import org.apache.drill.exec.physical.rowSet.RowSetLoader;
+import org.apache.drill.exec.record.TupleMetadata;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.accessor.ArrayReader;
+import org.apache.drill.exec.vector.accessor.ArrayWriter;
+import org.apache.drill.exec.vector.accessor.ScalarElementReader;
+import org.apache.drill.exec.vector.accessor.ScalarReader;
+import org.apache.drill.exec.vector.accessor.ScalarWriter;
+import org.apache.drill.exec.vector.accessor.TupleReader;
+import org.apache.drill.exec.vector.accessor.TupleWriter;
+import org.apache.drill.test.SubOperatorTest;
+import org.apache.drill.test.rowSet.RowSet;
+import org.apache.drill.test.rowSet.RowSet.SingleRowSet;
+import org.apache.drill.test.rowSet.RowSetComparison;
+import org.apache.drill.test.rowSet.RowSetReader;
+import org.apache.drill.test.rowSet.SchemaBuilder;
+import org.junit.Test;
+
+/**
+ * Test map array support in the result set loader.
+ * <p>
+ * The tests here should be considered in the "extra for experts"
+ * category: run and/or debug these tests only after the scalar
+ * tests work. Maps, and especially repeated maps, are very complex
+ * constructs not to be tackled lightly.
+ */
+
+public class TestResultSetLoaderMapArray extends SubOperatorTest {
+
+ @Test
+ public void testBasics() {
+ TupleMetadata schema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .addMapArray("m")
+ .add("c", MinorType.INT)
+ .add("d", MinorType.VARCHAR)
+ .buildMap()
+ .buildSchema();
+ ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder()
+ .setSchema(schema)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ // Verify structure and schema
+
+ TupleMetadata actualSchema = rootWriter.schema();
+ assertEquals(2, actualSchema.size());
+ assertTrue(actualSchema.metadata(1).isArray());
+ assertTrue(actualSchema.metadata(1).isMap());
+ assertEquals(2, actualSchema.metadata("m").mapSchema().size());
+ assertEquals(2, actualSchema.column("m").getChildren().size());
+
+ // Write a couple of rows with arrays.
+
+ rsLoader.startBatch();
+ rootWriter
+ .addRow(10, new Object[] {
+ new Object[] {110, "d1.1"},
+ new Object[] {120, "d2.2"}})
+ .addRow(20, new Object[] {})
+ .addRow(30, new Object[] {
+ new Object[] {310, "d3.1"},
+ new Object[] {320, "d3.2"},
+ new Object[] {330, "d3.3"}})
+ ;
+
+ // Verify the first batch
+
+ RowSet actual = fixture.wrap(rsLoader.harvest());
+ SingleRowSet expected = fixture.rowSetBuilder(schema)
+ .addRow(10, new Object[] {
+ new Object[] {110, "d1.1"},
+ new Object[] {120, "d2.2"}})
+ .addRow(20, new Object[] {})
+ .addRow(30, new Object[] {
+ new Object[] {310, "d3.1"},
+ new Object[] {320, "d3.2"},
+ new Object[] {330, "d3.3"}})
+ .build();
+ new RowSetComparison(expected).verifyAndClearAll(actual);
+
+ // In the second, create a row, then add a map member.
+ // Should be back-filled to empty for the first row.
+
+ rsLoader.startBatch();
+ rootWriter
+ .addRow(40, new Object[] {
+ new Object[] {410, "d4.1"},
+ new Object[] {420, "d4.2"}});
+
+ TupleWriter mapWriter = rootWriter.array("m").tuple();
+ mapWriter.addColumn(SchemaBuilder.columnSchema("e", MinorType.VARCHAR, DataMode.OPTIONAL));
+
+ rootWriter
+ .addRow(50, new Object[] {
+ new Object[] {510, "d5.1", "e5.1"},
+ new Object[] {520, "d5.2", null}})
+ .addRow(60, new Object[] {
+ new Object[] {610, "d6.1", "e6.1"},
+ new Object[] {620, "d6.2", null},
+ new Object[] {630, "d6.3", "e6.3"}})
+ ;
+
+ // Verify the second batch
+
+ actual = fixture.wrap(rsLoader.harvest());
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .addMapArray("m")
+ .add("c", MinorType.INT)
+ .add("d", MinorType.VARCHAR)
+ .addNullable("e", MinorType.VARCHAR)
+ .buildMap()
+ .buildSchema();
+ expected = fixture.rowSetBuilder(expectedSchema)
+ .addRow(40, new Object[] {
+ new Object[] {410, "d4.1", null},
+ new Object[] {420, "d4.2", null}})
+ .addRow(50, new Object[] {
+ new Object[] {510, "d5.1", "e5.1"},
+ new Object[] {520, "d5.2", null}})
+ .addRow(60, new Object[] {
+ new Object[] {610, "d6.1", "e6.1"},
+ new Object[] {620, "d6.2", null},
+ new Object[] {630, "d6.3", "e6.3"}})
+ .build();
+ new RowSetComparison(expected).verifyAndClearAll(actual);
+
+ rsLoader.close();
+ }
+
+ @Test
+ public void testNestedArray() {
+ TupleMetadata schema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .addMapArray("m")
+ .add("c", MinorType.INT)
+ .addArray("d", MinorType.VARCHAR)
+ .buildMap()
+ .buildSchema();
+ ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder()
+ .setSchema(schema)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ // Write a couple of rows with arrays within arrays.
+ // (And, of course, the Varchar is actually an array of
+ // bytes, so that's three array levels.)
+
+ rsLoader.startBatch();
+ rootWriter
+ .addRow(10, new Object[] {
+ new Object[] {110, new String[] {"d1.1.1", "d1.1.2"}},
+ new Object[] {120, new String[] {"d1.2.1", "d1.2.2"}}})
+ .addRow(20, new Object[] {})
+ .addRow(30, new Object[] {
+ new Object[] {310, new String[] {"d3.1.1", "d3.2.2"}},
+ new Object[] {320, new String[] {}},
+ new Object[] {330, new String[] {"d3.3.1", "d1.2.2"}}})
+ ;
+
+ // Verify the batch
+
+ RowSet actual = fixture.wrap(rsLoader.harvest());
+ SingleRowSet expected = fixture.rowSetBuilder(schema)
+ .addRow(10, new Object[] {
+ new Object[] {110, new String[] {"d1.1.1", "d1.1.2"}},
+ new Object[] {120, new String[] {"d1.2.1", "d1.2.2"}}})
+ .addRow(20, new Object[] {})
+ .addRow(30, new Object[] {
+ new Object[] {310, new String[] {"d3.1.1", "d3.2.2"}},
+ new Object[] {320, new String[] {}},
+ new Object[] {330, new String[] {"d3.3.1", "d1.2.2"}}})
+ .build();
+ new RowSetComparison(expected).verifyAndClearAll(actual);
+
+ rsLoader.close();
+ }
+
+ /**
+ * Test a doubly-nested arrays of maps.
+ */
+
+ @Test
+ public void testDoubleNestedArray() {
+ TupleMetadata schema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .addMapArray("m1")
+ .add("b", MinorType.INT)
+ .addMapArray("m2")
+ .add("c", MinorType.INT)
+ .addArray("d", MinorType.VARCHAR)
+ .buildMap()
+ .buildMap()
+ .buildSchema();
+ ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder()
+ .setSchema(schema)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ RowSetLoader rootWriter = rsLoader.writer();
+ rsLoader.startBatch();
+
+ ScalarWriter aWriter = rootWriter.scalar("a");
+ ArrayWriter a1Writer = rootWriter.array("m1");
+ TupleWriter m1Writer = a1Writer.tuple();
+ ScalarWriter bWriter = m1Writer.scalar("b");
+ ArrayWriter a2Writer = m1Writer.array("m2");
+ TupleWriter m2Writer = a2Writer.tuple();
+ ScalarWriter cWriter = m2Writer.scalar("c");
+ ScalarWriter dWriter = m2Writer.array("d").scalar();
+
+ for (int i = 0; i < 5; i++) {
+ rootWriter.start();
+ aWriter.setInt(i);
+ for (int j = 0; j < 4; j++) {
+ int a1Key = i + 10 + j;
+ bWriter.setInt(a1Key);
+ for (int k = 0; k < 3; k++) {
+ int a2Key = a1Key * 10 + k;
+ cWriter.setInt(a2Key);
+ for (int l = 0; l < 2; l++) {
+ dWriter.setString("d-" + (a2Key * 10 + l));
+ }
+ a2Writer.save();
+ }
+ a1Writer.save();
+ }
+ rootWriter.save();
+ }
+
+ RowSet results = fixture.wrap(rsLoader.harvest());
+ RowSetReader reader = results.reader();
+
+ ScalarReader aReader = reader.scalar("a");
+ ArrayReader a1Reader = reader.array("m1");
+ TupleReader m1Reader = a1Reader.tuple();
+ ScalarReader bReader = m1Reader.scalar("b");
+ ArrayReader a2Reader = m1Reader.array("m2");
+ TupleReader m2Reader = a2Reader.tuple();
+ ScalarReader cReader = m2Reader.scalar("c");
+ ScalarElementReader dReader = m2Reader.elements("d");
+
+ for (int i = 0; i < 5; i++) {
+ reader.next();
+ assertEquals(i, aReader.getInt());
+ for (int j = 0; j < 4; j++) {
+ a1Reader.setPosn(j);
+ int a1Key = i + 10 + j;
+ assertEquals(a1Key, bReader.getInt());
+ for (int k = 0; k < 3; k++) {
+ a2Reader.setPosn(k);
+ int a2Key = a1Key * 10 + k;
+ assertEquals(a2Key, cReader.getInt());
+ for (int l = 0; l < 2; l++) {
+ assertEquals("d-" + (a2Key * 10 + l), dReader.getString(l));
+ }
+ }
+ }
+ }
+ rsLoader.close();
+ }
+
+ /**
+ * Version of the {#link TestResultSetLoaderProtocol#testOverwriteRow()} test
+ * that uses nested columns inside an array of maps. Here we must call
+ * <tt>start()</tt> to reset the array back to the initial start position after
+ * each "discard."
+ */
+
+ @Test
+ public void testOverwriteRow() {
+ TupleMetadata schema = new SchemaBuilder()
+ .add("a", MinorType.INT)
+ .addMapArray("m")
+ .add("b", MinorType.INT)
+ .add("c", MinorType.VARCHAR)
+ .buildMap()
+ .buildSchema();
+ ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder()
+ .setSchema(schema)
+ .setRowCountLimit(ValueVector.MAX_ROW_COUNT)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ // Can't use the shortcut to populate rows when doing overwrites.
+
+ ScalarWriter aWriter = rootWriter.scalar("a");
+ ArrayWriter maWriter = rootWriter.array("m");
+ TupleWriter mWriter = maWriter.tuple();
+ ScalarWriter bWriter = mWriter.scalar("b");
+ ScalarWriter cWriter = mWriter.scalar("c");
+
+ // Write 100,000 rows, overwriting 99% of them. This will cause vector
+ // overflow and data corruption if overwrite does not work; but will happily
+ // produce the correct result if everything works as it should.
+
+ byte value[] = new byte[512];
+ Arrays.fill(value, (byte) 'X');
+ int count = 0;
+ rsLoader.startBatch();
+ while (count < 10_000) {
+ rootWriter.start();
+ count++;
+ aWriter.setInt(count);
+ for (int i = 0; i < 10; i++) {
+ bWriter.setInt(count * 10 + i);
+ cWriter.setBytes(value, value.length);
+ maWriter.save();
+ }
+ if (count % 100 == 0) {
+ rootWriter.save();
+ }
+ }
+
+ // Verify using a reader.
+
+ RowSet result = fixture.wrap(rsLoader.harvest());
+ assertEquals(count / 100, result.rowCount());
+ RowSetReader reader = result.reader();
+ ArrayReader maReader = reader.array("m");
+ TupleReader mReader = maReader.tuple();
+ int rowId = 1;
+ while (reader.next()) {
+ assertEquals(rowId * 100, reader.scalar("a").getInt());
+ assertEquals(10, maReader.size());
+ for (int i = 0; i < 10; i++) {
+ maReader.setPosn(i);
+ assertEquals(rowId * 1000 + i, mReader.scalar("b").getInt());
+ assertTrue(Arrays.equals(value, mReader.scalar("c").getBytes()));
+ }
+ rowId++;
+ }
+
+ result.clear();
+ rsLoader.close();
+ }
+
+ /**
+ * Check that the "fill-empties" logic descends down into
+ * a repeated map.
+ */
+
+ @Test
+ public void testOmittedValues() {
+ TupleMetadata schema = new SchemaBuilder()
+ .add("id", MinorType.INT)
+ .addMapArray("m")
+ .addNullable("a", MinorType.INT)
+ .addNullable("b", MinorType.VARCHAR)
+ .buildMap()
+ .buildSchema();
+ ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder()
+ .setSchema(schema)
+ .setRowCountLimit(ValueVector.MAX_ROW_COUNT)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ int mapSkip = 5;
+ int entrySkip = 3;
+ int rowCount = 1000;
+ int entryCount = 10;
+
+ rsLoader.startBatch();
+ ArrayWriter maWriter = rootWriter.array("m");
+ TupleWriter mWriter = maWriter.tuple();
+ for (int i = 0; i < rowCount; i++) {
+ rootWriter.start();
+ rootWriter.scalar(0).setInt(i);
+ if (i % mapSkip != 0) {
+ for (int j = 0; j < entryCount; j++) {
+ if (j % entrySkip != 0) {
+ mWriter.scalar(0).setInt(i * entryCount + j);
+ mWriter.scalar(1).setString("b-" + i + "." + j);
+ }
+ maWriter.save();
+ }
+ }
+ rootWriter.save();
+ }
+
+ RowSet result = fixture.wrap(rsLoader.harvest());
+ assertEquals(rowCount, result.rowCount());
+ RowSetReader reader = result.reader();
+ ArrayReader maReader = reader.array("m");
+ TupleReader mReader = maReader.tuple();
+ for (int i = 0; i < rowCount; i++) {
+ assertTrue(reader.next());
+ assertEquals(i, reader.scalar(0).getInt());
+ if (i % mapSkip == 0) {
+ assertEquals(0, maReader.size());
+ continue;
+ }
+ assertEquals(entryCount, maReader.size());
+ for (int j = 0; j < entryCount; j++) {
+ maReader.setPosn(j);
+ if (j % entrySkip == 0) {
+ assertTrue(mReader.scalar(0).isNull());
+ assertTrue(mReader.scalar(1).isNull());
+ } else {
+ assertFalse(mReader.scalar(0).isNull());
+ assertFalse(mReader.scalar(1).isNull());
+ assertEquals(i * entryCount + j, mReader.scalar(0).getInt());
+ assertEquals("b-" + i + "." + j, mReader.scalar(1).getString());
+ }
+ }
+ }
+ result.clear();
+ rsLoader.close();
+ }
+
+ /**
+ * Test that memory is released if the loader is closed with an active
+ * batch (that is, before the batch is harvested.)
+ */
+
+ @Test
+ public void testCloseWithoutHarvest() {
+ TupleMetadata schema = new SchemaBuilder()
+ .addMapArray("m")
+ .add("a", MinorType.INT)
+ .add("b", MinorType.VARCHAR)
+ .buildMap()
+ .buildSchema();
+ ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder()
+ .setSchema(schema)
+ .setRowCountLimit(ValueVector.MAX_ROW_COUNT)
+ .build();
+ ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
+ RowSetLoader rootWriter = rsLoader.writer();
+
+ ArrayWriter maWriter = rootWriter.array("m");
+ TupleWriter mWriter = maWriter.tuple();
+ rsLoader.startBatch();
+ for (int i = 0; i < 40; i++) {
+ rootWriter.start();
+ for (int j = 0; j < 3; j++) {
+ mWriter.scalar("a").setInt(i);
+ mWriter.scalar("b").setString("b-" + i);
+ maWriter.save();
+ }
+ rootWriter.save();
+ }
+
+ // Don't harvest the batch. Allocator will complain if the
+ // loader does not release memory.
+
+ rsLoader.close();
+ }
+}
[02/15] drill git commit: DRILL-5657: Size-aware vector writer
structure
Posted by pr...@apache.org.
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractTupleWriter.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractTupleWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractTupleWriter.java
new file mode 100644
index 0000000..1fd12f2
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/AbstractTupleWriter.java
@@ -0,0 +1,450 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.writer;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.drill.exec.record.ColumnMetadata;
+import org.apache.drill.exec.record.MaterializedField;
+import org.apache.drill.exec.record.TupleMetadata;
+import org.apache.drill.exec.vector.accessor.ArrayWriter;
+import org.apache.drill.exec.vector.accessor.ColumnWriterIndex;
+import org.apache.drill.exec.vector.accessor.ObjectType;
+import org.apache.drill.exec.vector.accessor.ObjectWriter;
+import org.apache.drill.exec.vector.accessor.ScalarWriter;
+import org.apache.drill.exec.vector.accessor.TupleWriter;
+import org.apache.drill.exec.vector.accessor.impl.HierarchicalFormatter;
+
+/**
+ * Implementation for a writer for a tuple (a row or a map.) Provides access to each
+ * column using either a name or a numeric index.
+ * <p>
+ * A tuple maintains an internal state needed to handle dynamic column additions.
+ * The state identifies the amount of "catch up" needed to get the new column into
+ * the same state as the existing columns. The state is also handy for understanding
+ * the tuple lifecycle. This lifecycle works for all three cases of:
+ * <ul>
+ * <li>Top-level tuple (row).</li>
+ * <li>Nested tuple (map).</li>
+ * <li>Array of tuples (repeated map).</li>
+ * </ul>
+ *
+ * Specifically, the transitions, for batch, row and array events, are:
+ *
+ * <table border=1>
+ * <tr><th>Public API</th><th>Tuple Event</th><th>State Transition</th>
+ * <th>Child Event</th></tr>
+ * <tr><td>(Start state)</td>
+ * <td>—</td>
+ * <td>IDLE</td>
+ * <td>—</td></tr>
+ * <tr><td>startBatch()</td>
+ * <td>startWrite()</td>
+ * <td>IDLE → IN_WRITE</td>
+ * <td>startWrite()</td></tr>
+ * <tr><td>start() (new row)</td>
+ * <td>startRow()</td>
+ * <td>IN_WRITE → IN_ROW</td>
+ * <td>startRow()</td></tr>
+ * <tr><td>start() (without save)</td>
+ * <td>restartRow()</td>
+ * <td>IN_ROW → IN_ROW</td>
+ * <td>restartRow()</td></tr>
+ * <tr><td>save() (array)</td>
+ * <td>saveValue()</td>
+ * <td>IN_ROW → IN_ROW</td>
+ * <td>saveValue()</td></tr>
+ * <tr><td rowspan=2>save() (row)</td>
+ * <td>saveValue()</td>
+ * <td>IN_ROW → IN_ROW</td>
+ * <td>saveValue()</td></tr>
+ * <tr><td>saveRow()</td>
+ * <td>IN_ROW → IN_WRITE</td>
+ * <td>saveRow()</td></tr>
+ * <tr><td rowspan=2>end batch</td>
+ * <td>—</td>
+ * <td>IN_ROW → IDLE</td>
+ * <td>endWrite()</td></tr>
+ * <tr><td>—</td>
+ * <td>IN_WRITE → IDLE</td>
+ * <td>endWrite()</td></tr>
+ * </table>
+ *
+ * Notes:
+ * <ul>
+ * <li>For the top-level tuple, a special case occurs with ending a batch. (The
+ * method for doing so differs depending on implementation.) If a row is active,
+ * then that row's values are discarded. Then, the batch is ended.</li>
+ * </ul>
+ */
+
+public abstract class AbstractTupleWriter implements TupleWriter, WriterEvents {
+
+ /**
+ * Generic object wrapper for the tuple writer.
+ */
+
+ public static class TupleObjectWriter extends AbstractObjectWriter {
+
+ private AbstractTupleWriter tupleWriter;
+
+ public TupleObjectWriter(ColumnMetadata schema, AbstractTupleWriter tupleWriter) {
+ super(schema);
+ this.tupleWriter = tupleWriter;
+ }
+
+ @Override
+ public ObjectType type() { return ObjectType.TUPLE; }
+
+ @Override
+ public void set(Object value) { tupleWriter.setObject(value); }
+
+ @Override
+ public TupleWriter tuple() { return tupleWriter; }
+
+ @Override
+ public WriterEvents events() { return tupleWriter; }
+
+ @Override
+ public void bindListener(TupleWriterListener listener) {
+ tupleWriter.bindListener(listener);
+ }
+
+ @Override
+ public void dump(HierarchicalFormatter format) {
+ format
+ .startObject(this)
+ .attribute("tupleWriter");
+ tupleWriter.dump(format);
+ format.endObject();
+ }
+ }
+
+ /**
+ * Tracks the write state of the tuple to allow applying the correct
+ * operations to newly-added columns to synchronize them with the rest
+ * of the tuple.
+ */
+
+ public enum State {
+ /**
+ * No write is in progress. Nothing need be done to newly-added
+ * writers.
+ */
+ IDLE,
+
+ /**
+ * <tt>startWrite()</tt> has been called to start a write operation
+ * (start a batch), but <tt>startValue()</tt> has not yet been called
+ * to start a row (or value within an array). <tt>startWrite()</tt> must
+ * be called on newly added columns.
+ */
+
+ IN_WRITE,
+
+ /**
+ * Both <tt>startWrite()</tt> and <tt>startValue()</tt> has been called on
+ * the tuple to prepare for writing values, and both must be called on
+ * newly-added vectors.
+ */
+
+ IN_ROW
+ }
+
+ protected final TupleMetadata schema;
+ protected final List<AbstractObjectWriter> writers;
+ protected ColumnWriterIndex vectorIndex;
+ protected ColumnWriterIndex childIndex;
+ protected TupleWriterListener listener;
+ protected State state = State.IDLE;
+
+ protected AbstractTupleWriter(TupleMetadata schema, List<AbstractObjectWriter> writers) {
+ this.schema = schema;
+ this.writers = writers;
+ }
+
+ protected AbstractTupleWriter(TupleMetadata schema) {
+ this(schema, new ArrayList<AbstractObjectWriter>());
+ }
+
+ protected void bindIndex(ColumnWriterIndex index, ColumnWriterIndex childIndex) {
+ vectorIndex = index;
+ this.childIndex = childIndex;
+
+ for (int i = 0; i < writers.size(); i++) {
+ writers.get(i).events().bindIndex(childIndex);
+ }
+ }
+
+ @Override
+ public void bindIndex(ColumnWriterIndex index) {
+ bindIndex(index, index);
+ }
+
+ @Override
+ public ColumnWriterIndex writerIndex() { return vectorIndex; }
+
+ /**
+ * Add a column writer to an existing tuple writer. Used for implementations
+ * that support "live" schema evolution: column discovery while writing.
+ * The corresponding metadata must already have been added to the schema.
+ *
+ * @param colWriter the column writer to add
+ */
+
+ public int addColumnWriter(AbstractObjectWriter colWriter) {
+ assert writers.size() == schema.size();
+ int colIndex = schema.addColumn(colWriter.schema());
+ writers.add(colWriter);
+ colWriter.events().bindIndex(childIndex);
+ if (state != State.IDLE) {
+ colWriter.events().startWrite();
+ if (state == State.IN_ROW) {
+ colWriter.events().startRow();
+ }
+ }
+ return colIndex;
+ }
+
+ @Override
+ public int addColumn(ColumnMetadata column) {
+ if (listener == null) {
+ throw new UnsupportedOperationException("addColumn");
+ }
+ AbstractObjectWriter colWriter = (AbstractObjectWriter) listener.addColumn(this, column);
+ return addColumnWriter(colWriter);
+ }
+
+ @Override
+ public int addColumn(MaterializedField field) {
+ if (listener == null) {
+ throw new UnsupportedOperationException("addColumn");
+ }
+ AbstractObjectWriter colWriter = (AbstractObjectWriter) listener.addColumn(this, field);
+ return addColumnWriter(colWriter);
+ }
+
+ @Override
+ public TupleMetadata schema() { return schema; }
+
+ @Override
+ public int size() { return schema().size(); }
+
+ @Override
+ public void startWrite() {
+ assert state == State.IDLE;
+ state = State.IN_WRITE;
+ for (int i = 0; i < writers.size(); i++) {
+ writers.get(i).events().startWrite();
+ }
+ }
+
+ @Override
+ public void startRow() {
+ // Must be in a write. Can start a row only once.
+ // To restart, call restartRow() instead.
+
+ assert state == State.IN_WRITE;
+ state = State.IN_ROW;
+ for (int i = 0; i < writers.size(); i++) {
+ writers.get(i).events().startRow();
+ }
+ }
+
+ @Override
+ public void endArrayValue() {
+ assert state == State.IN_ROW;
+ for (int i = 0; i < writers.size(); i++) {
+ writers.get(i).events().endArrayValue();
+ }
+ }
+
+ @Override
+ public void restartRow() {
+
+ // Rewind is normally called only when a value is active: it resets
+ // pointers to allow rewriting the value. However, if this tuple
+ // is nested in an array, then the array entry could have been
+ // saved (state here is IN_WRITE), but the row as a whole has
+ // not been saved. Thus, we must also allow a rewind() while in
+ // the IN_WRITE state to set the pointers back to the start of
+ // the current row.
+
+ assert state == State.IN_ROW;
+ for (int i = 0; i < writers.size(); i++) {
+ writers.get(i).events().restartRow();
+ }
+ }
+
+ @Override
+ public void saveRow() {
+ assert state == State.IN_ROW;
+ for (int i = 0; i < writers.size(); i++) {
+ writers.get(i).events().saveRow();
+ }
+ state = State.IN_WRITE;
+ }
+
+ @Override
+ public void preRollover() {
+
+ // Rollover can only happen while a row is in progress.
+
+ assert state == State.IN_ROW;
+ for (int i = 0; i < writers.size(); i++) {
+ writers.get(i).events().preRollover();
+ }
+ }
+
+ @Override
+ public void postRollover() {
+
+ // Rollover can only happen while a row is in progress.
+
+ assert state == State.IN_ROW;
+ for (int i = 0; i < writers.size(); i++) {
+ writers.get(i).events().postRollover();
+ }
+ }
+
+ @Override
+ public void endWrite() {
+ assert state != State.IDLE;
+ for (int i = 0; i < writers.size(); i++) {
+ writers.get(i).events().endWrite();
+ }
+ state = State.IDLE;
+ }
+
+ @Override
+ public ObjectWriter column(int colIndex) {
+ return writers.get(colIndex);
+ }
+
+ @Override
+ public ObjectWriter column(String colName) {
+ int index = schema.index(colName);
+ if (index == -1) {
+ throw new UndefinedColumnException(colName);
+ }
+ return writers.get(index);
+ }
+
+ @Override
+ public void set(int colIndex, Object value) {
+ ObjectWriter colWriter = column(colIndex);
+ switch (colWriter.type()) {
+ case ARRAY:
+ colWriter.array().setObject(value);
+ break;
+ case SCALAR:
+ colWriter.scalar().setObject(value);
+ break;
+ case TUPLE:
+ colWriter.tuple().setObject(value);
+ break;
+ default:
+ throw new IllegalStateException("Unexpected object type: " + colWriter.type());
+ }
+ }
+
+ @Override
+ public void setTuple(Object ...values) {
+ setObject(values);
+ }
+
+ @Override
+ public void setObject(Object value) {
+ Object values[] = (Object[]) value;
+ if (values.length != schema.size()) {
+ throw new IllegalArgumentException(
+ "Map has " + schema.size() +
+ " columns, but value array has " +
+ values.length + " values.");
+ }
+ for (int i = 0; i < values.length; i++) {
+ set(i, values[i]);
+ }
+ }
+
+ @Override
+ public ScalarWriter scalar(int colIndex) {
+ return column(colIndex).scalar();
+ }
+
+ @Override
+ public ScalarWriter scalar(String colName) {
+ return column(colName).scalar();
+ }
+
+ @Override
+ public TupleWriter tuple(int colIndex) {
+ return column(colIndex).tuple();
+ }
+
+ @Override
+ public TupleWriter tuple(String colName) {
+ return column(colName).tuple();
+ }
+
+ @Override
+ public ArrayWriter array(int colIndex) {
+ return column(colIndex).array();
+ }
+
+ @Override
+ public ArrayWriter array(String colName) {
+ return column(colName).array();
+ }
+
+ @Override
+ public ObjectType type(int colIndex) {
+ return column(colIndex).type();
+ }
+
+ @Override
+ public ObjectType type(String colName) {
+ return column(colName).type();
+ }
+
+ @Override
+ public int lastWriteIndex() {
+ return vectorIndex.vectorIndex();
+ }
+
+ @Override
+ public void bindListener(TupleWriterListener listener) {
+ this.listener = listener;
+ }
+
+ public void dump(HierarchicalFormatter format) {
+ format
+ .startObject(this)
+ .attribute("vectorIndex", vectorIndex)
+ .attribute("state", state)
+ .attributeArray("writers");
+ for (int i = 0; i < writers.size(); i++) {
+ format.element(i);
+ writers.get(i).dump(format);
+ }
+ format
+ .endArray()
+ .endObject();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/BaseScalarWriter.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/BaseScalarWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/BaseScalarWriter.java
new file mode 100644
index 0000000..4793277
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/BaseScalarWriter.java
@@ -0,0 +1,272 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.writer;
+
+import java.math.BigDecimal;
+
+import org.apache.drill.exec.vector.accessor.ColumnWriterIndex;
+import org.apache.drill.exec.vector.accessor.impl.HierarchicalFormatter;
+import org.joda.time.Period;
+
+import io.netty.buffer.DrillBuf;
+
+/**
+ * Column writer implementation that acts as the basis for the
+ * generated, vector-specific implementations. All set methods
+ * throw an exception; subclasses simply override the supported
+ * method(s).
+ * <p>
+ * The only tricky part to this class is understanding the
+ * state of the write indexes as the write proceeds. There are
+ * two pointers to consider:
+ * <ul>
+ * <li>lastWriteIndex: The position in the vector at which the
+ * client last asked us to write data. This index is maintained
+ * in this class because it depends only on the actions of this
+ * class.</li>
+ * <li>vectorIndex: The position in the vector at which we will
+ * write if the client chooses to write a value at this time.
+ * The vector index is shared by all columns at the same repeat
+ * level. It is incremented as the client steps through the write
+ * and is observed in this class each time a write occurs.</i>
+ * </ul>
+ * A repeat level is defined as any of the following:
+ * <ul>
+ * <li>The set of top-level scalar columns, or those within a
+ * top-level, non-repeated map, or nested to any depth within
+ * non-repeated maps rooted at the top level.</li>
+ * <li>The values for a single scalar array.</li>
+ * <li>The set of scalar columns within a repeated map, or
+ * nested within non-repeated maps within a repeated map.</li>
+ * </ul>
+ * Items at a repeat level index together and share a vector
+ * index. However, the columns within a repeat level
+ * <i>do not</i> share a last write index: some can lag further
+ * behind than others.
+ * <p>
+ * Let's illustrate the states. Let's focus on one column and
+ * illustrate the three states that can occur during write:
+ * <ul>
+ * <li><b>Behind</b>: the last write index is more than one position behind
+ * the vector index. Zero-filling will be needed to catch up to
+ * the vector index.</li>
+ * <li><b>Written</b>: the last write index is the same as the vector
+ * index because the client wrote data at this position (and previous
+ * values were back-filled with nulls, empties or zeros.)</li>
+ * <li><b>Unwritten</b>: the last write index is one behind the vector
+ * index. This occurs when the column was written, then the client
+ * moved to the next row or array position.</li>
+ * <li><b>Restarted</b>: The current row is abandoned (perhaps filtered
+ * out) and is to be rewritten. The last write position moves
+ * back one position. Note that, the Restarted state is
+ * indistinguishable from the unwritten state: the only real
+ * difference is that the current slot (pointed to by the
+ * vector index) contains the previous written value that must
+ * be overwritten or back-filled. But, this is fine, because we
+ * assume that unwritten values are garbage anyway.</li>
+ * </ul>
+ * To illustrate:<pre><code>
+ * Behind Written Unwritten Restarted
+ * |X| |X| |X| |X|
+ * lw >|X| |X| |X| |X|
+ * | | |0| |0| lw > |0|
+ * v >| | lw, v > |X| lw > |X| v > |X|
+ * v > | |
+ * </code></pre>
+ * The illustrated state transitions are:
+ * <ul>
+ * <li>Suppose the state starts in Behind.<ul>
+ * <li>If the client writes a value, then the empty slot is
+ * back-filled and the state moves to Written.</li>
+ * <li>If the client does not write a value, the state stays
+ * at Behind, and the gap of unfilled values grows.</li></ul></li>
+ * <li>When in the Written state:<ul>
+ * <li>If the client saves the current row or array position,
+ * the vector index increments and we move to the Unwritten
+ * state.</li>
+ * <li>If the client abandons the row, the last write position
+ * moves back one to recreate the unwritten state. We've
+ * shown this state separately above just to illustrate
+ * the two transitions from Written.</li></ul></li>
+ * <li>When in the Unwritten (or Restarted) states:<ul>
+ * <li>If the client writes a value, then the writer moves back to the
+ * Written state.</li>
+ * <li>If the client skips the value, then the vector index increments
+ * again, leaving a gap, and the writer moves to the
+ * Behind state.</li></ul>
+ * </ul>
+ * <p>
+ * We've already noted that the Restarted state is identical to
+ * the Unwritten state (and was discussed just to make the flow a bit
+ * clearer.) The astute reader will have noticed that the Behind state is
+ * the same as the Unwritten state if we define the combined state as
+ * when the last write position is behind the vector index.
+ * <p>
+ * Further, if
+ * one simply treats the gap between last write and the vector indexes
+ * as the amount (which may be zero) to back-fill, then there is just
+ * one state. This is, in fact, how the code works: it always writes
+ * to the vector index (and can do so multiple times for a single row),
+ * back-filling as necessary.
+ * <p>
+ * The states, then, are more for our use in understanding the algorithm.
+ * They are also very useful when working through the logic of performing
+ * a roll-over when a vector overflows.
+ */
+
+public abstract class BaseScalarWriter extends AbstractScalarWriter {
+
+ public static final int MIN_BUFFER_SIZE = 256;
+
+ /**
+ * Indicates the position in the vector to write. Set via an object so that
+ * all writers (within the same subtree) can agree on the write position.
+ * For example, all top-level, simple columns see the same row index.
+ * All columns within a repeated map see the same (inner) index, etc.
+ */
+
+ protected ColumnWriterIndex vectorIndex;
+
+ /**
+ * Listener invoked if the vector overflows. If not provided, then the writer
+ * does not support vector overflow.
+ */
+
+ protected ColumnWriterListener listener;
+
+ protected DrillBuf drillBuf;
+
+ /**
+ * Capacity, in values, of the currently allocated buffer that backs
+ * the vector. Updated each time the buffer changes. The capacity is in
+ * values (rather than bytes) to streamline the per-write logic.
+ */
+
+ protected int capacity;
+
+ @Override
+ public void bindIndex(ColumnWriterIndex vectorIndex) {
+ this.vectorIndex = vectorIndex;
+ }
+
+ @Override
+ public ColumnWriterIndex writerIndex() { return vectorIndex; }
+
+ @Override
+ public void bindListener(ColumnWriterListener listener) {
+ this.listener = listener;
+ }
+
+ /**
+ * All change of buffer comes through this function to allow capturing
+ * the buffer address and capacity. Only two ways to set the buffer:
+ * by binding to a vector in bindVector(), or by resizing the vector
+ * in writeIndex().
+ */
+
+ protected abstract void setBuffer();
+
+ protected void realloc(int size) {
+ vector().reallocRaw(size);
+ setBuffer();
+ }
+
+ /**
+ * The vector is about to grow. Give the listener a chance to
+ * veto the growth and opt for overflow instead.
+ *
+ * @param delta the new amount of memory to allocate
+ * @return true if the vector can be grown, false if an
+ * overflow should be triggered
+ */
+
+ protected boolean canExpand(int delta) {
+ if (listener == null) {
+ return true;
+ } else {
+ return listener.canExpand(this, delta);
+ }
+ }
+
+ /**
+ * Handle vector overflow. If this is an array, then there is a slim chance
+ * we may need to grow the vector immediately after overflow. Since a double
+ * overflow is not allowed, this recursive call won't continue forever.
+ */
+
+ protected void overflowed() {
+ if (listener == null) {
+ throw new IndexOutOfBoundsException("Overflow not supported");
+ } else {
+ listener.overflowed(this);
+ }
+ }
+
+ public abstract void skipNulls();
+
+ @Override
+ public void setNull() {
+ throw new UnsupportedOperationException("Vector is not nullable");
+ }
+
+ @Override
+ public void setInt(int value) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void setLong(long value) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void setDouble(double value) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void setString(String value) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void setBytes(byte[] value, int len) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void setDecimal(BigDecimal value) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void setPeriod(Period value) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void dump(HierarchicalFormatter format) {
+ format.extend();
+ super.dump(format);
+ format
+ .attribute("vectorIndex", vectorIndex)
+ .attributeIdentity("listener", listener)
+ .attribute("capacity", capacity)
+ .endObject();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/BaseVarWidthWriter.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/BaseVarWidthWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/BaseVarWidthWriter.java
new file mode 100644
index 0000000..e54625e
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/BaseVarWidthWriter.java
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.writer;
+
+import org.apache.drill.exec.memory.BaseAllocator;
+import org.apache.drill.exec.vector.UInt4Vector;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.accessor.ColumnWriterIndex;
+import org.apache.drill.exec.vector.accessor.impl.HierarchicalFormatter;
+
+/**
+ * Base class for variable-width (VarChar, VarBinary, etc.) writers.
+ * Handles the additional complexity that such writers work with
+ * both an offset vector and a data vector. The offset vector is
+ * written using a specialized offset vector writer. The last write
+ * index is defined as the the last write position in the offset
+ * vector; not the last write position in the variable-width
+ * vector.
+ * <p>
+ * Most and value events are forwarded to the offset vector.
+ */
+
+public abstract class BaseVarWidthWriter extends BaseScalarWriter {
+ protected final OffsetVectorWriter offsetsWriter;
+
+ public BaseVarWidthWriter(UInt4Vector offsetVector) {
+ offsetsWriter = new OffsetVectorWriter(offsetVector);
+ }
+
+ @Override
+ public void bindIndex(final ColumnWriterIndex index) {
+ offsetsWriter.bindIndex(index);
+ super.bindIndex(index);
+ }
+
+ @Override
+ public void startWrite() {
+ setBuffer();
+ offsetsWriter.startWrite();
+ }
+
+ @Override
+ public void startRow() { offsetsWriter.startRow(); }
+
+ protected final int writeIndex(final int width) {
+
+ // This is performance critical code; every operation counts.
+ // Please be thoughtful when changing the code.
+
+ int writeOffset = offsetsWriter.nextOffset();
+ if (writeOffset + width < capacity) {
+ return writeOffset;
+ }
+ resize(writeOffset + width);
+ return offsetsWriter.nextOffset();
+ }
+
+ @Override
+ protected final void setBuffer() {
+ drillBuf = vector().getBuffer();
+ capacity = drillBuf.capacity();
+ }
+
+ private void resize(int size) {
+ if (size <= capacity) {
+ return;
+ }
+
+ // Since some vectors start off as 0 length, set a
+ // minimum size to avoid silly thrashing on early rows.
+
+ if (size < MIN_BUFFER_SIZE) {
+ size = MIN_BUFFER_SIZE;
+ }
+
+ // Grow the vector -- or overflow if the growth would make the batch
+ // consume too much memory. The idea is that we grow vectors as they
+ // fit the available memory budget, then we fill those vectors until
+ // one of them needs more space. At that point we trigger overflow to
+ // a new set of vectors. Internal fragmentation will result, but this
+ // approach (along with proper initial vector sizing), minimizes that
+ // fragmentation.
+
+ size = BaseAllocator.nextPowerOfTwo(size);
+
+ // Two cases: grow this vector or allocate a new one.
+
+ if (size <= ValueVector.MAX_BUFFER_SIZE && canExpand(size - capacity)) {
+
+ // Optimized form of reAlloc() which does not zero memory, does not do
+ // bounds checks (since they were already done above). The write index
+ // and offset remain unchanged.
+
+ realloc(size);
+ } else {
+
+ // Allocate a new vector, or throw an exception if overflow is not
+ // supported. If overflow is supported, the callback will call
+ // endWrite(), which will set the final writer index for the current
+ // vector. Then, bindVector() will be called to provide the new vector.
+ // The write index changes with the new vector.
+
+ overflowed();
+ }
+ }
+
+ @Override
+ public void skipNulls() { }
+
+ @Override
+ public void restartRow() { offsetsWriter.restartRow(); }
+
+ @Override
+ public int lastWriteIndex() { return offsetsWriter.lastWriteIndex(); }
+
+ @Override
+ public final void preRollover() {
+ vector().getBuffer().writerIndex(offsetsWriter.rowStartOffset());
+ offsetsWriter.preRollover();
+ }
+
+ @Override
+ public void postRollover() {
+ setBuffer();
+ offsetsWriter.postRollover();
+ }
+
+ @Override
+ public final void endWrite() {
+ vector().getBuffer().writerIndex(offsetsWriter.nextOffset());
+ offsetsWriter.endWrite();
+ }
+
+ @Override
+ public void dump(HierarchicalFormatter format) {
+ format.extend();
+ super.dump(format);
+ format.attribute("offsetsWriter");
+ offsetsWriter.dump(format);
+ format.endObject();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ColumnWriterFactory.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ColumnWriterFactory.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ColumnWriterFactory.java
new file mode 100644
index 0000000..5a1187a
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ColumnWriterFactory.java
@@ -0,0 +1,196 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.writer;
+
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.drill.common.types.TypeProtos.MajorType;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.record.ColumnMetadata;
+import org.apache.drill.exec.vector.NullableVector;
+import org.apache.drill.exec.vector.UInt4Vector;
+import org.apache.drill.exec.vector.ValueVector;
+import org.apache.drill.exec.vector.accessor.ColumnAccessors;
+import org.apache.drill.exec.vector.accessor.writer.AbstractArrayWriter.ArrayObjectWriter;
+import org.apache.drill.exec.vector.accessor.writer.AbstractScalarWriter.ScalarObjectWriter;
+import org.apache.drill.exec.vector.accessor.writer.AbstractTupleWriter.TupleObjectWriter;
+import org.apache.drill.exec.vector.accessor.writer.MapWriter.ArrayMapWriter;
+import org.apache.drill.exec.vector.accessor.writer.MapWriter.DummyArrayMapWriter;
+import org.apache.drill.exec.vector.accessor.writer.MapWriter.DummyMapWriter;
+import org.apache.drill.exec.vector.accessor.writer.MapWriter.SingleMapWriter;
+import org.apache.drill.exec.vector.accessor.writer.dummy.DummyArrayWriter;
+import org.apache.drill.exec.vector.accessor.writer.dummy.DummyScalarWriter;
+import org.apache.drill.exec.vector.complex.AbstractMapVector;
+import org.apache.drill.exec.vector.complex.MapVector;
+import org.apache.drill.exec.vector.complex.RepeatedMapVector;
+import org.apache.drill.exec.vector.complex.RepeatedValueVector;
+
+/**
+ * Gather generated writer classes into a set of class tables to allow rapid
+ * run-time creation of writers. Builds the writer and its object writer
+ * wrapper which binds the vector to the writer.
+ */
+
+@SuppressWarnings("unchecked")
+public class ColumnWriterFactory {
+
+ private static final int typeCount = MinorType.values().length;
+ private static final Class<? extends BaseScalarWriter> requiredWriters[] = new Class[typeCount];
+
+ static {
+ ColumnAccessors.defineRequiredWriters(requiredWriters);
+ }
+
+ public static AbstractObjectWriter buildColumnWriter(ColumnMetadata schema, ValueVector vector) {
+ if (vector == null) {
+ return buildDummyColumnWriter(schema);
+ }
+
+ // Build a writer for a materialized column.
+
+ assert schema.type() == vector.getField().getType().getMinorType();
+ assert schema.mode() == vector.getField().getType().getMode();
+
+ switch (schema.type()) {
+ case GENERIC_OBJECT:
+ case LATE:
+ case NULL:
+ case LIST:
+ case MAP:
+ throw new UnsupportedOperationException(schema.type().toString());
+ default:
+ switch (schema.mode()) {
+ case OPTIONAL:
+ NullableVector nullableVector = (NullableVector) vector;
+ return NullableScalarWriter.build(schema, nullableVector,
+ newWriter(nullableVector.getValuesVector()));
+ case REQUIRED:
+ return new ScalarObjectWriter(schema, newWriter(vector));
+ case REPEATED:
+ RepeatedValueVector repeatedVector = (RepeatedValueVector) vector;
+ return ScalarArrayWriter.build(schema, repeatedVector,
+ newWriter(repeatedVector.getDataVector()));
+ default:
+ throw new UnsupportedOperationException(schema.mode().toString());
+ }
+ }
+ }
+
+ /**
+ * Build a writer for a non-projected column.
+ * @param schema schema of the column
+ * @return a "dummy" writer for the column
+ */
+
+ public static AbstractObjectWriter buildDummyColumnWriter(ColumnMetadata schema) {
+ switch (schema.type()) {
+ case GENERIC_OBJECT:
+ case LATE:
+ case NULL:
+ case LIST:
+ case MAP:
+ throw new UnsupportedOperationException(schema.type().toString());
+ default:
+ ScalarObjectWriter scalarWriter = new ScalarObjectWriter(schema,
+ new DummyScalarWriter());
+ switch (schema.mode()) {
+ case OPTIONAL:
+ case REQUIRED:
+ return scalarWriter;
+ case REPEATED:
+ return new ArrayObjectWriter(schema,
+ new DummyArrayWriter(
+ scalarWriter));
+ default:
+ throw new UnsupportedOperationException(schema.mode().toString());
+ }
+ }
+ }
+
+ public static TupleObjectWriter buildMap(ColumnMetadata schema, MapVector vector,
+ List<AbstractObjectWriter> writers) {
+ MapWriter mapWriter;
+ if (schema.isProjected()) {
+ mapWriter = new SingleMapWriter(schema, vector, writers);
+ } else {
+ mapWriter = new DummyMapWriter(schema, writers);
+ }
+ return new TupleObjectWriter(schema, mapWriter);
+ }
+
+ public static ArrayObjectWriter buildMapArray(ColumnMetadata schema,
+ UInt4Vector offsetVector,
+ List<AbstractObjectWriter> writers) {
+ MapWriter mapWriter;
+ if (schema.isProjected()) {
+ mapWriter = new ArrayMapWriter(schema, writers);
+ } else {
+ mapWriter = new DummyArrayMapWriter(schema, writers);
+ }
+ TupleObjectWriter mapArray = new TupleObjectWriter(schema, mapWriter);
+ AbstractArrayWriter arrayWriter;
+ if (schema.isProjected()) {
+ arrayWriter = new ObjectArrayWriter(
+ offsetVector,
+ mapArray);
+ } else {
+ arrayWriter = new DummyArrayWriter(mapArray);
+ }
+ return new ArrayObjectWriter(schema, arrayWriter);
+ }
+
+ public static AbstractObjectWriter buildMapWriter(ColumnMetadata schema,
+ AbstractMapVector vector,
+ List<AbstractObjectWriter> writers) {
+ assert (vector != null) == schema.isProjected();
+ if (! schema.isArray()) {
+ return buildMap(schema, (MapVector) vector, writers);
+ } else if (vector == null) {
+ return buildMapArray(schema,
+ null, writers);
+ } else {
+ return buildMapArray(schema,
+ ((RepeatedMapVector) vector).getOffsetVector(),
+ writers);
+ }
+ }
+
+ public static AbstractObjectWriter buildMapWriter(ColumnMetadata schema, AbstractMapVector vector) {
+ assert schema.mapSchema().size() == 0;
+ return buildMapWriter(schema, vector, new ArrayList<AbstractObjectWriter>());
+ }
+
+ public static BaseScalarWriter newWriter(ValueVector vector) {
+ MajorType major = vector.getField().getType();
+ MinorType type = major.getMinorType();
+ try {
+ Class<? extends BaseScalarWriter> accessorClass = requiredWriters[type.ordinal()];
+ if (accessorClass == null) {
+ throw new UnsupportedOperationException(type.toString());
+ }
+ Constructor<? extends BaseScalarWriter> ctor = accessorClass.getConstructor(ValueVector.class);
+ return ctor.newInstance(vector);
+ } catch (InstantiationException | IllegalAccessException | NoSuchMethodException |
+ SecurityException | IllegalArgumentException | InvocationTargetException e) {
+ throw new IllegalStateException(e);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/MapWriter.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/MapWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/MapWriter.java
new file mode 100644
index 0000000..8aec301
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/MapWriter.java
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.writer;
+
+import java.util.List;
+
+import org.apache.drill.exec.record.ColumnMetadata;
+import org.apache.drill.exec.vector.accessor.ColumnWriterIndex;
+import org.apache.drill.exec.vector.complex.MapVector;
+
+/**
+ * Writer for a Drill Map type. Maps are actually tuples, just like rows.
+ */
+
+public abstract class MapWriter extends AbstractTupleWriter {
+
+ /**
+ * Wrap the outer index to avoid incrementing the array index
+ * on the call to <tt>nextElement().</tt> For maps, the increment
+ * is done at the map level, not the column level.
+ */
+
+ private static class MemberWriterIndex implements ColumnWriterIndex {
+ private ColumnWriterIndex baseIndex;
+
+ private MemberWriterIndex(ColumnWriterIndex baseIndex) {
+ this.baseIndex = baseIndex;
+ }
+
+ @Override public int rowStartIndex() { return baseIndex.rowStartIndex(); }
+ @Override public int vectorIndex() { return baseIndex.vectorIndex(); }
+ @Override public void nextElement() { }
+ @Override public void rollover() { }
+ @Override public ColumnWriterIndex outerIndex() {
+ return baseIndex.outerIndex();
+ }
+
+ @Override
+ public String toString() {
+ return new StringBuilder()
+ .append("[")
+ .append(getClass().getSimpleName())
+ .append(" baseIndex = ")
+ .append(baseIndex.toString())
+ .append("]")
+ .toString();
+ }
+ }
+
+ /**
+ * Writer for a single (non-array) map. Clients don't really "write" maps;
+ * rather, this writer is a holder for the columns within the map, and those
+ * columns are what is written.
+ */
+
+ protected static class SingleMapWriter extends MapWriter {
+ private final MapVector mapVector;
+
+ protected SingleMapWriter(ColumnMetadata schema, MapVector vector, List<AbstractObjectWriter> writers) {
+ super(schema, writers);
+ mapVector = vector;
+ }
+
+ @Override
+ public void endWrite() {
+ super.endWrite();
+
+ // Special form of set value count: used only for
+ // this class to avoid setting the value count of children.
+ // Setting these counts was already done. Doing it again
+ // will corrupt nullable vectors because the writers don't
+ // set the "lastSet" field of nullable vector accessors,
+ // and the initial value of -1 will cause all values to
+ // be overwritten.
+ //
+ // Note that the map vector can be null if there is no actual
+ // map vector represented by this writer.
+
+ if (mapVector != null) {
+ mapVector.setMapValueCount(vectorIndex.vectorIndex());
+ }
+ }
+ }
+
+ /**
+ * Writer for a an array of maps. A single array index coordinates writes
+ * to the constituent member vectors so that, say, the values for (row 10,
+ * element 5) all occur to the same position in the columns within the map.
+ * Since the map is an array, it has an associated offset vector, which the
+ * parent array writer is responsible for maintaining.
+ */
+
+ protected static class ArrayMapWriter extends MapWriter {
+
+ protected ArrayMapWriter(ColumnMetadata schema, List<AbstractObjectWriter> writers) {
+ super(schema, writers);
+ }
+
+ @Override
+ public void bindIndex(ColumnWriterIndex index) {
+
+ // This is a repeated map, so the provided index is an array element
+ // index. Convert this to an index that will not increment the element
+ // index on each write so that a map with three members, say, won't
+ // increment the index for each member. Rather, the index must be
+ // incremented at the array level.
+
+ bindIndex(index, new MemberWriterIndex(index));
+ }
+
+ // In endWrite(), do not call setValueCount on the map vector.
+ // Doing so will zero-fill the composite vectors because
+ // the internal map state does not track the writer state.
+ // Instead, the code in this structure has set the value
+ // count for each composite vector individually.
+ }
+
+ protected static class DummyMapWriter extends MapWriter {
+
+ protected DummyMapWriter(ColumnMetadata schema,
+ List<AbstractObjectWriter> writers) {
+ super(schema, writers);
+ }
+ }
+
+ protected static class DummyArrayMapWriter extends MapWriter {
+
+ protected DummyArrayMapWriter(ColumnMetadata schema,
+ List<AbstractObjectWriter> writers) {
+ super(schema, writers);
+ }
+ }
+
+ protected final ColumnMetadata mapColumnSchema;
+
+ protected MapWriter(ColumnMetadata schema, List<AbstractObjectWriter> writers) {
+ super(schema.mapSchema(), writers);
+ mapColumnSchema = schema;
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/NullableScalarWriter.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/NullableScalarWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/NullableScalarWriter.java
new file mode 100644
index 0000000..6da2b50
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/NullableScalarWriter.java
@@ -0,0 +1,190 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.writer;
+
+import java.math.BigDecimal;
+
+import org.apache.drill.exec.record.ColumnMetadata;
+import org.apache.drill.exec.vector.BaseDataValueVector;
+import org.apache.drill.exec.vector.NullableVector;
+import org.apache.drill.exec.vector.accessor.ColumnAccessors.UInt1ColumnWriter;
+import org.apache.drill.exec.vector.accessor.impl.HierarchicalFormatter;
+import org.apache.drill.exec.vector.accessor.ColumnWriterIndex;
+import org.apache.drill.exec.vector.accessor.ValueType;
+import org.joda.time.Period;
+
+public class NullableScalarWriter extends AbstractScalarWriter {
+
+ private final UInt1ColumnWriter isSetWriter;
+ private final BaseScalarWriter baseWriter;
+
+ public NullableScalarWriter(NullableVector nullableVector, BaseScalarWriter baseWriter) {
+ isSetWriter = new UInt1ColumnWriter(nullableVector.getBitsVector());
+ this.baseWriter = baseWriter;
+ }
+
+ public static ScalarObjectWriter build(ColumnMetadata schema,
+ NullableVector nullableVector, BaseScalarWriter baseWriter) {
+ return new ScalarObjectWriter(schema,
+ new NullableScalarWriter(nullableVector, baseWriter));
+ }
+
+ public BaseScalarWriter bitsWriter() { return isSetWriter; }
+ public BaseScalarWriter baseWriter() { return baseWriter; }
+
+ @Override
+ public BaseDataValueVector vector() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void bindIndex(ColumnWriterIndex index) {
+ isSetWriter.bindIndex(index);
+ baseWriter.bindIndex(index);
+ }
+
+ @Override
+ public ColumnWriterIndex writerIndex() { return baseWriter.writerIndex(); }
+
+ @Override
+ public ValueType valueType() {
+ return baseWriter.valueType();
+ }
+
+ @Override
+ public void restartRow() {
+ isSetWriter.restartRow();
+ baseWriter.restartRow();
+ }
+
+ @Override
+ public void setNull() {
+ isSetWriter.setInt(0);
+ baseWriter.skipNulls();
+ }
+
+ @Override
+ public void setInt(int value) {
+ baseWriter.setInt(value);
+ isSetWriter.setInt(1);
+ }
+
+ @Override
+ public void setLong(long value) {
+ baseWriter.setLong(value);
+ isSetWriter.setInt(1);
+ }
+
+ @Override
+ public void setDouble(double value) {
+ baseWriter.setDouble(value);
+ isSetWriter.setInt(1);
+ }
+
+ @Override
+ public void setString(String value) {
+ // String may overflow. Set bits after
+ // overflow since bits vector does not have
+ // overflow handling separate from the nullable
+ // vector as a whole.
+
+ baseWriter.setString(value);
+ isSetWriter.setInt(1);
+ }
+
+ @Override
+ public void setBytes(byte[] value, int len) {
+ baseWriter.setBytes(value, len);
+ isSetWriter.setInt(1);
+ }
+
+ @Override
+ public void setDecimal(BigDecimal value) {
+ baseWriter.setDecimal(value);
+ isSetWriter.setInt(1);
+ }
+
+ @Override
+ public void setPeriod(Period value) {
+ baseWriter.setPeriod(value);
+ isSetWriter.setInt(1);
+ }
+
+ @Override
+ public void preRollover() {
+ isSetWriter.preRollover();
+ baseWriter.preRollover();
+ }
+
+ @Override
+ public void postRollover() {
+ isSetWriter.postRollover();
+ baseWriter.postRollover();
+ }
+
+ @Override
+ public int lastWriteIndex() {
+ return baseWriter.lastWriteIndex();
+ }
+
+ @Override
+ public void bindListener(ColumnWriterListener listener) {
+ baseWriter.bindListener(listener);
+ }
+
+ @Override
+ public void startWrite() {
+ isSetWriter.startWrite();
+ baseWriter.startWrite();
+ }
+
+ @Override
+ public void startRow() {
+ // Skip calls for performance: they do nothing for
+ // scalar writers -- the only kind supported here.
+// isSetWriter.startRow();
+ baseWriter.startRow();
+ }
+
+ @Override
+ public void endArrayValue() {
+ // Skip calls for performance: they do nothing for
+ // scalar writers -- the only kind supported here.
+// isSetWriter.saveValue();
+ baseWriter.endArrayValue();
+ }
+
+ @Override
+ public void endWrite() {
+ isSetWriter.endWrite();
+ // Avoid back-filling null values.
+ baseWriter.skipNulls();
+ baseWriter.endWrite();
+ }
+
+ @Override
+ public void dump(HierarchicalFormatter format) {
+ format.extend();
+ super.dump(format);
+ format.attribute("isSetWriter");
+ isSetWriter.dump(format);
+ format.attribute("baseWriter");
+ baseWriter.dump(format);
+ format.endObject();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ObjectArrayWriter.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ObjectArrayWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ObjectArrayWriter.java
new file mode 100644
index 0000000..3554a3b
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ObjectArrayWriter.java
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.writer;
+
+import org.apache.drill.exec.vector.UInt4Vector;
+import org.apache.drill.exec.vector.accessor.ColumnWriterIndex;
+import org.apache.drill.exec.vector.accessor.writer.AbstractArrayWriter.BaseArrayWriter;
+
+/**
+ * Writer for an array of either a map or another array. Here, the contents
+ * are a structure and need explicit saves. State transitions in addition to the
+ * base class are:
+ *
+ * <table border=1>
+ * <tr><th>Public API</th><th>Array Event</th>
+ * <th>Offset Event</th><th>Element Event</th></tr>
+ * <tr><td>save() (array)</td>
+ * <td>saveValue()</td>
+ * <td>saveValue()</td>
+ * <td>saveValue()</td></tr>
+ * </table>
+ *
+ * This class is use for arrays of maps (and for arrays of arrays). When used
+ * with a map, then we have a single offset vector pointing into a group of
+ * arrays. Consider the simple case of a map of three scalars. Here, we have
+ * a hybrid of the states discussed for the {@link BaseScalarWriter} and those
+ * discussed for {@link OffsetVectorWriter}. That is, the offset vector
+ * points into one map element. The individual elements can we Behind,
+ * Written or Unwritten, depending on the specific actions taken by the
+ * client.
+ * <p>
+ * For example:<pre><code>
+ * Offset Vector Vector A Vector B Vector C Index
+ * | | + - > |X| < lwa |Y| |Z| 8
+ * lw > | 8 | - + | | |Y| |Z| 9
+ * v > | 10 | - - - > | | |Y| |Z| 10
+ * | | | | |Y| < lwb |Z| 11
+ * | | v' > | | | | |Z| < lwc 12
+ * </code></pre>
+ * In the above:
+ * <ul>
+ * <li>The last write index, lw, for the current row points to the
+ * previous start position. (Recall that finishing the row writes the
+ * end position into the entry for the <i>next</i> row.</li>
+ * <li>The top-level vector index, v, points to start position of
+ * the current row, which is offset 10 in all three data vectors.</li>
+ * <li>The current array write position, v', is for the third element
+ * of the array that starts at position 10.</li>
+ * <li>Since the row is active, the end position of the row has not yet
+ * been written, and so is blank in the offset vector.</li>
+ * <li>The previous row had a two-element map array written, starting
+ * at offset 8 and ending at offset 9 (inclusive), identified as
+ * writing the next start offset (exclusive) into the following
+ * offset array slot.</li>
+ * <li>Column A has not had data written since the first element of the
+ * previous row. It is currently in the Behind state with the last
+ * write position for A, lwa, pointing to the last write.</li>
+ * <li>Column B is in the Unwritten state. A value was written for
+ * previous element in the map array, but not for the current element.
+ * We see this by the fact that the last write position for B, lwb,
+ * is one behind v'.</li>
+ * <li>Column C has been written for the current array element and is
+ * in the Written state, with the last write position, lwc, pointing
+ * to the same location as v'.</li>
+ * </ul>
+ * Suppose we now write to Vector A and end the row:<pre><code>
+ * Offset Vector Vector A Vector B Vector C Index
+ * | | + - > |X| |Y| |Z| 8
+ * | 8 | - + |0| |Y| |Z| 9
+ * lw > | 10 | - - - > |0| |Y| |Z| 10
+ * v > | 13 | - + |0| |Y| < lwb |Z| 11
+ * | | | |X| < lwa | | |Z| < lwc 12
+ * | | + - > | | | | | | < v' 13
+ * </code></pre>
+ * Here:
+ * <ul>
+ * <li>Vector A has been back-filled and the last write index advanced.</li>
+ * <li>Vector B is now in the Behind state. Vectors A and B are in the
+ * Unwritten state.</li>
+ * <li>The end position has been written to the offset vector, the
+ * offset vector last write position has been advance, and the
+ * top-level vector offset has advanced.</li>
+ * </ul>
+ * All this happens automatically as part of the indexing mechanisms.
+ * The key reason to understand this flow is to understand what happens
+ * in vector overflow: unlike an array of scalars, in which the data
+ * vector can never be in the Behind state, when we have an array of
+ * maps then each vector can be in an of the scalar writer state.
+ */
+
+public class ObjectArrayWriter extends BaseArrayWriter {
+
+ protected ObjectArrayWriter(UInt4Vector offsetVector, AbstractObjectWriter elementWriter) {
+ super(offsetVector, elementWriter);
+ }
+
+ @Override
+ public void bindIndex(ColumnWriterIndex index) {
+ elementIndex = new ArrayElementWriterIndex();
+ super.bindIndex(index);
+ }
+
+ @Override
+ public void save() {
+ elementObjWriter.events().endArrayValue();
+ elementIndex.next();
+ }
+
+ @Override
+ public void set(Object... values) {
+ setObject(values);
+ }
+
+ @Override
+ public void setObject(Object array) {
+ Object values[] = (Object[]) array;
+ for (int i = 0; i < values.length; i++) {
+ elementObjWriter.set(values[i]);
+ save();
+ }
+ }
+
+ @Override
+ public int lastWriteIndex() {
+ // Undefined for arrays
+ return 0;
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/OffsetVectorWriter.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/OffsetVectorWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/OffsetVectorWriter.java
new file mode 100644
index 0000000..d5f9b30
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/OffsetVectorWriter.java
@@ -0,0 +1,283 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.writer;
+
+import org.apache.drill.exec.vector.BaseDataValueVector;
+import org.apache.drill.exec.vector.UInt4Vector;
+import org.apache.drill.exec.vector.accessor.ValueType;
+import org.apache.drill.exec.vector.accessor.impl.HierarchicalFormatter;
+
+/**
+ * Specialized column writer for the (hidden) offset vector used
+ * with variable-length or repeated vectors. See comments in the
+ * <tt>ColumnAccessors.java</tt> template file for more details.
+ * <p>
+ * Note that the <tt>lastWriteIndex</tt> tracked here corresponds
+ * to the data values; it is one less than the actual offset vector
+ * last write index due to the nature of offset vector layouts. The selection
+ * of last write index basis makes roll-over processing easier as only this
+ * writer need know about the +1 translation required for writing.
+ * <p>
+ * The states illustrated in the base class apply here as well,
+ * remembering that the end offset for a row (or array position)
+ * is written one ahead of the vector index.
+ * <p>
+ * The vector index does create an interesting dynamic for the child
+ * writers. From the child writer's perspective, the states described in
+ * the super class are the only states of interest. Here we want to
+ * take the perspective of the parent.
+ * <p>
+ * The offset vector is an implementation of a repeat level. A repeat
+ * level can occur for a single array, or for a collection of columns
+ * within a repeated map. (A repeat level also occurs for variable-width
+ * fields, but this is a bit harder to see, so let's ignore that for
+ * now.)
+ * <p>
+ * The key point to realize is that each repeat level introduces an
+ * isolation level in terms of indexing. That is, empty values in the
+ * outer level have no affect on indexing in the inner level. In fact,
+ * the nature of a repeated outer level means that there are no empties
+ * in the inner level.
+ * <p>
+ * To illustrate:<pre><code>
+ * Offset Vector Data Vector Indexes
+ * lw, v > | 10 | - - - - - > | X | 10
+ * | 12 | - - + | X | < lw' 11
+ * | | + - - > | | < v' 12
+ * </code></pre>
+ * In the above, the client has just written an array of two elements
+ * at the current write position. The data starts at offset 10 in
+ * the data vector, and the next write will be at 12. The end offset
+ * is written one ahead of the vector index.
+ * <p>
+ * From the data vector's perspective, its last-write (lw') reflects
+ * the last element written. If this is an array of scalars, then the
+ * write index is automatically incremented, as illustrated by v'.
+ * (For map arrays, the index must be incremented by calling
+ * <tt>save()</tt> on the map array writer.)
+ * <p>
+ * Suppose the client now skips some arrays:<pre><code>
+ * Offset Vector Data Vector
+ * lw > | 10 | - - - - - > | X | 10
+ * | 12 | - - + | X | < lw' 11
+ * | | + - - > | | < v' 12
+ * | | | | 13
+ * v > | | | | 14
+ * </code></pre>
+ * The last write position does not move and there are gaps in the
+ * offset vector. The vector index points to the current row. Note
+ * that the data vector last write and vector indexes do not change,
+ * this reflects the fact that the the data vector's vector index
+ * (v') matches the tail offset
+ * <p>
+ * The
+ * client now writes a three-element vector:<pre><code>
+ * Offset Vector Data Vector
+ * | 10 | - - - - - > | X | 10
+ * | 12 | - - + | X | 11
+ * | 12 | - - + - - > | Y | 12
+ * | 12 | - - + | Y | 13
+ * lw, v > | 12 | - - + | Y | < lw' 14
+ * | 15 | - - - - - > | | < v' 15
+ * </code></pre>
+ * Quite a bit just happened. The empty offset slots were back-filled
+ * with the last write offset in the data vector. The client wrote
+ * three values, which advanced the last write and vector indexes
+ * in the data vector. And, the last write index in the offset
+ * vector also moved to reflect the update of the offset vector.
+ * Note that as a result, multiple positions in the offset vector
+ * point to the same location in the data vector. This is fine; we
+ * compute the number of entries as the difference between two successive
+ * offset vector positions, so the empty positions have become 0-length
+ * arrays.
+ * <p>
+ * Note that, for an array of scalars, when overflow occurs,
+ * we need only worry about two
+ * states in the data vector. Either data has been written for the
+ * row (as in the third example above), and so must be moved to the
+ * roll-over vector, or no data has been written and no move is
+ * needed. We never have to worry about missing values because the
+ * cannot occur in the data vector.
+ * <p>
+ * See {@link ObjectArrayWriter} for information about arrays of
+ * maps (arrays of multiple columns.)
+ */
+
+public class OffsetVectorWriter extends AbstractFixedWidthWriter {
+
+ private static final int VALUE_WIDTH = UInt4Vector.VALUE_WIDTH;
+
+ private UInt4Vector vector;
+
+ /**
+ * Offset of the first value for the current row. Used during
+ * overflow or if the row is restarted.
+ */
+
+ private int rowStartOffset;
+
+ /**
+ * Cached value of the end offset for the current value. Used
+ * primarily for variable-width columns to allow the column to be
+ * rewritten multiple times within the same row. The start offset
+ * value is updated with the end offset only when the value is
+ * committed in {@link @endValue()}.
+ */
+
+ private int nextOffset;
+
+ public OffsetVectorWriter(UInt4Vector vector) {
+ this.vector = vector;
+ }
+
+ @Override public BaseDataValueVector vector() { return vector; }
+ @Override public int width() { return VALUE_WIDTH; }
+
+ @Override
+ protected void realloc(int size) {
+ vector.reallocRaw(size);
+ setBuffer();
+ }
+
+ @Override
+ public ValueType valueType() { return ValueType.INTEGER; }
+
+ @Override
+ public void startWrite() {
+ super.startWrite();
+ nextOffset = 0;
+ rowStartOffset = 0;
+
+ // Special handling for first value. Alloc vector if needed.
+ // Offset vectors require a 0 at position 0. The (end) offset
+ // for row 0 starts at position 1, which is handled in
+ // writeOffset() below.
+
+ if (capacity * VALUE_WIDTH < MIN_BUFFER_SIZE) {
+ realloc(MIN_BUFFER_SIZE);
+ }
+ vector.getBuffer().unsafePutInt(0, 0);
+ }
+
+ public int nextOffset() { return nextOffset; }
+ public int rowStartOffset() { return rowStartOffset; }
+
+ @Override
+ public void startRow() { rowStartOffset = nextOffset; }
+
+ /**
+ * Return the write offset, which is one greater than the index reported
+ * by the vector index.
+ *
+ * @return the offset in which to write the current offset of the end
+ * of the current data value
+ */
+
+ protected final int writeIndex() {
+
+ // "Fast path" for the normal case of no fills, no overflow.
+ // This is the only bounds check we want to do for the entire
+ // set operation.
+
+ // This is performance critical code; every operation counts.
+ // Please be thoughtful when changing the code.
+
+ final int valueIndex = vectorIndex.vectorIndex();
+ int writeIndex = valueIndex + 1;
+ if (lastWriteIndex < valueIndex - 1 || writeIndex >= capacity) {
+ writeIndex = prepareWrite(writeIndex);
+ }
+
+ // Track the last write location for zero-fill use next time around.
+ // Recall, it is the value index, which is one less than the (end)
+ // offset index.
+
+ lastWriteIndex = writeIndex - 1;
+ return writeIndex;
+ }
+
+ protected int prepareWrite(int writeIndex) {
+
+ // Either empties must be filled or the vector is full.
+
+ resize(writeIndex);
+
+ // Call to resize may cause rollover, so reset write index
+ // afterwards.
+
+ writeIndex = vectorIndex.vectorIndex() + 1;
+
+ // Fill empties to the write position.
+
+ fillEmpties(writeIndex);
+ return writeIndex;
+ }
+
+ @Override
+ protected final void fillEmpties(final int writeIndex) {
+ while (lastWriteIndex < writeIndex - 1) {
+ drillBuf.unsafePutInt((++lastWriteIndex + 1) * VALUE_WIDTH, nextOffset);
+ }
+ }
+
+ public final void setNextOffset(final int newOffset) {
+ final int writeIndex = writeIndex();
+ drillBuf.unsafePutInt(writeIndex * VALUE_WIDTH, newOffset);
+ nextOffset = newOffset;
+ }
+
+ @Override
+ public void skipNulls() {
+
+ // Nothing to do. Fill empties logic will fill in missing
+ // offsets.
+ }
+
+ @Override
+ public void restartRow() {
+ nextOffset = rowStartOffset;
+ super.restartRow();
+ }
+
+ @Override
+ public void preRollover() {
+ setValueCount(vectorIndex.rowStartIndex() + 1);
+ }
+
+ @Override
+ public void postRollover() {
+ final int newNext = nextOffset - rowStartOffset;
+ super.postRollover();
+ nextOffset = newNext;
+ }
+
+ @Override
+ public final void endWrite() {
+ setValueCount(vectorIndex.vectorIndex() + 1);
+ }
+
+ @Override
+ public void dump(HierarchicalFormatter format) {
+ format.extend();
+ super.dump(format);
+ format
+ .attribute("lastWriteIndex", lastWriteIndex)
+ .attribute("nextOffset", nextOffset)
+ .endObject();
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ScalarArrayWriter.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ScalarArrayWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ScalarArrayWriter.java
new file mode 100644
index 0000000..95f8f29
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/ScalarArrayWriter.java
@@ -0,0 +1,229 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.writer;
+
+import java.math.BigDecimal;
+
+import org.apache.drill.exec.record.ColumnMetadata;
+import org.apache.drill.exec.vector.accessor.ColumnWriterIndex;
+import org.apache.drill.exec.vector.accessor.ScalarWriter.ColumnWriterListener;
+import org.apache.drill.exec.vector.accessor.writer.AbstractArrayWriter.BaseArrayWriter;
+import org.apache.drill.exec.vector.accessor.writer.AbstractScalarWriter.ScalarObjectWriter;
+import org.apache.drill.exec.vector.complex.RepeatedValueVector;
+import org.joda.time.Period;
+
+/**
+ * Writer for a column that holds an array of scalars. This writer manages
+ * the array itself. A type-specific child writer manages the elements within
+ * the array. The overall row index (usually) provides the index into
+ * the offset vector. An array-specific element index provides the index
+ * into elements.
+ * <p>
+ * This class manages the offset vector directly. Doing so saves one read and
+ * one write to direct memory per element value.
+ * <p>
+ * Provides generic write methods for testing and other times when
+ * convenience is more important than speed.
+ * <p>
+ * The scalar writer for array-valued columns appends values: once a value
+ * is written, it cannot be changed. As a result, writer methods have no item index;
+ * each set advances the array to the next position. This is an abstract base class;
+ * subclasses are generated for each repeated value vector type.
+ */
+
+public class ScalarArrayWriter extends BaseArrayWriter {
+
+ /**
+ * For scalar arrays, incrementing the element index and
+ * committing the current value is done automatically since
+ * there is exactly one value per array element.
+ */
+
+ public class ScalarElementWriterIndex extends ArrayElementWriterIndex {
+
+ @Override
+ public final void nextElement() { next(); }
+ }
+
+ private final BaseScalarWriter elementWriter;
+
+ public ScalarArrayWriter(ColumnMetadata schema,
+ RepeatedValueVector vector, BaseScalarWriter elementWriter) {
+ super(vector.getOffsetVector(),
+ new ScalarObjectWriter(schema, elementWriter));
+ this.elementWriter = elementWriter;
+ }
+
+ public static ArrayObjectWriter build(ColumnMetadata schema,
+ RepeatedValueVector repeatedVector, BaseScalarWriter elementWriter) {
+ return new ArrayObjectWriter(schema,
+ new ScalarArrayWriter(schema, repeatedVector, elementWriter));
+ }
+
+ @Override
+ public void bindIndex(ColumnWriterIndex index) {
+ elementIndex = new ScalarElementWriterIndex();
+ super.bindIndex(index);
+ elementWriter.bindIndex(elementIndex);
+ }
+
+ @Override
+ public void bindListener(ColumnWriterListener listener) {
+ elementWriter.bindListener(listener);
+ }
+
+ @Override
+ public void save() {
+ // No-op: done when writing each scalar value
+ }
+
+ @Override
+ public void set(Object... values) {
+ for (Object value : values) {
+ entry().set(value);
+ }
+ }
+
+ @Override
+ public void setObject(Object array) {
+ if (array == null) {
+ // Assume null means a 0-element array since Drill does
+ // not support null for the whole array.
+
+ return;
+ }
+ String objClass = array.getClass().getName();
+ if (! objClass.startsWith("[")) {
+ throw new IllegalArgumentException("Argument must be an array");
+ }
+
+ // Figure out type
+
+ char second = objClass.charAt(1);
+ switch ( second ) {
+ case '[':
+ // bytes is represented as an array of byte arrays.
+
+ char third = objClass.charAt(2);
+ switch (third) {
+ case 'B':
+ setBytesArray((byte[][]) array);
+ break;
+ default:
+ throw new IllegalArgumentException( "Unknown Java array type: " + objClass );
+ }
+ break;
+ case 'S':
+ setShortArray((short[]) array );
+ break;
+ case 'I':
+ setIntArray((int[]) array );
+ break;
+ case 'J':
+ setLongArray((long[]) array );
+ break;
+ case 'F':
+ setFloatArray((float[]) array );
+ break;
+ case 'D':
+ setDoubleArray((double[]) array );
+ break;
+ case 'Z':
+ setBooleanArray((boolean[]) array );
+ break;
+ case 'L':
+ int posn = objClass.indexOf(';');
+
+ // If the array is of type Object, then we have no type info.
+
+ String memberClassName = objClass.substring( 2, posn );
+ if (memberClassName.equals(String.class.getName())) {
+ setStringArray((String[]) array );
+ } else if (memberClassName.equals(Period.class.getName())) {
+ setPeriodArray((Period[]) array );
+ } else if (memberClassName.equals(BigDecimal.class.getName())) {
+ setBigDecimalArray((BigDecimal[]) array );
+ } else {
+ throw new IllegalArgumentException( "Unknown Java array type: " + memberClassName );
+ }
+ break;
+ default:
+ throw new IllegalArgumentException( "Unknown Java array type: " + objClass );
+ }
+ }
+
+ public void setBooleanArray(boolean[] value) {
+ for (int i = 0; i < value.length; i++) {
+ elementWriter.setInt(value[i] ? 1 : 0);
+ }
+ }
+
+ public void setBytesArray(byte[][] value) {
+ for (int i = 0; i < value.length; i++) {
+ elementWriter.setBytes(value[i], value[i].length);
+ }
+ }
+
+ public void setShortArray(short[] value) {
+ for (int i = 0; i < value.length; i++) {
+ elementWriter.setInt(value[i]);
+ }
+ }
+
+ public void setIntArray(int[] value) {
+ for (int i = 0; i < value.length; i++) {
+ elementWriter.setInt(value[i]);
+ }
+ }
+
+ public void setLongArray(long[] value) {
+ for (int i = 0; i < value.length; i++) {
+ elementWriter.setLong(value[i]);
+ }
+ }
+
+ public void setFloatArray(float[] value) {
+ for (int i = 0; i < value.length; i++) {
+ elementWriter.setDouble(value[i]);
+ }
+ }
+
+ public void setDoubleArray(double[] value) {
+ for (int i = 0; i < value.length; i++) {
+ elementWriter.setDouble(value[i]);
+ }
+ }
+
+ public void setStringArray(String[] value) {
+ for (int i = 0; i < value.length; i++) {
+ elementWriter.setString(value[i]);
+ }
+ }
+
+ public void setPeriodArray(Period[] value) {
+ for (int i = 0; i < value.length; i++) {
+ elementWriter.setPeriod(value[i]);
+ }
+ }
+
+ public void setBigDecimalArray(BigDecimal[] value) {
+ for (int i = 0; i < value.length; i++) {
+ elementWriter.setDecimal(value[i]);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/WriterEvents.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/WriterEvents.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/WriterEvents.java
new file mode 100644
index 0000000..7566f28
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/WriterEvents.java
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.writer;
+
+import org.apache.drill.exec.vector.accessor.ColumnWriterIndex;
+
+/**
+ * Internal interface used to control the behavior
+ * of writers. Consumers of writers never use this method; it is
+ * instead used by the code that implements writers.
+ * <p>
+ * Most methods here represents events in a state machine. The top-level
+ * writer provides a set of public methods which trigger one or more of
+ * these internal events. The events draw some fine distinctions between
+ * top-level values and those nested within arrays. See each kind of
+ * writer for the details.
+ * <p>
+ * The events also ensure symmetry between top-level and nested tuples,
+ * especially those nested within an array. That is, an event cannot change
+ * meaning depending on whether the tuple is top-level or nested within an
+ * array. Instead, the order of calls, or selectively making or not making
+ * calls, can change.
+ */
+
+public interface WriterEvents {
+
+ /**
+ * Bind the writer to a writer index.
+ *
+ * @param index the writer index (top level or nested for
+ * arrays)
+ */
+
+ void bindIndex(ColumnWriterIndex index);
+
+ ColumnWriterIndex writerIndex();
+
+ /**
+ * Start a write (batch) operation. Performs any vector initialization
+ * required at the start of a batch (especially for offset vectors.)
+ */
+
+ void startWrite();
+
+ /**
+ * Start a new row. To be called only when a row is not active. To
+ * restart a row, call {@link #restartRow()} instead.
+ */
+
+ void startRow();
+
+ /**
+ * End a value. Similar to {@link saveRow()}, but the save of a value
+ * is conditional on saving the row. This version is primarily of use
+ * in tuples nested inside arrays: it saves each tuple within the array,
+ * advancing to a new position in the array. The update of the array's
+ * offset vector based on the cumulative value saves is done when
+ * saving the row.
+ */
+
+ void endArrayValue();
+
+ /**
+ * During a writer to a row, rewind the the current index position to
+ * restart the row.
+ * Done when abandoning the current row, such as when filtering out
+ * a row at read time.
+ */
+
+ void restartRow();
+
+ /**
+ * Saves a row. Commits offset vector locations and advances each to
+ * the next position. Can be called only when a row is active.
+ */
+
+ void saveRow();
+
+ /**
+ * End a batch: finalize any vector values.
+ */
+
+ void endWrite();
+
+ /**
+ * The vectors backing this vector are about to roll over. Finish
+ * the current batch up to, but not including, the current row.
+ */
+
+ void preRollover();
+
+ /**
+ * The vectors backing this writer rolled over. This means that data
+ * for the current row has been rolled over into a new vector. Offsets
+ * and indexes should be shifted based on the understanding that data
+ * for the current row now resides at the start of a new vector instead
+ * of its previous location elsewhere in an old vector.
+ */
+
+ void postRollover();
+
+ /**
+ * Return the last write position in the vector. This may be the
+ * same as the writer index position (if the vector was written at
+ * that point), or an earlier point. In either case, this value
+ * points to the last valid value in the vector.
+ *
+ * @return index of the last valid value in the vector
+ */
+
+ int lastWriteIndex();
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/dummy/DummyArrayWriter.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/dummy/DummyArrayWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/dummy/DummyArrayWriter.java
new file mode 100644
index 0000000..7c9f8ba
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/dummy/DummyArrayWriter.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.writer.dummy;
+
+import org.apache.drill.exec.vector.accessor.ColumnWriterIndex;
+import org.apache.drill.exec.vector.accessor.ScalarWriter.ColumnWriterListener;
+import org.apache.drill.exec.vector.accessor.TupleWriter.TupleWriterListener;
+import org.apache.drill.exec.vector.accessor.writer.AbstractArrayWriter;
+import org.apache.drill.exec.vector.accessor.writer.AbstractObjectWriter;
+import org.apache.drill.exec.vector.accessor.writer.OffsetVectorWriter;
+
+/**
+ * Dummy scalar array writer that allows a client to write values into
+ * the array, but discards all of them. Provides no implementations of
+ * any methods, all are simply ignored.
+ * <p>
+ * Experience may suggest that some methods must return non-dummy
+ * values, such as the number of items in the array. That can be added
+ * as needed.
+ */
+public class DummyArrayWriter extends AbstractArrayWriter {
+
+ public DummyArrayWriter(
+ AbstractObjectWriter elementWriter) {
+ super(elementWriter);
+ }
+
+ @Override
+ public int size() { return 0; }
+
+ @Override
+ public void save() { }
+
+ @Override
+ public void set(Object... values) { }
+
+ @Override
+ public void setObject(Object array) { }
+
+ @Override
+ public void bindIndex(ColumnWriterIndex index) { }
+
+ @Override
+ public ColumnWriterIndex writerIndex() { return null; }
+
+ @Override
+ public void startWrite() { }
+
+ @Override
+ public void startRow() { }
+
+ @Override
+ public void endArrayValue() { }
+
+ @Override
+ public void restartRow() { }
+
+ @Override
+ public void saveRow() { }
+
+ @Override
+ public void endWrite() { }
+
+ @Override
+ public void preRollover() { }
+
+ @Override
+ public void postRollover() { }
+
+ @Override
+ public int lastWriteIndex() { return 0; }
+
+ @Override
+ public void bindListener(ColumnWriterListener listener) { }
+
+ @Override
+ public void bindListener(TupleWriterListener listener) { }
+
+ @Override
+ public OffsetVectorWriter offsetWriter() { return null; }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/40de8ca4/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/dummy/DummyScalarWriter.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/dummy/DummyScalarWriter.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/dummy/DummyScalarWriter.java
new file mode 100644
index 0000000..e8272d6
--- /dev/null
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/accessor/writer/dummy/DummyScalarWriter.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.accessor.writer.dummy;
+
+import java.math.BigDecimal;
+
+import org.apache.drill.exec.vector.BaseDataValueVector;
+import org.apache.drill.exec.vector.accessor.ColumnWriterIndex;
+import org.apache.drill.exec.vector.accessor.ValueType;
+import org.apache.drill.exec.vector.accessor.writer.AbstractScalarWriter;
+import org.joda.time.Period;
+
+/**
+ * Represents a non-projected column. The writer accepts data, but
+ * discards it. The writer does not participate in writer events,
+ * nor is it backed by a real vector, index or type.
+ */
+
+public class DummyScalarWriter extends AbstractScalarWriter {
+
+ @Override
+ public void bindListener(ColumnWriterListener listener) { }
+
+ @Override
+ public ValueType valueType() { return null; }
+
+ @Override
+ public void setNull() { }
+
+ @Override
+ public void setInt(int value) { }
+
+ @Override
+ public void setLong(long value) { }
+
+ @Override
+ public void setDouble(double value) { }
+
+ @Override
+ public void setString(String value) { }
+
+ @Override
+ public void setBytes(byte[] value, int len) { }
+
+ @Override
+ public void setDecimal(BigDecimal value) { }
+
+ @Override
+ public void setPeriod(Period value) { }
+
+ @Override
+ public void bindIndex(ColumnWriterIndex index) { }
+
+ @Override
+ public ColumnWriterIndex writerIndex() { return null; }
+
+ @Override
+ public void restartRow() { }
+
+ @Override
+ public void endWrite() { }
+
+ @Override
+ public void preRollover() { }
+
+ @Override
+ public void postRollover() { }
+
+ @Override
+ public int lastWriteIndex() { return 0; }
+
+ @Override
+ public BaseDataValueVector vector() { return null; }
+}