You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ju...@apache.org on 2016/10/12 03:11:53 UTC
arrow git commit: ARROW-275: Add tests for UnionVector in Arrow File
Repository: arrow
Updated Branches:
refs/heads/master 3919a2778 -> bf749f55a
ARROW-275: Add tests for UnionVector in Arrow File
Author: Julien Le Dem <ju...@dremio.com>
Closes #169 from julienledem/union_test and squashes the following commits:
120f504 [Julien Le Dem] ARROW-275: Add tests for UnionVector in Arrow File
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/bf749f55
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/bf749f55
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/bf749f55
Branch: refs/heads/master
Commit: bf749f55a1e24d79b08813a39ce51e9aaf6fb425
Parents: 3919a27
Author: Julien Le Dem <ju...@dremio.com>
Authored: Tue Oct 11 20:11:48 2016 -0700
Committer: Julien Le Dem <ju...@dremio.com>
Committed: Tue Oct 11 20:11:48 2016 -0700
----------------------------------------------------------------------
.../src/main/codegen/templates/UnionReader.java | 4 +
.../src/main/codegen/templates/UnionVector.java | 30 ++---
.../org/apache/arrow/vector/VectorLoader.java | 2 +
.../apache/arrow/vector/schema/TypeLayout.java | 3 +-
.../apache/arrow/vector/file/TestArrowFile.java | 110 ++++++++++++++++++-
5 files changed, 127 insertions(+), 22 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/bf749f55/java/vector/src/main/codegen/templates/UnionReader.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/codegen/templates/UnionReader.java b/java/vector/src/main/codegen/templates/UnionReader.java
index 7351ae3..c56e95c 100644
--- a/java/vector/src/main/codegen/templates/UnionReader.java
+++ b/java/vector/src/main/codegen/templates/UnionReader.java
@@ -134,6 +134,10 @@ public class UnionReader extends AbstractFieldReader {
</#list>
+ public int size() {
+ return getReaderForIndex(idx()).size();
+ }
+
<#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
<#assign uncappedName = name?uncap_first/>
<#assign boxedType = (minor.boxedType!type.boxedType) />
http://git-wip-us.apache.org/repos/asf/arrow/blob/bf749f55/java/vector/src/main/codegen/templates/UnionVector.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/codegen/templates/UnionVector.java b/java/vector/src/main/codegen/templates/UnionVector.java
index b14314d..5ca3f90 100644
--- a/java/vector/src/main/codegen/templates/UnionVector.java
+++ b/java/vector/src/main/codegen/templates/UnionVector.java
@@ -15,17 +15,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
-import com.google.common.collect.ImmutableList;
-import com.google.flatbuffers.FlatBufferBuilder;
-import io.netty.buffer.ArrowBuf;
-import org.apache.arrow.flatbuf.Field;
-import org.apache.arrow.flatbuf.Type;
-import org.apache.arrow.flatbuf.Union;
-import org.apache.arrow.vector.ValueVector;
-import org.apache.arrow.vector.types.pojo.ArrowType;
-
-import java.util.ArrayList;
import java.util.List;
<@pp.dropOutputFile />
@@ -39,7 +28,9 @@ package org.apache.arrow.vector.complex;
<#include "/@includes/vv_imports.ftl" />
import com.google.common.collect.ImmutableList;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.Iterator;
+import org.apache.arrow.vector.BaseDataValueVector;
import org.apache.arrow.vector.complex.impl.ComplexCopier;
import org.apache.arrow.vector.util.CallBack;
import org.apache.arrow.vector.schema.ArrowFieldNode;
@@ -47,6 +38,7 @@ import org.apache.arrow.vector.schema.ArrowFieldNode;
import static org.apache.arrow.flatbuf.UnionMode.Sparse;
+
/*
* This class is generated using freemarker and the ${.template_name} template.
*/
@@ -81,6 +73,7 @@ public class UnionVector implements FieldVector {
private ValueVector singleVector;
private final CallBack callBack;
+ private final List<BufferBacked> innerVectors;
public UnionVector(String name, BufferAllocator allocator, CallBack callBack) {
this.name = name;
@@ -88,6 +81,7 @@ public class UnionVector implements FieldVector {
this.internalMap = new MapVector("internal", allocator, callBack);
this.typeVector = new UInt1Vector("types", allocator);
this.callBack = callBack;
+ this.innerVectors = Collections.unmodifiableList(Arrays.<BufferBacked>asList(typeVector));
}
public BufferAllocator getAllocator() {
@@ -101,30 +95,28 @@ public class UnionVector implements FieldVector {
@Override
public void initializeChildrenFromFields(List<Field> children) {
- getMap().initializeChildrenFromFields(children);
+ internalMap.initializeChildrenFromFields(children);
}
@Override
public List<FieldVector> getChildrenFromFields() {
- return getMap().getChildrenFromFields();
+ return internalMap.getChildrenFromFields();
}
@Override
public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) {
- // TODO
- throw new UnsupportedOperationException();
+ BaseDataValueVector.load(getFieldInnerVectors(), ownBuffers);
+ this.valueCount = fieldNode.getLength();
}
@Override
public List<ArrowBuf> getFieldBuffers() {
- // TODO
- throw new UnsupportedOperationException();
+ return BaseDataValueVector.unload(getFieldInnerVectors());
}
@Override
public List<BufferBacked> getFieldInnerVectors() {
- // TODO
- throw new UnsupportedOperationException();
+ return this.innerVectors;
}
public NullableMapVector getMap() {
http://git-wip-us.apache.org/repos/asf/arrow/blob/bf749f55/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java
index 58ac68b..b7040da 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java
@@ -74,6 +74,8 @@ public class VectorLoader {
}
private void loadBuffers(FieldVector vector, Field field, Iterator<ArrowBuf> buffers, Iterator<ArrowFieldNode> nodes) {
+ checkArgument(nodes.hasNext(),
+ "no more field nodes for for field " + field + " and vector " + vector);
ArrowFieldNode fieldNode = nodes.next();
List<VectorLayout> typeLayout = field.getTypeLayout().getVectors();
List<ArrowBuf> ownBuffers = new ArrayList<>(typeLayout.size());
http://git-wip-us.apache.org/repos/asf/arrow/blob/bf749f55/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java
index 06ae203..c5f53fe 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java
@@ -82,8 +82,7 @@ public class TypeLayout {
break;
case UnionMode.Sparse:
vectors = asList(
- validityVector(),
- typeVector()
+ typeVector() // type of the value at the index or 0 if null
);
break;
default:
http://git-wip-us.apache.org/repos/asf/arrow/blob/bf749f55/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java
----------------------------------------------------------------------
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java
index 7a5e7b5..0f28d53 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java
@@ -266,7 +266,7 @@ public class TestArrowFile {
Assert.assertEquals(i % 3, rootReader.reader("list").size());
NullableTimeStampHolder h = new NullableTimeStampHolder();
rootReader.reader("map").reader("timestamp").read(h);
- Assert.assertEquals(i, h.value % COUNT);
+ Assert.assertEquals(i, h.value);
}
}
@@ -339,4 +339,112 @@ public class TestArrowFile {
}
}
+ @Test
+ public void testWriteReadUnion() throws IOException {
+ File file = new File("target/mytest_write_union.arrow");
+ int count = COUNT;
+ try (
+ BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+ NullableMapVector parent = new NullableMapVector("parent", vectorAllocator, null)) {
+
+ writeUnionData(count, parent);
+
+ printVectors(parent.getChildrenFromFields());
+
+ validateUnionData(count, parent);
+
+ write(parent.getChild("root"), file);
+ }
+ // read
+ try (
+ BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE);
+ FileInputStream fileInputStream = new FileInputStream(file);
+ ArrowReader arrowReader = new ArrowReader(fileInputStream.getChannel(), readerAllocator);
+ BufferAllocator vectorAllocator = allocator.newChildAllocator("final vectors", 0, Integer.MAX_VALUE);
+ NullableMapVector parent = new NullableMapVector("parent", vectorAllocator, null)
+ ) {
+ ArrowFooter footer = arrowReader.readFooter();
+ Schema schema = footer.getSchema();
+ LOGGER.debug("reading schema: " + schema);
+
+ // initialize vectors
+
+ NullableMapVector root = parent.addOrGet("root", MinorType.MAP, NullableMapVector.class);
+ VectorLoader vectorLoader = new VectorLoader(schema, root);
+
+ List<ArrowBlock> recordBatches = footer.getRecordBatches();
+ for (ArrowBlock rbBlock : recordBatches) {
+ try (ArrowRecordBatch recordBatch = arrowReader.readRecordBatch(rbBlock)) {
+ vectorLoader.load(recordBatch);
+ }
+ validateUnionData(count, parent);
+ }
+ }
+ }
+
+ public void validateUnionData(int count, MapVector parent) {
+ MapReader rootReader = new SingleMapReaderImpl(parent).reader("root");
+ for (int i = 0; i < count; i++) {
+ rootReader.setPosition(i);
+ switch (i % 4) {
+ case 0:
+ Assert.assertEquals(i, rootReader.reader("union").readInteger().intValue());
+ break;
+ case 1:
+ Assert.assertEquals(i, rootReader.reader("union").readLong().longValue());
+ break;
+ case 2:
+ Assert.assertEquals(i % 3, rootReader.reader("union").size());
+ break;
+ case 3:
+ NullableTimeStampHolder h = new NullableTimeStampHolder();
+ rootReader.reader("union").reader("timestamp").read(h);
+ Assert.assertEquals(i, h.value);
+ break;
+ }
+ }
+ }
+
+ public void writeUnionData(int count, NullableMapVector parent) {
+ ArrowBuf varchar = allocator.buffer(3);
+ varchar.readerIndex(0);
+ varchar.setByte(0, 'a');
+ varchar.setByte(1, 'b');
+ varchar.setByte(2, 'c');
+ varchar.writerIndex(3);
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ MapWriter rootWriter = writer.rootAsMap();
+ IntWriter intWriter = rootWriter.integer("union");
+ BigIntWriter bigIntWriter = rootWriter.bigInt("union");
+ ListWriter listWriter = rootWriter.list("union");
+ MapWriter mapWriter = rootWriter.map("union");
+ for (int i = 0; i < count; i++) {
+ switch (i % 4) {
+ case 0:
+ intWriter.setPosition(i);
+ intWriter.writeInt(i);
+ break;
+ case 1:
+ bigIntWriter.setPosition(i);
+ bigIntWriter.writeBigInt(i);
+ break;
+ case 2:
+ listWriter.setPosition(i);
+ listWriter.startList();
+ for (int j = 0; j < i % 3; j++) {
+ listWriter.varChar().writeVarChar(0, 3, varchar);
+ }
+ listWriter.endList();
+ break;
+ case 3:
+ mapWriter.setPosition(i);
+ mapWriter.start();
+ mapWriter.timeStamp("timestamp").writeTimeStamp(i);
+ mapWriter.end();
+ break;
+ }
+ }
+ writer.setValueCount(count);
+ varchar.release();
+ }
}