You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ju...@apache.org on 2016/10/12 03:11:53 UTC

arrow git commit: ARROW-275: Add tests for UnionVector in Arrow File

Repository: arrow
Updated Branches:
  refs/heads/master 3919a2778 -> bf749f55a


ARROW-275: Add tests for UnionVector in Arrow File

Author: Julien Le Dem <ju...@dremio.com>

Closes #169 from julienledem/union_test and squashes the following commits:

120f504 [Julien Le Dem] ARROW-275: Add tests for UnionVector in Arrow File


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/bf749f55
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/bf749f55
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/bf749f55

Branch: refs/heads/master
Commit: bf749f55a1e24d79b08813a39ce51e9aaf6fb425
Parents: 3919a27
Author: Julien Le Dem <ju...@dremio.com>
Authored: Tue Oct 11 20:11:48 2016 -0700
Committer: Julien Le Dem <ju...@dremio.com>
Committed: Tue Oct 11 20:11:48 2016 -0700

----------------------------------------------------------------------
 .../src/main/codegen/templates/UnionReader.java |   4 +
 .../src/main/codegen/templates/UnionVector.java |  30 ++---
 .../org/apache/arrow/vector/VectorLoader.java   |   2 +
 .../apache/arrow/vector/schema/TypeLayout.java  |   3 +-
 .../apache/arrow/vector/file/TestArrowFile.java | 110 ++++++++++++++++++-
 5 files changed, 127 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/bf749f55/java/vector/src/main/codegen/templates/UnionReader.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/codegen/templates/UnionReader.java b/java/vector/src/main/codegen/templates/UnionReader.java
index 7351ae3..c56e95c 100644
--- a/java/vector/src/main/codegen/templates/UnionReader.java
+++ b/java/vector/src/main/codegen/templates/UnionReader.java
@@ -134,6 +134,10 @@ public class UnionReader extends AbstractFieldReader {
 
   </#list>
 
+  public int size() {
+    return getReaderForIndex(idx()).size();
+  }
+
   <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
           <#assign uncappedName = name?uncap_first/>
   <#assign boxedType = (minor.boxedType!type.boxedType) />

http://git-wip-us.apache.org/repos/asf/arrow/blob/bf749f55/java/vector/src/main/codegen/templates/UnionVector.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/codegen/templates/UnionVector.java b/java/vector/src/main/codegen/templates/UnionVector.java
index b14314d..5ca3f90 100644
--- a/java/vector/src/main/codegen/templates/UnionVector.java
+++ b/java/vector/src/main/codegen/templates/UnionVector.java
@@ -15,17 +15,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-import com.google.common.collect.ImmutableList;
-import com.google.flatbuffers.FlatBufferBuilder;
-import io.netty.buffer.ArrowBuf;
-import org.apache.arrow.flatbuf.Field;
-import org.apache.arrow.flatbuf.Type;
-import org.apache.arrow.flatbuf.Union;
-import org.apache.arrow.vector.ValueVector;
-import org.apache.arrow.vector.types.pojo.ArrowType;
-
-import java.util.ArrayList;
 import java.util.List;
 
 <@pp.dropOutputFile />
@@ -39,7 +28,9 @@ package org.apache.arrow.vector.complex;
 <#include "/@includes/vv_imports.ftl" />
 import com.google.common.collect.ImmutableList;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.Iterator;
+import org.apache.arrow.vector.BaseDataValueVector;
 import org.apache.arrow.vector.complex.impl.ComplexCopier;
 import org.apache.arrow.vector.util.CallBack;
 import org.apache.arrow.vector.schema.ArrowFieldNode;
@@ -47,6 +38,7 @@ import org.apache.arrow.vector.schema.ArrowFieldNode;
 import static org.apache.arrow.flatbuf.UnionMode.Sparse;
 
 
+
 /*
  * This class is generated using freemarker and the ${.template_name} template.
  */
@@ -81,6 +73,7 @@ public class UnionVector implements FieldVector {
   private ValueVector singleVector;
 
   private final CallBack callBack;
+  private final List<BufferBacked> innerVectors;
 
   public UnionVector(String name, BufferAllocator allocator, CallBack callBack) {
     this.name = name;
@@ -88,6 +81,7 @@ public class UnionVector implements FieldVector {
     this.internalMap = new MapVector("internal", allocator, callBack);
     this.typeVector = new UInt1Vector("types", allocator);
     this.callBack = callBack;
+    this.innerVectors = Collections.unmodifiableList(Arrays.<BufferBacked>asList(typeVector));
   }
 
   public BufferAllocator getAllocator() {
@@ -101,30 +95,28 @@ public class UnionVector implements FieldVector {
 
   @Override
   public void initializeChildrenFromFields(List<Field> children) {
-    getMap().initializeChildrenFromFields(children);
+    internalMap.initializeChildrenFromFields(children);
   }
 
   @Override
   public List<FieldVector> getChildrenFromFields() {
-    return getMap().getChildrenFromFields();
+    return internalMap.getChildrenFromFields();
   }
 
   @Override
   public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) {
-    // TODO
-    throw new UnsupportedOperationException();
+    BaseDataValueVector.load(getFieldInnerVectors(), ownBuffers);
+    this.valueCount = fieldNode.getLength();
   }
 
   @Override
   public List<ArrowBuf> getFieldBuffers() {
-    // TODO
-    throw new UnsupportedOperationException();
+    return BaseDataValueVector.unload(getFieldInnerVectors());
   }
 
   @Override
   public List<BufferBacked> getFieldInnerVectors() {
-    // TODO
-    throw new UnsupportedOperationException();
+     return this.innerVectors;
   }
   
   public NullableMapVector getMap() {

http://git-wip-us.apache.org/repos/asf/arrow/blob/bf749f55/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java
index 58ac68b..b7040da 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java
@@ -74,6 +74,8 @@ public class VectorLoader {
   }
 
   private void loadBuffers(FieldVector vector, Field field, Iterator<ArrowBuf> buffers, Iterator<ArrowFieldNode> nodes) {
+    checkArgument(nodes.hasNext(),
+        "no more field nodes for for field " + field + " and vector " + vector);
     ArrowFieldNode fieldNode = nodes.next();
     List<VectorLayout> typeLayout = field.getTypeLayout().getVectors();
     List<ArrowBuf> ownBuffers = new ArrayList<>(typeLayout.size());

http://git-wip-us.apache.org/repos/asf/arrow/blob/bf749f55/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java
index 06ae203..c5f53fe 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/schema/TypeLayout.java
@@ -82,8 +82,7 @@ public class TypeLayout {
             break;
           case UnionMode.Sparse:
             vectors = asList(
-                validityVector(),
-                typeVector()
+                typeVector() // type of the value at the index or 0 if null
                 );
             break;
           default:

http://git-wip-us.apache.org/repos/asf/arrow/blob/bf749f55/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java
----------------------------------------------------------------------
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java
index 7a5e7b5..0f28d53 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/file/TestArrowFile.java
@@ -266,7 +266,7 @@ public class TestArrowFile {
       Assert.assertEquals(i % 3, rootReader.reader("list").size());
       NullableTimeStampHolder h = new NullableTimeStampHolder();
       rootReader.reader("map").reader("timestamp").read(h);
-      Assert.assertEquals(i, h.value % COUNT);
+      Assert.assertEquals(i, h.value);
     }
   }
 
@@ -339,4 +339,112 @@ public class TestArrowFile {
     }
   }
 
+  @Test
+  public void testWriteReadUnion() throws IOException {
+    File file = new File("target/mytest_write_union.arrow");
+    int count = COUNT;
+    try (
+        BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+        NullableMapVector parent = new NullableMapVector("parent", vectorAllocator, null)) {
+
+      writeUnionData(count, parent);
+
+      printVectors(parent.getChildrenFromFields());
+
+      validateUnionData(count, parent);
+
+      write(parent.getChild("root"), file);
+    }
+ // read
+    try (
+        BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE);
+        FileInputStream fileInputStream = new FileInputStream(file);
+        ArrowReader arrowReader = new ArrowReader(fileInputStream.getChannel(), readerAllocator);
+        BufferAllocator vectorAllocator = allocator.newChildAllocator("final vectors", 0, Integer.MAX_VALUE);
+        NullableMapVector parent = new NullableMapVector("parent", vectorAllocator, null)
+        ) {
+      ArrowFooter footer = arrowReader.readFooter();
+      Schema schema = footer.getSchema();
+      LOGGER.debug("reading schema: " + schema);
+
+      // initialize vectors
+
+      NullableMapVector root = parent.addOrGet("root", MinorType.MAP, NullableMapVector.class);
+      VectorLoader vectorLoader = new VectorLoader(schema, root);
+
+      List<ArrowBlock> recordBatches = footer.getRecordBatches();
+      for (ArrowBlock rbBlock : recordBatches) {
+        try (ArrowRecordBatch recordBatch = arrowReader.readRecordBatch(rbBlock)) {
+          vectorLoader.load(recordBatch);
+        }
+        validateUnionData(count, parent);
+      }
+    }
+  }
+
+  public void validateUnionData(int count, MapVector parent) {
+    MapReader rootReader = new SingleMapReaderImpl(parent).reader("root");
+    for (int i = 0; i < count; i++) {
+      rootReader.setPosition(i);
+      switch (i % 4) {
+      case 0:
+        Assert.assertEquals(i, rootReader.reader("union").readInteger().intValue());
+        break;
+      case 1:
+        Assert.assertEquals(i, rootReader.reader("union").readLong().longValue());
+        break;
+      case 2:
+        Assert.assertEquals(i % 3, rootReader.reader("union").size());
+        break;
+      case 3:
+        NullableTimeStampHolder h = new NullableTimeStampHolder();
+        rootReader.reader("union").reader("timestamp").read(h);
+        Assert.assertEquals(i, h.value);
+        break;
+      }
+    }
+  }
+
+  public void writeUnionData(int count, NullableMapVector parent) {
+    ArrowBuf varchar = allocator.buffer(3);
+    varchar.readerIndex(0);
+    varchar.setByte(0, 'a');
+    varchar.setByte(1, 'b');
+    varchar.setByte(2, 'c');
+    varchar.writerIndex(3);
+    ComplexWriter writer = new ComplexWriterImpl("root", parent);
+    MapWriter rootWriter = writer.rootAsMap();
+    IntWriter intWriter = rootWriter.integer("union");
+    BigIntWriter bigIntWriter = rootWriter.bigInt("union");
+    ListWriter listWriter = rootWriter.list("union");
+    MapWriter mapWriter = rootWriter.map("union");
+    for (int i = 0; i < count; i++) {
+      switch (i % 4) {
+      case 0:
+        intWriter.setPosition(i);
+        intWriter.writeInt(i);
+        break;
+      case 1:
+        bigIntWriter.setPosition(i);
+        bigIntWriter.writeBigInt(i);
+        break;
+      case 2:
+        listWriter.setPosition(i);
+        listWriter.startList();
+        for (int j = 0; j < i % 3; j++) {
+          listWriter.varChar().writeVarChar(0, 3, varchar);
+        }
+        listWriter.endList();
+        break;
+      case 3:
+        mapWriter.setPosition(i);
+        mapWriter.start();
+        mapWriter.timeStamp("timestamp").writeTimeStamp(i);
+        mapWriter.end();
+        break;
+      }
+    }
+    writer.setValueCount(count);
+    varchar.release();
+  }
 }