You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by em...@apache.org on 2019/08/10 06:08:35 UTC

[arrow] branch master updated: ARROW-6079: [Java] Implement/test UnionFixedSizeListWriter for FixedSizeListVector

This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new a4b7059  ARROW-6079: [Java] Implement/test UnionFixedSizeListWriter for FixedSizeListVector
a4b7059 is described below

commit a4b7059324ca2aca51635eaf5542c8fcecef1a8e
Author: tianchen <ni...@alibaba-inc.com>
AuthorDate: Fri Aug 9 23:02:12 2019 -0700

    ARROW-6079: [Java] Implement/test UnionFixedSizeListWriter for FixedSizeListVector
    
    Related to [ARROW-6079](https://issues.apache.org/jira/browse/ARROW-6079).
    
    Now we have two list vectors: ListVector and FixedSizeListVector.
    
    ListVector has already implemented UnionListWriter for writing data, however, FixedSizeListVector doesn't have this yet and seems the only way for users to write data is getting inner vector and set value manually.
    
    Implement a writer for FixedSizeListVector is useful in some cases.
    
    Closes #4973 from tianchen92/ARROW-6079 and squashes the following commits:
    
    ad18d1cd4 <tianchen> fix test to avoid string comparision
    d3338c7cc <tianchen> ARROW-6079:  Implement/test UnionFixedSizeListWriter for FixedSizeListVector
    
    Authored-by: tianchen <ni...@alibaba-inc.com>
    Signed-off-by: Micah Kornfield <em...@gmail.com>
---
 .../templates/UnionFixedSizeListWriter.java        | 217 +++++++++++++++++++++
 .../arrow/vector/complex/FixedSizeListVector.java  |  19 ++
 .../vector/complex/impl/PromotableWriter.java      |  37 +++-
 .../arrow/vector/TestFixedSizeListVector.java      | 104 ++++++++--
 4 files changed, 360 insertions(+), 17 deletions(-)

diff --git a/java/vector/src/main/codegen/templates/UnionFixedSizeListWriter.java b/java/vector/src/main/codegen/templates/UnionFixedSizeListWriter.java
new file mode 100644
index 0000000..e7f3e39
--- /dev/null
+++ b/java/vector/src/main/codegen/templates/UnionFixedSizeListWriter.java
@@ -0,0 +1,217 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import io.netty.buffer.ArrowBuf;
+import org.apache.arrow.vector.complex.writer.DecimalWriter;
+import org.apache.arrow.vector.holders.DecimalHolder;
+
+import java.lang.UnsupportedOperationException;
+import java.math.BigDecimal;
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/UnionFixedSizeListWriter.java" />
+
+
+<#include "/@includes/license.ftl" />
+
+    package org.apache.arrow.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+
+/*
+ * This class is generated using freemarker and the ${.template_name} template.
+ */
+
+@SuppressWarnings("unused")
+public class UnionFixedSizeListWriter extends AbstractFieldWriter {
+
+  protected FixedSizeListVector vector;
+  protected PromotableWriter writer;
+  private boolean inStruct = false;
+  private String structName;
+  private int lastIndex = 0;
+  private final int listSize;
+
+  public UnionFixedSizeListWriter(FixedSizeListVector vector) {
+    this(vector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance());
+  }
+
+  public UnionFixedSizeListWriter(FixedSizeListVector vector, NullableStructWriterFactory nullableStructWriterFactory) {
+    this.vector = vector;
+    this.writer = new PromotableWriter(vector.getDataVector(), vector, nullableStructWriterFactory);
+    this.listSize = vector.getListSize();
+  }
+
+  public UnionFixedSizeListWriter(FixedSizeListVector vector, AbstractFieldWriter parent) {
+    this(vector);
+  }
+
+  @Override
+  public void allocate() {
+    vector.allocateNew();
+  }
+
+  @Override
+  public void clear() {
+    vector.clear();
+  }
+
+  @Override
+  public Field getField() {
+    return null;
+  }
+
+  public void setValueCount(int count) {
+    vector.setValueCount(count);
+  }
+
+  @Override
+  public int getValueCapacity() {
+    return vector.getValueCapacity();
+  }
+
+  @Override
+  public void close() throws Exception {
+
+  }
+
+  @Override
+  public void setPosition(int index) {
+    super.setPosition(index);
+  }
+  <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
+  <#assign fields = minor.fields!type.fields />
+  <#assign uncappedName = name?uncap_first/>
+  <#if uncappedName == "int" ><#assign uncappedName = "integer" /></#if>
+  <#if !minor.typeParams?? >
+
+  @Override
+  public ${name}Writer ${uncappedName}() {
+    return this;
+  }
+
+  @Override
+  public ${name}Writer ${uncappedName}(String name) {
+    structName = name;
+    return writer.${uncappedName}(name);
+  }
+  </#if>
+  </#list></#list>
+
+  @Override
+  public DecimalWriter decimal() {
+    return this;
+  }
+
+  @Override
+  public DecimalWriter decimal(String name, int scale, int precision) {
+    return writer.decimal(name, scale, precision);
+  }
+
+  @Override
+  public DecimalWriter decimal(String name) {
+    return writer.decimal(name);
+  }
+
+  @Override
+  public StructWriter struct() {
+    inStruct = true;
+    return this;
+  }
+
+  @Override
+  public ListWriter list() {
+    return writer;
+  }
+
+  @Override
+  public ListWriter list(String name) {
+    ListWriter listWriter = writer.list(name);
+    return listWriter;
+  }
+
+  @Override
+  public StructWriter struct(String name) {
+    StructWriter structWriter = writer.struct(name);
+    return structWriter;
+  }
+
+  @Override
+  public void startList() {
+    int start = vector.startNewValue(idx());
+    writer.setPosition(start);
+  }
+
+  @Override
+  public void endList() {
+    setPosition(idx() + 1);
+  }
+
+  @Override
+  public void start() {
+    writer.start();
+  }
+
+  @Override
+  public void end() {
+    writer.end();
+    inStruct = false;
+  }
+
+  @Override
+  public void write(DecimalHolder holder) {
+    writer.write(holder);
+    writer.setPosition(writer.idx() + 1);
+  }
+
+  public void writeDecimal(int start, ArrowBuf buffer) {
+    writer.writeDecimal(start, buffer);
+    writer.setPosition(writer.idx() + 1);
+  }
+
+  public void writeDecimal(BigDecimal value) {
+    writer.writeDecimal(value);
+    writer.setPosition(writer.idx() + 1);
+  }
+
+  <#list vv.types as type>
+    <#list type.minor as minor>
+      <#assign name = minor.class?cap_first />
+      <#assign fields = minor.fields!type.fields />
+      <#assign uncappedName = name?uncap_first/>
+      <#if !minor.typeParams?? >
+  @Override
+  public void write${name}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>) {
+    if (writer.idx() >= (idx() + 1) * listSize) {
+      throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize));
+    }
+    writer.write${name}(<#list fields as field>${field.name}<#if field_has_next>, </#if></#list>);
+    writer.setPosition(writer.idx() + 1);
+  }
+
+  public void write(${name}Holder holder) {
+    if (writer.idx() >= (idx() + 1) * listSize) {
+      throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize));
+    }
+    writer.write${name}(<#list fields as field>holder.${field.name}<#if field_has_next>, </#if></#list>);
+    writer.setPosition(writer.idx() + 1);
+  }
+
+      </#if>
+    </#list>
+  </#list>
+}
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
index 39d0287..5be308e 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
@@ -40,6 +40,7 @@ import org.apache.arrow.vector.FieldVector;
 import org.apache.arrow.vector.ValueVector;
 import org.apache.arrow.vector.ZeroVector;
 import org.apache.arrow.vector.complex.impl.UnionFixedSizeListReader;
+import org.apache.arrow.vector.complex.impl.UnionFixedSizeListWriter;
 import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
 import org.apache.arrow.vector.types.Types.MinorType;
 import org.apache.arrow.vector.types.pojo.ArrowType;
@@ -267,6 +268,24 @@ public class FixedSizeListVector extends BaseValueVector implements FieldVector,
     return vector;
   }
 
+  /**
+   * Start a new value in the list vector.
+   *
+   * @param index index of the value to start
+   */
+  public int startNewValue(int index) {
+    while (index >= getValidityBufferValueCapacity()) {
+      reallocValidityBuffer();
+    }
+
+    BitVectorHelper.setValidityBitToOne(validityBuffer, index);
+    return index * listSize;
+  }
+
+  public UnionFixedSizeListWriter getWriter() {
+    return new UnionFixedSizeListWriter(this);
+  }
+
   @Override
   public void setInitialCapacity(int numRecords) {
     validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords);
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java
index 9009040..ea3bc62 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java
@@ -23,6 +23,7 @@ import org.apache.arrow.vector.FieldVector;
 import org.apache.arrow.vector.ValueVector;
 import org.apache.arrow.vector.ZeroVector;
 import org.apache.arrow.vector.complex.AbstractStructVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
 import org.apache.arrow.vector.complex.ListVector;
 import org.apache.arrow.vector.complex.StructVector;
 import org.apache.arrow.vector.complex.UnionVector;
@@ -48,6 +49,7 @@ public class PromotableWriter extends AbstractPromotableFieldWriter {
 
   private final AbstractStructVector parentContainer;
   private final ListVector listVector;
+  private final FixedSizeListVector fixedListVector;
   private final NullableStructWriterFactory nullableStructWriterFactory;
   private int position;
   private static final int MAX_DECIMAL_PRECISION = 38;
@@ -86,6 +88,7 @@ public class PromotableWriter extends AbstractPromotableFieldWriter {
       NullableStructWriterFactory nullableStructWriterFactory) {
     this.parentContainer = parentContainer;
     this.listVector = null;
+    this.fixedListVector = null;
     this.nullableStructWriterFactory = nullableStructWriterFactory;
     init(v);
   }
@@ -104,6 +107,16 @@ public class PromotableWriter extends AbstractPromotableFieldWriter {
    * Constructs a new instance.
    *
    * @param v The vector to initialize the writer with.
+   * @param fixedListVector The vector that serves as a parent of v.
+   */
+  public PromotableWriter(ValueVector v, FixedSizeListVector fixedListVector) {
+    this(v, fixedListVector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance());
+  }
+
+  /**
+   * Constructs a new instance.
+   *
+   * @param v The vector to initialize the writer with.
    * @param listVector The vector that serves as a parent of v.
    * @param nullableStructWriterFactory The factory to create the delegate writer.
    */
@@ -113,6 +126,25 @@ public class PromotableWriter extends AbstractPromotableFieldWriter {
       NullableStructWriterFactory nullableStructWriterFactory) {
     this.listVector = listVector;
     this.parentContainer = null;
+    this.fixedListVector = null;
+    this.nullableStructWriterFactory = nullableStructWriterFactory;
+    init(v);
+  }
+
+  /**
+   * Constructs a new instance.
+   *
+   * @param v The vector to initialize the writer with.
+   * @param fixedListVector The vector that serves as a parent of v.
+   * @param nullableStructWriterFactory The factory to create the delegate writer.
+   */
+  public PromotableWriter(
+      ValueVector v,
+      FixedSizeListVector fixedListVector,
+      NullableStructWriterFactory nullableStructWriterFactory) {
+    this.fixedListVector = fixedListVector;
+    this.parentContainer = null;
+    this.listVector = null;
     this.nullableStructWriterFactory = nullableStructWriterFactory;
     init(v);
   }
@@ -189,7 +221,8 @@ public class PromotableWriter extends AbstractPromotableFieldWriter {
         arrowType = type.getType();
       }
       FieldType fieldType = new FieldType(addVectorAsNullable, arrowType, null, null);
-      ValueVector v = listVector.addOrGetVector(fieldType).getVector();
+      ValueVector v = listVector != null ? listVector.addOrGetVector(fieldType).getVector() :
+          fixedListVector.addOrGetVector(fieldType).getVector();
       v.allocateNew();
       setWriter(v, arrowType);
       writer.setPosition(position);
@@ -219,6 +252,8 @@ public class PromotableWriter extends AbstractPromotableFieldWriter {
       unionVector.allocateNew();
     } else if (listVector != null) {
       unionVector = listVector.promoteToUnion();
+    } else if (fixedListVector != null) {
+      unionVector = fixedListVector.promoteToUnion();
     }
     unionVector.addVector((FieldVector) tp.getTo());
     writer = new UnionWriter(unionVector, nullableStructWriterFactory);
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java
index ddaa523..cf0c813 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java
@@ -17,17 +17,22 @@
 
 package org.apache.arrow.vector;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
 import java.util.Arrays;
 
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.vector.complex.FixedSizeListVector;
 import org.apache.arrow.vector.complex.ListVector;
 import org.apache.arrow.vector.complex.impl.UnionFixedSizeListReader;
+import org.apache.arrow.vector.complex.impl.UnionFixedSizeListWriter;
 import org.apache.arrow.vector.complex.impl.UnionListReader;
 import org.apache.arrow.vector.complex.reader.FieldReader;
 import org.apache.arrow.vector.types.Types.MinorType;
 import org.apache.arrow.vector.types.pojo.ArrowType;
 import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.JsonStringArrayList;
 import org.apache.arrow.vector.util.TransferPair;
 import org.junit.After;
 import org.junit.Assert;
@@ -66,11 +71,11 @@ public class TestFixedSizeListVector {
         reader.setPosition(i);
         Assert.assertTrue(reader.isSet());
         Assert.assertTrue(reader.next());
-        Assert.assertEquals(i, reader.reader().readInteger().intValue());
+        assertEquals(i, reader.reader().readInteger().intValue());
         Assert.assertTrue(reader.next());
-        Assert.assertEquals(i + 10, reader.reader().readInteger().intValue());
+        assertEquals(i + 10, reader.reader().readInteger().intValue());
         Assert.assertFalse(reader.next());
-        Assert.assertEquals(Arrays.asList(i, i + 10), reader.readObject());
+        assertEquals(Arrays.asList(i, i + 10), reader.readObject());
       }
     }
   }
@@ -97,11 +102,11 @@ public class TestFixedSizeListVector {
         if (i % 2 == 0) {
           Assert.assertTrue(reader.isSet());
           Assert.assertTrue(reader.next());
-          Assert.assertEquals(i + 0.1f, reader.reader().readFloat(), 0.00001);
+          assertEquals(i + 0.1f, reader.reader().readFloat(), 0.00001);
           Assert.assertTrue(reader.next());
-          Assert.assertEquals(i + 10.1f, reader.reader().readFloat(), 0.00001);
+          assertEquals(i + 10.1f, reader.reader().readFloat(), 0.00001);
           Assert.assertFalse(reader.next());
-          Assert.assertEquals(Arrays.asList(i + 0.1f, i + 10.1f), reader.readObject());
+          assertEquals(Arrays.asList(i + 0.1f, i + 10.1f), reader.readObject());
         } else {
           Assert.assertFalse(reader.isSet());
           Assert.assertNull(reader.readObject());
@@ -141,7 +146,7 @@ public class TestFixedSizeListVector {
             FieldReader innerListReader = reader.reader();
             for (int k = 0; k < 2; k++) {
               Assert.assertTrue(innerListReader.next());
-              Assert.assertEquals(k + j, innerListReader.reader().readInteger().intValue());
+              assertEquals(k + j, innerListReader.reader().readInteger().intValue());
             }
             Assert.assertFalse(innerListReader.next());
           }
@@ -188,29 +193,29 @@ public class TestFixedSizeListVector {
       reader.setPosition(1);
       Assert.assertTrue(reader.isSet());
       Assert.assertTrue(reader.next());
-      Assert.assertEquals(0.1f, reader.reader().readFloat(), 0.00001);
+      assertEquals(0.1f, reader.reader().readFloat(), 0.00001);
       Assert.assertTrue(reader.next());
-      Assert.assertEquals(10.1f, reader.reader().readFloat(), 0.00001);
+      assertEquals(10.1f, reader.reader().readFloat(), 0.00001);
       Assert.assertFalse(reader.next());
-      Assert.assertEquals(Arrays.asList(0.1f, 10.1f), reader.readObject());
+      assertEquals(Arrays.asList(0.1f, 10.1f), reader.readObject());
 
       reader.setPosition(2);
       Assert.assertTrue(reader.isSet());
       Assert.assertTrue(reader.next());
-      Assert.assertEquals(2.1f, reader.reader().readFloat(), 0.00001);
+      assertEquals(2.1f, reader.reader().readFloat(), 0.00001);
       Assert.assertTrue(reader.next());
-      Assert.assertEquals(12.1f, reader.reader().readFloat(), 0.00001);
+      assertEquals(12.1f, reader.reader().readFloat(), 0.00001);
       Assert.assertFalse(reader.next());
-      Assert.assertEquals(Arrays.asList(2.1f, 12.1f), reader.readObject());
+      assertEquals(Arrays.asList(2.1f, 12.1f), reader.readObject());
 
       reader.setPosition(3);
       Assert.assertTrue(reader.isSet());
       Assert.assertTrue(reader.next());
-      Assert.assertEquals(4.1f, reader.reader().readFloat(), 0.00001);
+      assertEquals(4.1f, reader.reader().readFloat(), 0.00001);
       Assert.assertTrue(reader.next());
-      Assert.assertEquals(14.1f, reader.reader().readFloat(), 0.00001);
+      assertEquals(14.1f, reader.reader().readFloat(), 0.00001);
       Assert.assertFalse(reader.next());
-      Assert.assertEquals(Arrays.asList(4.1f, 14.1f), reader.readObject());
+      assertEquals(Arrays.asList(4.1f, 14.1f), reader.readObject());
 
       for (int i = 4; i < 10; i++) {
         reader.setPosition(i);
@@ -232,4 +237,71 @@ public class TestFixedSizeListVector {
     }
   }
 
+  @Test
+  public void testUnionFixedSizeListWriter() throws Exception {
+    try (final FixedSizeListVector vector1 = FixedSizeListVector.empty("vector", 3, allocator)) {
+
+      UnionFixedSizeListWriter writer1 = vector1.getWriter();
+      writer1.allocate();
+
+      int[] values1 = new int[] {1, 2, 3};
+      int[] values2 = new int[] {4, 5, 6};
+      int[] values3 = new int[] {7, 8, 9};
+
+      //set some values
+      writeListVector(writer1, values1);
+      writeListVector(writer1, values2);
+      writeListVector(writer1, values3);
+      writer1.setValueCount(3);
+
+      assertEquals(3, vector1.getValueCount());
+
+      int[] realValue1 = convertListToIntArray((JsonStringArrayList) vector1.getObject(0));
+      assertTrue(Arrays.equals(values1, realValue1));
+      int[] realValue2 = convertListToIntArray((JsonStringArrayList) vector1.getObject(1));
+      assertTrue(Arrays.equals(values2, realValue2));
+      int[] realValue3 = convertListToIntArray((JsonStringArrayList) vector1.getObject(2));
+      assertTrue(Arrays.equals(values3, realValue3));
+    }
+  }
+
+  @Test(expected = IllegalStateException.class)
+  public void testWriteIllegalData() throws Exception {
+    try (final FixedSizeListVector vector1 = FixedSizeListVector.empty("vector", 3, allocator)) {
+
+      UnionFixedSizeListWriter writer1 = vector1.getWriter();
+      writer1.allocate();
+
+      int[] values1 = new int[] {1, 2, 3};
+      int[] values2 = new int[] {4, 5, 6, 7, 8};
+
+      //set some values
+      writeListVector(writer1, values1);
+      writeListVector(writer1, values2);
+      writer1.setValueCount(3);
+
+      assertEquals(3, vector1.getValueCount());
+      int[] realValue1 = convertListToIntArray((JsonStringArrayList) vector1.getObject(0));
+      assertTrue(Arrays.equals(values1, realValue1));
+      int[] realValue2 = convertListToIntArray((JsonStringArrayList) vector1.getObject(1));
+      assertTrue(Arrays.equals(values2, realValue2));
+    }
+  }
+
+  private int[] convertListToIntArray(JsonStringArrayList list) {
+    int[] values = new int[list.size()];
+    for (int i = 0; i < list.size(); i++) {
+      values[i] = (int) list.get(i);
+    }
+    return values;
+  }
+
+  private void writeListVector(UnionFixedSizeListWriter writer, int[] values) throws Exception {
+    writer.startList();
+    for (int v: values) {
+      writer.integer().writeInt(v);
+    }
+    writer.endList();
+  }
+
 }