You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by pr...@apache.org on 2019/08/09 10:06:57 UTC

[arrow] branch master updated: ARROW-6145: [Java] UnionVector created by MinorType#getNewVector could not keep field type info properly

This is an automated email from the ASF dual-hosted git repository.

praveenbingo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 1dbaee6  ARROW-6145: [Java] UnionVector created by MinorType#getNewVector could not keep field type info properly
1dbaee6 is described below

commit 1dbaee62bdd3b1fe38145b1c941f616c2157e6c8
Author: tianchen <ni...@alibaba-inc.com>
AuthorDate: Fri Aug 9 15:36:26 2019 +0530

    ARROW-6145: [Java] UnionVector created by MinorType#getNewVector could not keep field type info properly
    
    Related to [ARROW-6145](https://issues.apache.org/jira/browse/ARROW-6145).
    When I worked for other items, I found UnionVector created by VectorSchemaRoot#create(Schema schema, BufferAllocator allocator) could not keep field type info properly. For example, if we set metadata in Field in schema, we could not get it back by UnionVector#getField.
    
    This is mainly because MinorType.Union.getNewVector did not pass FieldType to vector and UnionVector#getField create a new Field which cause inconsistent.
    
    Closes #5023 from tianchen92/ARROW-6145 and squashes the following commits:
    
    a11c4e1b1 <tianchen>  UnionVector created by MinorType#getNewVector could not keep field type info properly
    
    Authored-by: tianchen <ni...@alibaba-inc.com>
    Signed-off-by: Praveen <pr...@dremio.com>
---
 .../src/main/codegen/templates/UnionVector.java    | 23 +++++++++++++++-
 .../java/org/apache/arrow/vector/types/Types.java  |  2 +-
 .../apache/arrow/vector/types/pojo/FieldType.java  | 13 +++++++++
 .../org/apache/arrow/vector/TestUnionVector.java   | 31 ++++++++++++++++++++++
 4 files changed, 67 insertions(+), 2 deletions(-)

diff --git a/java/vector/src/main/codegen/templates/UnionVector.java b/java/vector/src/main/codegen/templates/UnionVector.java
index c4db607..f288fb6 100644
--- a/java/vector/src/main/codegen/templates/UnionVector.java
+++ b/java/vector/src/main/codegen/templates/UnionVector.java
@@ -16,9 +16,12 @@
  */
 
 import io.netty.buffer.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.ReferenceManager;
 import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.types.UnionMode;
 import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
 
 <@pp.dropOutputFile />
 <@pp.changeOutputFile name="/org/apache/arrow/vector/complex/UnionVector.java" />
@@ -79,13 +82,21 @@ public class UnionVector implements FieldVector {
   private final CallBack callBack;
   private int typeBufferAllocationSizeInBytes;
 
+  private final FieldType fieldType;
+
   private static final byte TYPE_WIDTH = 1;
   private static final FieldType INTERNAL_STRUCT_TYPE = new FieldType(false /*nullable*/,
       ArrowType.Struct.INSTANCE, null /*dictionary*/, null /*metadata*/);
 
+  @Deprecated
   public UnionVector(String name, BufferAllocator allocator, CallBack callBack) {
+    this(name, allocator, null, callBack);
+  }
+
+  public UnionVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) {
     this.name = name;
     this.allocator = allocator;
+    this.fieldType = fieldType;
     this.internalStruct = new NonNullableStructVector("internal", allocator, INTERNAL_STRUCT_TYPE,
         callBack);
     this.typeBuffer = allocator.getEmpty();
@@ -341,7 +352,17 @@ public class UnionVector implements FieldVector {
       typeIds[childFields.size()] = v.getMinorType().ordinal();
       childFields.add(v.getField());
     }
-    return new Field(name, FieldType.nullable(new ArrowType.Union(Sparse, typeIds)), childFields);
+
+    FieldType fieldType;
+    if (this.fieldType == null) {
+      fieldType = FieldType.nullable(new ArrowType.Union(Sparse, typeIds));
+    } else {
+      final UnionMode mode = ((ArrowType.Union)this.fieldType.getType()).getMode();
+      fieldType = new FieldType(this.fieldType.isNullable(), new ArrowType.Union(mode, typeIds),
+          this.fieldType.getDictionary(), this.fieldType.getMetadata());
+    }
+
+    return new Field(name, fieldType, childFields);
   }
 
   @Override
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java
index 35aafcb..88d7413 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java
@@ -598,7 +598,7 @@ public class Types {
           throw new UnsupportedOperationException("Dictionary encoding not supported for complex " +
               "types");
         }
-        return new UnionVector(field.getName(), allocator, schemaChangeCallback);
+        return new UnionVector(field.getName(), allocator, field.getFieldType(), schemaChangeCallback);
       }
 
       @Override
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java
index 945f5df..5bd8418 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java
@@ -19,6 +19,7 @@ package org.apache.arrow.vector.types.pojo;
 
 import java.util.HashMap;
 import java.util.Map;
+import java.util.Objects;
 
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.util.Collections2;
@@ -103,4 +104,16 @@ public class FieldType {
     return minorType.getNewVector(field, allocator, schemaCallBack);
   }
 
+  @Override
+  public boolean equals(Object obj) {
+    if (!(obj instanceof FieldType)) {
+      return false;
+    }
+    Field that = (Field) obj;
+    return Objects.equals(this.isNullable(), that.isNullable()) &&
+        Objects.equals(this.getType(), that.getType()) &&
+        Objects.equals(this.getDictionary(), that.getDictionary()) &&
+        Objects.equals(this.getMetadata(), that.getMetadata());
+  }
+
 }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java
index 74c3765..2ea51c8 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java
@@ -21,7 +21,10 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
+import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.vector.complex.UnionVector;
@@ -30,6 +33,10 @@ import org.apache.arrow.vector.holders.NullableFloat4Holder;
 import org.apache.arrow.vector.holders.NullableIntHolder;
 import org.apache.arrow.vector.holders.NullableUInt4Holder;
 import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.UnionMode;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.TransferPair;
 import org.junit.After;
 import org.junit.Before;
@@ -301,6 +308,30 @@ public class TestUnionVector {
   }
 
   @Test
+  public void testGetFieldTypeInfo() throws Exception {
+    Map<String, String> metadata = new HashMap<>();
+    metadata.put("key1", "value1");
+
+    int[] typeIds = new int[2];
+    typeIds[0] = MinorType.INT.ordinal();
+    typeIds[1] = MinorType.VARCHAR.ordinal();
+
+    List<Field> children = new ArrayList<>();
+    children.add(new Field("int", FieldType.nullable(MinorType.INT.getType()), null));
+    children.add(new Field("varchar", FieldType.nullable(MinorType.VARCHAR.getType()), null));
+
+    final FieldType fieldType =  new FieldType(false, new ArrowType.Union(UnionMode.Sparse, typeIds),
+        /*dictionary=*/null, metadata);
+    final Field field = new Field("union", fieldType, children);
+
+    MinorType minorType = MinorType.UNION;
+    UnionVector vector = (UnionVector) minorType.getNewVector(field, allocator, null);
+    vector.initializeChildrenFromFields(children);
+
+    assertTrue(vector.getField().equals(field));
+  }
+
+  @Test
   public void testGetBufferAddress() throws Exception {
     try (UnionVector vector = new UnionVector(EMPTY_SCHEMA_PATH, allocator, null)) {
       boolean error = false;