You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2017/07/17 16:19:01 UTC

arrow git commit: ARROW-1190: [JAVA] Fixing VectorLoader for duplicate field names

Repository: arrow
Updated Branches:
  refs/heads/master ea9bc8378 -> 0396240b5


ARROW-1190: [JAVA] Fixing VectorLoader for duplicate field names

VectorLoader was corrupting data when some of the fields had same name in which case only one of that vectors got properly loaded. This PR resolves the problem by avoiding by-name field lookups.

Author: Antony Mayi <an...@yahoo.com>

Closes #816 from antonymayi/master and squashes the following commits:

38c1837 [Antony Mayi] adding unit test for unload-loading vectors with duplicate field names
a723416 [Antony Mayi] ARROW-1190 - fixing VectorLoader for duplicate field names


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/0396240b
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/0396240b
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/0396240b

Branch: refs/heads/master
Commit: 0396240b55ab1d74c6b36ffaf95290135d8da389
Parents: ea9bc83
Author: Antony Mayi <an...@yahoo.com>
Authored: Mon Jul 17 18:18:56 2017 +0200
Committer: Uwe L. Korn <uw...@xhochy.com>
Committed: Mon Jul 17 18:18:56 2017 +0200

----------------------------------------------------------------------
 .../org/apache/arrow/vector/VectorLoader.java   |  6 +--
 .../arrow/vector/TestVectorUnloadLoad.java      | 47 ++++++++++++++++++++
 2 files changed, 49 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/0396240b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java
index 33a608c..e640c7c 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java
@@ -55,10 +55,8 @@ public class VectorLoader {
   public void load(ArrowRecordBatch recordBatch) {
     Iterator<ArrowBuf> buffers = recordBatch.getBuffers().iterator();
     Iterator<ArrowFieldNode> nodes = recordBatch.getNodes().iterator();
-    List<Field> fields = root.getSchema().getFields();
-    for (Field field: fields) {
-      FieldVector fieldVector = root.getVector(field.getName());
-      loadBuffers(fieldVector, field, buffers, nodes);
+    for (FieldVector fieldVector: root.getFieldVectors()) {
+      loadBuffers(fieldVector, fieldVector.getField(), buffers, nodes);
     }
     root.setRowCount(recordBatch.getLength());
     if (nodes.hasNext() || buffers.hasNext()) {

http://git-wip-us.apache.org/repos/asf/arrow/blob/0396240b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java
----------------------------------------------------------------------
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java
index 6fb559c..f369465 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java
@@ -240,6 +240,53 @@ public class TestVectorUnloadLoad {
     }
   }
 
+  @Test
+  public void testUnloadLoadDuplicates() throws IOException {
+    int count = 10;
+    Schema schema = new Schema(asList(
+      new Field("duplicate", FieldType.nullable(new ArrowType.Int(32, true)), Collections.<Field>emptyList()),
+      new Field("duplicate", FieldType.nullable(new ArrowType.Int(32, true)), Collections.<Field>emptyList())
+    ));
+
+    try (
+      BufferAllocator originalVectorsAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+    ) {
+      List<FieldVector> sources = new ArrayList<>();
+      for (Field field: schema.getFields()) {
+        FieldVector vector = field.createVector(originalVectorsAllocator);
+        vector.allocateNew();
+        sources.add(vector);
+        NullableIntVector.Mutator mutator = (NullableIntVector.Mutator) vector.getMutator();
+        for (int i =  0; i < count; i++) {
+          mutator.set(i, i);
+        }
+        mutator.setValueCount(count);
+      }
+
+      try (VectorSchemaRoot root = new VectorSchemaRoot(schema.getFields(), sources, count)) {
+        VectorUnloader vectorUnloader = new VectorUnloader(root);
+        try (ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch();
+                BufferAllocator finalVectorsAllocator = allocator.newChildAllocator("final vectors", 0, Integer.MAX_VALUE);
+                VectorSchemaRoot newRoot = VectorSchemaRoot.create(schema, finalVectorsAllocator);) {
+          // load it
+          VectorLoader vectorLoader = new VectorLoader(newRoot);
+          vectorLoader.load(recordBatch);
+
+          List<FieldVector> targets = newRoot.getFieldVectors();
+          Assert.assertEquals(sources.size(), targets.size());
+          for (int k = 0; k < sources.size(); k++) {
+            NullableIntVector.Accessor src = (NullableIntVector.Accessor) sources.get(k).getAccessor();
+            NullableIntVector.Accessor tgt = (NullableIntVector.Accessor) targets.get(k).getAccessor();
+            Assert.assertEquals(src.getValueCount(), tgt.getValueCount());
+            for (int i = 0; i < count; i++) {
+              Assert.assertEquals(src.get(i), tgt.get(i));
+            }
+          }
+        }
+      }
+    }
+  }
+
   public static VectorUnloader newVectorUnloader(FieldVector root) {
     Schema schema = new Schema(root.getField().getChildren());
     int valueCount = root.getAccessor().getValueCount();