You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2022/06/01 14:49:58 UTC

[arrow] branch master updated: ARROW-16672: [Java] Allow duplicated field names in Java C data interface (#13247)

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 86eb03ec81 ARROW-16672: [Java] Allow duplicated field names in Java C data interface (#13247)
86eb03ec81 is described below

commit 86eb03ec815ff4d61c65b5066515a28acf2910f2
Author: Hongze Zhang <ho...@intel.com>
AuthorDate: Wed Jun 1 22:49:51 2022 +0800

    ARROW-16672: [Java] Allow duplicated field names in Java C data interface (#13247)
    
    Authored-by: Hongze Zhang <ho...@intel.com>
    Signed-off-by: Antoine Pitrou <an...@python.org>
---
 java/c/src/main/java/org/apache/arrow/c/Data.java  |  2 +-
 .../apache/arrow/vector/StructVectorLoader.java    |  2 +-
 .../java/org/apache/arrow/c/RoundtripTest.java     | 40 ++++++++++++++++++++++
 3 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/java/c/src/main/java/org/apache/arrow/c/Data.java b/java/c/src/main/java/org/apache/arrow/c/Data.java
index 27b0ce4bfc..7151bff94b 100644
--- a/java/c/src/main/java/org/apache/arrow/c/Data.java
+++ b/java/c/src/main/java/org/apache/arrow/c/Data.java
@@ -259,7 +259,7 @@ public final class Data {
    */
   public static void importIntoVectorSchemaRoot(BufferAllocator allocator, ArrowArray array, VectorSchemaRoot root,
       DictionaryProvider provider) {
-    try (StructVector structVector = StructVector.empty("", allocator)) {
+    try (StructVector structVector = StructVector.emptyWithDuplicates("", allocator)) {
       structVector.initializeChildrenFromFields(root.getSchema().getFields());
       importIntoVector(allocator, array, structVector, provider);
       StructVectorUnloader unloader = new StructVectorUnloader(structVector);
diff --git a/java/c/src/main/java/org/apache/arrow/vector/StructVectorLoader.java b/java/c/src/main/java/org/apache/arrow/vector/StructVectorLoader.java
index eab7e491f3..10e3570177 100644
--- a/java/c/src/main/java/org/apache/arrow/vector/StructVectorLoader.java
+++ b/java/c/src/main/java/org/apache/arrow/vector/StructVectorLoader.java
@@ -79,7 +79,7 @@ public class StructVectorLoader {
    * @param recordBatch the batch to load
    */
   public StructVector load(BufferAllocator allocator, ArrowRecordBatch recordBatch) {
-    StructVector result = StructVector.empty("", allocator);
+    StructVector result = StructVector.emptyWithDuplicates("", allocator);
     result.initializeChildrenFromFields(this.schema.getFields());
 
     Iterator<ArrowBuf> buffers = recordBatch.getBuffers().iterator();
diff --git a/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java b/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
index 059ca32845..6aa6e889ba 100644
--- a/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
+++ b/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
@@ -32,6 +32,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.UUID;
 import java.util.stream.Collectors;
+import java.util.stream.Stream;
 
 import org.apache.arrow.c.ArrowArray;
 import org.apache.arrow.c.ArrowSchema;
@@ -658,6 +659,45 @@ public class RoundtripTest {
     imported.close();
   }
 
+  @Test
+  public void testVectorSchemaRootWithDuplicatedFieldNames() {
+    VectorSchemaRoot imported;
+
+    // Consumer allocates empty structures
+    try (ArrowSchema consumerArrowSchema = ArrowSchema.allocateNew(allocator);
+        ArrowArray consumerArrowArray = ArrowArray.allocateNew(allocator)) {
+
+      try (VectorSchemaRoot testVSR1 = createTestVSR();
+          VectorSchemaRoot testVSR2 = createTestVSR()) {
+        // Merge two VSRs to produce duplicated field names
+        final VectorSchemaRoot vsr = new VectorSchemaRoot(
+            Stream.concat(
+                testVSR1.getFieldVectors().stream(),
+                testVSR2.getFieldVectors().stream()).collect(Collectors.toList()));
+        // Producer creates structures from existing memory pointers
+        try (ArrowSchema arrowSchema = ArrowSchema.wrap(consumerArrowSchema.memoryAddress());
+            ArrowArray arrowArray = ArrowArray.wrap(consumerArrowArray.memoryAddress())) {
+          // Producer exports vector into the C Data Interface structures
+          Data.exportVectorSchemaRoot(allocator, vsr, null, arrowArray, arrowSchema);
+        }
+      }
+      // Consumer imports vector
+      imported = Data.importVectorSchemaRoot(allocator, consumerArrowArray, consumerArrowSchema, null);
+    }
+
+    // Ensure that imported VectorSchemaRoot is valid even after C Data Interface
+    // structures are closed
+    try (VectorSchemaRoot testVSR1 = createTestVSR();
+        VectorSchemaRoot testVSR2 = createTestVSR()) {
+      final VectorSchemaRoot original = new VectorSchemaRoot(
+          Stream.concat(
+              testVSR1.getFieldVectors().stream(),
+              testVSR2.getFieldVectors().stream()).collect(Collectors.toList()));
+      assertTrue(imported.equals(original));
+    }
+    imported.close();
+  }
+
   @Test
   public void testSchema() {
     Field decimalField = new Field("inner1", FieldType.nullable(new ArrowType.Decimal(19, 4, 128)), null);