You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2022/06/01 14:49:58 UTC
[arrow] branch master updated: ARROW-16672: [Java] Allow duplicated field names in Java C data interface (#13247)
This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 86eb03ec81 ARROW-16672: [Java] Allow duplicated field names in Java C data interface (#13247)
86eb03ec81 is described below
commit 86eb03ec815ff4d61c65b5066515a28acf2910f2
Author: Hongze Zhang <ho...@intel.com>
AuthorDate: Wed Jun 1 22:49:51 2022 +0800
ARROW-16672: [Java] Allow duplicated field names in Java C data interface (#13247)
Authored-by: Hongze Zhang <ho...@intel.com>
Signed-off-by: Antoine Pitrou <an...@python.org>
---
java/c/src/main/java/org/apache/arrow/c/Data.java | 2 +-
.../apache/arrow/vector/StructVectorLoader.java | 2 +-
.../java/org/apache/arrow/c/RoundtripTest.java | 40 ++++++++++++++++++++++
3 files changed, 42 insertions(+), 2 deletions(-)
diff --git a/java/c/src/main/java/org/apache/arrow/c/Data.java b/java/c/src/main/java/org/apache/arrow/c/Data.java
index 27b0ce4bfc..7151bff94b 100644
--- a/java/c/src/main/java/org/apache/arrow/c/Data.java
+++ b/java/c/src/main/java/org/apache/arrow/c/Data.java
@@ -259,7 +259,7 @@ public final class Data {
*/
public static void importIntoVectorSchemaRoot(BufferAllocator allocator, ArrowArray array, VectorSchemaRoot root,
DictionaryProvider provider) {
- try (StructVector structVector = StructVector.empty("", allocator)) {
+ try (StructVector structVector = StructVector.emptyWithDuplicates("", allocator)) {
structVector.initializeChildrenFromFields(root.getSchema().getFields());
importIntoVector(allocator, array, structVector, provider);
StructVectorUnloader unloader = new StructVectorUnloader(structVector);
diff --git a/java/c/src/main/java/org/apache/arrow/vector/StructVectorLoader.java b/java/c/src/main/java/org/apache/arrow/vector/StructVectorLoader.java
index eab7e491f3..10e3570177 100644
--- a/java/c/src/main/java/org/apache/arrow/vector/StructVectorLoader.java
+++ b/java/c/src/main/java/org/apache/arrow/vector/StructVectorLoader.java
@@ -79,7 +79,7 @@ public class StructVectorLoader {
* @param recordBatch the batch to load
*/
public StructVector load(BufferAllocator allocator, ArrowRecordBatch recordBatch) {
- StructVector result = StructVector.empty("", allocator);
+ StructVector result = StructVector.emptyWithDuplicates("", allocator);
result.initializeChildrenFromFields(this.schema.getFields());
Iterator<ArrowBuf> buffers = recordBatch.getBuffers().iterator();
diff --git a/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java b/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
index 059ca32845..6aa6e889ba 100644
--- a/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
+++ b/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
@@ -32,6 +32,7 @@ import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.stream.Collectors;
+import java.util.stream.Stream;
import org.apache.arrow.c.ArrowArray;
import org.apache.arrow.c.ArrowSchema;
@@ -658,6 +659,45 @@ public class RoundtripTest {
imported.close();
}
+ @Test
+ public void testVectorSchemaRootWithDuplicatedFieldNames() {
+ VectorSchemaRoot imported;
+
+ // Consumer allocates empty structures
+ try (ArrowSchema consumerArrowSchema = ArrowSchema.allocateNew(allocator);
+ ArrowArray consumerArrowArray = ArrowArray.allocateNew(allocator)) {
+
+ try (VectorSchemaRoot testVSR1 = createTestVSR();
+ VectorSchemaRoot testVSR2 = createTestVSR()) {
+ // Merge two VSRs to produce duplicated field names
+ final VectorSchemaRoot vsr = new VectorSchemaRoot(
+ Stream.concat(
+ testVSR1.getFieldVectors().stream(),
+ testVSR2.getFieldVectors().stream()).collect(Collectors.toList()));
+ // Producer creates structures from existing memory pointers
+ try (ArrowSchema arrowSchema = ArrowSchema.wrap(consumerArrowSchema.memoryAddress());
+ ArrowArray arrowArray = ArrowArray.wrap(consumerArrowArray.memoryAddress())) {
+ // Producer exports vector into the C Data Interface structures
+ Data.exportVectorSchemaRoot(allocator, vsr, null, arrowArray, arrowSchema);
+ }
+ }
+ // Consumer imports vector
+ imported = Data.importVectorSchemaRoot(allocator, consumerArrowArray, consumerArrowSchema, null);
+ }
+
+ // Ensure that imported VectorSchemaRoot is valid even after C Data Interface
+ // structures are closed
+ try (VectorSchemaRoot testVSR1 = createTestVSR();
+ VectorSchemaRoot testVSR2 = createTestVSR()) {
+ final VectorSchemaRoot original = new VectorSchemaRoot(
+ Stream.concat(
+ testVSR1.getFieldVectors().stream(),
+ testVSR2.getFieldVectors().stream()).collect(Collectors.toList()));
+ assertTrue(imported.equals(original));
+ }
+ imported.close();
+ }
+
@Test
public void testSchema() {
Field decimalField = new Field("inner1", FieldType.nullable(new ArrowType.Decimal(19, 4, 128)), null);