You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2023/09/07 19:03:38 UTC
[arrow] branch main updated: GH-37056: [Java] Fix importing an empty data array from c-data (#37531)
This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 5a781693d9 GH-37056: [Java] Fix importing an empty data array from c-data (#37531)
5a781693d9 is described below
commit 5a781693d953da8784edaf971f6124528398b4a2
Author: Jacek Stania <38...@users.noreply.github.com>
AuthorDate: Thu Sep 7 20:03:32 2023 +0100
GH-37056: [Java] Fix importing an empty data array from c-data (#37531)
### Rationale for this change
Fix java lib bug that prevents from importing specific data via c-data interface.
Currently an attempt to load a vector with empty data buffer results in an IllegalStateException error.
### What changes are included in this PR?
Updated BufferImportTypeVisitor to correctly handle a situation when underlying c-data array pointer is NULL (0) and the expected length of data is zero (0).
### Are these changes tested?
Yes, updated the existing unit tests
### Are there any user-facing changes?
No
* Closes: #37056
Lead-authored-by: Jacek Stania <ja...@gmail.com>
Co-authored-by: Jacek Stania <38...@users.noreply.github.com>
Co-authored-by: David Li <li...@gmail.com>
Signed-off-by: Antoine Pitrou <an...@python.org>
---
.../apache/arrow/c/BufferImportTypeVisitor.java | 43 +++++++++++-----------
.../org/apache/arrow/c/ArrowArrayUtilityTest.java | 30 ++++++++++++---
.../java/org/apache/arrow/c/RoundtripTest.java | 8 ++++
java/c/src/test/python/integration_tests.py | 27 +++++++++++++-
4 files changed, 79 insertions(+), 29 deletions(-)
diff --git a/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java b/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java
index c8b6d07086..7408bf7113 100644
--- a/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java
+++ b/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java
@@ -80,45 +80,44 @@ class BufferImportTypeVisitor implements ArrowType.ArrowTypeVisitor<List<ArrowBu
}
@VisibleForTesting
- long getBufferPtr(ArrowType type, int index) {
+ ArrowBuf importBuffer(ArrowType type, int index, long capacity) {
checkState(
- buffers.length > index,
- "Expected at least %s buffers for type %s, but found %s", index + 1, type, buffers.length);
- if (buffers[index] == NULL) {
- throw new IllegalStateException(String.format("Buffer %s for type %s cannot be null", index, type));
+ buffers.length > index,
+ "Expected at least %s buffers for type %s, but found %s", index + 1, type, buffers.length);
+ long bufferPtr = buffers[index];
+
+ if (bufferPtr == NULL) {
+ // C array may be NULL but only accept that if expected capacity is zero too
+ if (capacity != 0) {
+ throw new IllegalStateException(String.format("Buffer %s for type %s cannot be null", index, type));
+ } else {
+ // no data in the C array, return an empty buffer
+ return allocator.getEmpty();
+ }
}
- return buffers[index];
+
+ ArrowBuf buf = underlyingAllocation.unsafeAssociateAllocation(allocator, capacity, bufferPtr);
+ imported.add(buf);
+ return buf;
}
private ArrowBuf importFixedBits(ArrowType type, int index, long bitsPerSlot) {
- final long bufferPtr = getBufferPtr(type, index);
final long capacity = DataSizeRoundingUtil.divideBy8Ceil(bitsPerSlot * fieldNode.getLength());
- ArrowBuf buf = underlyingAllocation.unsafeAssociateAllocation(allocator, capacity, bufferPtr);
- this.imported.add(buf);
- return buf;
+ return importBuffer(type, index, capacity);
}
private ArrowBuf importFixedBytes(ArrowType type, int index, long bytesPerSlot) {
- final long bufferPtr = getBufferPtr(type, index);
final long capacity = bytesPerSlot * fieldNode.getLength();
- ArrowBuf buf = underlyingAllocation.unsafeAssociateAllocation(allocator, capacity, bufferPtr);
- this.imported.add(buf);
- return buf;
+ return importBuffer(type, index, capacity);
}
private ArrowBuf importOffsets(ArrowType type, long bytesPerSlot) {
- final long bufferPtr = getBufferPtr(type, 1);
final long capacity = bytesPerSlot * (fieldNode.getLength() + 1);
- ArrowBuf buf = underlyingAllocation.unsafeAssociateAllocation(allocator, capacity, bufferPtr);
- this.imported.add(buf);
- return buf;
+ return importBuffer(type, 1, capacity);
}
private ArrowBuf importData(ArrowType type, long capacity) {
- final long bufferPtr = getBufferPtr(type, 2);
- ArrowBuf buf = underlyingAllocation.unsafeAssociateAllocation(allocator, capacity, bufferPtr);
- this.imported.add(buf);
- return buf;
+ return importBuffer(type, 2, capacity);
}
private ArrowBuf maybeImportBitmap(ArrowType type) {
diff --git a/java/c/src/test/java/org/apache/arrow/c/ArrowArrayUtilityTest.java b/java/c/src/test/java/org/apache/arrow/c/ArrowArrayUtilityTest.java
index 2d31089ca7..4a0a966ba1 100644
--- a/java/c/src/test/java/org/apache/arrow/c/ArrowArrayUtilityTest.java
+++ b/java/c/src/test/java/org/apache/arrow/c/ArrowArrayUtilityTest.java
@@ -56,19 +56,39 @@ class ArrowArrayUtilityTest {
// BufferImportTypeVisitor
@Test
- void getBufferPtr() throws Exception {
+ void importBuffer() throws Exception {
// Note values are all dummy values here
- try (BufferImportTypeVisitor visitor =
- new BufferImportTypeVisitor(allocator, dummyHandle, new ArrowFieldNode(0, 0), new long[]{0})) {
+ try (BufferImportTypeVisitor notEmptyDataVisitor =
+ new BufferImportTypeVisitor(allocator, dummyHandle, new ArrowFieldNode(/* length= */ 1, 0), new long[]{0})) {
// Too few buffers
- assertThrows(IllegalStateException.class, () -> visitor.getBufferPtr(new ArrowType.Bool(), 1));
+ assertThrows(IllegalStateException.class, () -> notEmptyDataVisitor.importBuffer(new ArrowType.Bool(), 1, 1));
// Null where one isn't expected
- assertThrows(IllegalStateException.class, () -> visitor.getBufferPtr(new ArrowType.Bool(), 0));
+ assertThrows(IllegalStateException.class, () -> notEmptyDataVisitor.importBuffer(new ArrowType.Bool(), 0, 1));
+
+ // Expected capacity not zero but c array ptr is NULL (zero)
+ assertThrows(IllegalStateException.class, () -> notEmptyDataVisitor.importBuffer(new ArrowType.Bool(), 0, 1));
+
+ // Expected capacity is zero and c array ptr is NULL (zero)
+ assertThat(notEmptyDataVisitor.importBuffer(new ArrowType.Bool(), 0, 0)).isEqualTo(allocator.getEmpty());
+ }
+
+ try (BufferImportTypeVisitor emptyDataVisitor =
+ new BufferImportTypeVisitor(allocator, dummyHandle, new ArrowFieldNode(/* length= */ 0, 0), new long[]{0})) {
+
+ // Too few buffers
+ assertThrows(IllegalStateException.class, () -> emptyDataVisitor.importBuffer(new ArrowType.Bool(), 1, 1));
+
+ // Expected capacity not zero but c array ptr is NULL (zero)
+ assertThrows(IllegalStateException.class, () -> emptyDataVisitor.importBuffer(new ArrowType.Bool(), 0, 1));
+
+ // Expected capacity is zero and c array ptr is NULL (zero)
+ assertThat(emptyDataVisitor.importBuffer(new ArrowType.Bool(), 0, 0)).isEqualTo(allocator.getEmpty());
}
}
+
@Test
void cleanupAfterFailure() throws Exception {
// Note values are all dummy values here
diff --git a/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java b/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
index fc73df449b..c1ceb93cb4 100644
--- a/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
+++ b/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
@@ -534,6 +534,14 @@ public class RoundtripTest {
}
}
+ @Test
+ public void testEmptyListVector() {
+ try (final ListVector vector = ListVector.empty("v", allocator)) {
+ setVector(vector, new ArrayList<Integer>());
+ assertTrue(roundtrip(vector, ListVector.class));
+ }
+ }
+
@Test
public void testLargeListVector() {
try (final LargeListVector vector = LargeListVector.empty("v", allocator)) {
diff --git a/java/c/src/test/python/integration_tests.py b/java/c/src/test/python/integration_tests.py
index c23b4b9b44..eb8bb8c4b2 100644
--- a/java/c/src/test/python/integration_tests.py
+++ b/java/c/src/test/python/integration_tests.py
@@ -24,6 +24,7 @@ import xml.etree.ElementTree as ET
import jpype
import pyarrow as pa
+import pyarrow.ipc as ipc
from pyarrow.cffi import ffi
@@ -119,6 +120,10 @@ class Bridge:
class TestPythonIntegration(unittest.TestCase):
+ @classmethod
+ def setUpClass(cls) -> None:
+ setup_jvm()
+
def setUp(self):
gc.collect()
self.old_allocated_python = pa.total_allocated_bytes()
@@ -195,7 +200,26 @@ class TestPythonIntegration(unittest.TestCase):
# disabled check_metadata since the list internal field name ("item")
# is not preserved during round trips (it becomes "$data$").
), check_metadata=False)
-
+
+ def test_empty_list_array(self):
+ """Validates GH-37056 fix.
+ Empty list of int32 produces a vector with empty child data buffer, however with non-zero capacity.
+ Using streaming forces the c-data array which represent the child data buffer to be NULL (pointer is 0).
+ On Java side, an attempt to import such array triggered an exception described in GH-37056.
+ """
+ with pa.BufferOutputStream() as bos:
+ schema = pa.schema([pa.field("f0", pa.list_(pa.int32()), True)])
+ with ipc.new_stream(bos, schema) as writer:
+ src = pa.RecordBatch.from_arrays([pa.array([[]])], schema=schema)
+ writer.write(src)
+ data_bytes = bos.getvalue()
+
+ def recreate_batch():
+ with pa.input_stream(data_bytes) as ios:
+ with ipc.open_stream(ios) as reader:
+ return reader.read_next_batch()
+
+ self.round_trip_record_batch(recreate_batch)
def test_struct_array(self):
fields = [
@@ -274,5 +298,4 @@ class TestPythonIntegration(unittest.TestCase):
if __name__ == '__main__':
- setup_jvm()
unittest.main(verbosity=2)