You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@arrow.apache.org by "Emilio Lahr-Vivaz (JIRA)" <ji...@apache.org> on 2018/04/23 21:01:00 UTC
[jira] [Created] (ARROW-2500) [Java] IPC Writers/readers are not
always setting validity bits correctly
Emilio Lahr-Vivaz created ARROW-2500:
----------------------------------------
Summary: [Java] IPC Writers/readers are not always setting validity bits correctly
Key: ARROW-2500
URL: https://issues.apache.org/jira/browse/ARROW-2500
Project: Apache Arrow
Issue Type: Bug
Components: Java - Vectors
Affects Versions: 0.9.0, 0.8.0
Reporter: Emilio Lahr-Vivaz
When writing multiple batches to a Stream/File Writer, the first validity bit can get garbled between writing and reading. I couldn't pinpoint the exact issue, but I was able to re-create it with a fairly simple unit test.
in TestArrowStream.java:
{code:java}
@Test
public void testReadWriteMultipleBatches() throws IOException {
ByteArrayOutputStream os = new ByteArrayOutputStream();
try (IntVector vector = new IntVector("foo", allocator);) {
Schema schema = new Schema(Collections.singletonList(vector.getField()), null);
try (VectorSchemaRoot root = new VectorSchemaRoot(schema, Collections.singletonList((FieldVector) vector), vector.getValueCount());
ArrowStreamWriter writer = new ArrowStreamWriter(root, new MapDictionaryProvider(), Channels.newChannel(os));) {
writer.start();
vector.setNull(0);
vector.setSafe(1, 1);
vector.setSafe(2, 2);
vector.setNull(3);
vector.setSafe(4, 1);
vector.setValueCount(5);
root.setRowCount(5);
writer.writeBatch();
vector.setNull(0);
vector.setSafe(1, 1);
vector.setSafe(2, 2);
vector.setValueCount(3);
root.setRowCount(3);
writer.writeBatch();
}
}
ByteArrayInputStream in = new ByteArrayInputStream(os.toByteArray());
try (ArrowStreamReader reader = new ArrowStreamReader(in, allocator);) {
IntVector read = (IntVector) reader.getVectorSchemaRoot().getFieldVectors().get(0);
reader.loadNextBatch();
assertEquals(read.getValueCount(), 5);
assertNull(read.getObject(0));
assertEquals(read.getObject(1), Integer.valueOf(1));
assertEquals(read.getObject(2), Integer.valueOf(2));
assertNull(read.getObject(3));
assertEquals(read.getObject(4), Integer.valueOf(1));
reader.loadNextBatch();
assertEquals(read.getValueCount(), 3);
assertNull(read.getObject(0));
assertEquals(read.getObject(1), Integer.valueOf(1));
assertEquals(read.getObject(2), Integer.valueOf(2));
}
}
{code}
in TestArrowFile.java:
{code}
@Test
public void testReadWriteMultipleBatches() throws IOException {
File file = new File("target/mytest_nulls_multibatch.arrow");
try (IntVector vector = new IntVector("foo", allocator);) {
Schema schema = new Schema(Collections.singletonList(vector.getField()), null);
try (FileOutputStream fileOutputStream = new FileOutputStream(file);
VectorSchemaRoot root = new VectorSchemaRoot(schema, Collections.singletonList((FieldVector) vector), vector.getValueCount());
ArrowFileWriter writer = new ArrowFileWriter(root, new MapDictionaryProvider(), fileOutputStream.getChannel());) {
writer.start();
vector.setNull(0);
vector.setSafe(1, 1);
vector.setSafe(2, 2);
vector.setNull(3);
vector.setSafe(4, 1);
vector.setValueCount(5);
root.setRowCount(5);
writer.writeBatch();
vector.setNull(0);
vector.setSafe(1, 1);
vector.setSafe(2, 2);
vector.setValueCount(3);
root.setRowCount(3);
writer.writeBatch();
}
}
try (FileInputStream fileInputStream = new FileInputStream(file);
ArrowFileReader reader = new ArrowFileReader(fileInputStream.getChannel(), allocator);) {
IntVector read = (IntVector) reader.getVectorSchemaRoot().getFieldVectors().get(0);
reader.loadNextBatch();
assertEquals(read.getValueCount(), 5);
assertNull(read.getObject(0));
assertEquals(read.getObject(1), Integer.valueOf(1));
assertEquals(read.getObject(2), Integer.valueOf(2));
assertNull(read.getObject(3));
assertEquals(read.getObject(4), Integer.valueOf(1));
reader.loadNextBatch();
assertEquals(read.getValueCount(), 3);
assertNull(read.getObject(0));
assertEquals(read.getObject(1), Integer.valueOf(1));
assertEquals(read.getObject(2), Integer.valueOf(2));
}
}
{code}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)