You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/07/10 19:42:05 UTC
[arrow] branch master updated: ARROW-5887: [C#] ArrowStreamWriter
writes FieldNodes in wrong order
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new efa3ae7 ARROW-5887: [C#] ArrowStreamWriter writes FieldNodes in wrong order
efa3ae7 is described below
commit efa3ae765baa39d75b98054e12b20dbfaa9424bb
Author: Eric Erhardt <er...@microsoft.com>
AuthorDate: Wed Jul 10 14:41:48 2019 -0500
ARROW-5887: [C#] ArrowStreamWriter writes FieldNodes in wrong order
Write FieldNodes in correct order in ArrowStreamWriter.
Also, fix a small issue with writing BooleanArrays' NullBitmapBuffer.
@pgovind @chutchinson
Author: Eric Erhardt <er...@microsoft.com>
Closes #4836 from eerhardt/Fix5887 and squashes the following commits:
f0835dea8 <Eric Erhardt> Write FieldNodes in correct order in ArrowStreamWriter.
---
csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs | 13 ++++-----
.../Apache.Arrow.Tests/ArrowStreamWriterTests.cs | 34 ++++++++++++++++++++++
2 files changed, 40 insertions(+), 7 deletions(-)
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
index 7696dfa..8488175 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
@@ -109,7 +109,7 @@ namespace Apache.Arrow.Ipc
private void CreateBuffers(BooleanArray array)
{
- _buffers.Add(CreateBuffer(ArrowBuffer.Empty));
+ _buffers.Add(CreateBuffer(array.NullBitmapBuffer));
_buffers.Add(CreateBuffer(array.ValueBuffer));
}
@@ -181,28 +181,26 @@ namespace Apache.Arrow.Ipc
HasWrittenSchema = true;
}
- var recordBatchBuilder = new ArrowRecordBatchFlatBufferBuilder();
-
Builder.Clear();
// Serialize field nodes
var fieldCount = Schema.Fields.Count;
- var fieldNodeOffsets = new Offset<Flatbuf.FieldNode>[fieldCount];
Flatbuf.RecordBatch.StartNodesVector(Builder, fieldCount);
- for (var i = 0; i < fieldCount; i++)
+ // flatbuffer struct vectors have to be created in reverse order
+ for (var i = fieldCount - 1; i >= 0; i--)
{
var fieldArray = recordBatch.Column(i);
- fieldNodeOffsets[i] =
- Flatbuf.FieldNode.CreateFieldNode(Builder, fieldArray.Length, fieldArray.NullCount);
+ Flatbuf.FieldNode.CreateFieldNode(Builder, fieldArray.Length, fieldArray.NullCount);
}
var fieldNodesVectorOffset = Builder.EndVector();
// Serialize buffers
+ var recordBatchBuilder = new ArrowRecordBatchFlatBufferBuilder();
for (var i = 0; i < fieldCount; i++)
{
var fieldArray = recordBatch.Column(i);
@@ -213,6 +211,7 @@ namespace Apache.Arrow.Ipc
Flatbuf.RecordBatch.StartBuffersVector(Builder, buffers.Count);
+ // flatbuffer struct vectors have to be created in reverse order
for (var i = buffers.Count - 1; i >= 0; i--)
{
Flatbuf.Buffer.CreateBuffer(Builder,
diff --git a/csharp/test/Apache.Arrow.Tests/ArrowStreamWriterTests.cs b/csharp/test/Apache.Arrow.Tests/ArrowStreamWriterTests.cs
index 3ef747d..83a97f3 100644
--- a/csharp/test/Apache.Arrow.Tests/ArrowStreamWriterTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/ArrowStreamWriterTests.cs
@@ -16,6 +16,7 @@
using Apache.Arrow.Ipc;
using System;
using System.IO;
+using System.Linq;
using System.Net;
using System.Net.Sockets;
using System.Threading.Tasks;
@@ -89,6 +90,39 @@ namespace Apache.Arrow.Tests
{
RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 0);
+ await TestRoundTripRecordBatch(originalBatch);
+ }
+
+ [Fact]
+ public async Task WriteBatchWithNulls()
+ {
+ RecordBatch originalBatch = new RecordBatch.Builder()
+ .Append("Column1", false, col => col.Int32(array => array.AppendRange(Enumerable.Range(0, 10))))
+ .Append("Column2", true, new Int32Array(
+ valueBuffer: new ArrowBuffer.Builder<int>().AppendRange(Enumerable.Range(0, 10)).Build(),
+ nullBitmapBuffer: new ArrowBuffer.Builder<byte>().Append(0xfd).Append(0xff).Build(),
+ length: 10,
+ nullCount: 2,
+ offset: 0))
+ .Append("Column3", true, new Int32Array(
+ valueBuffer: new ArrowBuffer.Builder<int>().AppendRange(Enumerable.Range(0, 10)).Build(),
+ nullBitmapBuffer: new ArrowBuffer.Builder<byte>().Append(0x00).Append(0x00).Build(),
+ length: 10,
+ nullCount: 10,
+ offset: 0))
+ .Append("NullableBooleanColumn", true, new BooleanArray(
+ valueBuffer: new ArrowBuffer.Builder<byte>().Append(0xfd).Append(0xff).Build(),
+ nullBitmapBuffer: new ArrowBuffer.Builder<byte>().Append(0xed).Append(0xff).Build(),
+ length: 10,
+ nullCount: 3,
+ offset: 0))
+ .Build();
+
+ await TestRoundTripRecordBatch(originalBatch);
+ }
+
+ private static async Task TestRoundTripRecordBatch(RecordBatch originalBatch)
+ {
using (MemoryStream stream = new MemoryStream())
{
using (var writer = new ArrowStreamWriter(stream, originalBatch.Schema, leaveOpen: true))