You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/07/10 19:42:05 UTC

[arrow] branch master updated: ARROW-5887: [C#] ArrowStreamWriter writes FieldNodes in wrong order

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new efa3ae7  ARROW-5887: [C#] ArrowStreamWriter writes FieldNodes in wrong order
efa3ae7 is described below

commit efa3ae765baa39d75b98054e12b20dbfaa9424bb
Author: Eric Erhardt <er...@microsoft.com>
AuthorDate: Wed Jul 10 14:41:48 2019 -0500

    ARROW-5887: [C#] ArrowStreamWriter writes FieldNodes in wrong order
    
    Write FieldNodes in correct order in ArrowStreamWriter.
    Also, fix a small issue with writing BooleanArrays' NullBitmapBuffer.
    
    @pgovind @chutchinson
    
    Author: Eric Erhardt <er...@microsoft.com>
    
    Closes #4836 from eerhardt/Fix5887 and squashes the following commits:
    
    f0835dea8 <Eric Erhardt> Write FieldNodes in correct order in ArrowStreamWriter.
---
 csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs   | 13 ++++-----
 .../Apache.Arrow.Tests/ArrowStreamWriterTests.cs   | 34 ++++++++++++++++++++++
 2 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
index 7696dfa..8488175 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
@@ -109,7 +109,7 @@ namespace Apache.Arrow.Ipc
 
             private void CreateBuffers(BooleanArray array)
             {
-                _buffers.Add(CreateBuffer(ArrowBuffer.Empty));
+                _buffers.Add(CreateBuffer(array.NullBitmapBuffer));
                 _buffers.Add(CreateBuffer(array.ValueBuffer));
             }
 
@@ -181,28 +181,26 @@ namespace Apache.Arrow.Ipc
                 HasWrittenSchema = true;
             }
 
-            var recordBatchBuilder = new ArrowRecordBatchFlatBufferBuilder();
-
             Builder.Clear();
 
             // Serialize field nodes
 
             var fieldCount = Schema.Fields.Count;
-            var fieldNodeOffsets = new Offset<Flatbuf.FieldNode>[fieldCount];
 
             Flatbuf.RecordBatch.StartNodesVector(Builder, fieldCount);
 
-            for (var i = 0; i < fieldCount; i++)
+            // flatbuffer struct vectors have to be created in reverse order
+            for (var i = fieldCount - 1; i >= 0; i--)
             {
                 var fieldArray = recordBatch.Column(i);
-                fieldNodeOffsets[i] =
-                    Flatbuf.FieldNode.CreateFieldNode(Builder, fieldArray.Length, fieldArray.NullCount);
+                Flatbuf.FieldNode.CreateFieldNode(Builder, fieldArray.Length, fieldArray.NullCount);
             }
 
             var fieldNodesVectorOffset = Builder.EndVector();
 
             // Serialize buffers
 
+            var recordBatchBuilder = new ArrowRecordBatchFlatBufferBuilder();
             for (var i = 0; i < fieldCount; i++)
             {
                 var fieldArray = recordBatch.Column(i);
@@ -213,6 +211,7 @@ namespace Apache.Arrow.Ipc
 
             Flatbuf.RecordBatch.StartBuffersVector(Builder, buffers.Count);
 
+            // flatbuffer struct vectors have to be created in reverse order
             for (var i = buffers.Count - 1; i >= 0; i--)
             {
                 Flatbuf.Buffer.CreateBuffer(Builder,
diff --git a/csharp/test/Apache.Arrow.Tests/ArrowStreamWriterTests.cs b/csharp/test/Apache.Arrow.Tests/ArrowStreamWriterTests.cs
index 3ef747d..83a97f3 100644
--- a/csharp/test/Apache.Arrow.Tests/ArrowStreamWriterTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/ArrowStreamWriterTests.cs
@@ -16,6 +16,7 @@
 using Apache.Arrow.Ipc;
 using System;
 using System.IO;
+using System.Linq;
 using System.Net;
 using System.Net.Sockets;
 using System.Threading.Tasks;
@@ -89,6 +90,39 @@ namespace Apache.Arrow.Tests
         {
             RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 0);
 
+            await TestRoundTripRecordBatch(originalBatch);
+        }
+
+        [Fact]
+        public async Task WriteBatchWithNulls()
+        {
+            RecordBatch originalBatch = new RecordBatch.Builder()
+                .Append("Column1", false, col => col.Int32(array => array.AppendRange(Enumerable.Range(0, 10))))
+                .Append("Column2", true, new Int32Array(
+                    valueBuffer: new ArrowBuffer.Builder<int>().AppendRange(Enumerable.Range(0, 10)).Build(),
+                    nullBitmapBuffer: new ArrowBuffer.Builder<byte>().Append(0xfd).Append(0xff).Build(),
+                    length: 10,
+                    nullCount: 2,
+                    offset: 0))
+                .Append("Column3", true, new Int32Array(
+                    valueBuffer: new ArrowBuffer.Builder<int>().AppendRange(Enumerable.Range(0, 10)).Build(),
+                    nullBitmapBuffer: new ArrowBuffer.Builder<byte>().Append(0x00).Append(0x00).Build(),
+                    length: 10,
+                    nullCount: 10,
+                    offset: 0))
+                .Append("NullableBooleanColumn", true, new BooleanArray(
+                    valueBuffer: new ArrowBuffer.Builder<byte>().Append(0xfd).Append(0xff).Build(),
+                    nullBitmapBuffer: new ArrowBuffer.Builder<byte>().Append(0xed).Append(0xff).Build(),
+                    length: 10,
+                    nullCount: 3,
+                    offset: 0))
+                .Build();
+
+            await TestRoundTripRecordBatch(originalBatch);
+        }
+
+        private static async Task TestRoundTripRecordBatch(RecordBatch originalBatch)
+        {
             using (MemoryStream stream = new MemoryStream())
             {
                 using (var writer = new ArrowStreamWriter(stream, originalBatch.Schema, leaveOpen: true))