You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/04/30 14:37:47 UTC

[arrow] branch master updated: ARROW-4717 [C#] Consider exposing ValueTask instead of Task

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 3514a37  ARROW-4717 [C#] Consider exposing ValueTask instead of Task
3514a37 is described below

commit 3514a37091a9ee06a7d74b7fc57b18caeef8d0cb
Author: Eric Erhardt <er...@microsoft.com>
AuthorDate: Tue Apr 30 09:37:40 2019 -0500

    ARROW-4717 [C#] Consider exposing ValueTask instead of Task
    
    Eliminating allocations in the C# Arrow library:
    
    1. Use ValueTask instead of Task in the public APIs.
    2. Eliminate allocations in ArrayData constructor by taking an array directly instead of calling `ToArray()` every time.
    3. Remove some other minor allocations that provide no benefit.
    4. Also, add a simple `NullCount == 0` check to optimize the case where there are no nulls in the data, but the null bitmap is filled with 0s instead of being empty.
    
    With this change the `ArrowReaderWithMemory` benchmark in the repo goes from:
    
    ## Before
    |                      Method |      Allocated Memory/Op |
    |---------------------------- |--------------------:|
    |       ArrowReaderWithMemory |  5.84 KB |
    
    ## After
    |                      Method |      Allocated Memory/Op |
    |---------------------------- |--------------------:|
    |       ArrowReaderWithMemory |  4.48 KB |
    
    @stephentoub @pgovind @chutchinson
    
    Author: Eric Erhardt <er...@microsoft.com>
    
    Closes #4194 from eerhardt/ValueTask and squashes the following commits:
    
    d2a00afd9 <Eric Erhardt> PR feedback.
    310bb2f3b <Eric Erhardt> Convert all Task usages to ValueTask to save on allocations.
    d1d2687d5 <Eric Erhardt> Minor optimizations
---
 csharp/src/Apache.Arrow/Arrays/Array.cs            |   2 +-
 csharp/src/Apache.Arrow/Arrays/ArrayData.cs        |  13 +++
 .../src/Apache.Arrow/Arrays/ArrowArrayFactory.cs   | 109 +++++++++------------
 .../Apache.Arrow/Extensions/ArrayDataExtensions.cs |  14 +--
 csharp/src/Apache.Arrow/Ipc/ArrowFileReader.cs     |   4 +-
 .../Ipc/ArrowFileReaderImplementation.cs           |   4 +-
 csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs     |  17 ++--
 csharp/src/Apache.Arrow/Ipc/ArrowStreamReader.cs   |   4 +-
 csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs   |  17 ++--
 csharp/src/Apache.Arrow/Ipc/IArrowReader.cs        |   2 +-
 10 files changed, 88 insertions(+), 98 deletions(-)

diff --git a/csharp/src/Apache.Arrow/Arrays/Array.cs b/csharp/src/Apache.Arrow/Arrays/Array.cs
index e795ad9..561547a 100644
--- a/csharp/src/Apache.Arrow/Arrays/Array.cs
+++ b/csharp/src/Apache.Arrow/Arrays/Array.cs
@@ -41,7 +41,7 @@ namespace Apache.Arrow
         }
 
         public bool IsValid(int index) =>
-            NullBitmapBuffer.IsEmpty || BitUtility.GetBit(NullBitmapBuffer.Span, index);
+            NullCount == 0 || NullBitmapBuffer.IsEmpty || BitUtility.GetBit(NullBitmapBuffer.Span, index);
 
         public bool IsNull(int index) => !IsValid(index);
 
diff --git a/csharp/src/Apache.Arrow/Arrays/ArrayData.cs b/csharp/src/Apache.Arrow/Arrays/ArrayData.cs
index 2074f12..e2345e0 100644
--- a/csharp/src/Apache.Arrow/Arrays/ArrayData.cs
+++ b/csharp/src/Apache.Arrow/Arrays/ArrayData.cs
@@ -40,5 +40,18 @@ namespace Apache.Arrow
             Buffers = buffers?.ToArray();
             Children = children?.ToArray();
         }
+
+        public ArrayData(
+            IArrowType dataType,
+            int length, int nullCount = 0, int offset = 0,
+            ArrowBuffer[] buffers = null, ArrayData[] children = null)
+        {
+            DataType = dataType ?? NullType.Default;
+            Length = length;
+            NullCount = nullCount;
+            Offset = offset;
+            Buffers = buffers;
+            Children = children;
+        }
     }
 }
diff --git a/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs b/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
index 542871c..ac598af 100644
--- a/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
+++ b/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
@@ -18,70 +18,59 @@ using System;
 
 namespace Apache.Arrow
 {
-    public class ArrowArrayFactory
+    public static class ArrowArrayFactory
     {
-        private class FactoryTypeVisitor :
-            IArrowTypeVisitor<Int8Type>,
-            IArrowTypeVisitor<Int16Type>,
-            IArrowTypeVisitor<Int32Type>,
-            IArrowTypeVisitor<Int64Type>,
-            IArrowTypeVisitor<UInt8Type>,
-            IArrowTypeVisitor<UInt16Type>,
-            IArrowTypeVisitor<UInt32Type>,
-            IArrowTypeVisitor<UInt64Type>,
-            IArrowTypeVisitor<BooleanType>,
-            IArrowTypeVisitor<FloatType>,
-            IArrowTypeVisitor<DoubleType>,
-            IArrowTypeVisitor<StructType>,
-            IArrowTypeVisitor<UnionType>,
-            IArrowTypeVisitor<ListType>,
-            IArrowTypeVisitor<TimestampType>,
-            IArrowTypeVisitor<StringType>,
-            IArrowTypeVisitor<BinaryType>
+        public static IArrowArray BuildArray(ArrayData data)
         {
-            private readonly ArrayData _data;
-            private IArrowArray _array;
-
-            public FactoryTypeVisitor(ArrayData data)
+            switch (data.DataType.TypeId)
             {
-                _data = data;
+                case ArrowTypeId.Boolean:
+                    return new BooleanArray(data);
+                case ArrowTypeId.UInt8:
+                    return new UInt8Array(data);
+                case ArrowTypeId.Int8:
+                    return new Int8Array(data);
+                case ArrowTypeId.UInt16:
+                    return new UInt16Array(data);
+                case ArrowTypeId.Int16:
+                    return new Int16Array(data);
+                case ArrowTypeId.UInt32:
+                    return new UInt32Array(data);
+                case ArrowTypeId.Int32:
+                    return new Int32Array(data);
+                case ArrowTypeId.UInt64:
+                    return new UInt64Array(data);
+                case ArrowTypeId.Int64:
+                    return new Int64Array(data);
+                case ArrowTypeId.Float:
+                    return new FloatArray(data);
+                case ArrowTypeId.Double:
+                    return new DoubleArray(data);
+                case ArrowTypeId.String:
+                    return new StringArray(data);
+                case ArrowTypeId.Binary:
+                    return new BinaryArray(data);
+                case ArrowTypeId.Timestamp:
+                    return new TimestampArray(data);
+                case ArrowTypeId.List:
+                    return new ListArray(data);
+                case ArrowTypeId.Struct:
+                    return new StructArray(data);
+                case ArrowTypeId.Union:
+                    return new UnionArray(data);
+                case ArrowTypeId.Date64:
+                case ArrowTypeId.Date32:
+                case ArrowTypeId.Decimal:
+                case ArrowTypeId.Dictionary:
+                case ArrowTypeId.FixedSizedBinary:
+                case ArrowTypeId.HalfFloat:
+                case ArrowTypeId.Interval:
+                case ArrowTypeId.Map:
+                case ArrowTypeId.Time32:
+                case ArrowTypeId.Time64:
+                default:
+                    throw new NotSupportedException($"An ArrowArray cannot be built for type {data.DataType.TypeId}.");
             }
-
-            public IArrowArray CreateArray()
-            {
-                _data.DataType.Accept(this);
-                return _array;
-            }
-
-            public void Visit(Int8Type type) => _array = new Int8Array(_data);
-            public void Visit(Int16Type type) => _array = new Int16Array(_data);
-            public void Visit(Int32Type type) => _array = new Int32Array(_data);
-            public void Visit(Int64Type type) => _array = new Int64Array(_data);
-            public void Visit(UInt8Type type) => _array = new UInt8Array(_data);
-            public void Visit(UInt16Type type) => _array = new UInt16Array(_data);
-            public void Visit(UInt32Type type) => _array = new UInt32Array(_data);
-            public void Visit(UInt64Type type) => _array = new UInt64Array(_data);
-            public void Visit(BooleanType type) => _array = new BooleanArray(_data);
-            public void Visit(FloatType type) => _array = new FloatArray(_data);
-            public void Visit(DoubleType type) => _array = new DoubleArray(_data);
-            public void Visit(StructType type) => _array = new StructArray(_data);
-            public void Visit(UnionType type) => _array = new UnionArray(_data);
-            public void Visit(ListType type) => _array = new ListArray(_data);
-            public void Visit(TimestampType type) => _array = new TimestampArray(_data);
-            public void Visit(BinaryType type) => _array = new BinaryArray(_data);
-            public void Visit(StringType type) => _array = new StringArray(_data);
-
-            public void Visit(IArrowType type)
-            {
-                throw new NotImplementedException();
-            }
-        }
-
-        public static IArrowArray BuildArray(ArrayData data)
-        {
-            var visitor = new FactoryTypeVisitor(data);
-            var array = visitor.CreateArray();
-            return array;
         }
     }
 }
diff --git a/csharp/src/Apache.Arrow/Extensions/ArrayDataExtensions.cs b/csharp/src/Apache.Arrow/Extensions/ArrayDataExtensions.cs
index 3cc4c13..399d9bf 100644
--- a/csharp/src/Apache.Arrow/Extensions/ArrayDataExtensions.cs
+++ b/csharp/src/Apache.Arrow/Extensions/ArrayDataExtensions.cs
@@ -31,21 +31,13 @@ namespace Apache.Arrow
             }
         }
 
-        public static void EnsureDataType(this ArrayData data, params ArrowTypeId[] ids)
+        public static void EnsureDataType(this ArrayData data, ArrowTypeId id)
         {
-            var valid = true;
-
-            foreach (var id in ids)
-            {
-                if (data.DataType.TypeId != id)
-                    valid = false;
-            }
-
-            if (!valid)
+            if (data.DataType.TypeId != id)
             {
                 // TODO: Use localizable string resource
                 throw new ArgumentException(
-                    $"Specified array type <{data.DataType.TypeId}> does not match expected type(s) <{string.Join(",", ids)}>",
+                    $"Specified array type <{data.DataType.TypeId}> does not match expected type(s) <{id}>",
                     nameof(data.DataType.TypeId));
             }
         }
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowFileReader.cs b/csharp/src/Apache.Arrow/Ipc/ArrowFileReader.cs
index da9525e..b61c2d9 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowFileReader.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowFileReader.cs
@@ -45,12 +45,12 @@ namespace Apache.Arrow.Ipc
             return new ArrowFileReader(stream);
         }
 
-        public Task<int> RecordBatchCountAsync()
+        public ValueTask<int> RecordBatchCountAsync()
         {
             return Implementation.RecordBatchCountAsync();
         }
 
-        public Task<RecordBatch> ReadRecordBatchAsync(int index, CancellationToken cancellationToken = default)
+        public ValueTask<RecordBatch> ReadRecordBatchAsync(int index, CancellationToken cancellationToken = default)
         {
             return Implementation.ReadRecordBatchAsync(index, cancellationToken);
         }
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowFileReaderImplementation.cs b/csharp/src/Apache.Arrow/Ipc/ArrowFileReaderImplementation.cs
index 7a62085..370ac6d 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowFileReaderImplementation.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowFileReaderImplementation.cs
@@ -42,7 +42,7 @@ namespace Apache.Arrow.Ipc
         {
         }
 
-        public async Task<int> RecordBatchCountAsync()
+        public async ValueTask<int> RecordBatchCountAsync()
         {
             if (!HasReadSchema)
             {
@@ -142,7 +142,7 @@ namespace Apache.Arrow.Ipc
             Schema = _footer.Schema;
         }
 
-        public async Task<RecordBatch> ReadRecordBatchAsync(int index, CancellationToken cancellationToken)
+        public async ValueTask<RecordBatch> ReadRecordBatchAsync(int index, CancellationToken cancellationToken)
         {
             await ReadSchemaAsync().ConfigureAwait(false);
 
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs
index d6e124a..265c31f 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs
@@ -112,18 +112,16 @@ namespace Apache.Arrow.Ipc
             await BaseStream.FlushAsync(cancellationToken).ConfigureAwait(false);
         }
 
-        private async ValueTask WriteHeaderAsync(CancellationToken cancellationToken)
+        private async Task WriteHeaderAsync(CancellationToken cancellationToken)
         {
-            cancellationToken.ThrowIfCancellationRequested();
-
             // Write magic number and empty padding up to the 8-byte boundary
 
-            await WriteMagicAsync().ConfigureAwait(false);
+            await WriteMagicAsync(cancellationToken).ConfigureAwait(false);
             await WritePaddingAsync(CalculatePadding(ArrowFileConstants.Magic.Length))
                 .ConfigureAwait(false);
         }
 
-        private async ValueTask WriteFooterAsync(Schema schema, CancellationToken cancellationToken)
+        private async Task WriteFooterAsync(Schema schema, CancellationToken cancellationToken)
         {
             Builder.Clear();
 
@@ -182,15 +180,12 @@ namespace Apache.Arrow.Ipc
 
             // Write magic
 
-            cancellationToken.ThrowIfCancellationRequested();
-
-            await WriteMagicAsync().ConfigureAwait(false);
+            await WriteMagicAsync(cancellationToken).ConfigureAwait(false);
         }
 
-        private Task WriteMagicAsync()
+        private ValueTask WriteMagicAsync(CancellationToken cancellationToken)
         {
-            return BaseStream.WriteAsync(
-                ArrowFileConstants.Magic, 0, ArrowFileConstants.Magic.Length);
+            return BaseStream.WriteAsync(ArrowFileConstants.Magic, cancellationToken);
         }
     }
 }
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamReader.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamReader.cs
index 0923968..7357a11 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamReader.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamReader.cs
@@ -66,9 +66,9 @@ namespace Apache.Arrow.Ipc
             }
         }
 
-        public async Task<RecordBatch> ReadNextRecordBatchAsync(CancellationToken cancellationToken = default)
+        public ValueTask<RecordBatch> ReadNextRecordBatchAsync(CancellationToken cancellationToken = default)
         {
-            return await _implementation.ReadNextRecordBatchAsync(cancellationToken).ConfigureAwait(false);
+            return _implementation.ReadNextRecordBatchAsync(cancellationToken);
         }
 
         public RecordBatch ReadNextRecordBatch()
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
index 90cbfd3..f3d0840 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
@@ -200,13 +200,12 @@ namespace Apache.Arrow.Ipc
             }
 
             var buffers = recordBatchBuilder.Buffers;
-            var bufferOffsets = new Offset<Flatbuf.Buffer>[buffers.Count];
 
             Flatbuf.RecordBatch.StartBuffersVector(Builder, buffers.Count);
 
             for (var i = buffers.Count - 1; i >= 0; i--)
             {
-                bufferOffsets[i] = Flatbuf.Buffer.CreateBuffer(Builder,
+                Flatbuf.Buffer.CreateBuffer(Builder,
                     buffers[i].Offset, buffers[i].Length);
             }
 
@@ -259,10 +258,9 @@ namespace Apache.Arrow.Ipc
             return WriteRecordBatchInternalAsync(recordBatch, cancellationToken);
         }
 
-        public async Task WriteBufferAsync(ArrowBuffer arrowBuffer, CancellationToken cancellationToken = default)
+        private ValueTask WriteBufferAsync(ArrowBuffer arrowBuffer, CancellationToken cancellationToken = default)
         {
-            await BaseStream.WriteAsync(arrowBuffer.Memory, cancellationToken)
-                .ConfigureAwait(false);
+            return BaseStream.WriteAsync(arrowBuffer.Memory, cancellationToken);
         }
 
         private protected Offset<Flatbuf.Schema> SerializeSchema(Schema schema)
@@ -365,11 +363,14 @@ namespace Apache.Arrow.Ipc
             }
         }
 
-        protected Task WritePaddingAsync(int length)
+        private protected ValueTask WritePaddingAsync(int length)
         {
-            if (length <= 0) return Task.CompletedTask;
+            if (length > 0)
+            {
+                return BaseStream.WriteAsync(Padding.AsMemory(0, Math.Min(Padding.Length, length)));
+            }
 
-            return BaseStream.WriteAsync(Padding, 0, Math.Min(Padding.Length, length));
+            return default;
         }
 
         public virtual void Dispose()
diff --git a/csharp/src/Apache.Arrow/Ipc/IArrowReader.cs b/csharp/src/Apache.Arrow/Ipc/IArrowReader.cs
index 1230613..255ea4f 100644
--- a/csharp/src/Apache.Arrow/Ipc/IArrowReader.cs
+++ b/csharp/src/Apache.Arrow/Ipc/IArrowReader.cs
@@ -20,7 +20,7 @@ namespace Apache.Arrow.Ipc
 {
     public interface IArrowReader
     {
-        Task<RecordBatch> ReadNextRecordBatchAsync(
+        ValueTask<RecordBatch> ReadNextRecordBatchAsync(
             CancellationToken cancellationToken = default);
     }
 }