You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/04/30 14:37:47 UTC
[arrow] branch master updated: ARROW-4717 [C#] Consider exposing
ValueTask instead of Task
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 3514a37 ARROW-4717 [C#] Consider exposing ValueTask instead of Task
3514a37 is described below
commit 3514a37091a9ee06a7d74b7fc57b18caeef8d0cb
Author: Eric Erhardt <er...@microsoft.com>
AuthorDate: Tue Apr 30 09:37:40 2019 -0500
ARROW-4717 [C#] Consider exposing ValueTask instead of Task
Eliminating allocations in the C# Arrow library:
1. Use ValueTask instead of Task in the public APIs.
2. Eliminate allocations in ArrayData constructor by taking an array directly instead of calling `ToArray()` every time.
3. Remove some other minor allocations that provide no benefit.
4. Also, add a simple `NullCount == 0` check to optimize the case where there are no nulls in the data, but the null bitmap is filled with 0s instead of being empty.
With this change the `ArrowReaderWithMemory` benchmark in the repo goes from:
## Before
| Method | Allocated Memory/Op |
|---------------------------- |--------------------:|
| ArrowReaderWithMemory | 5.84 KB |
## After
| Method | Allocated Memory/Op |
|---------------------------- |--------------------:|
| ArrowReaderWithMemory | 4.48 KB |
@stephentoub @pgovind @chutchinson
Author: Eric Erhardt <er...@microsoft.com>
Closes #4194 from eerhardt/ValueTask and squashes the following commits:
d2a00afd9 <Eric Erhardt> PR feedback.
310bb2f3b <Eric Erhardt> Convert all Task usages to ValueTask to save on allocations.
d1d2687d5 <Eric Erhardt> Minor optimizations
---
csharp/src/Apache.Arrow/Arrays/Array.cs | 2 +-
csharp/src/Apache.Arrow/Arrays/ArrayData.cs | 13 +++
.../src/Apache.Arrow/Arrays/ArrowArrayFactory.cs | 109 +++++++++------------
.../Apache.Arrow/Extensions/ArrayDataExtensions.cs | 14 +--
csharp/src/Apache.Arrow/Ipc/ArrowFileReader.cs | 4 +-
.../Ipc/ArrowFileReaderImplementation.cs | 4 +-
csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs | 17 ++--
csharp/src/Apache.Arrow/Ipc/ArrowStreamReader.cs | 4 +-
csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs | 17 ++--
csharp/src/Apache.Arrow/Ipc/IArrowReader.cs | 2 +-
10 files changed, 88 insertions(+), 98 deletions(-)
diff --git a/csharp/src/Apache.Arrow/Arrays/Array.cs b/csharp/src/Apache.Arrow/Arrays/Array.cs
index e795ad9..561547a 100644
--- a/csharp/src/Apache.Arrow/Arrays/Array.cs
+++ b/csharp/src/Apache.Arrow/Arrays/Array.cs
@@ -41,7 +41,7 @@ namespace Apache.Arrow
}
public bool IsValid(int index) =>
- NullBitmapBuffer.IsEmpty || BitUtility.GetBit(NullBitmapBuffer.Span, index);
+ NullCount == 0 || NullBitmapBuffer.IsEmpty || BitUtility.GetBit(NullBitmapBuffer.Span, index);
public bool IsNull(int index) => !IsValid(index);
diff --git a/csharp/src/Apache.Arrow/Arrays/ArrayData.cs b/csharp/src/Apache.Arrow/Arrays/ArrayData.cs
index 2074f12..e2345e0 100644
--- a/csharp/src/Apache.Arrow/Arrays/ArrayData.cs
+++ b/csharp/src/Apache.Arrow/Arrays/ArrayData.cs
@@ -40,5 +40,18 @@ namespace Apache.Arrow
Buffers = buffers?.ToArray();
Children = children?.ToArray();
}
+
+ public ArrayData(
+ IArrowType dataType,
+ int length, int nullCount = 0, int offset = 0,
+ ArrowBuffer[] buffers = null, ArrayData[] children = null)
+ {
+ DataType = dataType ?? NullType.Default;
+ Length = length;
+ NullCount = nullCount;
+ Offset = offset;
+ Buffers = buffers;
+ Children = children;
+ }
}
}
diff --git a/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs b/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
index 542871c..ac598af 100644
--- a/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
+++ b/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
@@ -18,70 +18,59 @@ using System;
namespace Apache.Arrow
{
- public class ArrowArrayFactory
+ public static class ArrowArrayFactory
{
- private class FactoryTypeVisitor :
- IArrowTypeVisitor<Int8Type>,
- IArrowTypeVisitor<Int16Type>,
- IArrowTypeVisitor<Int32Type>,
- IArrowTypeVisitor<Int64Type>,
- IArrowTypeVisitor<UInt8Type>,
- IArrowTypeVisitor<UInt16Type>,
- IArrowTypeVisitor<UInt32Type>,
- IArrowTypeVisitor<UInt64Type>,
- IArrowTypeVisitor<BooleanType>,
- IArrowTypeVisitor<FloatType>,
- IArrowTypeVisitor<DoubleType>,
- IArrowTypeVisitor<StructType>,
- IArrowTypeVisitor<UnionType>,
- IArrowTypeVisitor<ListType>,
- IArrowTypeVisitor<TimestampType>,
- IArrowTypeVisitor<StringType>,
- IArrowTypeVisitor<BinaryType>
+ public static IArrowArray BuildArray(ArrayData data)
{
- private readonly ArrayData _data;
- private IArrowArray _array;
-
- public FactoryTypeVisitor(ArrayData data)
+ switch (data.DataType.TypeId)
{
- _data = data;
+ case ArrowTypeId.Boolean:
+ return new BooleanArray(data);
+ case ArrowTypeId.UInt8:
+ return new UInt8Array(data);
+ case ArrowTypeId.Int8:
+ return new Int8Array(data);
+ case ArrowTypeId.UInt16:
+ return new UInt16Array(data);
+ case ArrowTypeId.Int16:
+ return new Int16Array(data);
+ case ArrowTypeId.UInt32:
+ return new UInt32Array(data);
+ case ArrowTypeId.Int32:
+ return new Int32Array(data);
+ case ArrowTypeId.UInt64:
+ return new UInt64Array(data);
+ case ArrowTypeId.Int64:
+ return new Int64Array(data);
+ case ArrowTypeId.Float:
+ return new FloatArray(data);
+ case ArrowTypeId.Double:
+ return new DoubleArray(data);
+ case ArrowTypeId.String:
+ return new StringArray(data);
+ case ArrowTypeId.Binary:
+ return new BinaryArray(data);
+ case ArrowTypeId.Timestamp:
+ return new TimestampArray(data);
+ case ArrowTypeId.List:
+ return new ListArray(data);
+ case ArrowTypeId.Struct:
+ return new StructArray(data);
+ case ArrowTypeId.Union:
+ return new UnionArray(data);
+ case ArrowTypeId.Date64:
+ case ArrowTypeId.Date32:
+ case ArrowTypeId.Decimal:
+ case ArrowTypeId.Dictionary:
+ case ArrowTypeId.FixedSizedBinary:
+ case ArrowTypeId.HalfFloat:
+ case ArrowTypeId.Interval:
+ case ArrowTypeId.Map:
+ case ArrowTypeId.Time32:
+ case ArrowTypeId.Time64:
+ default:
+ throw new NotSupportedException($"An ArrowArray cannot be built for type {data.DataType.TypeId}.");
}
-
- public IArrowArray CreateArray()
- {
- _data.DataType.Accept(this);
- return _array;
- }
-
- public void Visit(Int8Type type) => _array = new Int8Array(_data);
- public void Visit(Int16Type type) => _array = new Int16Array(_data);
- public void Visit(Int32Type type) => _array = new Int32Array(_data);
- public void Visit(Int64Type type) => _array = new Int64Array(_data);
- public void Visit(UInt8Type type) => _array = new UInt8Array(_data);
- public void Visit(UInt16Type type) => _array = new UInt16Array(_data);
- public void Visit(UInt32Type type) => _array = new UInt32Array(_data);
- public void Visit(UInt64Type type) => _array = new UInt64Array(_data);
- public void Visit(BooleanType type) => _array = new BooleanArray(_data);
- public void Visit(FloatType type) => _array = new FloatArray(_data);
- public void Visit(DoubleType type) => _array = new DoubleArray(_data);
- public void Visit(StructType type) => _array = new StructArray(_data);
- public void Visit(UnionType type) => _array = new UnionArray(_data);
- public void Visit(ListType type) => _array = new ListArray(_data);
- public void Visit(TimestampType type) => _array = new TimestampArray(_data);
- public void Visit(BinaryType type) => _array = new BinaryArray(_data);
- public void Visit(StringType type) => _array = new StringArray(_data);
-
- public void Visit(IArrowType type)
- {
- throw new NotImplementedException();
- }
- }
-
- public static IArrowArray BuildArray(ArrayData data)
- {
- var visitor = new FactoryTypeVisitor(data);
- var array = visitor.CreateArray();
- return array;
}
}
}
diff --git a/csharp/src/Apache.Arrow/Extensions/ArrayDataExtensions.cs b/csharp/src/Apache.Arrow/Extensions/ArrayDataExtensions.cs
index 3cc4c13..399d9bf 100644
--- a/csharp/src/Apache.Arrow/Extensions/ArrayDataExtensions.cs
+++ b/csharp/src/Apache.Arrow/Extensions/ArrayDataExtensions.cs
@@ -31,21 +31,13 @@ namespace Apache.Arrow
}
}
- public static void EnsureDataType(this ArrayData data, params ArrowTypeId[] ids)
+ public static void EnsureDataType(this ArrayData data, ArrowTypeId id)
{
- var valid = true;
-
- foreach (var id in ids)
- {
- if (data.DataType.TypeId != id)
- valid = false;
- }
-
- if (!valid)
+ if (data.DataType.TypeId != id)
{
// TODO: Use localizable string resource
throw new ArgumentException(
- $"Specified array type <{data.DataType.TypeId}> does not match expected type(s) <{string.Join(",", ids)}>",
+ $"Specified array type <{data.DataType.TypeId}> does not match expected type(s) <{id}>",
nameof(data.DataType.TypeId));
}
}
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowFileReader.cs b/csharp/src/Apache.Arrow/Ipc/ArrowFileReader.cs
index da9525e..b61c2d9 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowFileReader.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowFileReader.cs
@@ -45,12 +45,12 @@ namespace Apache.Arrow.Ipc
return new ArrowFileReader(stream);
}
- public Task<int> RecordBatchCountAsync()
+ public ValueTask<int> RecordBatchCountAsync()
{
return Implementation.RecordBatchCountAsync();
}
- public Task<RecordBatch> ReadRecordBatchAsync(int index, CancellationToken cancellationToken = default)
+ public ValueTask<RecordBatch> ReadRecordBatchAsync(int index, CancellationToken cancellationToken = default)
{
return Implementation.ReadRecordBatchAsync(index, cancellationToken);
}
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowFileReaderImplementation.cs b/csharp/src/Apache.Arrow/Ipc/ArrowFileReaderImplementation.cs
index 7a62085..370ac6d 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowFileReaderImplementation.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowFileReaderImplementation.cs
@@ -42,7 +42,7 @@ namespace Apache.Arrow.Ipc
{
}
- public async Task<int> RecordBatchCountAsync()
+ public async ValueTask<int> RecordBatchCountAsync()
{
if (!HasReadSchema)
{
@@ -142,7 +142,7 @@ namespace Apache.Arrow.Ipc
Schema = _footer.Schema;
}
- public async Task<RecordBatch> ReadRecordBatchAsync(int index, CancellationToken cancellationToken)
+ public async ValueTask<RecordBatch> ReadRecordBatchAsync(int index, CancellationToken cancellationToken)
{
await ReadSchemaAsync().ConfigureAwait(false);
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs
index d6e124a..265c31f 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs
@@ -112,18 +112,16 @@ namespace Apache.Arrow.Ipc
await BaseStream.FlushAsync(cancellationToken).ConfigureAwait(false);
}
- private async ValueTask WriteHeaderAsync(CancellationToken cancellationToken)
+ private async Task WriteHeaderAsync(CancellationToken cancellationToken)
{
- cancellationToken.ThrowIfCancellationRequested();
-
// Write magic number and empty padding up to the 8-byte boundary
- await WriteMagicAsync().ConfigureAwait(false);
+ await WriteMagicAsync(cancellationToken).ConfigureAwait(false);
await WritePaddingAsync(CalculatePadding(ArrowFileConstants.Magic.Length))
.ConfigureAwait(false);
}
- private async ValueTask WriteFooterAsync(Schema schema, CancellationToken cancellationToken)
+ private async Task WriteFooterAsync(Schema schema, CancellationToken cancellationToken)
{
Builder.Clear();
@@ -182,15 +180,12 @@ namespace Apache.Arrow.Ipc
// Write magic
- cancellationToken.ThrowIfCancellationRequested();
-
- await WriteMagicAsync().ConfigureAwait(false);
+ await WriteMagicAsync(cancellationToken).ConfigureAwait(false);
}
- private Task WriteMagicAsync()
+ private ValueTask WriteMagicAsync(CancellationToken cancellationToken)
{
- return BaseStream.WriteAsync(
- ArrowFileConstants.Magic, 0, ArrowFileConstants.Magic.Length);
+ return BaseStream.WriteAsync(ArrowFileConstants.Magic, cancellationToken);
}
}
}
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamReader.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamReader.cs
index 0923968..7357a11 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamReader.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamReader.cs
@@ -66,9 +66,9 @@ namespace Apache.Arrow.Ipc
}
}
- public async Task<RecordBatch> ReadNextRecordBatchAsync(CancellationToken cancellationToken = default)
+ public ValueTask<RecordBatch> ReadNextRecordBatchAsync(CancellationToken cancellationToken = default)
{
- return await _implementation.ReadNextRecordBatchAsync(cancellationToken).ConfigureAwait(false);
+ return _implementation.ReadNextRecordBatchAsync(cancellationToken);
}
public RecordBatch ReadNextRecordBatch()
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
index 90cbfd3..f3d0840 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
@@ -200,13 +200,12 @@ namespace Apache.Arrow.Ipc
}
var buffers = recordBatchBuilder.Buffers;
- var bufferOffsets = new Offset<Flatbuf.Buffer>[buffers.Count];
Flatbuf.RecordBatch.StartBuffersVector(Builder, buffers.Count);
for (var i = buffers.Count - 1; i >= 0; i--)
{
- bufferOffsets[i] = Flatbuf.Buffer.CreateBuffer(Builder,
+ Flatbuf.Buffer.CreateBuffer(Builder,
buffers[i].Offset, buffers[i].Length);
}
@@ -259,10 +258,9 @@ namespace Apache.Arrow.Ipc
return WriteRecordBatchInternalAsync(recordBatch, cancellationToken);
}
- public async Task WriteBufferAsync(ArrowBuffer arrowBuffer, CancellationToken cancellationToken = default)
+ private ValueTask WriteBufferAsync(ArrowBuffer arrowBuffer, CancellationToken cancellationToken = default)
{
- await BaseStream.WriteAsync(arrowBuffer.Memory, cancellationToken)
- .ConfigureAwait(false);
+ return BaseStream.WriteAsync(arrowBuffer.Memory, cancellationToken);
}
private protected Offset<Flatbuf.Schema> SerializeSchema(Schema schema)
@@ -365,11 +363,14 @@ namespace Apache.Arrow.Ipc
}
}
- protected Task WritePaddingAsync(int length)
+ private protected ValueTask WritePaddingAsync(int length)
{
- if (length <= 0) return Task.CompletedTask;
+ if (length > 0)
+ {
+ return BaseStream.WriteAsync(Padding.AsMemory(0, Math.Min(Padding.Length, length)));
+ }
- return BaseStream.WriteAsync(Padding, 0, Math.Min(Padding.Length, length));
+ return default;
}
public virtual void Dispose()
diff --git a/csharp/src/Apache.Arrow/Ipc/IArrowReader.cs b/csharp/src/Apache.Arrow/Ipc/IArrowReader.cs
index 1230613..255ea4f 100644
--- a/csharp/src/Apache.Arrow/Ipc/IArrowReader.cs
+++ b/csharp/src/Apache.Arrow/Ipc/IArrowReader.cs
@@ -20,7 +20,7 @@ namespace Apache.Arrow.Ipc
{
public interface IArrowReader
{
- Task<RecordBatch> ReadNextRecordBatchAsync(
+ ValueTask<RecordBatch> ReadNextRecordBatchAsync(
CancellationToken cancellationToken = default);
}
}