You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ee...@apache.org on 2023/05/23 15:49:53 UTC
[arrow] branch main updated: GH-32605: [C#] Extend validity buffer api (#35342)
This is an automated email from the ASF dual-hosted git repository.
eerhardt pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new f38943af09 GH-32605: [C#] Extend validity buffer api (#35342)
f38943af09 is described below
commit f38943af09c51d2e50c94f03de14f88cbaa07a3f
Author: Aleksei Smirnov <tl...@inbox.ru>
AuthorDate: Tue May 23 18:49:44 2023 +0300
GH-32605: [C#] Extend validity buffer api (#35342)
Add a method to the ValidityBuffer that adds the same bool value length times without allocating an Enumerable.Repeat object
### Rationale for this change
See more details in the code review comments in https://github.com/apache/arrow/pull/13810
* Closes: #32605
Authored-by: Aleksei Smirnov <tl...@inbox.ru>
Signed-off-by: Eric Erhardt <er...@microsoft.com>
---
.../Apache.Arrow/Arrays/PrimitiveArrayBuilder.cs | 4 +-
.../src/Apache.Arrow/ArrowBuffer.BitmapBuilder.cs | 21 ++++
csharp/src/Apache.Arrow/BitUtility.cs | 109 ++++++++++++++++-----
csharp/src/Apache.Arrow/Properties/AssembyInfo.cs | 1 +
.../test/Apache.Arrow.Tests/ArrayBuilderTests.cs | 2 +-
.../ArrowBufferBitmapBuilderTests.cs | 43 ++++++--
csharp/test/Apache.Arrow.Tests/BitUtilityTests.cs | 98 +++++++++++++++++-
7 files changed, 245 insertions(+), 33 deletions(-)
diff --git a/csharp/src/Apache.Arrow/Arrays/PrimitiveArrayBuilder.cs b/csharp/src/Apache.Arrow/Arrays/PrimitiveArrayBuilder.cs
index ec1786fdc6..a50d4b52c3 100644
--- a/csharp/src/Apache.Arrow/Arrays/PrimitiveArrayBuilder.cs
+++ b/csharp/src/Apache.Arrow/Arrays/PrimitiveArrayBuilder.cs
@@ -142,7 +142,7 @@ namespace Apache.Arrow
int len = ValueBuffer.Length;
ValueBuffer.Append(span);
int additionalBitsCount = ValueBuffer.Length - len;
- ValidityBuffer.Reserve(additionalBitsCount).AppendRange(Enumerable.Repeat(true, additionalBitsCount));
+ ValidityBuffer.AppendRange(true, additionalBitsCount);
return Instance;
}
@@ -151,7 +151,7 @@ namespace Apache.Arrow
int len = ValueBuffer.Length;
ValueBuffer.AppendRange(values);
var additionalBitsCount = ValueBuffer.Length - len;
- ValidityBuffer.Reserve(additionalBitsCount).AppendRange(Enumerable.Repeat(true, additionalBitsCount));
+ ValidityBuffer.AppendRange(true, additionalBitsCount);
return Instance;
}
diff --git a/csharp/src/Apache.Arrow/ArrowBuffer.BitmapBuilder.cs b/csharp/src/Apache.Arrow/ArrowBuffer.BitmapBuilder.cs
index bdc68e37a9..410c22885a 100644
--- a/csharp/src/Apache.Arrow/ArrowBuffer.BitmapBuilder.cs
+++ b/csharp/src/Apache.Arrow/ArrowBuffer.BitmapBuilder.cs
@@ -138,6 +138,27 @@ namespace Apache.Arrow
return this;
}
+ /// <summary>
+ /// Append multiple bits.
+ /// </summary>
+ /// <param name="value">Value of bits to append.</param>
+ /// <param name="length">Number of times the value should be added.</param>
+ /// <returns>Returns the builder (for fluent-style composition).</returns>
+ public BitmapBuilder AppendRange(bool value, int length)
+ {
+ if (length < 0)
+ throw new ArgumentOutOfRangeException(nameof(length));
+
+ EnsureAdditionalCapacity(length);
+ Span<byte> span = Span;
+ BitUtility.SetBits(span, Length, length, value);
+
+ Length += length;
+ SetBitCount += value ? length : 0;
+
+ return this;
+ }
+
/// <summary>
/// Toggle the bit at a particular index.
/// </summary>
diff --git a/csharp/src/Apache.Arrow/BitUtility.cs b/csharp/src/Apache.Arrow/BitUtility.cs
index 19417bbbe4..d9ad2e6da8 100644
--- a/csharp/src/Apache.Arrow/BitUtility.cs
+++ b/csharp/src/Apache.Arrow/BitUtility.cs
@@ -62,6 +62,67 @@ namespace Apache.Arrow
: (byte)(data[idx] & ~BitMask[mod]);
}
+ /// <summary>
+ /// Set the number of bits in a span of bytes starting
+ /// at a specific index, and limiting to length.
+ /// </summary>
+ /// <param name="data">Span to set bits value.</param>
+ /// <param name="index">Bit index to start counting from.</param>
+ /// <param name="length">Maximum of bits in the span to consider.</param>
+ internal static void SetBits(Span<byte> data, int index, int length, bool value)
+ {
+ if (length == 0)
+ return;
+
+ int endBitIndex = checked(index + length - 1);
+
+ // Use simpler method if there aren't many values
+ if (length < 20)
+ {
+ for (int i = index; i <= endBitIndex; i++)
+ {
+ SetBit(data, i, value);
+ }
+ return;
+ }
+
+ // Otherwise do the work to figure out how to copy whole bytes
+ int startByteIndex = index / 8;
+ int startBitOffset = index % 8;
+ int endByteIndex = endBitIndex / 8;
+ int endBitOffset = endBitIndex % 8;
+
+ // If the starting index and ending index are not byte-aligned,
+ // we'll need to set bits the slow way. If they are
+ // byte-aligned, and for all other bytes in the 'middle', we
+ // can use a faster byte-aligned set.
+ int fullByteStartIndex = startBitOffset == 0 ? startByteIndex : startByteIndex + 1;
+ int fullByteEndIndex = endBitOffset == 7 ? endByteIndex : endByteIndex - 1;
+
+ // Bits we will be using to finish up the first byte
+ if (startBitOffset != 0)
+ {
+ Span<byte> slice = data.Slice(startByteIndex, 1);
+ for (int i = startBitOffset; i <= 7; i++)
+ SetBit(slice, i, value);
+ }
+
+ if (fullByteEndIndex >= fullByteStartIndex)
+ {
+ Span<byte> slice = data.Slice(fullByteStartIndex, fullByteEndIndex - fullByteStartIndex + 1);
+ byte fill = (byte)(value ? 0xFF : 0x00);
+
+ slice.Fill(fill);
+ }
+
+ if (endBitOffset != 7)
+ {
+ Span<byte> slice = data.Slice(endByteIndex, 1);
+ for (int i = 0; i <= endBitOffset; i++)
+ SetBit(slice, i, value);
+ }
+ }
+
public static void ToggleBit(Span<byte> data, int index)
{
data[index / 8] ^= BitMask[index % 8];
@@ -69,29 +130,33 @@ namespace Apache.Arrow
/// <summary>
/// Counts the number of set bits in a span of bytes starting
- /// at a specific bit offset.
+ /// at a specific bit index.
/// </summary>
- /// <param name="data">Span to count bits</param>
- /// <param name="offset">Bit offset to start counting from</param>
- /// <returns>Count of set (one) bits</returns>
- public static int CountBits(ReadOnlySpan<byte> data, int offset) =>
- CountBits(data, offset, data.Length * 8 - offset);
+ /// <param name="data">Span to count bits.</param>
+ /// <param name="index">Bit index to start counting from.</param>
+ /// <returns>Count of set (one) bits.</returns>
+ public static int CountBits(ReadOnlySpan<byte> data, int index) =>
+ CountBits(data, index, data.Length * 8 - index);
/// <summary>
/// Counts the number of set bits in a span of bytes starting
- /// at a specific bit offset, and limiting to a certain number of bits
+ /// at a specific bit index, and limiting to a certain number of bits
/// in the span.
/// </summary>
/// <param name="data">Span to count bits.</param>
- /// <param name="offset">Bit offset to start counting from.</param>
+ /// <param name="index">Bit index to start counting from.</param>
/// <param name="length">Maximum of bits in the span to consider.</param>
- /// <returns>Count of set (one) bits</returns>
- public static int CountBits(ReadOnlySpan<byte> data, int offset, int length)
+ /// <returns>Count of set (one) bits.</returns>
+ public static int CountBits(ReadOnlySpan<byte> data, int index, int length)
{
- int startByteIndex = offset / 8;
- int startBitOffset = offset % 8;
- int endByteIndex = (offset + length - 1) / 8;
- int endBitOffset = (offset + length - 1) % 8;
+ int startByteIndex = index / 8;
+ int startBitOffset = index % 8;
+
+ int endBitIndex = index + length - 1;
+
+ int endByteIndex = endBitIndex / 8;
+ int endBitOffset = endBitIndex % 8;
+
if (startBitOffset < 0)
return 0;
@@ -99,7 +164,7 @@ namespace Apache.Arrow
if (startByteIndex == endByteIndex)
{
// Range starts and ends within the same byte.
- var slice = data.Slice(startByteIndex, 1);
+ ReadOnlySpan<byte> slice = data.Slice(startByteIndex, 1);
for (int i = startBitOffset; i <= endBitOffset; i++)
count += GetBit(slice, i) ? 1 : 0;
@@ -107,7 +172,7 @@ namespace Apache.Arrow
}
// If the starting index and ending index are not byte-aligned,
- // we'll need to count bits the slow way. If they are
+ // we'll need to count bits the slow way. If they are
// byte-aligned, and for all other bytes in the 'middle', we
// can use a faster byte-aligned count.
int fullByteStartIndex = startBitOffset == 0 ? startByteIndex : startByteIndex + 1;
@@ -115,20 +180,20 @@ namespace Apache.Arrow
if (startBitOffset != 0)
{
- var slice = data.Slice(startByteIndex, 1);
+ ReadOnlySpan<byte> slice = data.Slice(startByteIndex, 1);
for (int i = startBitOffset; i <= 7; i++)
count += GetBit(slice, i) ? 1 : 0;
}
if (fullByteEndIndex >= fullByteStartIndex)
{
- var slice = data.Slice(fullByteStartIndex, fullByteEndIndex - fullByteStartIndex + 1);
+ ReadOnlySpan<byte> slice = data.Slice(fullByteStartIndex, fullByteEndIndex - fullByteStartIndex + 1);
count += CountBits(slice);
}
if (endBitOffset != 7)
{
- var slice = data.Slice(endByteIndex, 1);
+ ReadOnlySpan<byte> slice = data.Slice(endByteIndex, 1);
for (int i = 0; i <= endBitOffset; i++)
count += GetBit(slice, i) ? 1 : 0;
}
@@ -139,7 +204,7 @@ namespace Apache.Arrow
/// <summary>
/// Counts the number of set bits in a span of bytes.
/// </summary>
- /// <param name="data">Span to count bits</param>
+ /// <param name="data">Span to count bits.</param>
/// <returns>Count of set (one) bits.</returns>
public static int CountBits(ReadOnlySpan<byte> data)
{
@@ -186,8 +251,8 @@ namespace Apache.Arrow
/// <summary>
/// Calculates the number of bytes required to store n bits.
/// </summary>
- /// <param name="n">number of bits</param>
- /// <returns>number of bytes</returns>
+ /// <param name="n">Number of bits</param>
+ /// <returns>Number of bytes</returns>
public static int ByteCount(int n)
{
Debug.Assert(n >= 0);
diff --git a/csharp/src/Apache.Arrow/Properties/AssembyInfo.cs b/csharp/src/Apache.Arrow/Properties/AssembyInfo.cs
index 415bf89af4..6b2a7aea39 100644
--- a/csharp/src/Apache.Arrow/Properties/AssembyInfo.cs
+++ b/csharp/src/Apache.Arrow/Properties/AssembyInfo.cs
@@ -16,3 +16,4 @@
using System.Runtime.CompilerServices;
[assembly: InternalsVisibleTo("Apache.Arrow.Flight, PublicKey=0024000004800000940000000602000000240000525341310004000001000100e504183f6d470d6b67b6d19212be3e1f598f70c246a120194bc38130101d0c1853e4a0f2232cb12e37a7a90e707aabd38511dac4f25fcb0d691b2aa265900bf42de7f70468fc997551a40e1e0679b605aa2088a4a69e07c117e988f5b1738c570ee66997fba02485e7856a49eca5fd0706d09899b8312577cbb9034599fc92d4")]
+[assembly: InternalsVisibleTo("Apache.Arrow.Tests, PublicKey=0024000004800000940000000602000000240000525341310004000001000100e504183f6d470d6b67b6d19212be3e1f598f70c246a120194bc38130101d0c1853e4a0f2232cb12e37a7a90e707aabd38511dac4f25fcb0d691b2aa265900bf42de7f70468fc997551a40e1e0679b605aa2088a4a69e07c117e988f5b1738c570ee66997fba02485e7856a49eca5fd0706d09899b8312577cbb9034599fc92d4")]
diff --git a/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs b/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs
index 8c90f78072..9ac2f779a6 100644
--- a/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs
@@ -160,7 +160,7 @@ namespace Apache.Arrow.Tests
public void NestedListArrayBuilder()
{
var childListType = new ListType(Int64Type.Default);
- var parentListBuilder = new ListArray.Builder(childListType);
+ var parentListBuilder = new ListArray.Builder((IArrowType)childListType);
var childListBuilder = parentListBuilder.ValueBuilder as ListArray.Builder;
Assert.NotNull(childListBuilder);
var valueBuilder = childListBuilder.ValueBuilder as Int64Array.Builder;
diff --git a/csharp/test/Apache.Arrow.Tests/ArrowBufferBitmapBuilderTests.cs b/csharp/test/Apache.Arrow.Tests/ArrowBufferBitmapBuilderTests.cs
index 1e20446766..dec8274962 100644
--- a/csharp/test/Apache.Arrow.Tests/ArrowBufferBitmapBuilderTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/ArrowBufferBitmapBuilderTests.cs
@@ -83,7 +83,7 @@ namespace Apache.Arrow.Tests
// Arrange
var builder = new ArrowBuffer.BitmapBuilder();
int initialCapacity = builder.Capacity;
- builder.AppendRange(Enumerable.Repeat(true, initialCapacity)); // Fill to capacity.
+ builder.AppendRange(true, initialCapacity); // Fill to capacity.
// Act
var actualReturnValue = builder.Append(true);
@@ -136,7 +136,7 @@ namespace Apache.Arrow.Tests
{
// Arrange
var builder = new ArrowBuffer.BitmapBuilder();
- builder.AppendRange(Enumerable.Repeat(true, 8));
+ builder.AppendRange(true, 8);
// Act
var actualReturnValue = builder.Append(new Span<byte>(bytesToAppend), validBits);
@@ -162,7 +162,7 @@ namespace Apache.Arrow.Tests
{
// Arrange
var builder = new ArrowBuffer.BitmapBuilder();
- builder.AppendRange(Enumerable.Repeat(true, 9));
+ builder.AppendRange(true, 9);
// Act
var actualReturnValue = builder.Append(new Span<byte>(bytesToAppend), validBits);
@@ -180,7 +180,7 @@ namespace Apache.Arrow.Tests
{
// Arrange
var builder = new ArrowBuffer.BitmapBuilder();
- builder.AppendRange(Enumerable.Repeat(true, 8));
+ builder.AppendRange(true, 8);
// Act
var actualReturnValue = builder.Append(Span<byte>.Empty, 8);
@@ -198,7 +198,7 @@ namespace Apache.Arrow.Tests
{
// Arrange
var builder = new ArrowBuffer.BitmapBuilder();
- builder.AppendRange(Enumerable.Repeat(true, 8));
+ builder.AppendRange(true, 8);
// Act
Assert.Throws<ArgumentException>(() => builder.Append(new byte[] { 0b0010111 }, 9));
@@ -213,7 +213,7 @@ namespace Apache.Arrow.Tests
[InlineData(new bool[] {}, new[] { true, false }, 2, 1, 1)]
[InlineData(new[] { true, false }, new bool[] {}, 2, 1, 1)]
[InlineData(new[] { true, false }, new[] { true, false }, 4, 2, 2)]
- public void IncreasesLength(
+ public void AppendingEnumerableIncreasesLength(
bool[] initialContents,
bool[] toAppend,
int expectedLength,
@@ -234,6 +234,35 @@ namespace Apache.Arrow.Tests
Assert.Equal(expectedSetBitCount, builder.SetBitCount);
Assert.Equal(expectedUnsetBitCount, builder.UnsetBitCount);
}
+
+ [Theory]
+ [InlineData(new bool[] { }, true, 0, 0, 0, 0)]
+ [InlineData(new bool[] { }, true, 2, 2, 2, 0)]
+ [InlineData(new[] { true, false }, false, 0, 2, 1, 1)]
+ [InlineData(new[] { true, false }, false, 2, 4, 1, 3)]
+ [InlineData(new[] { true, false }, true, 2, 4, 3, 1)]
+ public void AppendingValueMultipleTimesIncreasesLength(
+ bool[] initialContents,
+ bool valueToAppend,
+ int numberOfTimes,
+ int expectedLength,
+ int expectedSetBitCount,
+ int expectedUnsetBitCount)
+ {
+ // Arrange
+ var builder = new ArrowBuffer.BitmapBuilder();
+ builder.AppendRange(initialContents);
+
+ // Act
+ var actualReturnValue = builder.AppendRange(valueToAppend, numberOfTimes);
+
+ // Assert
+ Assert.Equal(builder, actualReturnValue);
+ Assert.Equal(expectedLength, builder.Length);
+ Assert.True(builder.Capacity >= expectedLength);
+ Assert.Equal(expectedSetBitCount, builder.SetBitCount);
+ Assert.Equal(expectedUnsetBitCount, builder.UnsetBitCount);
+ }
}
public class Build
@@ -329,7 +358,7 @@ namespace Apache.Arrow.Tests
{
// Arrange
var builder = new ArrowBuffer.BitmapBuilder(initialCapacity);
- builder.AppendRange(Enumerable.Repeat(true, numBitsToAppend));
+ builder.AppendRange(true, numBitsToAppend);
// Act
var actualReturnValue = builder.Reserve(additionalCapacity);
diff --git a/csharp/test/Apache.Arrow.Tests/BitUtilityTests.cs b/csharp/test/Apache.Arrow.Tests/BitUtilityTests.cs
index 5e18716a06..528dca1bde 100644
--- a/csharp/test/Apache.Arrow.Tests/BitUtilityTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/BitUtilityTests.cs
@@ -58,7 +58,7 @@ namespace Apache.Arrow.Tests
Assert.Equal(expectedCount,
BitUtility.CountBits(data, offset));
}
-
+
[Theory]
[InlineData(new byte[] { 0b11111111 }, 0, 8, 8)]
[InlineData(new byte[] { 0b11111111 }, 0, 4, 4)]
@@ -127,6 +127,102 @@ namespace Apache.Arrow.Tests
BitUtility.SetBit(data, index);
Assert.Equal(expectedValue, data);
}
+
+ [Theory]
+ [InlineData(new byte[] { 0b10000000 }, 0, true, new byte[] { 0b10000001 })]
+ [InlineData(new byte[] { 0b00000000 }, 2, true, new byte[] { 0b00000100 })]
+ [InlineData(new byte[] { 0b11111111 }, 2, false, new byte[] { 0b11111011 })]
+ [InlineData(new byte[] { 0b01111111 }, 7, true, new byte[] { 0b11111111 })]
+ [InlineData(new byte[] { 0b00000000, 0b00000000 }, 8, true, new byte[] { 0b00000000, 0b00000001 })]
+ [InlineData(new byte[] { 0b11111110, 0b11111110 }, 15, false, new byte[] { 0b11111110, 0b01111110 })]
+ public void SetsBitValueAtIndex(byte[] data, int index, bool value, byte[] expectedValue)
+ {
+ BitUtility.SetBit(data, index, value);
+ Assert.Equal(expectedValue, data);
+ }
+ }
+
+ public class SetBits
+ {
+ [Theory]
+ [InlineData(new byte[] { 0b00000000 }, 0, 0, true, new byte[] { 0b00000000 })]
+ [InlineData(new byte[] { 0b00000000 }, 0, 1, true, new byte[] { 0b00000001 })]
+ [InlineData(new byte[] { 0b00000000 }, 0, 8, true, new byte[] { 0b11111111 })]
+ [InlineData(new byte[] { 0b00000000 }, 2, 2, true, new byte[] { 0b00001100 })]
+ [InlineData(new byte[] { 0b00000000 }, 5, 3, true, new byte[] { 0b11100000 })]
+ [InlineData(new byte[] { 0b00000000, 0b00000000 }, 8, 1, true, new byte[] { 0b00000000, 0b00000001 })]
+ [InlineData(new byte[] { 0b00000000, 0b00000000 }, 15, 1, true, new byte[] { 0b00000000, 0b10000000 })]
+ [InlineData(new byte[] { 0b00000000, 0b00000000 }, 7, 2, true, new byte[] { 0b10000000, 0b00000001 })]
+ [InlineData(new byte[] { 0b11111111, 0b11111111 }, 7, 2, false, new byte[] { 0b01111111, 0b11111110 })]
+ public void SetsBitsInRangeOnSmallRanges(byte[] data, int index, int length, bool value, byte[] expectedValue)
+ {
+ BitUtility.SetBits(data, index, length, value);
+ Assert.Equal(expectedValue, data);
+ }
+
+ [Fact]
+ public void SetsBitsInRangeOnBigRanges()
+ {
+ //Arrange
+ //Allocate 64 bits
+ Span<byte> data = stackalloc byte[8];
+ data.Clear();
+
+ //Act
+ //Sets 56 bits in the middle
+ BitUtility.SetBits(data, 4, 56, true);
+
+ //Assert
+ //Check that 4 bits in the lower and upper boundaries are left untouched
+ Assert.Equal(0b11110000, data[0]);
+ Assert.Equal(0b00001111, data[7]);
+
+ //Check bits in the middle
+ for (int i = 1; i < 7; i++)
+ Assert.Equal(0b11111111, data[i]);
+ }
+
+ [Fact]
+ public void SetsBitsInRangeOnBigRanges_LowerBoundaryCornerCase()
+ {
+ //Arrange
+ //Allocate 64 bits
+ Span<byte> data = stackalloc byte[8];
+ data.Clear();
+
+ //Act
+ //Sets all bits starting from 1
+ BitUtility.SetBits(data, 1, 63, true);
+
+ //Assert
+ //Check lower boundary
+ Assert.Equal(0b11111110, data[0]);
+
+ //Check other bits
+ for (int i = 1; i < 8; i++)
+ Assert.Equal(0b11111111, data[i]);
+ }
+
+ [Fact]
+ public void SetsBitsInRangeOnBigRanges_UpperBoundaryCornerCase()
+ {
+ //Arrange
+ //Allocate 64 bits
+ Span<byte> data = stackalloc byte[8];
+ data.Clear();
+
+ //Act
+ //Sets all bits starting from 0
+ BitUtility.SetBits(data, 0, 63, true);
+
+ //Assert
+ //Check upper boundary
+ Assert.Equal(0b01111111, data[7]);
+
+ //Check other bits
+ for (int i = 0; i < 7; i++)
+ Assert.Equal(0b11111111, data[i]);
+ }
}
public class ClearBit