You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "Platob (via GitHub)" <gi...@apache.org> on 2023/05/06 06:19:22 UTC

[GitHub] [arrow] Platob commented on a diff in pull request #35299: GH-35285: [C#] Need refacto IArrowArrayBuilder for nested types

Platob commented on code in PR #35299:
URL: https://github.com/apache/arrow/pull/35299#discussion_r1186646427


##########
csharp/src/Apache.Arrow/Builder/BufferBuilder.cs:
##########
@@ -0,0 +1,356 @@
+using System;
+using System.Buffers;
+using System.Collections.Generic;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using Apache.Arrow.Memory;
+
+namespace Apache.Arrow.Builder
+{
+    public class BufferBuilder : IBufferBuilder
+    {
+        public class BitBuffer
+        {
+            private readonly bool[] _bits;
+
+            public int Length { get; private set; }
+            public int AvailableLength => Capacity - Length;
+
+            public int Capacity;
+
+            public bool IsFull => Length == Capacity;
+            public byte ToByte(ref byte data) => BitUtility.ToByte(ref data, _bits);
+
+            public BitBuffer(int capacity = 8)
+            {
+                Capacity = capacity;
+                _bits = new bool[capacity];
+                Length = 0;
+            }
+
+            public void Append(bool bit) => _bits[Length++] = bit;
+            public void Fill(ReadOnlySpan<bool> bits)
+            {
+                bits.CopyTo(_bits.AsSpan().Slice(Length, bits.Length));
+                Length += bits.Length;
+            }
+
+            public void Reset()
+            {
+                for (int i = 0; i < _bits.Length; i++)
+                {
+                    _bits[i] = false;
+                }
+                Length = 0;
+            }
+        }
+
+        private const int DefaultCapacity = 64;
+        public int ByteLength { get; private set; }
+
+        public Memory<byte> Memory { get; private set; }
+        public BitBuffer BitOverhead { get; }

Review Comment:
   During its lifetime, the builder will allocate only 1 time 8 bits in bit overhead
   
   On Ever bit(s) append, it will check if the 8 bits are written (like the old bitmap builder checking if it is % 8)
   
   If it is full (8 bits are written) i will write a new byte in the Memory Buffer and reset all values in the BitOverhead to false setting his length to 0 an dread to recieve other bits
   
   ```csharp
   public class BenchmarkBits
   {
       public static long ElapsedTicks(int repetition, Action action)
       {
           Stopwatch stopwatch = Stopwatch.StartNew();
   
           for (int i = 0; i < repetition; i++)
               action();
   
           stopwatch.Stop();
   
           return stopwatch.ElapsedTicks / repetition;
       }
   
       private readonly ITestOutputHelper output;
   
       private static readonly bool[] trueBits = Enumerable.Range(0, 100).Select(_ => true).ToArray();
       private static readonly bool[] falseBits = Enumerable.Range(0, 100).Select(_ => false).ToArray();
   
       public BenchmarkBits(ITestOutputHelper output)
       {
           this.output = output;
       }
   
       [Fact]
       public void Bench()
       {
           output.WriteLine($"Elapsed {ElapsedTicks(1000000, MakeOld)}");
           output.WriteLine($"Elapsed {ElapsedTicks(1000000, MakeNew)}");
       }
   
       private static void MakeNew()
       {
           var builder = new BufferBuilder(64);
   
           builder.AppendBit(true).AppendBit(true)
               .AppendBits(trueBits)
               .AppendBits(falseBits)
               .Build();
       }
   
       private static void MakeOld()
       {
           var builder = new ArrowBuffer.BitmapBuilder(64);
   
           builder.Append(true).Append(true)
               .AppendRange(trueBits)
               .AppendRange(falseBits)
               .Build();
       }
   }
   ````
   
   On 1 million iterations the new implementation is in mean 168ticks
   On 1 million iterations the old implementation is in mean 312 ticks



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org