You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/07/21 23:17:23 UTC

[GitHub] [arrow-rs] HaoYang670 commented on a diff in pull request #2117: Update `GenericBinaryBuilder` to use buffer builders directly.

HaoYang670 commented on code in PR #2117:
URL: https://github.com/apache/arrow-rs/pull/2117#discussion_r927170082


##########
arrow/src/array/builder/generic_binary_builder.rs:
##########
@@ -15,63 +15,72 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::array::{
-    ArrayBuilder, ArrayRef, GenericBinaryArray, GenericListBuilder, OffsetSizeTrait,
-    UInt8Builder,
+use crate::{
+    array::{
+        ArrayBuilder, ArrayDataBuilder, ArrayRef, GenericBinaryArray, OffsetSizeTrait,
+        UInt8BufferBuilder,
+    },
+    datatypes::DataType,
 };
 use std::any::Any;
 use std::sync::Arc;
 
-///  Array builder for `BinaryArray`
+use super::{BooleanBufferBuilder, BufferBuilder};
+
+///  Array builder for [`GenericBinaryArray`]
 #[derive(Debug)]
 pub struct GenericBinaryBuilder<OffsetSize: OffsetSizeTrait> {
-    builder: GenericListBuilder<OffsetSize, UInt8Builder>,
+    value_builder: UInt8BufferBuilder,
+    offsets_builder: BufferBuilder<OffsetSize>,
+    null_buffer_builder: BooleanBufferBuilder,
 }
 
 impl<OffsetSize: OffsetSizeTrait> GenericBinaryBuilder<OffsetSize> {
-    /// Creates a new `GenericBinaryBuilder`, `capacity` is the number of bytes in the values
-    /// array
+    /// Creates a new [`GenericBinaryBuilder`].
+    /// `capacity` is the number of bytes in the values array.
     pub fn new(capacity: usize) -> Self {
-        let values_builder = UInt8Builder::new(capacity);
+        let mut offsets_builder = BufferBuilder::<OffsetSize>::new(1024);
+        offsets_builder.append(OffsetSize::zero());
         Self {
-            builder: GenericListBuilder::new(values_builder),
+            value_builder: UInt8BufferBuilder::new(capacity),
+            offsets_builder,
+            null_buffer_builder: BooleanBufferBuilder::new(1024),
         }
     }
 
-    /// Appends a single byte value into the builder's values array.
-    ///
-    /// Note, when appending individual byte values you must call `append` to delimit each
-    /// distinct list value.
-    #[inline]
-    pub fn append_byte(&mut self, value: u8) {
-        self.builder.values().append_value(value);
-    }
-
     /// Appends a byte slice into the builder.
-    ///
-    /// Automatically calls the `append` method to delimit the slice appended in as a
-    /// distinct array element.
     #[inline]
     pub fn append_value(&mut self, value: impl AsRef<[u8]>) {
-        self.builder.values().append_slice(value.as_ref());
-        self.builder.append(true);
-    }
-
-    /// Finish the current variable-length list array slot.
-    #[inline]
-    pub fn append(&mut self, is_valid: bool) {
-        self.builder.append(is_valid)
+        self.value_builder.append_slice(value.as_ref());
+        self.null_buffer_builder.append(true);
+        self.offsets_builder
+            .append(OffsetSize::from_usize(self.value_builder.len()).unwrap());
     }
 
     /// Append a null value to the array.
     #[inline]
     pub fn append_null(&mut self) {
-        self.append(false)
+        self.null_buffer_builder.append(false);

Review Comment:
   Currently, This optimization is only used on primitive builder and boolean builder. 
   We could do a follow-up to use it on all builders.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org