You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/08/19 16:29:48 UTC

[arrow-rs] branch master updated: Refactor BooleanBuilder Constructors (#2515)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new a90fc64f0 Refactor BooleanBuilder Constructors (#2515)
a90fc64f0 is described below

commit a90fc64f0d568bee350dadcaa2c6c7fcc21e0b97
Author: Palladium <ps...@gmail.com>
AuthorDate: Fri Aug 19 21:59:43 2022 +0530

    Refactor BooleanBuilder Constructors (#2515)
    
    * Refactor boolean builder
    
    * Fix errors
    
    * Fix typo in documentation
    
    * impl default trait for BooleanBuilder
---
 arrow/benches/builder.rs                   |  2 +-
 arrow/src/array/array_boolean.rs           |  2 +-
 arrow/src/array/builder/boolean_builder.rs | 17 ++++++++++++++---
 arrow/src/array/builder/struct_builder.rs  |  6 +++---
 arrow/src/array/data.rs                    |  2 +-
 arrow/src/compute/kernels/cast.rs          |  2 +-
 arrow/src/compute/kernels/filter.rs        |  2 +-
 arrow/src/compute/kernels/take.rs          |  2 +-
 arrow/src/json/reader.rs                   |  2 +-
 arrow/src/util/integration_util.rs         |  2 +-
 10 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/arrow/benches/builder.rs b/arrow/benches/builder.rs
index 691cd0683..b61423396 100644
--- a/arrow/benches/builder.rs
+++ b/arrow/benches/builder.rs
@@ -80,7 +80,7 @@ fn bench_bool(c: &mut Criterion) {
     ));
     group.bench_function("bench_bool", |b| {
         b.iter(|| {
-            let mut builder = BooleanBuilder::new(64);
+            let mut builder = BooleanBuilder::with_capacity(64);
             for _ in 0..NUM_BATCHES {
                 builder.append_slice(&data[..]);
             }
diff --git a/arrow/src/array/array_boolean.rs b/arrow/src/array/array_boolean.rs
index 5d1e20705..cb1cd11db 100644
--- a/arrow/src/array/array_boolean.rs
+++ b/arrow/src/array/array_boolean.rs
@@ -95,7 +95,7 @@ impl BooleanArray {
 
     // Returns a new boolean array builder
     pub fn builder(capacity: usize) -> BooleanBuilder {
-        BooleanBuilder::new(capacity)
+        BooleanBuilder::with_capacity(capacity)
     }
 
     /// Returns a `Buffer` holding all the values of this array.
diff --git a/arrow/src/array/builder/boolean_builder.rs b/arrow/src/array/builder/boolean_builder.rs
index e28e37bc9..eed14a55f 100644
--- a/arrow/src/array/builder/boolean_builder.rs
+++ b/arrow/src/array/builder/boolean_builder.rs
@@ -39,7 +39,7 @@ use super::NullBufferBuilder;
 /// ```
 ///     use arrow::array::{Array, BooleanArray, BooleanBuilder};
 ///
-///     let mut b = BooleanBuilder::new(4);
+///     let mut b = BooleanBuilder::new();
 ///     b.append_value(true);
 ///     b.append_null();
 ///     b.append_value(false);
@@ -66,9 +66,20 @@ pub struct BooleanBuilder {
     null_buffer_builder: NullBufferBuilder,
 }
 
+impl Default for BooleanBuilder {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl BooleanBuilder {
-    /// Creates a new primitive array builder
-    pub fn new(capacity: usize) -> Self {
+    /// Creates a new boolean builder
+    pub fn new() -> Self {
+        Self::with_capacity(1024)
+    }
+
+    /// Creates a new boolean builder with space for `capacity` elements without re-allocating
+    pub fn with_capacity(capacity: usize) -> Self {
         Self {
             values_builder: BooleanBufferBuilder::new(capacity),
             null_buffer_builder: NullBufferBuilder::new(capacity),
diff --git a/arrow/src/array/builder/struct_builder.rs b/arrow/src/array/builder/struct_builder.rs
index a2f8707b3..be7fa1405 100644
--- a/arrow/src/array/builder/struct_builder.rs
+++ b/arrow/src/array/builder/struct_builder.rs
@@ -96,7 +96,7 @@ impl ArrayBuilder for StructBuilder {
 pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilder> {
     match datatype {
         DataType::Null => unimplemented!(),
-        DataType::Boolean => Box::new(BooleanBuilder::new(capacity)),
+        DataType::Boolean => Box::new(BooleanBuilder::with_capacity(capacity)),
         DataType::Int8 => Box::new(Int8Builder::new(capacity)),
         DataType::Int16 => Box::new(Int16Builder::new(capacity)),
         DataType::Int32 => Box::new(Int32Builder::new(capacity)),
@@ -321,7 +321,7 @@ mod tests {
     #[test]
     fn test_struct_array_builder_finish() {
         let int_builder = Int32Builder::new(10);
-        let bool_builder = BooleanBuilder::new(10);
+        let bool_builder = BooleanBuilder::new();
 
         let mut fields = Vec::new();
         let mut field_builders = Vec::new();
@@ -426,7 +426,7 @@ mod tests {
     #[should_panic(expected = "StructBuilder and field_builders are of unequal lengths.")]
     fn test_struct_array_builder_unequal_field_builders_lengths() {
         let mut int_builder = Int32Builder::new(10);
-        let mut bool_builder = BooleanBuilder::new(10);
+        let mut bool_builder = BooleanBuilder::new();
 
         int_builder.append_value(1);
         int_builder.append_value(2);
diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs
index 3993d51d9..1a7f991ac 100644
--- a/arrow/src/array/data.rs
+++ b/arrow/src/array/data.rs
@@ -2677,7 +2677,7 @@ mod tests {
             ],
             vec![
                 Box::new(Int32Builder::new(5)),
-                Box::new(BooleanBuilder::new(5)),
+                Box::new(BooleanBuilder::with_capacity(5)),
             ],
         );
 
diff --git a/arrow/src/compute/kernels/cast.rs b/arrow/src/compute/kernels/cast.rs
index 2d9e0b5de..3ec3ea3a8 100644
--- a/arrow/src/compute/kernels/cast.rs
+++ b/arrow/src/compute/kernels/cast.rs
@@ -2072,7 +2072,7 @@ fn numeric_to_bool_cast<T>(from: &PrimitiveArray<T>) -> Result<BooleanArray>
 where
     T: ArrowPrimitiveType + ArrowNumericType,
 {
-    let mut b = BooleanBuilder::new(from.len());
+    let mut b = BooleanBuilder::with_capacity(from.len());
 
     for i in 0..from.len() {
         if from.is_null(i) {
diff --git a/arrow/src/compute/kernels/filter.rs b/arrow/src/compute/kernels/filter.rs
index 0bffee62c..34d2a880a 100644
--- a/arrow/src/compute/kernels/filter.rs
+++ b/arrow/src/compute/kernels/filter.rs
@@ -1039,7 +1039,7 @@ mod tests {
     #[test]
     fn test_filter_string_array_with_negated_boolean_array() {
         let a = StringArray::from(vec!["hello", " ", "world", "!"]);
-        let mut bb = BooleanBuilder::new(2);
+        let mut bb = BooleanBuilder::with_capacity(2);
         bb.append_value(false);
         bb.append_value(true);
         bb.append_value(false);
diff --git a/arrow/src/compute/kernels/take.rs b/arrow/src/compute/kernels/take.rs
index fb8f75651..a209cd444 100644
--- a/arrow/src/compute/kernels/take.rs
+++ b/arrow/src/compute/kernels/take.rs
@@ -1083,7 +1083,7 @@ mod tests {
                 Field::new("b", DataType::Int32, true),
             ],
             vec![
-                Box::new(BooleanBuilder::new(values.len())),
+                Box::new(BooleanBuilder::with_capacity(values.len())),
                 Box::new(Int32Builder::new(values.len())),
             ],
         );
diff --git a/arrow/src/json/reader.rs b/arrow/src/json/reader.rs
index 66fdc6918..ce08492ca 100644
--- a/arrow/src/json/reader.rs
+++ b/arrow/src/json/reader.rs
@@ -950,7 +950,7 @@ impl Decoder {
     }
 
     fn build_boolean_array(&self, rows: &[Value], col_name: &str) -> Result<ArrayRef> {
-        let mut builder = BooleanBuilder::new(rows.len());
+        let mut builder = BooleanBuilder::with_capacity(rows.len());
         for row in rows {
             if let Some(value) = row.get(&col_name) {
                 if let Some(boolean) = value.as_bool() {
diff --git a/arrow/src/util/integration_util.rs b/arrow/src/util/integration_util.rs
index 4b7d29259..4a3cc2fcb 100644
--- a/arrow/src/util/integration_util.rs
+++ b/arrow/src/util/integration_util.rs
@@ -284,7 +284,7 @@ pub fn array_from_json(
     match field.data_type() {
         DataType::Null => Ok(Arc::new(NullArray::new(json_col.count))),
         DataType::Boolean => {
-            let mut b = BooleanBuilder::new(json_col.count);
+            let mut b = BooleanBuilder::with_capacity(json_col.count);
             for (is_valid, value) in json_col
                 .validity
                 .as_ref()