You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ne...@apache.org on 2020/11/28 20:35:04 UTC

[arrow] branch master updated: ARROW-10755: [Rust] [Parquet] Add support for writing boolean type

This is an automated email from the ASF dual-hosted git repository.

nevime pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new fa4c76f  ARROW-10755: [Rust] [Parquet] Add support for writing boolean type
fa4c76f is described below

commit fa4c76fff56df97bce294e62c77942e6fef56b80
Author: Will Jones <wi...@gmail.com>
AuthorDate: Sat Nov 28 22:34:04 2020 +0200

    ARROW-10755: [Rust] [Parquet] Add support for writing boolean type
    
    Built this based on what I saw existing for the numeric implementations. Most of it was already there.
    
    Closes #8790 from wjones1/ARROW-10755
    
    Lead-authored-by: Will Jones <wi...@gmail.com>
    Co-authored-by: EC2 Default User <ec...@ip-172-31-17-159.us-west-2.compute.internal>
    Signed-off-by: Neville Dipale <ne...@gmail.com>
---
 rust/parquet/src/arrow/arrow_writer.rs | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/rust/parquet/src/arrow/arrow_writer.rs b/rust/parquet/src/arrow/arrow_writer.rs
index 77eb1e8..dc9cf70 100644
--- a/rust/parquet/src/arrow/arrow_writer.rs
+++ b/rust/parquet/src/arrow/arrow_writer.rs
@@ -129,6 +129,7 @@ fn write_leaves(
 ) -> Result<()> {
     match array.data_type() {
         ArrowDataType::Null
+        | ArrowDataType::Boolean
         | ArrowDataType::Int8
         | ArrowDataType::Int16
         | ArrowDataType::Int32
@@ -263,7 +264,6 @@ fn write_leaves(
             "Float16 arrays not supported".to_string(),
         )),
         ArrowDataType::FixedSizeList(_, _)
-        | ArrowDataType::Boolean
         | ArrowDataType::FixedSizeBinary(_)
         | ArrowDataType::Decimal(_, _)
         | ArrowDataType::Union(_) => Err(ParquetError::NYI(
@@ -351,8 +351,13 @@ fn write_leaf(
                 levels.repetition.as_deref(),
             )?
         }
-        ColumnWriter::BoolColumnWriter(ref mut _typed) => {
-            unreachable!("Currently unreachable because data type not supported")
+        ColumnWriter::BoolColumnWriter(ref mut typed) => {
+            let array = arrow_array::BooleanArray::from(column.data());
+            typed.write_batch(
+                get_bool_array_slice(&array).as_slice(),
+                Some(levels.definition.as_slice()),
+                levels.repetition.as_deref(),
+            )?
         }
         ColumnWriter::Int64ColumnWriter(ref mut typed) => {
             let array = arrow_array::Int64Array::from(column.data());
@@ -525,8 +530,8 @@ fn get_levels(
                     definition: list_def_levels,
                     repetition: Some(list_rep_levels),
                 }],
-                ArrowDataType::Boolean => unimplemented!(),
-                ArrowDataType::Int8
+                ArrowDataType::Boolean
+                | ArrowDataType::Int8
                 | ArrowDataType::Int16
                 | ArrowDataType::Int32
                 | ArrowDataType::Int64
@@ -665,6 +670,16 @@ where
     values
 }
 
+fn get_bool_array_slice(array: &arrow_array::BooleanArray) -> Vec<bool> {
+    let mut values = Vec::with_capacity(array.len() - array.null_count());
+    for i in 0..array.len() {
+        if array.is_valid(i) {
+            values.push(array.value(i))
+        }
+    }
+    values
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -1023,9 +1038,6 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(
-        expected = "Attempting to write an Arrow type that is not yet implemented"
-    )]
     fn bool_single_column() {
         required_and_optional::<BooleanArray, _>(
             [true, false].iter().cycle().copied().take(SMALL_SIZE),