You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ne...@apache.org on 2020/11/28 20:35:04 UTC
[arrow] branch master updated: ARROW-10755: [Rust] [Parquet] Add
support for writing boolean type
This is an automated email from the ASF dual-hosted git repository.
nevime pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new fa4c76f ARROW-10755: [Rust] [Parquet] Add support for writing boolean type
fa4c76f is described below
commit fa4c76fff56df97bce294e62c77942e6fef56b80
Author: Will Jones <wi...@gmail.com>
AuthorDate: Sat Nov 28 22:34:04 2020 +0200
ARROW-10755: [Rust] [Parquet] Add support for writing boolean type
Built this based on what I saw existing for the numeric implementations. Most of it was already there.
Closes #8790 from wjones1/ARROW-10755
Lead-authored-by: Will Jones <wi...@gmail.com>
Co-authored-by: EC2 Default User <ec...@ip-172-31-17-159.us-west-2.compute.internal>
Signed-off-by: Neville Dipale <ne...@gmail.com>
---
rust/parquet/src/arrow/arrow_writer.rs | 28 ++++++++++++++++++++--------
1 file changed, 20 insertions(+), 8 deletions(-)
diff --git a/rust/parquet/src/arrow/arrow_writer.rs b/rust/parquet/src/arrow/arrow_writer.rs
index 77eb1e8..dc9cf70 100644
--- a/rust/parquet/src/arrow/arrow_writer.rs
+++ b/rust/parquet/src/arrow/arrow_writer.rs
@@ -129,6 +129,7 @@ fn write_leaves(
) -> Result<()> {
match array.data_type() {
ArrowDataType::Null
+ | ArrowDataType::Boolean
| ArrowDataType::Int8
| ArrowDataType::Int16
| ArrowDataType::Int32
@@ -263,7 +264,6 @@ fn write_leaves(
"Float16 arrays not supported".to_string(),
)),
ArrowDataType::FixedSizeList(_, _)
- | ArrowDataType::Boolean
| ArrowDataType::FixedSizeBinary(_)
| ArrowDataType::Decimal(_, _)
| ArrowDataType::Union(_) => Err(ParquetError::NYI(
@@ -351,8 +351,13 @@ fn write_leaf(
levels.repetition.as_deref(),
)?
}
- ColumnWriter::BoolColumnWriter(ref mut _typed) => {
- unreachable!("Currently unreachable because data type not supported")
+ ColumnWriter::BoolColumnWriter(ref mut typed) => {
+ let array = arrow_array::BooleanArray::from(column.data());
+ typed.write_batch(
+ get_bool_array_slice(&array).as_slice(),
+ Some(levels.definition.as_slice()),
+ levels.repetition.as_deref(),
+ )?
}
ColumnWriter::Int64ColumnWriter(ref mut typed) => {
let array = arrow_array::Int64Array::from(column.data());
@@ -525,8 +530,8 @@ fn get_levels(
definition: list_def_levels,
repetition: Some(list_rep_levels),
}],
- ArrowDataType::Boolean => unimplemented!(),
- ArrowDataType::Int8
+ ArrowDataType::Boolean
+ | ArrowDataType::Int8
| ArrowDataType::Int16
| ArrowDataType::Int32
| ArrowDataType::Int64
@@ -665,6 +670,16 @@ where
values
}
+fn get_bool_array_slice(array: &arrow_array::BooleanArray) -> Vec<bool> {
+ let mut values = Vec::with_capacity(array.len() - array.null_count());
+ for i in 0..array.len() {
+ if array.is_valid(i) {
+ values.push(array.value(i))
+ }
+ }
+ values
+}
+
#[cfg(test)]
mod tests {
use super::*;
@@ -1023,9 +1038,6 @@ mod tests {
}
#[test]
- #[should_panic(
- expected = "Attempting to write an Arrow type that is not yet implemented"
- )]
fn bool_single_column() {
required_and_optional::<BooleanArray, _>(
[true, false].iter().cycle().copied().take(SMALL_SIZE),