You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/01/13 08:28:11 UTC
[arrow-rs] branch master updated: Fix reading null booleans from CSV (#3523)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 0650a3a77 Fix reading null booleans from CSV (#3523)
0650a3a77 is described below
commit 0650a3a7726e992cf9d253165b79fbb00a3d9222
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Fri Jan 13 09:28:06 2023 +0100
Fix reading null booleans from CSV (#3523)
* Fix reading null booleans from CSV
* Clippy
* Review feedback
---
arrow-csv/src/reader/mod.rs | 32 ++++++++++++++++++++++++++++++++
1 file changed, 32 insertions(+)
diff --git a/arrow-csv/src/reader/mod.rs b/arrow-csv/src/reader/mod.rs
index bc6b016ec..0c7bfa897 100644
--- a/arrow-csv/src/reader/mod.rs
+++ b/arrow-csv/src/reader/mod.rs
@@ -866,6 +866,9 @@ fn build_boolean_array(
.enumerate()
.map(|(row_index, row)| {
let s = row.get(col_idx);
+ if s.is_empty() {
+ return Ok(None);
+ }
let parsed = parse_bool(s);
match parsed {
Some(e) => Ok(Some(e)),
@@ -1122,6 +1125,7 @@ mod tests {
use std::io::{Cursor, Write};
use tempfile::NamedTempFile;
+ use arrow_array::cast::as_boolean_array;
use chrono::prelude::*;
#[test]
@@ -2067,4 +2071,32 @@ mod tests {
assert_eq!(b.num_rows(), expected, "{}", idx);
}
}
+
+ #[test]
+ fn test_null_boolean() {
+ let csv = "true,false\nFalse,True\n,True\nFalse,";
+ let b = ReaderBuilder::new()
+ .build_buffered(Cursor::new(csv.as_bytes()))
+ .unwrap()
+ .next()
+ .unwrap()
+ .unwrap();
+
+ assert_eq!(b.num_rows(), 4);
+ assert_eq!(b.num_columns(), 2);
+
+ let c = as_boolean_array(b.column(0));
+ assert_eq!(c.null_count(), 1);
+ assert!(c.value(0));
+ assert!(!c.value(1));
+ assert!(c.is_null(2));
+ assert!(!c.value(3));
+
+ let c = as_boolean_array(b.column(1));
+ assert_eq!(c.null_count(), 1);
+ assert!(!c.value(0));
+ assert!(c.value(1));
+ assert!(c.value(2));
+ assert!(c.is_null(3));
+ }
}