You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/01/13 08:28:11 UTC

[arrow-rs] branch master updated: Fix reading null booleans from CSV (#3523)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 0650a3a77 Fix reading null booleans from CSV (#3523)
0650a3a77 is described below

commit 0650a3a7726e992cf9d253165b79fbb00a3d9222
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Fri Jan 13 09:28:06 2023 +0100

    Fix reading null booleans from CSV (#3523)
    
    * Fix reading null booleans from CSV
    
    * Clippy
    
    * Review feedback
---
 arrow-csv/src/reader/mod.rs | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/arrow-csv/src/reader/mod.rs b/arrow-csv/src/reader/mod.rs
index bc6b016ec..0c7bfa897 100644
--- a/arrow-csv/src/reader/mod.rs
+++ b/arrow-csv/src/reader/mod.rs
@@ -866,6 +866,9 @@ fn build_boolean_array(
         .enumerate()
         .map(|(row_index, row)| {
             let s = row.get(col_idx);
+            if s.is_empty() {
+                return Ok(None);
+            }
             let parsed = parse_bool(s);
             match parsed {
                 Some(e) => Ok(Some(e)),
@@ -1122,6 +1125,7 @@ mod tests {
     use std::io::{Cursor, Write};
     use tempfile::NamedTempFile;
 
+    use arrow_array::cast::as_boolean_array;
     use chrono::prelude::*;
 
     #[test]
@@ -2067,4 +2071,32 @@ mod tests {
             assert_eq!(b.num_rows(), expected, "{}", idx);
         }
     }
+
+    #[test]
+    fn test_null_boolean() {
+        let csv = "true,false\nFalse,True\n,True\nFalse,";
+        let b = ReaderBuilder::new()
+            .build_buffered(Cursor::new(csv.as_bytes()))
+            .unwrap()
+            .next()
+            .unwrap()
+            .unwrap();
+
+        assert_eq!(b.num_rows(), 4);
+        assert_eq!(b.num_columns(), 2);
+
+        let c = as_boolean_array(b.column(0));
+        assert_eq!(c.null_count(), 1);
+        assert!(c.value(0));
+        assert!(!c.value(1));
+        assert!(c.is_null(2));
+        assert!(!c.value(3));
+
+        let c = as_boolean_array(b.column(1));
+        assert_eq!(c.null_count(), 1);
+        assert!(!c.value(0));
+        assert!(c.value(1));
+        assert!(c.value(2));
+        assert!(c.is_null(3));
+    }
 }