You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2023/04/14 20:34:12 UTC

[arrow-rs] branch master updated: Fix timestamp numeric truncation (#4074)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 2e9fc22d7 Fix timestamp numeric truncation (#4074)
2e9fc22d7 is described below

commit 2e9fc22d787b42e4e77b7f8b5d713d7bf9b9e08b
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Fri Apr 14 21:34:07 2023 +0100

    Fix timestamp numeric truncation (#4074)
---
 arrow-json/src/reader/mod.rs             | 23 +++++++++++++++++++++++
 arrow-json/src/reader/timestamp_array.rs |  9 ++++-----
 2 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/arrow-json/src/reader/mod.rs b/arrow-json/src/reader/mod.rs
index 51bba322b..603a0cd7e 100644
--- a/arrow-json/src/reader/mod.rs
+++ b/arrow-json/src/reader/mod.rs
@@ -1370,6 +1370,29 @@ mod tests {
         assert_eq!(u64.values(), &[u64::MAX, u64::MAX, u64::MIN, u64::MIN]);
     }
 
+    #[test]
+    fn test_timestamp_truncation() {
+        let buf = r#"
+        {"time": 9223372036854775807 }
+        {"time": -9223372036854775808 }
+        {"time": 9e5 }
+        "#;
+
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "time",
+            DataType::Timestamp(TimeUnit::Nanosecond, None),
+            true,
+        )]));
+
+        let batches = do_read(buf, 1024, true, schema);
+        assert_eq!(batches.len(), 1);
+
+        let i64 = batches[0]
+            .column(0)
+            .as_primitive::<TimestampNanosecondType>();
+        assert_eq!(i64.values(), &[i64::MAX, i64::MIN, 900000]);
+    }
+
     fn read_file(path: &str, schema: Option<Schema>) -> Reader<BufReader<File>> {
         let file = File::open(path).unwrap();
         let mut reader = BufReader::new(file);
diff --git a/arrow-json/src/reader/timestamp_array.rs b/arrow-json/src/reader/timestamp_array.rs
index 249613d33..ef69deabc 100644
--- a/arrow-json/src/reader/timestamp_array.rs
+++ b/arrow-json/src/reader/timestamp_array.rs
@@ -16,7 +16,6 @@
 // under the License.
 
 use chrono::TimeZone;
-use num::NumCast;
 use std::marker::PhantomData;
 
 use arrow_array::builder::PrimitiveBuilder;
@@ -78,10 +77,10 @@ where
                 }
                 TapeElement::Number(idx) => {
                     let s = tape.get_string(idx);
-                    let value = lexical_core::parse::<f64>(s.as_bytes())
-                        .ok()
-                        .and_then(NumCast::from)
-                        .ok_or_else(|| {
+                    let b = s.as_bytes();
+                    let value = lexical_core::parse::<i64>(b)
+                        .or_else(|_| lexical_core::parse::<f64>(b).map(|x| x as i64))
+                        .map_err(|_| {
                             ArrowError::JsonError(format!(
                                 "failed to parse {s} as {}",
                                 self.data_type