You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2023/04/14 20:34:12 UTC
[arrow-rs] branch master updated: Fix timestamp numeric truncation (#4074)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 2e9fc22d7 Fix timestamp numeric truncation (#4074)
2e9fc22d7 is described below
commit 2e9fc22d787b42e4e77b7f8b5d713d7bf9b9e08b
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Fri Apr 14 21:34:07 2023 +0100
Fix timestamp numeric truncation (#4074)
---
arrow-json/src/reader/mod.rs | 23 +++++++++++++++++++++++
arrow-json/src/reader/timestamp_array.rs | 9 ++++-----
2 files changed, 27 insertions(+), 5 deletions(-)
diff --git a/arrow-json/src/reader/mod.rs b/arrow-json/src/reader/mod.rs
index 51bba322b..603a0cd7e 100644
--- a/arrow-json/src/reader/mod.rs
+++ b/arrow-json/src/reader/mod.rs
@@ -1370,6 +1370,29 @@ mod tests {
assert_eq!(u64.values(), &[u64::MAX, u64::MAX, u64::MIN, u64::MIN]);
}
+ #[test]
+ fn test_timestamp_truncation() {
+ let buf = r#"
+ {"time": 9223372036854775807 }
+ {"time": -9223372036854775808 }
+ {"time": 9e5 }
+ "#;
+
+ let schema = Arc::new(Schema::new(vec![Field::new(
+ "time",
+ DataType::Timestamp(TimeUnit::Nanosecond, None),
+ true,
+ )]));
+
+ let batches = do_read(buf, 1024, true, schema);
+ assert_eq!(batches.len(), 1);
+
+ let i64 = batches[0]
+ .column(0)
+ .as_primitive::<TimestampNanosecondType>();
+ assert_eq!(i64.values(), &[i64::MAX, i64::MIN, 900000]);
+ }
+
fn read_file(path: &str, schema: Option<Schema>) -> Reader<BufReader<File>> {
let file = File::open(path).unwrap();
let mut reader = BufReader::new(file);
diff --git a/arrow-json/src/reader/timestamp_array.rs b/arrow-json/src/reader/timestamp_array.rs
index 249613d33..ef69deabc 100644
--- a/arrow-json/src/reader/timestamp_array.rs
+++ b/arrow-json/src/reader/timestamp_array.rs
@@ -16,7 +16,6 @@
// under the License.
use chrono::TimeZone;
-use num::NumCast;
use std::marker::PhantomData;
use arrow_array::builder::PrimitiveBuilder;
@@ -78,10 +77,10 @@ where
}
TapeElement::Number(idx) => {
let s = tape.get_string(idx);
- let value = lexical_core::parse::<f64>(s.as_bytes())
- .ok()
- .and_then(NumCast::from)
- .ok_or_else(|| {
+ let b = s.as_bytes();
+ let value = lexical_core::parse::<i64>(b)
+ .or_else(|_| lexical_core::parse::<f64>(b).map(|x| x as i64))
+ .map_err(|_| {
ArrowError::JsonError(format!(
"failed to parse {s} as {}",
self.data_type