You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/04/18 13:45:42 UTC
[arrow-rs] branch master updated: Serialize numeric to tape (#4069) (#4073)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 4ec95c297 Serialize numeric to tape (#4069) (#4073)
4ec95c297 is described below
commit 4ec95c2972b2d49421fce66e20dd2df18c559658
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Tue Apr 18 09:45:35 2023 -0400
Serialize numeric to tape (#4069) (#4073)
---
arrow-json/src/reader/mod.rs | 23 +++++++++++++++++++++++
arrow-json/src/reader/serializer.rs | 10 +++++++++-
2 files changed, 32 insertions(+), 1 deletion(-)
diff --git a/arrow-json/src/reader/mod.rs b/arrow-json/src/reader/mod.rs
index 603a0cd7e..4abcb1ea7 100644
--- a/arrow-json/src/reader/mod.rs
+++ b/arrow-json/src/reader/mod.rs
@@ -634,6 +634,7 @@ fn make_decoder(
#[cfg(test)]
mod tests {
+ use serde_json::json;
use std::fs::File;
use std::io::{BufReader, Cursor, Seek};
use std::sync::Arc;
@@ -1976,4 +1977,26 @@ mod tests {
"Json error: whilst decoding field 'a': whilst decoding field 'child': expected primitive got [123, 3465346]"
);
}
+
+ #[test]
+ fn test_serialize_timestamp() {
+ let json = vec![
+ json!({"timestamp": 1681319393}),
+ json!({"timestamp": "1970-01-01T00:00:00+02:00"}),
+ ];
+ let schema = Schema::new(vec![Field::new(
+ "timestamp",
+ DataType::Timestamp(TimeUnit::Second, None),
+ true,
+ )]);
+ let mut decoder = ReaderBuilder::new(Arc::new(schema))
+ .build_decoder()
+ .unwrap();
+ decoder.serialize(&json).unwrap();
+ let batch = decoder.flush().unwrap().unwrap();
+ assert_eq!(batch.num_rows(), 2);
+ assert_eq!(batch.num_columns(), 1);
+ let values = batch.column(0).as_primitive::<TimestampSecondType>();
+ assert_eq!(values.values(), &[1681319393, -7200]);
+ }
}
diff --git a/arrow-json/src/reader/serializer.rs b/arrow-json/src/reader/serializer.rs
index a68d1d547..2aa72de94 100644
--- a/arrow-json/src/reader/serializer.rs
+++ b/arrow-json/src/reader/serializer.rs
@@ -68,6 +68,13 @@ impl<'a> TapeSerializer<'a> {
offsets,
}
}
+
+ fn serialize_number(&mut self, v: &[u8]) {
+ self.bytes.extend_from_slice(v);
+ let idx = self.offsets.len() - 1;
+ self.elements.push(TapeElement::Number(idx as _));
+ self.offsets.push(self.bytes.len());
+ }
}
/// The tape stores all values as strings, and so must serialize numeric types
@@ -81,7 +88,8 @@ macro_rules! serialize_numeric {
($s:ident, $t:ty, $v:ident) => {{
let mut buffer = [0_u8; <$t>::FORMATTED_SIZE];
let s = lexical_core::write($v, &mut buffer);
- $s.serialize_bytes(s)
+ $s.serialize_number(s);
+ Ok(())
}};
}