You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/04/18 13:45:42 UTC

[arrow-rs] branch master updated: Serialize numeric to tape (#4069) (#4073)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 4ec95c297 Serialize numeric to tape (#4069) (#4073)
4ec95c297 is described below

commit 4ec95c2972b2d49421fce66e20dd2df18c559658
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Tue Apr 18 09:45:35 2023 -0400

    Serialize numeric to tape (#4069) (#4073)
---
 arrow-json/src/reader/mod.rs        | 23 +++++++++++++++++++++++
 arrow-json/src/reader/serializer.rs | 10 +++++++++-
 2 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/arrow-json/src/reader/mod.rs b/arrow-json/src/reader/mod.rs
index 603a0cd7e..4abcb1ea7 100644
--- a/arrow-json/src/reader/mod.rs
+++ b/arrow-json/src/reader/mod.rs
@@ -634,6 +634,7 @@ fn make_decoder(
 
 #[cfg(test)]
 mod tests {
+    use serde_json::json;
     use std::fs::File;
     use std::io::{BufReader, Cursor, Seek};
     use std::sync::Arc;
@@ -1976,4 +1977,26 @@ mod tests {
             "Json error: whilst decoding field 'a': whilst decoding field 'child': expected primitive got [123, 3465346]"
         );
     }
+
+    #[test]
+    fn test_serialize_timestamp() {
+        let json = vec![
+            json!({"timestamp": 1681319393}),
+            json!({"timestamp": "1970-01-01T00:00:00+02:00"}),
+        ];
+        let schema = Schema::new(vec![Field::new(
+            "timestamp",
+            DataType::Timestamp(TimeUnit::Second, None),
+            true,
+        )]);
+        let mut decoder = ReaderBuilder::new(Arc::new(schema))
+            .build_decoder()
+            .unwrap();
+        decoder.serialize(&json).unwrap();
+        let batch = decoder.flush().unwrap().unwrap();
+        assert_eq!(batch.num_rows(), 2);
+        assert_eq!(batch.num_columns(), 1);
+        let values = batch.column(0).as_primitive::<TimestampSecondType>();
+        assert_eq!(values.values(), &[1681319393, -7200]);
+    }
 }
diff --git a/arrow-json/src/reader/serializer.rs b/arrow-json/src/reader/serializer.rs
index a68d1d547..2aa72de94 100644
--- a/arrow-json/src/reader/serializer.rs
+++ b/arrow-json/src/reader/serializer.rs
@@ -68,6 +68,13 @@ impl<'a> TapeSerializer<'a> {
             offsets,
         }
     }
+
+    fn serialize_number(&mut self, v: &[u8]) {
+        self.bytes.extend_from_slice(v);
+        let idx = self.offsets.len() - 1;
+        self.elements.push(TapeElement::Number(idx as _));
+        self.offsets.push(self.bytes.len());
+    }
 }
 
 /// The tape stores all values as strings, and so must serialize numeric types
@@ -81,7 +88,8 @@ macro_rules! serialize_numeric {
     ($s:ident, $t:ty, $v:ident) => {{
         let mut buffer = [0_u8; <$t>::FORMATTED_SIZE];
         let s = lexical_core::write($v, &mut buffer);
-        $s.serialize_bytes(s)
+        $s.serialize_number(s);
+        Ok(())
     }};
 }