You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/06/04 19:58:11 UTC

[arrow-rs] branch master updated: Minor: float16 to json (#4358)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 5976ae45e Minor: float16 to json (#4358)
5976ae45e is described below

commit 5976ae45e7fc3e4bed2ea0916162432d96e97f96
Author: Igor Izvekov <iz...@gmail.com>
AuthorDate: Sun Jun 4 22:58:05 2023 +0300

    Minor: float16 to json (#4358)
    
    * Minor: float16 to json
    
    * feat: Float16 JSON Reader
    
    * fix: clippy
    
    * fix: cargo fmt
---
 arrow-cast/Cargo.toml                    | 1 +
 arrow-cast/src/parse.rs                  | 9 +++++++++
 arrow-json/src/reader/mod.rs             | 5 +++--
 arrow-json/src/reader/primitive_array.rs | 7 +++++++
 arrow-json/src/writer.rs                 | 5 +++++
 arrow-json/test/data/basic.json          | 4 ++--
 6 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/arrow-cast/Cargo.toml b/arrow-cast/Cargo.toml
index a999fe517..ebfadeb99 100644
--- a/arrow-cast/Cargo.toml
+++ b/arrow-cast/Cargo.toml
@@ -46,6 +46,7 @@ arrow-data = { workspace = true }
 arrow-schema = { workspace = true }
 arrow-select = { workspace = true }
 chrono = { version = "0.4.23", default-features = false, features = ["clock"] }
+half = { version = "2.1", default-features = false }
 num = { version = "0.4", default-features = false, features = ["std"] }
 lexical-core = { version = "^0.8", default-features = false, features = ["write-integers", "write-floats", "parse-integers", "parse-floats"] }
 comfy-table = { version = "6.0", optional = true, default-features = false }
diff --git a/arrow-cast/src/parse.rs b/arrow-cast/src/parse.rs
index fd248f2be..fa0ed9979 100644
--- a/arrow-cast/src/parse.rs
+++ b/arrow-cast/src/parse.rs
@@ -21,6 +21,7 @@ use arrow_array::{ArrowNativeTypeOp, ArrowPrimitiveType};
 use arrow_buffer::ArrowNativeType;
 use arrow_schema::ArrowError;
 use chrono::prelude::*;
+use half::f16;
 use std::str::FromStr;
 
 /// Parse nanoseconds from the first `N` values in digits, subtracting the offset `O`
@@ -436,6 +437,14 @@ pub trait Parser: ArrowPrimitiveType {
     }
 }
 
+impl Parser for Float16Type {
+    fn parse(string: &str) -> Option<f16> {
+        lexical_core::parse(string.as_bytes())
+            .ok()
+            .map(f16::from_f32)
+    }
+}
+
 impl Parser for Float32Type {
     fn parse(string: &str) -> Option<f32> {
         lexical_core::parse(string.as_bytes()).ok()
diff --git a/arrow-json/src/reader/mod.rs b/arrow-json/src/reader/mod.rs
index 5f1a2bb43..dd58e1e1a 100644
--- a/arrow-json/src/reader/mod.rs
+++ b/arrow-json/src/reader/mod.rs
@@ -591,6 +591,7 @@ fn make_decoder(
     downcast_integer! {
         data_type => (primitive_decoder, data_type),
         DataType::Null => Ok(Box::<NullArrayDecoder>::default()),
+        DataType::Float16 => primitive_decoder!(Float16Type, data_type),
         DataType::Float32 => primitive_decoder!(Float32Type, data_type),
         DataType::Float64 => primitive_decoder!(Float64Type, data_type),
         DataType::Timestamp(TimeUnit::Second, None) => {
@@ -1422,7 +1423,7 @@ mod tests {
         let mut reader = read_file("test/data/basic.json", None);
         let batch = reader.next().unwrap().unwrap();
 
-        assert_eq!(7, batch.num_columns());
+        assert_eq!(8, batch.num_columns());
         assert_eq!(12, batch.num_rows());
 
         let schema = reader.schema();
@@ -1941,7 +1942,7 @@ mod tests {
         let mut sum_a = 0;
         for batch in reader {
             let batch = batch.unwrap();
-            assert_eq!(7, batch.num_columns());
+            assert_eq!(8, batch.num_columns());
             sum_num_rows += batch.num_rows();
             num_batches += 1;
             let batch_schema = batch.schema();
diff --git a/arrow-json/src/reader/primitive_array.rs b/arrow-json/src/reader/primitive_array.rs
index cde52391f..c78e4d914 100644
--- a/arrow-json/src/reader/primitive_array.rs
+++ b/arrow-json/src/reader/primitive_array.rs
@@ -23,6 +23,7 @@ use arrow_array::{Array, ArrowPrimitiveType};
 use arrow_cast::parse::Parser;
 use arrow_data::ArrayData;
 use arrow_schema::{ArrowError, DataType};
+use half::f16;
 
 use crate::reader::tape::{Tape, TapeElement};
 use crate::reader::ArrayDecoder;
@@ -54,6 +55,12 @@ macro_rules! primitive_parse {
 
 primitive_parse!(i8, i16, i32, i64, u8, u16, u32, u64);
 
+impl ParseJsonNumber for f16 {
+    fn parse(s: &[u8]) -> Option<Self> {
+        lexical_core::parse::<f32>(s).ok().map(f16::from_f32)
+    }
+}
+
 impl ParseJsonNumber for f32 {
     fn parse(s: &[u8]) -> Option<Self> {
         lexical_core::parse::<Self>(s).ok()
diff --git a/arrow-json/src/writer.rs b/arrow-json/src/writer.rs
index e6c960aef..d00662a72 100644
--- a/arrow-json/src/writer.rs
+++ b/arrow-json/src/writer.rs
@@ -174,6 +174,7 @@ pub fn array_to_json_array(array: &ArrayRef) -> Result<Vec<Value>, ArrowError> {
         DataType::UInt16 => primitive_array_to_json::<UInt16Type>(array),
         DataType::UInt32 => primitive_array_to_json::<UInt32Type>(array),
         DataType::UInt64 => primitive_array_to_json::<UInt64Type>(array),
+        DataType::Float16 => primitive_array_to_json::<Float16Type>(array),
         DataType::Float32 => primitive_array_to_json::<Float32Type>(array),
         DataType::Float64 => primitive_array_to_json::<Float64Type>(array),
         DataType::List(_) => as_list_array(array)
@@ -264,6 +265,9 @@ fn set_column_for_json_rows(
         DataType::UInt64 => {
             set_column_by_primitive_type::<UInt64Type>(rows, array, col_name);
         }
+        DataType::Float16 => {
+            set_column_by_primitive_type::<Float16Type>(rows, array, col_name);
+        }
         DataType::Float32 => {
             set_column_by_primitive_type::<Float32Type>(rows, array, col_name);
         }
@@ -1452,6 +1456,7 @@ mod tests {
             Field::new("e", DataType::Utf8, true),
             Field::new("f", DataType::Utf8, true),
             Field::new("g", DataType::Timestamp(TimeUnit::Millisecond, None), true),
+            Field::new("h", DataType::Float16, true),
         ]));
 
         let mut reader = ReaderBuilder::new(schema.clone())
diff --git a/arrow-json/test/data/basic.json b/arrow-json/test/data/basic.json
index 598838dfc..a6a8766bf 100644
--- a/arrow-json/test/data/basic.json
+++ b/arrow-json/test/data/basic.json
@@ -1,5 +1,5 @@
-{"a":1, "b":2.0, "c":false, "d":"4", "e":"1970-1-2", "f": "1.02", "g": "2012-04-23T18:25:43.511"}
-{"a":-10, "b":-3.5, "c":true, "d":"4", "e": "1969-12-31", "f": "-0.3", "g": "2016-04-23T18:25:43.511"}
+{"a":1, "b":2.0, "c":false, "d":"4", "e":"1970-1-2", "f": "1.02", "g": "2012-04-23T18:25:43.511", "h": 1.1}
+{"a":-10, "b":-3.5, "c":true, "d":"4", "e": "1969-12-31", "f": "-0.3", "g": "2016-04-23T18:25:43.511", "h": 3.141}
 {"a":2, "b":0.6, "c":false, "d":"text", "e": "1970-01-02 11:11:11", "f": "1377.223"}
 {"a":1, "b":2.0, "c":false, "d":"4", "f": "1337.009"}
 {"a":7, "b":-3.5, "c":true, "d":"4", "f": "1"}