You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/06/04 19:58:11 UTC
[arrow-rs] branch master updated: Minor: float16 to json (#4358)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 5976ae45e Minor: float16 to json (#4358)
5976ae45e is described below
commit 5976ae45e7fc3e4bed2ea0916162432d96e97f96
Author: Igor Izvekov <iz...@gmail.com>
AuthorDate: Sun Jun 4 22:58:05 2023 +0300
Minor: float16 to json (#4358)
* Minor: float16 to json
* feat: Float16 JSON Reader
* fix: clippy
* fix: cargo fmt
---
arrow-cast/Cargo.toml | 1 +
arrow-cast/src/parse.rs | 9 +++++++++
arrow-json/src/reader/mod.rs | 5 +++--
arrow-json/src/reader/primitive_array.rs | 7 +++++++
arrow-json/src/writer.rs | 5 +++++
arrow-json/test/data/basic.json | 4 ++--
6 files changed, 27 insertions(+), 4 deletions(-)
diff --git a/arrow-cast/Cargo.toml b/arrow-cast/Cargo.toml
index a999fe517..ebfadeb99 100644
--- a/arrow-cast/Cargo.toml
+++ b/arrow-cast/Cargo.toml
@@ -46,6 +46,7 @@ arrow-data = { workspace = true }
arrow-schema = { workspace = true }
arrow-select = { workspace = true }
chrono = { version = "0.4.23", default-features = false, features = ["clock"] }
+half = { version = "2.1", default-features = false }
num = { version = "0.4", default-features = false, features = ["std"] }
lexical-core = { version = "^0.8", default-features = false, features = ["write-integers", "write-floats", "parse-integers", "parse-floats"] }
comfy-table = { version = "6.0", optional = true, default-features = false }
diff --git a/arrow-cast/src/parse.rs b/arrow-cast/src/parse.rs
index fd248f2be..fa0ed9979 100644
--- a/arrow-cast/src/parse.rs
+++ b/arrow-cast/src/parse.rs
@@ -21,6 +21,7 @@ use arrow_array::{ArrowNativeTypeOp, ArrowPrimitiveType};
use arrow_buffer::ArrowNativeType;
use arrow_schema::ArrowError;
use chrono::prelude::*;
+use half::f16;
use std::str::FromStr;
/// Parse nanoseconds from the first `N` values in digits, subtracting the offset `O`
@@ -436,6 +437,14 @@ pub trait Parser: ArrowPrimitiveType {
}
}
+impl Parser for Float16Type {
+ fn parse(string: &str) -> Option<f16> {
+ lexical_core::parse(string.as_bytes())
+ .ok()
+ .map(f16::from_f32)
+ }
+}
+
impl Parser for Float32Type {
fn parse(string: &str) -> Option<f32> {
lexical_core::parse(string.as_bytes()).ok()
diff --git a/arrow-json/src/reader/mod.rs b/arrow-json/src/reader/mod.rs
index 5f1a2bb43..dd58e1e1a 100644
--- a/arrow-json/src/reader/mod.rs
+++ b/arrow-json/src/reader/mod.rs
@@ -591,6 +591,7 @@ fn make_decoder(
downcast_integer! {
data_type => (primitive_decoder, data_type),
DataType::Null => Ok(Box::<NullArrayDecoder>::default()),
+ DataType::Float16 => primitive_decoder!(Float16Type, data_type),
DataType::Float32 => primitive_decoder!(Float32Type, data_type),
DataType::Float64 => primitive_decoder!(Float64Type, data_type),
DataType::Timestamp(TimeUnit::Second, None) => {
@@ -1422,7 +1423,7 @@ mod tests {
let mut reader = read_file("test/data/basic.json", None);
let batch = reader.next().unwrap().unwrap();
- assert_eq!(7, batch.num_columns());
+ assert_eq!(8, batch.num_columns());
assert_eq!(12, batch.num_rows());
let schema = reader.schema();
@@ -1941,7 +1942,7 @@ mod tests {
let mut sum_a = 0;
for batch in reader {
let batch = batch.unwrap();
- assert_eq!(7, batch.num_columns());
+ assert_eq!(8, batch.num_columns());
sum_num_rows += batch.num_rows();
num_batches += 1;
let batch_schema = batch.schema();
diff --git a/arrow-json/src/reader/primitive_array.rs b/arrow-json/src/reader/primitive_array.rs
index cde52391f..c78e4d914 100644
--- a/arrow-json/src/reader/primitive_array.rs
+++ b/arrow-json/src/reader/primitive_array.rs
@@ -23,6 +23,7 @@ use arrow_array::{Array, ArrowPrimitiveType};
use arrow_cast::parse::Parser;
use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType};
+use half::f16;
use crate::reader::tape::{Tape, TapeElement};
use crate::reader::ArrayDecoder;
@@ -54,6 +55,12 @@ macro_rules! primitive_parse {
primitive_parse!(i8, i16, i32, i64, u8, u16, u32, u64);
+impl ParseJsonNumber for f16 {
+ fn parse(s: &[u8]) -> Option<Self> {
+ lexical_core::parse::<f32>(s).ok().map(f16::from_f32)
+ }
+}
+
impl ParseJsonNumber for f32 {
fn parse(s: &[u8]) -> Option<Self> {
lexical_core::parse::<Self>(s).ok()
diff --git a/arrow-json/src/writer.rs b/arrow-json/src/writer.rs
index e6c960aef..d00662a72 100644
--- a/arrow-json/src/writer.rs
+++ b/arrow-json/src/writer.rs
@@ -174,6 +174,7 @@ pub fn array_to_json_array(array: &ArrayRef) -> Result<Vec<Value>, ArrowError> {
DataType::UInt16 => primitive_array_to_json::<UInt16Type>(array),
DataType::UInt32 => primitive_array_to_json::<UInt32Type>(array),
DataType::UInt64 => primitive_array_to_json::<UInt64Type>(array),
+ DataType::Float16 => primitive_array_to_json::<Float16Type>(array),
DataType::Float32 => primitive_array_to_json::<Float32Type>(array),
DataType::Float64 => primitive_array_to_json::<Float64Type>(array),
DataType::List(_) => as_list_array(array)
@@ -264,6 +265,9 @@ fn set_column_for_json_rows(
DataType::UInt64 => {
set_column_by_primitive_type::<UInt64Type>(rows, array, col_name);
}
+ DataType::Float16 => {
+ set_column_by_primitive_type::<Float16Type>(rows, array, col_name);
+ }
DataType::Float32 => {
set_column_by_primitive_type::<Float32Type>(rows, array, col_name);
}
@@ -1452,6 +1456,7 @@ mod tests {
Field::new("e", DataType::Utf8, true),
Field::new("f", DataType::Utf8, true),
Field::new("g", DataType::Timestamp(TimeUnit::Millisecond, None), true),
+ Field::new("h", DataType::Float16, true),
]));
let mut reader = ReaderBuilder::new(schema.clone())
diff --git a/arrow-json/test/data/basic.json b/arrow-json/test/data/basic.json
index 598838dfc..a6a8766bf 100644
--- a/arrow-json/test/data/basic.json
+++ b/arrow-json/test/data/basic.json
@@ -1,5 +1,5 @@
-{"a":1, "b":2.0, "c":false, "d":"4", "e":"1970-1-2", "f": "1.02", "g": "2012-04-23T18:25:43.511"}
-{"a":-10, "b":-3.5, "c":true, "d":"4", "e": "1969-12-31", "f": "-0.3", "g": "2016-04-23T18:25:43.511"}
+{"a":1, "b":2.0, "c":false, "d":"4", "e":"1970-1-2", "f": "1.02", "g": "2012-04-23T18:25:43.511", "h": 1.1}
+{"a":-10, "b":-3.5, "c":true, "d":"4", "e": "1969-12-31", "f": "-0.3", "g": "2016-04-23T18:25:43.511", "h": 3.141}
{"a":2, "b":0.6, "c":false, "d":"text", "e": "1970-01-02 11:11:11", "f": "1377.223"}
{"a":1, "b":2.0, "c":false, "d":"4", "f": "1337.009"}
{"a":7, "b":-3.5, "c":true, "d":"4", "f": "1"}