You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2018/04/04 14:20:41 UTC
[arrow] branch master updated: ARROW-2385: [Rust] implement to_json
for DataType and Field
This is an automated email from the ASF dual-hosted git repository.
uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 7081752 ARROW-2385: [Rust] implement to_json for DataType and Field
7081752 is described below
commit 708175268d498a85ef81ae7438374a2c6fb00b6c
Author: Andy Grove <an...@gmail.com>
AuthorDate: Wed Apr 4 16:20:16 2018 +0200
ARROW-2385: [Rust] implement to_json for DataType and Field
Note that this PR also moves some tests for comparing arrays from Array to Buffer<T> and removes some redundant code that was implemented before it was possible to get a type-safe Iterator from Buffer<T>.
This change was made in this PR because the serde_json crate's macros pretty much forced me to address this now.
Author: Andy Grove <an...@gmail.com>
Closes #1829 from andygrove/schema_json and squashes the following commits:
6b5281f <Andy Grove> fix issues that stopped code compiling with Rust 1.25.0
6af8963 <Andy Grove> rustfmt
ce2e56d <Andy Grove> remove commented out code
0ba3a77 <Andy Grove> can parse types and fields from json
c9ace3f <Andy Grove> implement to_json for DataType and Field
---
rust/Cargo.toml | 3 +-
rust/src/array.rs | 122 ++--------------------------
rust/src/buffer.rs | 55 +++++++++++++
rust/src/datatypes.rs | 218 +++++++++++++++++++++++++++++++++++++++++++++++++-
rust/src/lib.rs | 3 +
5 files changed, 281 insertions(+), 120 deletions(-)
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
index 9b7b310..c3120cf 100644
--- a/rust/Cargo.toml
+++ b/rust/Cargo.toml
@@ -35,4 +35,5 @@ path = "src/lib.rs"
[dependencies]
bytes = "0.4"
-libc = "0.2"
\ No newline at end of file
+libc = "0.2"
+serde_json = "1.0.13"
\ No newline at end of file
diff --git a/rust/src/array.rs b/rust/src/array.rs
index 960f33e..7fd3433 100644
--- a/rust/src/array.rs
+++ b/rust/src/array.rs
@@ -23,7 +23,6 @@ use std::string::String;
use super::bitmap::Bitmap;
use super::buffer::Buffer;
-use super::error::*;
use super::list::List;
pub enum ArrayData {
@@ -91,74 +90,6 @@ impl Array {
}
}
-/// type-safe array operations
-trait ArrayOps<T> {
- /// Get one element from an array. Note that this is an expensive call since it
- /// will pattern match the type of the array on every invocation. We should add
- /// other efficient iterator and map methods so we can perform columnar operations
- /// instead.
- fn get(&self, i: usize) -> Result<T, Error>;
-
- /// Compare two same-typed arrays using a boolean closure e.g. eq, gt, lt, and so on
- fn compare(&self, other: &Array, f: &Fn(T, T) -> bool) -> Result<Vec<bool>, Error>;
-
- /// Perform a computation on two same-typed arrays and produce a result of the same type e.g. c = a + b
- fn compute(&self, other: &Array, f: &Fn(T, T) -> T) -> Result<Vec<T>, Error>;
-}
-
-macro_rules! array_ops {
- ($DT:ty, $AT:ident) => {
- impl ArrayOps<$DT> for Array {
- fn get(&self, i: usize) -> Result<$DT, Error> {
- match self.data() {
- &ArrayData::$AT(ref buf) => Ok(unsafe { *buf.data().offset(i as isize) }),
- _ => Err(Error::from("Request for $DT but array is not $DT")),
- }
- }
- fn compare(&self, other: &Array, f: &Fn($DT, $DT) -> bool) -> Result<Vec<bool>, Error> {
- match (&self.data, &other.data) {
- (&ArrayData::$AT(ref l), &ArrayData::$AT(ref r)) => {
- let mut b: Vec<bool> = Vec::with_capacity(self.len as usize);
- for i in 0..self.len as isize {
- let lv: $DT = unsafe { *l.data().offset(i) };
- let rv: $DT = unsafe { *r.data().offset(i) };
- b.push(f(lv, rv));
- }
- Ok(b)
- }
- _ => Err(Error::from("Cannot compare arrays of this type")),
- }
- }
- fn compute(&self, other: &Array, f: &Fn($DT, $DT) -> $DT) -> Result<Vec<$DT>, Error> {
- match (&self.data, &other.data) {
- (&ArrayData::$AT(ref l), &ArrayData::$AT(ref r)) => {
- let mut b: Vec<$DT> = Vec::with_capacity(self.len as usize);
- for i in 0..self.len as isize {
- let lv: $DT = unsafe { *l.data().offset(i) };
- let rv: $DT = unsafe { *r.data().offset(i) };
- b.push(f(lv, rv));
- }
- Ok(b)
- }
- _ => Err(Error::from("Cannot compare arrays of this type")),
- }
- }
- }
- };
-}
-
-array_ops!(bool, Boolean);
-array_ops!(f64, Float64);
-array_ops!(f32, Float32);
-array_ops!(u8, UInt8);
-array_ops!(u16, UInt16);
-array_ops!(u32, UInt32);
-array_ops!(u64, UInt64);
-array_ops!(i8, Int8);
-array_ops!(i16, Int16);
-array_ops!(i32, Int32);
-array_ops!(i64, Int64);
-
macro_rules! array_from_primitive {
($DT:ty) => {
impl From<Vec<$DT>> for Array {
@@ -307,12 +238,12 @@ mod tests {
fn test_from_i32() {
let a = Array::from(vec![15, 14, 13, 12, 11]);
assert_eq!(5, a.len());
-
- assert_eq!(15, a.get(0).unwrap());
- assert_eq!(14, a.get(1).unwrap());
- assert_eq!(13, a.get(2).unwrap());
- assert_eq!(12, a.get(3).unwrap());
- assert_eq!(11, a.get(4).unwrap());
+ match a.data() {
+ &ArrayData::Int32(ref b) => {
+ assert_eq!(vec![15, 14, 13, 12, 11], b.iter().collect::<Vec<i32>>());
+ }
+ _ => panic!(),
+ }
}
#[test]
@@ -330,7 +261,7 @@ mod tests {
#[test]
fn test_struct() {
- let _schema = Schema::new(vec![
+ let _schema = DataType::Struct(vec![
Field::new("a", DataType::Int32, false),
Field::new("b", DataType::Float32, false),
]);
@@ -340,43 +271,4 @@ mod tests {
let _ = Rc::new(Array::from(vec![a, b]));
}
- #[test]
- fn test_array_eq() {
- let a = Array::from(vec![1, 2, 3, 4, 5]);
- let b = Array::from(vec![5, 4, 3, 2, 1]);
- let c = a.compare(&b, &|a: i32, b: i32| a == b).unwrap();
- assert_eq!(c, vec![false, false, true, false, false]);
- }
-
- #[test]
- fn test_array_lt() {
- let a = Array::from(vec![1, 2, 3, 4, 5]);
- let b = Array::from(vec![5, 4, 3, 2, 1]);
- let c = a.compare(&b, &|a: i32, b: i32| a < b).unwrap();
- assert_eq!(c, vec![true, true, false, false, false]);
- }
-
- #[test]
- fn test_array_gt() {
- let a = Array::from(vec![1, 2, 3, 4, 5]);
- let b = Array::from(vec![5, 4, 3, 2, 1]);
- let c = a.compare(&b, &|a: i32, b: i32| a > b).unwrap();
- assert_eq!(c, vec![false, false, false, true, true]);
- }
-
- #[test]
- fn test_array_add() {
- let a = Array::from(vec![1, 2, 3, 4, 5]);
- let b = Array::from(vec![5, 4, 3, 2, 1]);
- let c = a.compute(&b, &|a: i32, b: i32| a + b).unwrap();
- assert_eq!(c, vec![6, 6, 6, 6, 6]);
- }
-
- #[test]
- fn test_array_multiply() {
- let a = Array::from(vec![1, 2, 3, 4, 5]);
- let b = Array::from(vec![5, 4, 3, 2, 1]);
- let c = a.compute(&b, &|a: i32, b: i32| a * b).unwrap();
- assert_eq!(c, vec![5, 8, 9, 8, 5]);
- }
}
diff --git a/rust/src/buffer.rs b/rust/src/buffer.rs
index 6c4f1c7..7d5cc7c 100644
--- a/rust/src/buffer.rs
+++ b/rust/src/buffer.rs
@@ -166,4 +166,59 @@ mod tests {
let v: Vec<i32> = it.map(|n| n + 1).collect();
assert_eq!(vec![2, 3, 4, 5, 6], v);
}
+
+ #[test]
+ fn test_buffer_eq() {
+ let a = Buffer::from(vec![1, 2, 3, 4, 5]);
+ let b = Buffer::from(vec![5, 4, 3, 2, 1]);
+ let c = a.iter()
+ .zip(b.iter())
+ .map(|(a, b)| a == b)
+ .collect::<Vec<bool>>();
+ assert_eq!(c, vec![false, false, true, false, false]);
+ }
+
+ #[test]
+ fn test_buffer_lt() {
+ let a = Buffer::from(vec![1, 2, 3, 4, 5]);
+ let b = Buffer::from(vec![5, 4, 3, 2, 1]);
+ let c = a.iter()
+ .zip(b.iter())
+ .map(|(a, b)| a < b)
+ .collect::<Vec<bool>>();
+ assert_eq!(c, vec![true, true, false, false, false]);
+ }
+
+ #[test]
+ fn test_buffer_gt() {
+ let a = Buffer::from(vec![1, 2, 3, 4, 5]);
+ let b = Buffer::from(vec![5, 4, 3, 2, 1]);
+ let c = a.iter()
+ .zip(b.iter())
+ .map(|(a, b)| a > b)
+ .collect::<Vec<bool>>();
+ assert_eq!(c, vec![false, false, false, true, true]);
+ }
+
+ #[test]
+ fn test_buffer_add() {
+ let a = Buffer::from(vec![1, 2, 3, 4, 5]);
+ let b = Buffer::from(vec![5, 4, 3, 2, 1]);
+ let c = a.iter()
+ .zip(b.iter())
+ .map(|(a, b)| a + b)
+ .collect::<Vec<i32>>();
+ assert_eq!(c, vec![6, 6, 6, 6, 6]);
+ }
+
+ #[test]
+ fn test_buffer_multiply() {
+ let a = Buffer::from(vec![1, 2, 3, 4, 5]);
+ let b = Buffer::from(vec![5, 4, 3, 2, 1]);
+ let c = a.iter()
+ .zip(b.iter())
+ .map(|(a, b)| a * b)
+ .collect::<Vec<i32>>();
+ assert_eq!(c, vec![5, 8, 9, 8, 5]);
+ }
}
diff --git a/rust/src/datatypes.rs b/rust/src/datatypes.rs
index a812f32..4f022ba 100644
--- a/rust/src/datatypes.rs
+++ b/rust/src/datatypes.rs
@@ -15,7 +15,15 @@
// specific language governing permissions and limitations
// under the License.
-#[derive(Debug, Clone)]
+use serde_json;
+use serde_json::Value;
+
+#[derive(Debug, Clone, PartialEq)]
+pub enum ArrowError {
+ ParseError(String),
+}
+
+#[derive(Debug, Clone, PartialEq)]
pub enum DataType {
Boolean,
Int8,
@@ -26,13 +34,105 @@ pub enum DataType {
UInt16,
UInt32,
UInt64,
+ Float16,
Float32,
Float64,
Utf8,
Struct(Vec<Field>),
}
-#[derive(Debug, Clone)]
+impl DataType {
+ fn from(json: &Value) -> Result<DataType, ArrowError> {
+ //println!("DataType::from({:?})", json);
+ match json {
+ &Value::Object(ref map) => match map.get("name") {
+ Some(s) if s == "bool" => Ok(DataType::Boolean),
+ Some(s) if s == "utf8" => Ok(DataType::Utf8),
+ Some(s) if s == "floatingpoint" => match map.get("precision") {
+ Some(p) if p == "HALF" => Ok(DataType::Float16),
+ Some(p) if p == "SINGLE" => Ok(DataType::Float32),
+ Some(p) if p == "DOUBLE" => Ok(DataType::Float64),
+ _ => Err(ArrowError::ParseError(format!(
+ "floatingpoint precision missing or invalid"
+ ))),
+ },
+ Some(s) if s == "int" => match map.get("isSigned") {
+ Some(&Value::Bool(true)) => match map.get("bitWidth") {
+ Some(&Value::Number(ref n)) => match n.as_u64() {
+ Some(8) => Ok(DataType::Int8),
+ Some(16) => Ok(DataType::Int16),
+ Some(32) => Ok(DataType::Int32),
+ Some(64) => Ok(DataType::Int32),
+ _ => Err(ArrowError::ParseError(format!(
+ "int bitWidth missing or invalid"
+ ))),
+ },
+ _ => Err(ArrowError::ParseError(format!(
+ "int bitWidth missing or invalid"
+ ))),
+ },
+ Some(&Value::Bool(false)) => match map.get("bitWidth") {
+ Some(&Value::Number(ref n)) => match n.as_u64() {
+ Some(8) => Ok(DataType::UInt8),
+ Some(16) => Ok(DataType::UInt16),
+ Some(32) => Ok(DataType::UInt32),
+ Some(64) => Ok(DataType::UInt64),
+ _ => Err(ArrowError::ParseError(format!(
+ "int bitWidth missing or invalid"
+ ))),
+ },
+ _ => Err(ArrowError::ParseError(format!(
+ "int bitWidth missing or invalid"
+ ))),
+ },
+ _ => Err(ArrowError::ParseError(format!(
+ "int signed missing or invalid"
+ ))),
+ },
+ Some(other) => Err(ArrowError::ParseError(format!(
+ "invalid type name: {}",
+ other
+ ))),
+ None => match map.get("fields") {
+ Some(&Value::Array(ref fields_array)) => {
+ let fields = fields_array
+ .iter()
+ .map(|f| Field::from(f))
+ .collect::<Result<Vec<Field>, ArrowError>>();
+ Ok(DataType::Struct(fields?))
+ }
+ _ => Err(ArrowError::ParseError(format!("empty type"))),
+ },
+ },
+ _ => Err(ArrowError::ParseError(format!("invalid json value type"))),
+ }
+ }
+
+ pub fn to_json(&self) -> Value {
+ match self {
+ &DataType::Boolean => json!({"name": "bool"}),
+ &DataType::Int8 => json!({"name": "int", "bitWidth": 8, "isSigned": true}),
+ &DataType::Int16 => json!({"name": "int", "bitWidth": 16, "isSigned": true}),
+ &DataType::Int32 => json!({"name": "int", "bitWidth": 32, "isSigned": true}),
+ &DataType::Int64 => json!({"name": "int", "bitWidth": 64, "isSigned": true}),
+ &DataType::UInt8 => json!({"name": "int", "bitWidth": 8, "isSigned": false}),
+ &DataType::UInt16 => json!({"name": "int", "bitWidth": 16, "isSigned": false}),
+ &DataType::UInt32 => json!({"name": "int", "bitWidth": 32, "isSigned": false}),
+ &DataType::UInt64 => json!({"name": "int", "bitWidth": 64, "isSigned": false}),
+ &DataType::Float16 => json!({"name": "floatingpoint", "precision": "HALF"}),
+ &DataType::Float32 => json!({"name": "floatingpoint", "precision": "SINGLE"}),
+ &DataType::Float64 => json!({"name": "floatingpoint", "precision": "DOUBLE"}),
+ &DataType::Utf8 => json!({"name": "utf8"}),
+ &DataType::Struct(ref fields) => {
+ let field_json_array =
+ Value::Array(fields.iter().map(|f| f.to_json()).collect::<Vec<Value>>());
+ json!({ "fields": field_json_array })
+ }
+ }
+ }
+}
+
+#[derive(Debug, Clone, PartialEq)]
pub struct Field {
pub name: String,
pub data_type: DataType,
@@ -48,6 +148,54 @@ impl Field {
}
}
+ pub fn from(json: &Value) -> Result<Self, ArrowError> {
+ //println!("Field::from({:?}", json);
+ match json {
+ &Value::Object(ref map) => {
+ let name = match map.get("name") {
+ Some(&Value::String(ref name)) => name.to_string(),
+ _ => {
+ return Err(ArrowError::ParseError(format!(
+ "Field missing 'name' attribute"
+ )))
+ }
+ };
+ let nullable = match map.get("nullable") {
+ Some(&Value::Bool(b)) => b,
+ _ => {
+ return Err(ArrowError::ParseError(format!(
+ "Field missing 'nullable' attribute"
+ )))
+ }
+ };
+ let data_type = match map.get("type") {
+ Some(t) => DataType::from(t)?,
+ _ => {
+ return Err(ArrowError::ParseError(format!(
+ "Field missing 'type' attribute"
+ )))
+ }
+ };
+ Ok(Field {
+ name,
+ nullable,
+ data_type,
+ })
+ }
+ _ => Err(ArrowError::ParseError(format!(
+ "Invalid json value type for field"
+ ))),
+ }
+ }
+
+ pub fn to_json(&self) -> Value {
+ json!({
+ "name": self.name,
+ "nullable": self.nullable,
+ "type": self.data_type.to_json(),
+ })
+ }
+
pub fn to_string(&self) -> String {
format!("{}: {:?}", self.name, self.data_type)
}
@@ -87,8 +235,8 @@ mod tests {
use super::*;
#[test]
- fn test_define_schema() {
- let _person = Schema::new(vec![
+ fn create_struct_type() {
+ let _person = DataType::Struct(vec![
Field::new("first_name", DataType::Utf8, false),
Field::new("last_name", DataType::Utf8, false),
Field::new(
@@ -101,4 +249,66 @@ mod tests {
),
]);
}
+
+ #[test]
+ fn struct_field_to_json() {
+ let f = Field::new(
+ "address",
+ DataType::Struct(vec![
+ Field::new("street", DataType::Utf8, false),
+ Field::new("zip", DataType::UInt16, false),
+ ]),
+ false,
+ );
+ assert_eq!(
+ "{\"name\":\"address\",\"nullable\":false,\"type\":{\"fields\":[\
+ {\"name\":\"street\",\"nullable\":false,\"type\":{\"name\":\"utf8\"}},\
+ {\"name\":\"zip\",\"nullable\":false,\"type\":{\"bitWidth\":16,\"isSigned\":false,\"name\":\"int\"}}]}}",
+ f.to_json().to_string()
+ );
+ }
+
+ #[test]
+ fn primitive_field_to_json() {
+ let f = Field::new("first_name", DataType::Utf8, false);
+ assert_eq!(
+ "{\"name\":\"first_name\",\"nullable\":false,\"type\":{\"name\":\"utf8\"}}",
+ f.to_json().to_string()
+ );
+ }
+ #[test]
+ fn parse_struct_from_json() {
+ let json = "{\"name\":\"address\",\"nullable\":false,\"type\":{\"fields\":[\
+ {\"name\":\"street\",\"nullable\":false,\"type\":{\"name\":\"utf8\"}},\
+ {\"name\":\"zip\",\"nullable\":false,\"type\":{\"bitWidth\":16,\"isSigned\":false,\"name\":\"int\"}}]}}";
+ let value: Value = serde_json::from_str(json).unwrap();
+ let dt = Field::from(&value).unwrap();
+
+ let expected = Field::new(
+ "address",
+ DataType::Struct(vec![
+ Field::new("street", DataType::Utf8, false),
+ Field::new("zip", DataType::UInt16, false),
+ ]),
+ false,
+ );
+
+ assert_eq!(expected, dt);
+ }
+
+ #[test]
+ fn parse_utf8_from_json() {
+ let json = "{\"name\":\"utf8\"}";
+ let value: Value = serde_json::from_str(json).unwrap();
+ let dt = DataType::from(&value).unwrap();
+ assert_eq!(DataType::Utf8, dt);
+ }
+
+ #[test]
+ fn parse_int32_from_json() {
+ let json = "{\"name\": \"int\", \"isSigned\": true, \"bitWidth\": 32}";
+ let value: Value = serde_json::from_str(json).unwrap();
+ let dt = DataType::from(&value).unwrap();
+ assert_eq!(DataType::Int32, dt);
+ }
}
diff --git a/rust/src/lib.rs b/rust/src/lib.rs
index 8eeb340..bfcd83d 100644
--- a/rust/src/lib.rs
+++ b/rust/src/lib.rs
@@ -18,6 +18,9 @@
extern crate bytes;
extern crate libc;
+#[macro_use]
+extern crate serde_json;
+
pub mod array;
pub mod bitmap;
pub mod buffer;
--
To stop receiving notification emails like this one, please contact
uwe@apache.org.