You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2018/04/04 14:20:41 UTC

[arrow] branch master updated: ARROW-2385: [Rust] implement to_json for DataType and Field

This is an automated email from the ASF dual-hosted git repository.

uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 7081752  ARROW-2385: [Rust] implement to_json for DataType and Field
7081752 is described below

commit 708175268d498a85ef81ae7438374a2c6fb00b6c
Author: Andy Grove <an...@gmail.com>
AuthorDate: Wed Apr 4 16:20:16 2018 +0200

    ARROW-2385: [Rust] implement to_json for DataType and Field
    
    Note that this PR also moves some tests for comparing arrays from Array to Buffer<T> and removes some redundant code that was implemented before it was possible to get a type-safe Iterator from Buffer<T>.
    
    This change was made in this PR because the serde_json crate's macros pretty much forced me to address this now.
    
    Author: Andy Grove <an...@gmail.com>
    
    Closes #1829 from andygrove/schema_json and squashes the following commits:
    
    6b5281f <Andy Grove> fix issues that stopped code compiling with Rust 1.25.0
    6af8963 <Andy Grove> rustfmt
    ce2e56d <Andy Grove> remove commented out code
    0ba3a77 <Andy Grove> can parse types and fields from json
    c9ace3f <Andy Grove> implement to_json for DataType and Field
---
 rust/Cargo.toml       |   3 +-
 rust/src/array.rs     | 122 ++--------------------------
 rust/src/buffer.rs    |  55 +++++++++++++
 rust/src/datatypes.rs | 218 +++++++++++++++++++++++++++++++++++++++++++++++++-
 rust/src/lib.rs       |   3 +
 5 files changed, 281 insertions(+), 120 deletions(-)

diff --git a/rust/Cargo.toml b/rust/Cargo.toml
index 9b7b310..c3120cf 100644
--- a/rust/Cargo.toml
+++ b/rust/Cargo.toml
@@ -35,4 +35,5 @@ path = "src/lib.rs"
 
 [dependencies]
 bytes = "0.4"
-libc = "0.2"
\ No newline at end of file
+libc = "0.2"
+serde_json = "1.0.13"
\ No newline at end of file
diff --git a/rust/src/array.rs b/rust/src/array.rs
index 960f33e..7fd3433 100644
--- a/rust/src/array.rs
+++ b/rust/src/array.rs
@@ -23,7 +23,6 @@ use std::string::String;
 
 use super::bitmap::Bitmap;
 use super::buffer::Buffer;
-use super::error::*;
 use super::list::List;
 
 pub enum ArrayData {
@@ -91,74 +90,6 @@ impl Array {
     }
 }
 
-/// type-safe array operations
-trait ArrayOps<T> {
-    /// Get one element from an array. Note that this is an expensive call since it
-    /// will pattern match the type of the array on every invocation. We should add
-    /// other efficient iterator and map methods so we can perform columnar operations
-    /// instead.
-    fn get(&self, i: usize) -> Result<T, Error>;
-
-    /// Compare two same-typed arrays using a boolean closure e.g. eq, gt, lt, and so on
-    fn compare(&self, other: &Array, f: &Fn(T, T) -> bool) -> Result<Vec<bool>, Error>;
-
-    /// Perform a computation on two same-typed arrays and produce a result of the same type e.g. c = a + b
-    fn compute(&self, other: &Array, f: &Fn(T, T) -> T) -> Result<Vec<T>, Error>;
-}
-
-macro_rules! array_ops {
-    ($DT:ty, $AT:ident) => {
-        impl ArrayOps<$DT> for Array {
-            fn get(&self, i: usize) -> Result<$DT, Error> {
-                match self.data() {
-                    &ArrayData::$AT(ref buf) => Ok(unsafe { *buf.data().offset(i as isize) }),
-                    _ => Err(Error::from("Request for $DT but array is not $DT")),
-                }
-            }
-            fn compare(&self, other: &Array, f: &Fn($DT, $DT) -> bool) -> Result<Vec<bool>, Error> {
-                match (&self.data, &other.data) {
-                    (&ArrayData::$AT(ref l), &ArrayData::$AT(ref r)) => {
-                        let mut b: Vec<bool> = Vec::with_capacity(self.len as usize);
-                        for i in 0..self.len as isize {
-                            let lv: $DT = unsafe { *l.data().offset(i) };
-                            let rv: $DT = unsafe { *r.data().offset(i) };
-                            b.push(f(lv, rv));
-                        }
-                        Ok(b)
-                    }
-                    _ => Err(Error::from("Cannot compare arrays of this type")),
-                }
-            }
-            fn compute(&self, other: &Array, f: &Fn($DT, $DT) -> $DT) -> Result<Vec<$DT>, Error> {
-                match (&self.data, &other.data) {
-                    (&ArrayData::$AT(ref l), &ArrayData::$AT(ref r)) => {
-                        let mut b: Vec<$DT> = Vec::with_capacity(self.len as usize);
-                        for i in 0..self.len as isize {
-                            let lv: $DT = unsafe { *l.data().offset(i) };
-                            let rv: $DT = unsafe { *r.data().offset(i) };
-                            b.push(f(lv, rv));
-                        }
-                        Ok(b)
-                    }
-                    _ => Err(Error::from("Cannot compare arrays of this type")),
-                }
-            }
-        }
-    };
-}
-
-array_ops!(bool, Boolean);
-array_ops!(f64, Float64);
-array_ops!(f32, Float32);
-array_ops!(u8, UInt8);
-array_ops!(u16, UInt16);
-array_ops!(u32, UInt32);
-array_ops!(u64, UInt64);
-array_ops!(i8, Int8);
-array_ops!(i16, Int16);
-array_ops!(i32, Int32);
-array_ops!(i64, Int64);
-
 macro_rules! array_from_primitive {
     ($DT:ty) => {
         impl From<Vec<$DT>> for Array {
@@ -307,12 +238,12 @@ mod tests {
     fn test_from_i32() {
         let a = Array::from(vec![15, 14, 13, 12, 11]);
         assert_eq!(5, a.len());
-
-        assert_eq!(15, a.get(0).unwrap());
-        assert_eq!(14, a.get(1).unwrap());
-        assert_eq!(13, a.get(2).unwrap());
-        assert_eq!(12, a.get(3).unwrap());
-        assert_eq!(11, a.get(4).unwrap());
+        match a.data() {
+            &ArrayData::Int32(ref b) => {
+                assert_eq!(vec![15, 14, 13, 12, 11], b.iter().collect::<Vec<i32>>());
+            }
+            _ => panic!(),
+        }
     }
 
     #[test]
@@ -330,7 +261,7 @@ mod tests {
 
     #[test]
     fn test_struct() {
-        let _schema = Schema::new(vec![
+        let _schema = DataType::Struct(vec![
             Field::new("a", DataType::Int32, false),
             Field::new("b", DataType::Float32, false),
         ]);
@@ -340,43 +271,4 @@ mod tests {
         let _ = Rc::new(Array::from(vec![a, b]));
     }
 
-    #[test]
-    fn test_array_eq() {
-        let a = Array::from(vec![1, 2, 3, 4, 5]);
-        let b = Array::from(vec![5, 4, 3, 2, 1]);
-        let c = a.compare(&b, &|a: i32, b: i32| a == b).unwrap();
-        assert_eq!(c, vec![false, false, true, false, false]);
-    }
-
-    #[test]
-    fn test_array_lt() {
-        let a = Array::from(vec![1, 2, 3, 4, 5]);
-        let b = Array::from(vec![5, 4, 3, 2, 1]);
-        let c = a.compare(&b, &|a: i32, b: i32| a < b).unwrap();
-        assert_eq!(c, vec![true, true, false, false, false]);
-    }
-
-    #[test]
-    fn test_array_gt() {
-        let a = Array::from(vec![1, 2, 3, 4, 5]);
-        let b = Array::from(vec![5, 4, 3, 2, 1]);
-        let c = a.compare(&b, &|a: i32, b: i32| a > b).unwrap();
-        assert_eq!(c, vec![false, false, false, true, true]);
-    }
-
-    #[test]
-    fn test_array_add() {
-        let a = Array::from(vec![1, 2, 3, 4, 5]);
-        let b = Array::from(vec![5, 4, 3, 2, 1]);
-        let c = a.compute(&b, &|a: i32, b: i32| a + b).unwrap();
-        assert_eq!(c, vec![6, 6, 6, 6, 6]);
-    }
-
-    #[test]
-    fn test_array_multiply() {
-        let a = Array::from(vec![1, 2, 3, 4, 5]);
-        let b = Array::from(vec![5, 4, 3, 2, 1]);
-        let c = a.compute(&b, &|a: i32, b: i32| a * b).unwrap();
-        assert_eq!(c, vec![5, 8, 9, 8, 5]);
-    }
 }
diff --git a/rust/src/buffer.rs b/rust/src/buffer.rs
index 6c4f1c7..7d5cc7c 100644
--- a/rust/src/buffer.rs
+++ b/rust/src/buffer.rs
@@ -166,4 +166,59 @@ mod tests {
         let v: Vec<i32> = it.map(|n| n + 1).collect();
         assert_eq!(vec![2, 3, 4, 5, 6], v);
     }
+
+    #[test]
+    fn test_buffer_eq() {
+        let a = Buffer::from(vec![1, 2, 3, 4, 5]);
+        let b = Buffer::from(vec![5, 4, 3, 2, 1]);
+        let c = a.iter()
+            .zip(b.iter())
+            .map(|(a, b)| a == b)
+            .collect::<Vec<bool>>();
+        assert_eq!(c, vec![false, false, true, false, false]);
+    }
+
+    #[test]
+    fn test_buffer_lt() {
+        let a = Buffer::from(vec![1, 2, 3, 4, 5]);
+        let b = Buffer::from(vec![5, 4, 3, 2, 1]);
+        let c = a.iter()
+            .zip(b.iter())
+            .map(|(a, b)| a < b)
+            .collect::<Vec<bool>>();
+        assert_eq!(c, vec![true, true, false, false, false]);
+    }
+
+    #[test]
+    fn test_buffer_gt() {
+        let a = Buffer::from(vec![1, 2, 3, 4, 5]);
+        let b = Buffer::from(vec![5, 4, 3, 2, 1]);
+        let c = a.iter()
+            .zip(b.iter())
+            .map(|(a, b)| a > b)
+            .collect::<Vec<bool>>();
+        assert_eq!(c, vec![false, false, false, true, true]);
+    }
+
+    #[test]
+    fn test_buffer_add() {
+        let a = Buffer::from(vec![1, 2, 3, 4, 5]);
+        let b = Buffer::from(vec![5, 4, 3, 2, 1]);
+        let c = a.iter()
+            .zip(b.iter())
+            .map(|(a, b)| a + b)
+            .collect::<Vec<i32>>();
+        assert_eq!(c, vec![6, 6, 6, 6, 6]);
+    }
+
+    #[test]
+    fn test_buffer_multiply() {
+        let a = Buffer::from(vec![1, 2, 3, 4, 5]);
+        let b = Buffer::from(vec![5, 4, 3, 2, 1]);
+        let c = a.iter()
+            .zip(b.iter())
+            .map(|(a, b)| a * b)
+            .collect::<Vec<i32>>();
+        assert_eq!(c, vec![5, 8, 9, 8, 5]);
+    }
 }
diff --git a/rust/src/datatypes.rs b/rust/src/datatypes.rs
index a812f32..4f022ba 100644
--- a/rust/src/datatypes.rs
+++ b/rust/src/datatypes.rs
@@ -15,7 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#[derive(Debug, Clone)]
+use serde_json;
+use serde_json::Value;
+
+#[derive(Debug, Clone, PartialEq)]
+pub enum ArrowError {
+    ParseError(String),
+}
+
+#[derive(Debug, Clone, PartialEq)]
 pub enum DataType {
     Boolean,
     Int8,
@@ -26,13 +34,105 @@ pub enum DataType {
     UInt16,
     UInt32,
     UInt64,
+    Float16,
     Float32,
     Float64,
     Utf8,
     Struct(Vec<Field>),
 }
 
-#[derive(Debug, Clone)]
+impl DataType {
+    fn from(json: &Value) -> Result<DataType, ArrowError> {
+        //println!("DataType::from({:?})", json);
+        match json {
+            &Value::Object(ref map) => match map.get("name") {
+                Some(s) if s == "bool" => Ok(DataType::Boolean),
+                Some(s) if s == "utf8" => Ok(DataType::Utf8),
+                Some(s) if s == "floatingpoint" => match map.get("precision") {
+                    Some(p) if p == "HALF" => Ok(DataType::Float16),
+                    Some(p) if p == "SINGLE" => Ok(DataType::Float32),
+                    Some(p) if p == "DOUBLE" => Ok(DataType::Float64),
+                    _ => Err(ArrowError::ParseError(format!(
+                        "floatingpoint precision missing or invalid"
+                    ))),
+                },
+                Some(s) if s == "int" => match map.get("isSigned") {
+                    Some(&Value::Bool(true)) => match map.get("bitWidth") {
+                        Some(&Value::Number(ref n)) => match n.as_u64() {
+                            Some(8) => Ok(DataType::Int8),
+                            Some(16) => Ok(DataType::Int16),
+                            Some(32) => Ok(DataType::Int32),
+                            Some(64) => Ok(DataType::Int32),
+                            _ => Err(ArrowError::ParseError(format!(
+                                "int bitWidth missing or invalid"
+                            ))),
+                        },
+                        _ => Err(ArrowError::ParseError(format!(
+                            "int bitWidth missing or invalid"
+                        ))),
+                    },
+                    Some(&Value::Bool(false)) => match map.get("bitWidth") {
+                        Some(&Value::Number(ref n)) => match n.as_u64() {
+                            Some(8) => Ok(DataType::UInt8),
+                            Some(16) => Ok(DataType::UInt16),
+                            Some(32) => Ok(DataType::UInt32),
+                            Some(64) => Ok(DataType::UInt64),
+                            _ => Err(ArrowError::ParseError(format!(
+                                "int bitWidth missing or invalid"
+                            ))),
+                        },
+                        _ => Err(ArrowError::ParseError(format!(
+                            "int bitWidth missing or invalid"
+                        ))),
+                    },
+                    _ => Err(ArrowError::ParseError(format!(
+                        "int signed missing or invalid"
+                    ))),
+                },
+                Some(other) => Err(ArrowError::ParseError(format!(
+                    "invalid type name: {}",
+                    other
+                ))),
+                None => match map.get("fields") {
+                    Some(&Value::Array(ref fields_array)) => {
+                        let fields = fields_array
+                            .iter()
+                            .map(|f| Field::from(f))
+                            .collect::<Result<Vec<Field>, ArrowError>>();
+                        Ok(DataType::Struct(fields?))
+                    }
+                    _ => Err(ArrowError::ParseError(format!("empty type"))),
+                },
+            },
+            _ => Err(ArrowError::ParseError(format!("invalid json value type"))),
+        }
+    }
+
+    pub fn to_json(&self) -> Value {
+        match self {
+            &DataType::Boolean => json!({"name": "bool"}),
+            &DataType::Int8 => json!({"name": "int", "bitWidth": 8, "isSigned": true}),
+            &DataType::Int16 => json!({"name": "int", "bitWidth": 16, "isSigned": true}),
+            &DataType::Int32 => json!({"name": "int", "bitWidth": 32, "isSigned": true}),
+            &DataType::Int64 => json!({"name": "int", "bitWidth": 64, "isSigned": true}),
+            &DataType::UInt8 => json!({"name": "int", "bitWidth": 8, "isSigned": false}),
+            &DataType::UInt16 => json!({"name": "int", "bitWidth": 16, "isSigned": false}),
+            &DataType::UInt32 => json!({"name": "int", "bitWidth": 32, "isSigned": false}),
+            &DataType::UInt64 => json!({"name": "int", "bitWidth": 64, "isSigned": false}),
+            &DataType::Float16 => json!({"name": "floatingpoint", "precision": "HALF"}),
+            &DataType::Float32 => json!({"name": "floatingpoint", "precision": "SINGLE"}),
+            &DataType::Float64 => json!({"name": "floatingpoint", "precision": "DOUBLE"}),
+            &DataType::Utf8 => json!({"name": "utf8"}),
+            &DataType::Struct(ref fields) => {
+                let field_json_array =
+                    Value::Array(fields.iter().map(|f| f.to_json()).collect::<Vec<Value>>());
+                json!({ "fields": field_json_array })
+            }
+        }
+    }
+}
+
+#[derive(Debug, Clone, PartialEq)]
 pub struct Field {
     pub name: String,
     pub data_type: DataType,
@@ -48,6 +148,54 @@ impl Field {
         }
     }
 
+    pub fn from(json: &Value) -> Result<Self, ArrowError> {
+        //println!("Field::from({:?}", json);
+        match json {
+            &Value::Object(ref map) => {
+                let name = match map.get("name") {
+                    Some(&Value::String(ref name)) => name.to_string(),
+                    _ => {
+                        return Err(ArrowError::ParseError(format!(
+                            "Field missing 'name' attribute"
+                        )))
+                    }
+                };
+                let nullable = match map.get("nullable") {
+                    Some(&Value::Bool(b)) => b,
+                    _ => {
+                        return Err(ArrowError::ParseError(format!(
+                            "Field missing 'nullable' attribute"
+                        )))
+                    }
+                };
+                let data_type = match map.get("type") {
+                    Some(t) => DataType::from(t)?,
+                    _ => {
+                        return Err(ArrowError::ParseError(format!(
+                            "Field missing 'type' attribute"
+                        )))
+                    }
+                };
+                Ok(Field {
+                    name,
+                    nullable,
+                    data_type,
+                })
+            }
+            _ => Err(ArrowError::ParseError(format!(
+                "Invalid json value type for field"
+            ))),
+        }
+    }
+
+    pub fn to_json(&self) -> Value {
+        json!({
+            "name": self.name,
+            "nullable": self.nullable,
+            "type": self.data_type.to_json(),
+        })
+    }
+
     pub fn to_string(&self) -> String {
         format!("{}: {:?}", self.name, self.data_type)
     }
@@ -87,8 +235,8 @@ mod tests {
     use super::*;
 
     #[test]
-    fn test_define_schema() {
-        let _person = Schema::new(vec![
+    fn create_struct_type() {
+        let _person = DataType::Struct(vec![
             Field::new("first_name", DataType::Utf8, false),
             Field::new("last_name", DataType::Utf8, false),
             Field::new(
@@ -101,4 +249,66 @@ mod tests {
             ),
         ]);
     }
+
+    #[test]
+    fn struct_field_to_json() {
+        let f = Field::new(
+            "address",
+            DataType::Struct(vec![
+                Field::new("street", DataType::Utf8, false),
+                Field::new("zip", DataType::UInt16, false),
+            ]),
+            false,
+        );
+        assert_eq!(
+            "{\"name\":\"address\",\"nullable\":false,\"type\":{\"fields\":[\
+            {\"name\":\"street\",\"nullable\":false,\"type\":{\"name\":\"utf8\"}},\
+            {\"name\":\"zip\",\"nullable\":false,\"type\":{\"bitWidth\":16,\"isSigned\":false,\"name\":\"int\"}}]}}",
+            f.to_json().to_string()
+        );
+    }
+
+    #[test]
+    fn primitive_field_to_json() {
+        let f = Field::new("first_name", DataType::Utf8, false);
+        assert_eq!(
+            "{\"name\":\"first_name\",\"nullable\":false,\"type\":{\"name\":\"utf8\"}}",
+            f.to_json().to_string()
+        );
+    }
+    #[test]
+    fn parse_struct_from_json() {
+        let json = "{\"name\":\"address\",\"nullable\":false,\"type\":{\"fields\":[\
+        {\"name\":\"street\",\"nullable\":false,\"type\":{\"name\":\"utf8\"}},\
+        {\"name\":\"zip\",\"nullable\":false,\"type\":{\"bitWidth\":16,\"isSigned\":false,\"name\":\"int\"}}]}}";
+        let value: Value = serde_json::from_str(json).unwrap();
+        let dt = Field::from(&value).unwrap();
+
+        let expected = Field::new(
+            "address",
+            DataType::Struct(vec![
+                Field::new("street", DataType::Utf8, false),
+                Field::new("zip", DataType::UInt16, false),
+            ]),
+            false,
+        );
+
+        assert_eq!(expected, dt);
+    }
+
+    #[test]
+    fn parse_utf8_from_json() {
+        let json = "{\"name\":\"utf8\"}";
+        let value: Value = serde_json::from_str(json).unwrap();
+        let dt = DataType::from(&value).unwrap();
+        assert_eq!(DataType::Utf8, dt);
+    }
+
+    #[test]
+    fn parse_int32_from_json() {
+        let json = "{\"name\": \"int\", \"isSigned\": true, \"bitWidth\": 32}";
+        let value: Value = serde_json::from_str(json).unwrap();
+        let dt = DataType::from(&value).unwrap();
+        assert_eq!(DataType::Int32, dt);
+    }
 }
diff --git a/rust/src/lib.rs b/rust/src/lib.rs
index 8eeb340..bfcd83d 100644
--- a/rust/src/lib.rs
+++ b/rust/src/lib.rs
@@ -18,6 +18,9 @@
 extern crate bytes;
 extern crate libc;
 
+#[macro_use]
+extern crate serde_json;
+
 pub mod array;
 pub mod bitmap;
 pub mod buffer;

-- 
To stop receiving notification emails like this one, please contact
uwe@apache.org.