You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2019/01/09 23:49:14 UTC

[arrow] branch master updated: ARROW-3959: [Rust] Add date/time data types

This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 87ceb3c  ARROW-3959: [Rust] Add date/time data types
87ceb3c is described below

commit 87ceb3ca904c9e9a839ff1cc724d3139c1958047
Author: Andy Grove <an...@gmail.com>
AuthorDate: Wed Jan 9 16:49:04 2019 -0700

    ARROW-3959: [Rust] Add date/time data types
    
    This only adds the date/time types to the DataTypes enum as well as JSON serialization for meta data.
    
    This PR also implements `Schema::to_json`
    
    Author: Andy Grove <an...@gmail.com>
    
    Closes #3340 from andygrove/ARROW-3959 and squashes the following commits:
    
    945498e <Andy Grove> merge from master and implement Hash for DateUnit, TimeUnit, etc.
    b05d6a0 <Andy Grove> Merge branch 'master' into ARROW-3959
    312885e <Andy Grove> Timestamp now uses TimeUnit
    c3e092b <Andy Grove> Merge branch 'master' into ARROW-3959
    d289cbb <Andy Grove> improve test
    2d36927 <Andy Grove> update unit test
    d51bc82 <Andy Grove> fix mistake
    f4bbf10 <Andy Grove> Add date/time data types
---
 rust/arrow/src/datatypes.rs | 146 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 145 insertions(+), 1 deletion(-)

diff --git a/rust/arrow/src/datatypes.rs b/rust/arrow/src/datatypes.rs
index 05db6ce..5008a97 100644
--- a/rust/arrow/src/datatypes.rs
+++ b/rust/arrow/src/datatypes.rs
@@ -56,11 +56,36 @@ pub enum DataType {
     Float16,
     Float32,
     Float64,
+    Timestamp(TimeUnit),
+    Date(DateUnit),
+    Time32(TimeUnit),
+    Time64(TimeUnit),
+    Interval(IntervalUnit),
     Utf8,
     List(Box<DataType>),
     Struct(Vec<Field>),
 }
 
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash)]
+pub enum DateUnit {
+    Day,
+    Millisecond,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash)]
+pub enum TimeUnit {
+    Second,
+    Millisecond,
+    Microsecond,
+    Nanosecond,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash)]
+pub enum IntervalUnit {
+    YearMonth,
+    DayTime,
+}
+
 /// Contains the meta-data for a single relative type.
 ///
 /// The `Schema` object is an ordered collection of `Field` objects.
@@ -175,6 +200,47 @@ impl DataType {
                         "floatingpoint precision missing or invalid".to_string(),
                     )),
                 },
+                Some(s) if s == "timestamp" => match map.get("unit") {
+                    Some(p) if p == "SECOND" => Ok(DataType::Timestamp(TimeUnit::Second)),
+                    Some(p) if p == "MILLISECOND" => Ok(DataType::Timestamp(TimeUnit::Millisecond)),
+                    Some(p) if p == "MICROSECOND" => Ok(DataType::Timestamp(TimeUnit::Microsecond)),
+                    Some(p) if p == "NANOSECOND" => Ok(DataType::Timestamp(TimeUnit::Nanosecond)),
+                    _ => Err(ArrowError::ParseError(
+                        "timestamp unit missing or invalid".to_string(),
+                    )),
+                },
+                Some(s) if s == "date" => match map.get("unit") {
+                    Some(p) if p == "DAY" => Ok(DataType::Date(DateUnit::Day)),
+                    Some(p) if p == "MILLISECOND" => Ok(DataType::Date(DateUnit::Millisecond)),
+                    _ => Err(ArrowError::ParseError(
+                        "date unit missing or invalid".to_string(),
+                    )),
+                },
+                Some(s) if s == "time" => {
+                    let unit = match map.get("unit") {
+                        Some(p) if p == "SECOND" => Ok(TimeUnit::Second),
+                        Some(p) if p == "MILLISECOND" => Ok(TimeUnit::Millisecond),
+                        Some(p) if p == "MICROSECOND" => Ok(TimeUnit::Microsecond),
+                        Some(p) if p == "NANOSECOND" => Ok(TimeUnit::Nanosecond),
+                        _ => Err(ArrowError::ParseError(
+                            "time unit missing or invalid".to_string(),
+                        )),
+                    };
+                    match map.get("bitWidth") {
+                        Some(p) if p == "32" => Ok(DataType::Time32(unit?)),
+                        Some(p) if p == "64" => Ok(DataType::Time32(unit?)),
+                        _ => Err(ArrowError::ParseError(
+                            "time bitWidth missing or invalid".to_string(),
+                        )),
+                    }
+                }
+                Some(s) if s == "interval" => match map.get("unit") {
+                    Some(p) if p == "DAY_TIME" => Ok(DataType::Interval(IntervalUnit::DayTime)),
+                    Some(p) if p == "YEAR_MONTH" => Ok(DataType::Interval(IntervalUnit::YearMonth)),
+                    _ => Err(ArrowError::ParseError(
+                        "interval unit missing or invalid".to_string(),
+                    )),
+                },
                 Some(s) if s == "int" => match map.get("isSigned") {
                     Some(&Value::Bool(true)) => match map.get("bitWidth") {
                         Some(&Value::Number(ref n)) => match n.as_u64() {
@@ -231,7 +297,7 @@ impl DataType {
 
     /// Generate a JSON representation of the data type
     pub fn to_json(&self) -> Value {
-        match *self {
+        match self {
             DataType::Boolean => json!({"name": "bool"}),
             DataType::Int8 => json!({"name": "int", "bitWidth": 8, "isSigned": true}),
             DataType::Int16 => json!({"name": "int", "bitWidth": 16, "isSigned": true}),
@@ -254,6 +320,32 @@ impl DataType {
                 let child_json = t.to_json();
                 json!({ "name": "list", "children": child_json })
             }
+            DataType::Time32(unit) => json!({"name": "time", "bitWidth": "32", "unit": match unit {
+                TimeUnit::Second => "SECOND",
+                TimeUnit::Millisecond => "MILLISECOND",
+                TimeUnit::Microsecond => "MICROSECOND",
+                TimeUnit::Nanosecond => "NANOSECOND",
+            }}),
+            DataType::Time64(unit) => json!({"name": "time", "bitWidth": "64", "unit": match unit {
+                TimeUnit::Second => "SECOND",
+                TimeUnit::Millisecond => "MILLISECOND",
+                TimeUnit::Microsecond => "MICROSECOND",
+                TimeUnit::Nanosecond => "NANOSECOND",
+            }}),
+            DataType::Date(unit) => json!({"name": "date", "unit": match unit {
+                DateUnit::Day => "DAY",
+                DateUnit::Millisecond => "MILLISECOND",
+            }}),
+            DataType::Timestamp(unit) => json!({"name": "timestamp", "unit": match unit {
+                TimeUnit::Second => "SECOND",
+                TimeUnit::Millisecond => "MILLISECOND",
+                TimeUnit::Microsecond => "MICROSECOND",
+                TimeUnit::Nanosecond => "NANOSECOND",
+            }}),
+            DataType::Interval(unit) => json!({"name": "interval", "unit": match unit {
+                IntervalUnit::YearMonth => "YEAR_MONTH",
+                IntervalUnit::DayTime => "DAY_TIME",
+            }}),
         }
     }
 }
@@ -394,6 +486,13 @@ impl Schema {
             .enumerate()
             .find(|&(_, c)| c.name == name)
     }
+
+    /// Generate a JSON representation of the `Field`
+    pub fn to_json(&self) -> Value {
+        json!({
+            "fields": self.fields.iter().map(|field| field.to_json()).collect::<Vec<Value>>(),
+        })
+    }
 }
 
 impl fmt::Display for Schema {
@@ -529,6 +628,51 @@ mod tests {
     }
 
     #[test]
+    fn schema_json() {
+        let schema = Schema::new(vec![
+            Field::new("c1", DataType::Utf8, false),
+            Field::new("c2", DataType::Date(DateUnit::Day), false),
+            Field::new("c3", DataType::Date(DateUnit::Millisecond), false),
+            Field::new("c7", DataType::Time32(TimeUnit::Second), false),
+            Field::new("c8", DataType::Time32(TimeUnit::Millisecond), false),
+            Field::new("c9", DataType::Time32(TimeUnit::Microsecond), false),
+            Field::new("c10", DataType::Time32(TimeUnit::Nanosecond), false),
+            Field::new("c11", DataType::Time64(TimeUnit::Second), false),
+            Field::new("c12", DataType::Time64(TimeUnit::Millisecond), false),
+            Field::new("c13", DataType::Time64(TimeUnit::Microsecond), false),
+            Field::new("c14", DataType::Time64(TimeUnit::Nanosecond), false),
+            Field::new("c15", DataType::Timestamp(TimeUnit::Second), false),
+            Field::new("c16", DataType::Timestamp(TimeUnit::Millisecond), false),
+            Field::new("c17", DataType::Timestamp(TimeUnit::Microsecond), false),
+            Field::new("c18", DataType::Timestamp(TimeUnit::Nanosecond), false),
+            Field::new("c19", DataType::Interval(IntervalUnit::DayTime), false),
+            Field::new("c20", DataType::Interval(IntervalUnit::YearMonth), false),
+            Field::new(
+                "c21",
+                DataType::Struct(vec![
+                    Field::new("a", DataType::Utf8, false),
+                    Field::new("b", DataType::UInt16, false),
+                ]),
+                false,
+            ),
+        ]);
+
+        let json = schema.to_json().to_string();
+        assert_eq!(json, "{\"fields\":[{\"name\":\"c1\",\"nullable\":false,\"type\":{\"name\":\"utf8\"}},{\"name\":\"c2\",\"nullable\":false,\"type\":{\"name\":\"date\",\"unit\":\"DAY\"}},{\"name\":\"c3\",\"nullable\":false,\"type\":{\"name\":\"date\",\"unit\":\"MILLISECOND\"}},{\"name\":\"c7\",\"nullable\":false,\"type\":{\"bitWidth\":\"32\",\"name\":\"time\",\"unit\":\"SECOND\"}},{\"name\":\"c8\",\"nullable\":false,\"type\":{\"bitWidth\":\"32\",\"name\":\"time\",\"unit\":\"MILLISECOND\ [...]
+
+        // convert back to a schema
+        let value: Value = serde_json::from_str(&json).unwrap();
+        let schema2 = DataType::from(&value).unwrap();
+
+        match schema2 {
+            DataType::Struct(fields) => {
+                assert_eq!(schema.fields().len(), fields.len());
+            }
+            _ => panic!(),
+        }
+    }
+
+    #[test]
     fn create_schema_string() {
         let _person = Schema::new(vec![
             Field::new("first_name", DataType::Utf8, false),