You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2019/01/09 23:49:14 UTC
[arrow] branch master updated: ARROW-3959: [Rust] Add date/time
data types
This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 87ceb3c ARROW-3959: [Rust] Add date/time data types
87ceb3c is described below
commit 87ceb3ca904c9e9a839ff1cc724d3139c1958047
Author: Andy Grove <an...@gmail.com>
AuthorDate: Wed Jan 9 16:49:04 2019 -0700
ARROW-3959: [Rust] Add date/time data types
This only adds the date/time types to the DataTypes enum as well as JSON serialization for meta data.
This PR also implements `Schema::to_json`
Author: Andy Grove <an...@gmail.com>
Closes #3340 from andygrove/ARROW-3959 and squashes the following commits:
945498e <Andy Grove> merge from master and implement Hash for DateUnit, TimeUnit, etc.
b05d6a0 <Andy Grove> Merge branch 'master' into ARROW-3959
312885e <Andy Grove> Timestamp now uses TimeUnit
c3e092b <Andy Grove> Merge branch 'master' into ARROW-3959
d289cbb <Andy Grove> improve test
2d36927 <Andy Grove> update unit test
d51bc82 <Andy Grove> fix mistake
f4bbf10 <Andy Grove> Add date/time data types
---
rust/arrow/src/datatypes.rs | 146 +++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 145 insertions(+), 1 deletion(-)
diff --git a/rust/arrow/src/datatypes.rs b/rust/arrow/src/datatypes.rs
index 05db6ce..5008a97 100644
--- a/rust/arrow/src/datatypes.rs
+++ b/rust/arrow/src/datatypes.rs
@@ -56,11 +56,36 @@ pub enum DataType {
Float16,
Float32,
Float64,
+ Timestamp(TimeUnit),
+ Date(DateUnit),
+ Time32(TimeUnit),
+ Time64(TimeUnit),
+ Interval(IntervalUnit),
Utf8,
List(Box<DataType>),
Struct(Vec<Field>),
}
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash)]
+pub enum DateUnit {
+ Day,
+ Millisecond,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash)]
+pub enum TimeUnit {
+ Second,
+ Millisecond,
+ Microsecond,
+ Nanosecond,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash)]
+pub enum IntervalUnit {
+ YearMonth,
+ DayTime,
+}
+
/// Contains the meta-data for a single relative type.
///
/// The `Schema` object is an ordered collection of `Field` objects.
@@ -175,6 +200,47 @@ impl DataType {
"floatingpoint precision missing or invalid".to_string(),
)),
},
+ Some(s) if s == "timestamp" => match map.get("unit") {
+ Some(p) if p == "SECOND" => Ok(DataType::Timestamp(TimeUnit::Second)),
+ Some(p) if p == "MILLISECOND" => Ok(DataType::Timestamp(TimeUnit::Millisecond)),
+ Some(p) if p == "MICROSECOND" => Ok(DataType::Timestamp(TimeUnit::Microsecond)),
+ Some(p) if p == "NANOSECOND" => Ok(DataType::Timestamp(TimeUnit::Nanosecond)),
+ _ => Err(ArrowError::ParseError(
+ "timestamp unit missing or invalid".to_string(),
+ )),
+ },
+ Some(s) if s == "date" => match map.get("unit") {
+ Some(p) if p == "DAY" => Ok(DataType::Date(DateUnit::Day)),
+ Some(p) if p == "MILLISECOND" => Ok(DataType::Date(DateUnit::Millisecond)),
+ _ => Err(ArrowError::ParseError(
+ "date unit missing or invalid".to_string(),
+ )),
+ },
+ Some(s) if s == "time" => {
+ let unit = match map.get("unit") {
+ Some(p) if p == "SECOND" => Ok(TimeUnit::Second),
+ Some(p) if p == "MILLISECOND" => Ok(TimeUnit::Millisecond),
+ Some(p) if p == "MICROSECOND" => Ok(TimeUnit::Microsecond),
+ Some(p) if p == "NANOSECOND" => Ok(TimeUnit::Nanosecond),
+ _ => Err(ArrowError::ParseError(
+ "time unit missing or invalid".to_string(),
+ )),
+ };
+ match map.get("bitWidth") {
+ Some(p) if p == "32" => Ok(DataType::Time32(unit?)),
+ Some(p) if p == "64" => Ok(DataType::Time32(unit?)),
+ _ => Err(ArrowError::ParseError(
+ "time bitWidth missing or invalid".to_string(),
+ )),
+ }
+ }
+ Some(s) if s == "interval" => match map.get("unit") {
+ Some(p) if p == "DAY_TIME" => Ok(DataType::Interval(IntervalUnit::DayTime)),
+ Some(p) if p == "YEAR_MONTH" => Ok(DataType::Interval(IntervalUnit::YearMonth)),
+ _ => Err(ArrowError::ParseError(
+ "interval unit missing or invalid".to_string(),
+ )),
+ },
Some(s) if s == "int" => match map.get("isSigned") {
Some(&Value::Bool(true)) => match map.get("bitWidth") {
Some(&Value::Number(ref n)) => match n.as_u64() {
@@ -231,7 +297,7 @@ impl DataType {
/// Generate a JSON representation of the data type
pub fn to_json(&self) -> Value {
- match *self {
+ match self {
DataType::Boolean => json!({"name": "bool"}),
DataType::Int8 => json!({"name": "int", "bitWidth": 8, "isSigned": true}),
DataType::Int16 => json!({"name": "int", "bitWidth": 16, "isSigned": true}),
@@ -254,6 +320,32 @@ impl DataType {
let child_json = t.to_json();
json!({ "name": "list", "children": child_json })
}
+ DataType::Time32(unit) => json!({"name": "time", "bitWidth": "32", "unit": match unit {
+ TimeUnit::Second => "SECOND",
+ TimeUnit::Millisecond => "MILLISECOND",
+ TimeUnit::Microsecond => "MICROSECOND",
+ TimeUnit::Nanosecond => "NANOSECOND",
+ }}),
+ DataType::Time64(unit) => json!({"name": "time", "bitWidth": "64", "unit": match unit {
+ TimeUnit::Second => "SECOND",
+ TimeUnit::Millisecond => "MILLISECOND",
+ TimeUnit::Microsecond => "MICROSECOND",
+ TimeUnit::Nanosecond => "NANOSECOND",
+ }}),
+ DataType::Date(unit) => json!({"name": "date", "unit": match unit {
+ DateUnit::Day => "DAY",
+ DateUnit::Millisecond => "MILLISECOND",
+ }}),
+ DataType::Timestamp(unit) => json!({"name": "timestamp", "unit": match unit {
+ TimeUnit::Second => "SECOND",
+ TimeUnit::Millisecond => "MILLISECOND",
+ TimeUnit::Microsecond => "MICROSECOND",
+ TimeUnit::Nanosecond => "NANOSECOND",
+ }}),
+ DataType::Interval(unit) => json!({"name": "interval", "unit": match unit {
+ IntervalUnit::YearMonth => "YEAR_MONTH",
+ IntervalUnit::DayTime => "DAY_TIME",
+ }}),
}
}
}
@@ -394,6 +486,13 @@ impl Schema {
.enumerate()
.find(|&(_, c)| c.name == name)
}
+
+ /// Generate a JSON representation of the `Field`
+ pub fn to_json(&self) -> Value {
+ json!({
+ "fields": self.fields.iter().map(|field| field.to_json()).collect::<Vec<Value>>(),
+ })
+ }
}
impl fmt::Display for Schema {
@@ -529,6 +628,51 @@ mod tests {
}
#[test]
+ fn schema_json() {
+ let schema = Schema::new(vec![
+ Field::new("c1", DataType::Utf8, false),
+ Field::new("c2", DataType::Date(DateUnit::Day), false),
+ Field::new("c3", DataType::Date(DateUnit::Millisecond), false),
+ Field::new("c7", DataType::Time32(TimeUnit::Second), false),
+ Field::new("c8", DataType::Time32(TimeUnit::Millisecond), false),
+ Field::new("c9", DataType::Time32(TimeUnit::Microsecond), false),
+ Field::new("c10", DataType::Time32(TimeUnit::Nanosecond), false),
+ Field::new("c11", DataType::Time64(TimeUnit::Second), false),
+ Field::new("c12", DataType::Time64(TimeUnit::Millisecond), false),
+ Field::new("c13", DataType::Time64(TimeUnit::Microsecond), false),
+ Field::new("c14", DataType::Time64(TimeUnit::Nanosecond), false),
+ Field::new("c15", DataType::Timestamp(TimeUnit::Second), false),
+ Field::new("c16", DataType::Timestamp(TimeUnit::Millisecond), false),
+ Field::new("c17", DataType::Timestamp(TimeUnit::Microsecond), false),
+ Field::new("c18", DataType::Timestamp(TimeUnit::Nanosecond), false),
+ Field::new("c19", DataType::Interval(IntervalUnit::DayTime), false),
+ Field::new("c20", DataType::Interval(IntervalUnit::YearMonth), false),
+ Field::new(
+ "c21",
+ DataType::Struct(vec![
+ Field::new("a", DataType::Utf8, false),
+ Field::new("b", DataType::UInt16, false),
+ ]),
+ false,
+ ),
+ ]);
+
+ let json = schema.to_json().to_string();
+ assert_eq!(json, "{\"fields\":[{\"name\":\"c1\",\"nullable\":false,\"type\":{\"name\":\"utf8\"}},{\"name\":\"c2\",\"nullable\":false,\"type\":{\"name\":\"date\",\"unit\":\"DAY\"}},{\"name\":\"c3\",\"nullable\":false,\"type\":{\"name\":\"date\",\"unit\":\"MILLISECOND\"}},{\"name\":\"c7\",\"nullable\":false,\"type\":{\"bitWidth\":\"32\",\"name\":\"time\",\"unit\":\"SECOND\"}},{\"name\":\"c8\",\"nullable\":false,\"type\":{\"bitWidth\":\"32\",\"name\":\"time\",\"unit\":\"MILLISECOND\ [...]
+
+ // convert back to a schema
+ let value: Value = serde_json::from_str(&json).unwrap();
+ let schema2 = DataType::from(&value).unwrap();
+
+ match schema2 {
+ DataType::Struct(fields) => {
+ assert_eq!(schema.fields().len(), fields.len());
+ }
+ _ => panic!(),
+ }
+ }
+
+ #[test]
fn create_schema_string() {
let _person = Schema::new(vec![
Field::new("first_name", DataType::Utf8, false),