You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2022/08/15 18:53:17 UTC

[arrow-datafusion] branch master updated: feat: Add support for TIME literal values (#3010)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 15a9a4bec feat: Add support for TIME literal values (#3010)
15a9a4bec is described below

commit 15a9a4becc4bb41262b10f1d5395fc1d026de753
Author: Stuart Carnie <st...@gmail.com>
AuthorDate: Tue Aug 16 04:53:13 2022 +1000

    feat: Add support for TIME literal values (#3010)
    
    * feat: Initial work to add TIME literal value support
    
    * chore: Improve unit test coverage
    
    * chore: Improve test names and simplify test
---
 datafusion/common/src/scalar.rs        | 32 ++++++++++++++++++++++++++++
 datafusion/core/tests/sql/timestamp.rs | 38 ++++++++++++++++++++++++++++++++++
 datafusion/sql/src/planner.rs          | 18 ++++++++++++----
 3 files changed, 84 insertions(+), 4 deletions(-)

diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs
index 3069a54f4..dff97d2f9 100644
--- a/datafusion/common/src/scalar.rs
+++ b/datafusion/common/src/scalar.rs
@@ -83,6 +83,8 @@ pub enum ScalarValue {
     Date32(Option<i32>),
     /// Date stored as a signed 64bit int milliseconds since UNIX epoch 1970-01-01
     Date64(Option<i64>),
+    /// Time stored as a signed 64bit int as nanoseconds since midnight
+    Time64(Option<i64>),
     /// Timestamp Second
     TimestampSecond(Option<i64>, Option<String>),
     /// Timestamp Milliseconds
@@ -163,6 +165,8 @@ impl PartialEq for ScalarValue {
             (Date32(_), _) => false,
             (Date64(v1), Date64(v2)) => v1.eq(v2),
             (Date64(_), _) => false,
+            (Time64(v1), Time64(v2)) => v1.eq(v2),
+            (Time64(_), _) => false,
             (TimestampSecond(v1, _), TimestampSecond(v2, _)) => v1.eq(v2),
             (TimestampSecond(_, _), _) => false,
             (TimestampMillisecond(v1, _), TimestampMillisecond(v2, _)) => v1.eq(v2),
@@ -255,6 +259,8 @@ impl PartialOrd for ScalarValue {
             (Date32(_), _) => None,
             (Date64(v1), Date64(v2)) => v1.partial_cmp(v2),
             (Date64(_), _) => None,
+            (Time64(v1), Time64(v2)) => v1.partial_cmp(v2),
+            (Time64(_), _) => None,
             (TimestampSecond(v1, _), TimestampSecond(v2, _)) => v1.partial_cmp(v2),
             (TimestampSecond(_, _), _) => None,
             (TimestampMillisecond(v1, _), TimestampMillisecond(v2, _)) => {
@@ -338,6 +344,7 @@ impl std::hash::Hash for ScalarValue {
             }
             Date32(v) => v.hash(state),
             Date64(v) => v.hash(state),
+            Time64(v) => v.hash(state),
             TimestampSecond(v, _) => v.hash(state),
             TimestampMillisecond(v, _) => v.hash(state),
             TimestampMicrosecond(v, _) => v.hash(state),
@@ -681,6 +688,7 @@ impl ScalarValue {
             ))),
             ScalarValue::Date32(_) => DataType::Date32,
             ScalarValue::Date64(_) => DataType::Date64,
+            ScalarValue::Time64(_) => DataType::Time64(TimeUnit::Nanosecond),
             ScalarValue::IntervalYearMonth(_) => {
                 DataType::Interval(IntervalUnit::YearMonth)
             }
@@ -741,6 +749,7 @@ impl ScalarValue {
             ScalarValue::List(v, _) => v.is_none(),
             ScalarValue::Date32(v) => v.is_none(),
             ScalarValue::Date64(v) => v.is_none(),
+            ScalarValue::Time64(v) => v.is_none(),
             ScalarValue::TimestampSecond(v, _) => v.is_none(),
             ScalarValue::TimestampMillisecond(v, _) => v.is_none(),
             ScalarValue::TimestampMicrosecond(v, _) => v.is_none(),
@@ -963,6 +972,9 @@ impl ScalarValue {
             DataType::LargeBinary => build_array_string!(LargeBinaryArray, LargeBinary),
             DataType::Date32 => build_array_primitive!(Date32Array, Date32),
             DataType::Date64 => build_array_primitive!(Date64Array, Date64),
+            DataType::Time64(TimeUnit::Nanosecond) => {
+                build_array_primitive!(Time64NanosecondArray, Time64)
+            }
             DataType::Timestamp(TimeUnit::Second, _) => {
                 build_array_primitive_tz!(TimestampSecondArray, TimestampSecond)
             }
@@ -1357,6 +1369,15 @@ impl ScalarValue {
             ScalarValue::Date64(e) => {
                 build_array_from_option!(Date64, Date64Array, e, size)
             }
+            ScalarValue::Time64(e) => {
+                build_array_from_option!(
+                    Time64,
+                    TimeUnit::Nanosecond,
+                    Time64NanosecondArray,
+                    e,
+                    size
+                )
+            }
             ScalarValue::IntervalDayTime(e) => build_array_from_option!(
                 Interval,
                 IntervalUnit::DayTime,
@@ -1496,6 +1517,9 @@ impl ScalarValue {
             DataType::Date64 => {
                 typed_cast!(array, index, Date64Array, Date64)
             }
+            DataType::Time64(TimeUnit::Nanosecond) => {
+                typed_cast!(array, index, Time64NanosecondArray, Time64)
+            }
             DataType::Timestamp(TimeUnit::Second, tz_opt) => {
                 typed_cast_tz!(
                     array,
@@ -1691,6 +1715,9 @@ impl ScalarValue {
             ScalarValue::Date64(val) => {
                 eq_array_primitive!(array, index, Date64Array, val)
             }
+            ScalarValue::Time64(val) => {
+                eq_array_primitive!(array, index, Time64NanosecondArray, val)
+            }
             ScalarValue::TimestampSecond(val, _) => {
                 eq_array_primitive!(array, index, TimestampSecondArray, val)
             }
@@ -1845,6 +1872,7 @@ impl TryFrom<ScalarValue> for i64 {
         match value {
             ScalarValue::Int64(Some(inner_value))
             | ScalarValue::Date64(Some(inner_value))
+            | ScalarValue::Time64(Some(inner_value))
             | ScalarValue::TimestampNanosecond(Some(inner_value), _)
             | ScalarValue::TimestampMicrosecond(Some(inner_value), _)
             | ScalarValue::TimestampMillisecond(Some(inner_value), _)
@@ -1906,6 +1934,7 @@ impl TryFrom<&DataType> for ScalarValue {
             DataType::LargeUtf8 => ScalarValue::LargeUtf8(None),
             DataType::Date32 => ScalarValue::Date32(None),
             DataType::Date64 => ScalarValue::Date64(None),
+            DataType::Time64(TimeUnit::Nanosecond) => ScalarValue::Time64(None),
             DataType::Timestamp(TimeUnit::Second, tz_opt) => {
                 ScalarValue::TimestampSecond(None, tz_opt.clone())
             }
@@ -2007,6 +2036,7 @@ impl fmt::Display for ScalarValue {
             },
             ScalarValue::Date32(e) => format_option!(f, e)?,
             ScalarValue::Date64(e) => format_option!(f, e)?,
+            ScalarValue::Time64(e) => format_option!(f, e)?,
             ScalarValue::IntervalDayTime(e) => format_option!(f, e)?,
             ScalarValue::IntervalYearMonth(e) => format_option!(f, e)?,
             ScalarValue::IntervalMonthDayNano(e) => format_option!(f, e)?,
@@ -2067,6 +2097,7 @@ impl fmt::Debug for ScalarValue {
             ScalarValue::List(_, _) => write!(f, "List([{}])", self),
             ScalarValue::Date32(_) => write!(f, "Date32(\"{}\")", self),
             ScalarValue::Date64(_) => write!(f, "Date64(\"{}\")", self),
+            ScalarValue::Time64(_) => write!(f, "Time64(\"{}\")", self),
             ScalarValue::IntervalDayTime(_) => {
                 write!(f, "IntervalDayTime(\"{}\")", self)
             }
@@ -2665,6 +2696,7 @@ mod tests {
             make_binary_test_case!(str_vals, LargeBinaryArray, LargeBinary),
             make_test_case!(i32_vals, Date32Array, Date32),
             make_test_case!(i64_vals, Date64Array, Date64),
+            make_test_case!(i64_vals, Time64NanosecondArray, Time64),
             make_test_case!(i64_vals, TimestampSecondArray, TimestampSecond, None),
             make_test_case!(
                 i64_vals,
diff --git a/datafusion/core/tests/sql/timestamp.rs b/datafusion/core/tests/sql/timestamp.rs
index ba916a658..4ae81bfc6 100644
--- a/datafusion/core/tests/sql/timestamp.rs
+++ b/datafusion/core/tests/sql/timestamp.rs
@@ -986,6 +986,44 @@ async fn sub_interval_day() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn cast_string_to_time() {
+    let ctx = SessionContext::new();
+
+    let sql = "select \
+        time '08:09:10.123456789' as time_nano, \
+        time '13:14:15.123456'    as time_micro,\
+        time '13:14:15.123'       as time_milli,\
+        time '13:14:15'           as time;";
+    let results = execute_to_batches(&ctx, sql).await;
+
+    let expected = vec![
+        "+--------------------+-----------------+--------------+----------+",
+        "| time_nano          | time_micro      | time_milli   | time     |",
+        "+--------------------+-----------------+--------------+----------+",
+        "| 08:09:10.123456789 | 13:14:15.123456 | 13:14:15.123 | 13:14:15 |",
+        "+--------------------+-----------------+--------------+----------+",
+    ];
+    assert_batches_eq!(expected, &results);
+
+    // Fallible cases
+
+    let sql = "SELECT TIME 'not a time' as time;";
+    let result = try_execute_to_batches(&ctx, sql).await;
+    assert_eq!(
+        result.err().unwrap().to_string(),
+        "Arrow error: Cast error: Cannot cast string 'not a time' to value of Time64(Nanosecond) type"
+    );
+
+    // An invalid time
+    let sql = "SELECT TIME '24:01:02' as time;";
+    let result = try_execute_to_batches(&ctx, sql).await;
+    assert_eq!(
+        result.err().unwrap().to_string(),
+        "Arrow error: Cast error: Cannot cast string '24:01:02' to value of Time64(Nanosecond) type"
+    );
+}
+
 #[tokio::test]
 async fn cast_to_timestamp_twice() -> Result<()> {
     let ctx = SessionContext::new();
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index ddf9198e5..47223a6a8 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -496,7 +496,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             SQLDataType::Double => Ok(DataType::Float64),
             SQLDataType::Boolean => Ok(DataType::Boolean),
             SQLDataType::Date => Ok(DataType::Date32),
-            SQLDataType::Time => Ok(DataType::Time64(TimeUnit::Millisecond)),
+            SQLDataType::Time => Ok(DataType::Time64(TimeUnit::Nanosecond)),
             SQLDataType::Timestamp => Ok(DataType::Timestamp(TimeUnit::Nanosecond, None)),
             _ => Err(DataFusionError::NotImplemented(format!(
                 "The SQL data type {:?} is not implemented",
@@ -2563,6 +2563,7 @@ pub fn convert_simple_data_type(sql_type: &SQLDataType) -> Result<DataType> {
         | SQLDataType::String => Ok(DataType::Utf8),
         SQLDataType::Timestamp => Ok(DataType::Timestamp(TimeUnit::Nanosecond, None)),
         SQLDataType::Date => Ok(DataType::Date32),
+        SQLDataType::Time => Ok(DataType::Time64(TimeUnit::Nanosecond)),
         SQLDataType::Decimal(precision, scale) => make_decimal_type(*precision, *scale),
         SQLDataType::Binary(_) => Ok(DataType::Binary),
         SQLDataType::Bytea => Ok(DataType::Binary),
@@ -4360,10 +4361,19 @@ mod tests {
     }
 
     #[test]
-    fn select_typedstring() {
-        let sql = "SELECT date '2020-12-10' AS date FROM person";
+    fn select_typed_date_string() {
+        let sql = "SELECT date '2020-12-10' AS date";
         let expected = "Projection: CAST(Utf8(\"2020-12-10\") AS Date32) AS date\
-            \n  TableScan: person";
+            \n  EmptyRelation";
+        quick_test(sql, expected);
+    }
+
+    #[test]
+    fn select_typed_time_string() {
+        let sql = "SELECT TIME '08:09:10.123' AS time";
+        let expected =
+            "Projection: CAST(Utf8(\"08:09:10.123\") AS Time64(Nanosecond)) AS time\
+            \n  EmptyRelation";
         quick_test(sql, expected);
     }