You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2022/08/15 18:53:17 UTC
[arrow-datafusion] branch master updated: feat: Add support for TIME literal values (#3010)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 15a9a4bec feat: Add support for TIME literal values (#3010)
15a9a4bec is described below
commit 15a9a4becc4bb41262b10f1d5395fc1d026de753
Author: Stuart Carnie <st...@gmail.com>
AuthorDate: Tue Aug 16 04:53:13 2022 +1000
feat: Add support for TIME literal values (#3010)
* feat: Initial work to add TIME literal value support
* chore: Improve unit test coverage
* chore: Improve test names and simplify test
---
datafusion/common/src/scalar.rs | 32 ++++++++++++++++++++++++++++
datafusion/core/tests/sql/timestamp.rs | 38 ++++++++++++++++++++++++++++++++++
datafusion/sql/src/planner.rs | 18 ++++++++++++----
3 files changed, 84 insertions(+), 4 deletions(-)
diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs
index 3069a54f4..dff97d2f9 100644
--- a/datafusion/common/src/scalar.rs
+++ b/datafusion/common/src/scalar.rs
@@ -83,6 +83,8 @@ pub enum ScalarValue {
Date32(Option<i32>),
/// Date stored as a signed 64bit int milliseconds since UNIX epoch 1970-01-01
Date64(Option<i64>),
+ /// Time stored as a signed 64bit int as nanoseconds since midnight
+ Time64(Option<i64>),
/// Timestamp Second
TimestampSecond(Option<i64>, Option<String>),
/// Timestamp Milliseconds
@@ -163,6 +165,8 @@ impl PartialEq for ScalarValue {
(Date32(_), _) => false,
(Date64(v1), Date64(v2)) => v1.eq(v2),
(Date64(_), _) => false,
+ (Time64(v1), Time64(v2)) => v1.eq(v2),
+ (Time64(_), _) => false,
(TimestampSecond(v1, _), TimestampSecond(v2, _)) => v1.eq(v2),
(TimestampSecond(_, _), _) => false,
(TimestampMillisecond(v1, _), TimestampMillisecond(v2, _)) => v1.eq(v2),
@@ -255,6 +259,8 @@ impl PartialOrd for ScalarValue {
(Date32(_), _) => None,
(Date64(v1), Date64(v2)) => v1.partial_cmp(v2),
(Date64(_), _) => None,
+ (Time64(v1), Time64(v2)) => v1.partial_cmp(v2),
+ (Time64(_), _) => None,
(TimestampSecond(v1, _), TimestampSecond(v2, _)) => v1.partial_cmp(v2),
(TimestampSecond(_, _), _) => None,
(TimestampMillisecond(v1, _), TimestampMillisecond(v2, _)) => {
@@ -338,6 +344,7 @@ impl std::hash::Hash for ScalarValue {
}
Date32(v) => v.hash(state),
Date64(v) => v.hash(state),
+ Time64(v) => v.hash(state),
TimestampSecond(v, _) => v.hash(state),
TimestampMillisecond(v, _) => v.hash(state),
TimestampMicrosecond(v, _) => v.hash(state),
@@ -681,6 +688,7 @@ impl ScalarValue {
))),
ScalarValue::Date32(_) => DataType::Date32,
ScalarValue::Date64(_) => DataType::Date64,
+ ScalarValue::Time64(_) => DataType::Time64(TimeUnit::Nanosecond),
ScalarValue::IntervalYearMonth(_) => {
DataType::Interval(IntervalUnit::YearMonth)
}
@@ -741,6 +749,7 @@ impl ScalarValue {
ScalarValue::List(v, _) => v.is_none(),
ScalarValue::Date32(v) => v.is_none(),
ScalarValue::Date64(v) => v.is_none(),
+ ScalarValue::Time64(v) => v.is_none(),
ScalarValue::TimestampSecond(v, _) => v.is_none(),
ScalarValue::TimestampMillisecond(v, _) => v.is_none(),
ScalarValue::TimestampMicrosecond(v, _) => v.is_none(),
@@ -963,6 +972,9 @@ impl ScalarValue {
DataType::LargeBinary => build_array_string!(LargeBinaryArray, LargeBinary),
DataType::Date32 => build_array_primitive!(Date32Array, Date32),
DataType::Date64 => build_array_primitive!(Date64Array, Date64),
+ DataType::Time64(TimeUnit::Nanosecond) => {
+ build_array_primitive!(Time64NanosecondArray, Time64)
+ }
DataType::Timestamp(TimeUnit::Second, _) => {
build_array_primitive_tz!(TimestampSecondArray, TimestampSecond)
}
@@ -1357,6 +1369,15 @@ impl ScalarValue {
ScalarValue::Date64(e) => {
build_array_from_option!(Date64, Date64Array, e, size)
}
+ ScalarValue::Time64(e) => {
+ build_array_from_option!(
+ Time64,
+ TimeUnit::Nanosecond,
+ Time64NanosecondArray,
+ e,
+ size
+ )
+ }
ScalarValue::IntervalDayTime(e) => build_array_from_option!(
Interval,
IntervalUnit::DayTime,
@@ -1496,6 +1517,9 @@ impl ScalarValue {
DataType::Date64 => {
typed_cast!(array, index, Date64Array, Date64)
}
+ DataType::Time64(TimeUnit::Nanosecond) => {
+ typed_cast!(array, index, Time64NanosecondArray, Time64)
+ }
DataType::Timestamp(TimeUnit::Second, tz_opt) => {
typed_cast_tz!(
array,
@@ -1691,6 +1715,9 @@ impl ScalarValue {
ScalarValue::Date64(val) => {
eq_array_primitive!(array, index, Date64Array, val)
}
+ ScalarValue::Time64(val) => {
+ eq_array_primitive!(array, index, Time64NanosecondArray, val)
+ }
ScalarValue::TimestampSecond(val, _) => {
eq_array_primitive!(array, index, TimestampSecondArray, val)
}
@@ -1845,6 +1872,7 @@ impl TryFrom<ScalarValue> for i64 {
match value {
ScalarValue::Int64(Some(inner_value))
| ScalarValue::Date64(Some(inner_value))
+ | ScalarValue::Time64(Some(inner_value))
| ScalarValue::TimestampNanosecond(Some(inner_value), _)
| ScalarValue::TimestampMicrosecond(Some(inner_value), _)
| ScalarValue::TimestampMillisecond(Some(inner_value), _)
@@ -1906,6 +1934,7 @@ impl TryFrom<&DataType> for ScalarValue {
DataType::LargeUtf8 => ScalarValue::LargeUtf8(None),
DataType::Date32 => ScalarValue::Date32(None),
DataType::Date64 => ScalarValue::Date64(None),
+ DataType::Time64(TimeUnit::Nanosecond) => ScalarValue::Time64(None),
DataType::Timestamp(TimeUnit::Second, tz_opt) => {
ScalarValue::TimestampSecond(None, tz_opt.clone())
}
@@ -2007,6 +2036,7 @@ impl fmt::Display for ScalarValue {
},
ScalarValue::Date32(e) => format_option!(f, e)?,
ScalarValue::Date64(e) => format_option!(f, e)?,
+ ScalarValue::Time64(e) => format_option!(f, e)?,
ScalarValue::IntervalDayTime(e) => format_option!(f, e)?,
ScalarValue::IntervalYearMonth(e) => format_option!(f, e)?,
ScalarValue::IntervalMonthDayNano(e) => format_option!(f, e)?,
@@ -2067,6 +2097,7 @@ impl fmt::Debug for ScalarValue {
ScalarValue::List(_, _) => write!(f, "List([{}])", self),
ScalarValue::Date32(_) => write!(f, "Date32(\"{}\")", self),
ScalarValue::Date64(_) => write!(f, "Date64(\"{}\")", self),
+ ScalarValue::Time64(_) => write!(f, "Time64(\"{}\")", self),
ScalarValue::IntervalDayTime(_) => {
write!(f, "IntervalDayTime(\"{}\")", self)
}
@@ -2665,6 +2696,7 @@ mod tests {
make_binary_test_case!(str_vals, LargeBinaryArray, LargeBinary),
make_test_case!(i32_vals, Date32Array, Date32),
make_test_case!(i64_vals, Date64Array, Date64),
+ make_test_case!(i64_vals, Time64NanosecondArray, Time64),
make_test_case!(i64_vals, TimestampSecondArray, TimestampSecond, None),
make_test_case!(
i64_vals,
diff --git a/datafusion/core/tests/sql/timestamp.rs b/datafusion/core/tests/sql/timestamp.rs
index ba916a658..4ae81bfc6 100644
--- a/datafusion/core/tests/sql/timestamp.rs
+++ b/datafusion/core/tests/sql/timestamp.rs
@@ -986,6 +986,44 @@ async fn sub_interval_day() -> Result<()> {
Ok(())
}
+#[tokio::test]
+async fn cast_string_to_time() {
+ let ctx = SessionContext::new();
+
+ let sql = "select \
+ time '08:09:10.123456789' as time_nano, \
+ time '13:14:15.123456' as time_micro,\
+ time '13:14:15.123' as time_milli,\
+ time '13:14:15' as time;";
+ let results = execute_to_batches(&ctx, sql).await;
+
+ let expected = vec![
+ "+--------------------+-----------------+--------------+----------+",
+ "| time_nano | time_micro | time_milli | time |",
+ "+--------------------+-----------------+--------------+----------+",
+ "| 08:09:10.123456789 | 13:14:15.123456 | 13:14:15.123 | 13:14:15 |",
+ "+--------------------+-----------------+--------------+----------+",
+ ];
+ assert_batches_eq!(expected, &results);
+
+ // Fallible cases
+
+ let sql = "SELECT TIME 'not a time' as time;";
+ let result = try_execute_to_batches(&ctx, sql).await;
+ assert_eq!(
+ result.err().unwrap().to_string(),
+ "Arrow error: Cast error: Cannot cast string 'not a time' to value of Time64(Nanosecond) type"
+ );
+
+ // An invalid time
+ let sql = "SELECT TIME '24:01:02' as time;";
+ let result = try_execute_to_batches(&ctx, sql).await;
+ assert_eq!(
+ result.err().unwrap().to_string(),
+ "Arrow error: Cast error: Cannot cast string '24:01:02' to value of Time64(Nanosecond) type"
+ );
+}
+
#[tokio::test]
async fn cast_to_timestamp_twice() -> Result<()> {
let ctx = SessionContext::new();
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index ddf9198e5..47223a6a8 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -496,7 +496,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
SQLDataType::Double => Ok(DataType::Float64),
SQLDataType::Boolean => Ok(DataType::Boolean),
SQLDataType::Date => Ok(DataType::Date32),
- SQLDataType::Time => Ok(DataType::Time64(TimeUnit::Millisecond)),
+ SQLDataType::Time => Ok(DataType::Time64(TimeUnit::Nanosecond)),
SQLDataType::Timestamp => Ok(DataType::Timestamp(TimeUnit::Nanosecond, None)),
_ => Err(DataFusionError::NotImplemented(format!(
"The SQL data type {:?} is not implemented",
@@ -2563,6 +2563,7 @@ pub fn convert_simple_data_type(sql_type: &SQLDataType) -> Result<DataType> {
| SQLDataType::String => Ok(DataType::Utf8),
SQLDataType::Timestamp => Ok(DataType::Timestamp(TimeUnit::Nanosecond, None)),
SQLDataType::Date => Ok(DataType::Date32),
+ SQLDataType::Time => Ok(DataType::Time64(TimeUnit::Nanosecond)),
SQLDataType::Decimal(precision, scale) => make_decimal_type(*precision, *scale),
SQLDataType::Binary(_) => Ok(DataType::Binary),
SQLDataType::Bytea => Ok(DataType::Binary),
@@ -4360,10 +4361,19 @@ mod tests {
}
#[test]
- fn select_typedstring() {
- let sql = "SELECT date '2020-12-10' AS date FROM person";
+ fn select_typed_date_string() {
+ let sql = "SELECT date '2020-12-10' AS date";
let expected = "Projection: CAST(Utf8(\"2020-12-10\") AS Date32) AS date\
- \n TableScan: person";
+ \n EmptyRelation";
+ quick_test(sql, expected);
+ }
+
+ #[test]
+ fn select_typed_time_string() {
+ let sql = "SELECT TIME '08:09:10.123' AS time";
+ let expected =
+ "Projection: CAST(Utf8(\"08:09:10.123\") AS Time64(Nanosecond)) AS time\
+ \n EmptyRelation";
quick_test(sql, expected);
}