You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "comphead (via GitHub)" <gi...@apache.org> on 2023/03/07 22:24:00 UTC

[GitHub] [arrow-datafusion] comphead commented on a diff in pull request #5166: feat: add `arrow_cast` function to support supports arbitrary arrow types

comphead commented on code in PR #5166:
URL: https://github.com/apache/arrow-datafusion/pull/5166#discussion_r1128660828


##########
docs/source/user-guide/sql/data_types.md:
##########
@@ -37,6 +37,18 @@ the `arrow_typeof` function. For example:
 +-------------------------------------+
 ```
 
+You can cast a SQL expression to a specific Arrow type using the `arrow_cast` function
+For example, to cast the output of `now()` to a `Timestamp` with second precision rather:
+
+```sql

Review Comment:
   what if `select arrow_cast(now(), 'Timestamp(Second, Some("+00:00"))')`



##########
datafusion/core/tests/sqllogictests/test_files/arrow_typeof.slt:
##########
@@ -52,31 +52,203 @@ SELECT arrow_typeof(1.0::float)
 Float32
 
 # arrow_typeof_decimal
-# query T
-# SELECT arrow_typeof(1::Decimal)
-# ----
-# Decimal128(38, 10)
-
-# # arrow_typeof_timestamp
-# query T
-# SELECT arrow_typeof(now()::timestamp)
-# ----
-# Timestamp(Nanosecond, None)
-
-# # arrow_typeof_timestamp_utc
-# query T
-# SELECT arrow_typeof(now())
-# ----
-# Timestamp(Nanosecond, Some(\"+00:00\"))
-
-# # arrow_typeof_timestamp_date32(
-# query T
-# SELECT arrow_typeof(now()::date)
-# ----
-# Date32
-
-# # arrow_typeof_utf8
-# query T
-# SELECT arrow_typeof('1')
-# ----
-# Utf8
+query T
+SELECT arrow_typeof(1::Decimal)
+----
+Decimal128(38, 10)
+
+# arrow_typeof_timestamp
+query T
+SELECT arrow_typeof(now()::timestamp)
+----
+Timestamp(Nanosecond, None)
+
+# arrow_typeof_timestamp_utc
+query T
+SELECT arrow_typeof(now())
+----
+Timestamp(Nanosecond, Some("+00:00"))
+
+# arrow_typeof_timestamp_date32(
+query T
+SELECT arrow_typeof(now()::date)
+----
+Date32
+
+# arrow_typeof_utf8
+query T
+SELECT arrow_typeof('1')
+----
+Utf8
+
+
+#### arrow_cast (in some ways opposite of arrow_typeof)
+
+
+query I
+SELECT arrow_cast('1', 'Int16')
+----
+1
+
+query error Error during planning: arrow_cast needs 2 arguments, 1 provided
+SELECT arrow_cast('1')
+
+query error Error during planning: arrow_cast requires its second argument to be a constant string, got Int64\(43\)
+SELECT arrow_cast('1', 43)
+
+query error Error unrecognized word: unknown
+SELECT arrow_cast('1', 'unknown')
+
+
+## Basic types
+
+statement ok
+create table foo as select
+  arrow_cast(1, 'Int8') as col_i8,
+  arrow_cast(1, 'Int16') as col_i16,
+  arrow_cast(1, 'Int32') as col_i32,
+  arrow_cast(1, 'Int64') as col_i64,
+  arrow_cast(1, 'UInt8') as col_u8,
+  arrow_cast(1, 'UInt16') as col_u16,
+  arrow_cast(1, 'UInt32') as col_u32,
+  arrow_cast(1, 'UInt64') as col_u64,
+  -- can't seem to cast to Float16 for some reason
+  arrow_cast(1.0, 'Float32') as col_f32,
+  arrow_cast(1.0, 'Float64') as col_f64
+;
+
+
+query TTTTTTTTTT
+SELECT
+  arrow_typeof(col_i8),
+  arrow_typeof(col_i16),
+  arrow_typeof(col_i32),
+  arrow_typeof(col_i64),
+  arrow_typeof(col_u8),
+  arrow_typeof(col_u16),
+  arrow_typeof(col_u32),
+  arrow_typeof(col_u64),
+  arrow_typeof(col_f32),
+  arrow_typeof(col_f64)
+  FROM foo;

Review Comment:
   nit: you can remove `from foo`



##########
docs/source/user-guide/sql/data_types.md:
##########
@@ -100,3 +112,43 @@ the `arrow_typeof` function. For example:
 | `ENUM`        | _Not yet supported_ |
 | `SET`         | _Not yet supported_ |
 | `DATETIME`    | _Not yet supported_ |
+
+## Supported Arrow Types
+
+The following types are supported by the `arrow_typeof` function:
+
+| Arrow Type                                                  |
+| ----------------------------------------------------------- |
+| `Null`                                                      |
+| `Boolean`                                                   |
+| `Int8`                                                      |
+| `Int16`                                                     |
+| `Int32`                                                     |
+| `Int64`                                                     |
+| `UInt8`                                                     |
+| `UInt16`                                                    |
+| `UInt32`                                                    |
+| `UInt64`                                                    |
+| `Float16`                                                   |
+| `Float32`                                                   |
+| `Float64`                                                   |
+| `Utf8`                                                      |
+| `LargeUtf8`                                                 |
+| `Binary`                                                    |
+| `Timestamp(Second, None)`                                   |
+| `Timestamp(Millisecond, None)`                              |
+| `Timestamp(Microsecond, None)`                              |
+| `Timestamp(Nanosecond, None)`                               |

Review Comment:
   No support for timestamptz?



##########
datafusion/core/tests/sqllogictests/test_files/arrow_typeof.slt:
##########
@@ -52,31 +52,203 @@ SELECT arrow_typeof(1.0::float)
 Float32
 
 # arrow_typeof_decimal
-# query T
-# SELECT arrow_typeof(1::Decimal)
-# ----
-# Decimal128(38, 10)
-
-# # arrow_typeof_timestamp
-# query T
-# SELECT arrow_typeof(now()::timestamp)
-# ----
-# Timestamp(Nanosecond, None)
-
-# # arrow_typeof_timestamp_utc
-# query T
-# SELECT arrow_typeof(now())
-# ----
-# Timestamp(Nanosecond, Some(\"+00:00\"))
-
-# # arrow_typeof_timestamp_date32(
-# query T
-# SELECT arrow_typeof(now()::date)
-# ----
-# Date32
-
-# # arrow_typeof_utf8
-# query T
-# SELECT arrow_typeof('1')
-# ----
-# Utf8
+query T
+SELECT arrow_typeof(1::Decimal)
+----
+Decimal128(38, 10)
+
+# arrow_typeof_timestamp
+query T
+SELECT arrow_typeof(now()::timestamp)
+----
+Timestamp(Nanosecond, None)
+
+# arrow_typeof_timestamp_utc
+query T
+SELECT arrow_typeof(now())
+----
+Timestamp(Nanosecond, Some("+00:00"))
+
+# arrow_typeof_timestamp_date32(
+query T
+SELECT arrow_typeof(now()::date)
+----
+Date32
+
+# arrow_typeof_utf8
+query T
+SELECT arrow_typeof('1')
+----
+Utf8
+
+
+#### arrow_cast (in some ways opposite of arrow_typeof)
+
+
+query I
+SELECT arrow_cast('1', 'Int16')
+----
+1
+
+query error Error during planning: arrow_cast needs 2 arguments, 1 provided
+SELECT arrow_cast('1')
+
+query error Error during planning: arrow_cast requires its second argument to be a constant string, got Int64\(43\)
+SELECT arrow_cast('1', 43)
+
+query error Error unrecognized word: unknown
+SELECT arrow_cast('1', 'unknown')
+
+
+## Basic types
+
+statement ok
+create table foo as select
+  arrow_cast(1, 'Int8') as col_i8,
+  arrow_cast(1, 'Int16') as col_i16,
+  arrow_cast(1, 'Int32') as col_i32,
+  arrow_cast(1, 'Int64') as col_i64,
+  arrow_cast(1, 'UInt8') as col_u8,
+  arrow_cast(1, 'UInt16') as col_u16,
+  arrow_cast(1, 'UInt32') as col_u32,
+  arrow_cast(1, 'UInt64') as col_u64,
+  -- can't seem to cast to Float16 for some reason
+  arrow_cast(1.0, 'Float32') as col_f32,
+  arrow_cast(1.0, 'Float64') as col_f64
+;
+
+
+query TTTTTTTTTT
+SELECT
+  arrow_typeof(col_i8),
+  arrow_typeof(col_i16),
+  arrow_typeof(col_i32),
+  arrow_typeof(col_i64),
+  arrow_typeof(col_u8),
+  arrow_typeof(col_u16),
+  arrow_typeof(col_u32),
+  arrow_typeof(col_u64),
+  arrow_typeof(col_f32),
+  arrow_typeof(col_f64)
+  FROM foo;
+----
+Int8 Int16 Int32 Int64 UInt8 UInt16 UInt32 UInt64 Float32 Float64
+
+
+
+statement ok
+drop table foo
+
+## Decimals
+
+statement ok
+create table foo as select
+  arrow_cast(100, 'Decimal128(3,2)') as col_d128
+  -- Can't make a decimal 156:
+  -- This feature is not implemented: Can't create a scalar from array of type "Decimal256(3, 2)"
+  --arrow_cast(100, 'Decimal256(3,2)') as col_d256
+;
+
+
+query T
+SELECT
+  arrow_typeof(col_d128)
+  -- arrow_typeof(col_d256),
+  FROM foo;
+----
+Decimal128(3, 2)
+
+
+statement ok
+drop table foo
+
+## strings, large strings
+
+statement ok
+create table foo as select
+  arrow_cast('foo', 'Utf8') as col_utf8,
+  arrow_cast('foo', 'LargeUtf8') as col_large_utf8,
+  arrow_cast('foo', 'Binary') as col_binary,
+  arrow_cast('foo', 'LargeBinary') as col_large_binary
+;
+
+
+query TTTT
+SELECT
+  arrow_typeof(col_utf8),
+  arrow_typeof(col_large_utf8),
+  arrow_typeof(col_binary),
+  arrow_typeof(col_large_binary)
+  FROM foo;
+----
+Utf8 LargeUtf8 Binary LargeBinary
+
+
+statement ok
+drop table foo
+
+
+## timestamps
+
+statement ok
+create table foo as select
+  arrow_cast(to_timestamp('2020-01-02 01:01:11.1234567890Z'), 'Timestamp(Second, None)') as col_ts_s,
+  arrow_cast(to_timestamp('2020-01-02 01:01:11.1234567890Z'), 'Timestamp(Millisecond, None)') as col_ts_ms,
+  arrow_cast(to_timestamp('2020-01-02 01:01:11.1234567890Z'), 'Timestamp(Microsecond, None)') as col_ts_us,
+  arrow_cast(to_timestamp('2020-01-02 01:01:11.1234567890Z'), 'Timestamp(Nanosecond, None)') as col_ts_ns
+;
+
+
+query TTTT
+SELECT
+  arrow_typeof(col_ts_s),

Review Comment:
   Would be also great to have tests `select arrow_type_of(arrow_cast(.....))`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org