You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2022/08/13 10:04:41 UTC

[arrow-datafusion] branch master updated: add arrow_typeof (#3120)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 9e0bc50c4 add arrow_typeof (#3120)
9e0bc50c4 is described below

commit 9e0bc50c418e1eb5a068aa38515b15b7190dd17c
Author: Wei-Ting Kuo <wa...@gmail.com>
AuthorDate: Sat Aug 13 18:04:35 2022 +0800

    add arrow_typeof (#3120)
    
    * add arrow_typeof
    
    * update fmt
    
    * add test cases
    
    * fix test case
---
 datafusion/core/tests/sql/arrow_typeof.rs | 139 ++++++++++++++++++++++++++++++
 datafusion/core/tests/sql/mod.rs          |   1 +
 datafusion/expr/src/built_in_function.rs  |   6 +-
 datafusion/expr/src/expr_fn.rs            |   4 +
 datafusion/expr/src/function.rs           |   3 +
 datafusion/physical-expr/src/functions.rs |   9 ++
 datafusion/proto/proto/datafusion.proto   |   1 +
 datafusion/proto/src/from_proto.rs        |   1 +
 datafusion/proto/src/to_proto.rs          |   1 +
 9 files changed, 164 insertions(+), 1 deletion(-)

diff --git a/datafusion/core/tests/sql/arrow_typeof.rs b/datafusion/core/tests/sql/arrow_typeof.rs
new file mode 100644
index 000000000..9f971f27b
--- /dev/null
+++ b/datafusion/core/tests/sql/arrow_typeof.rs
@@ -0,0 +1,139 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use super::*;
+
+#[tokio::test]
+async fn arrow_typeof_null() -> Result<()> {
+    let ctx = SessionContext::new();
+    let sql = "SELECT arrow_typeof(null)";
+    let actual = execute(&ctx, sql).await;
+    let expected = "Null";
+    assert_eq!(expected, &actual[0][0]);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn arrow_typeof_boolean() -> Result<()> {
+    let ctx = SessionContext::new();
+    let sql = "SELECT arrow_typeof(true)";
+    let actual = execute(&ctx, sql).await;
+    let expected = "Boolean";
+    assert_eq!(expected, &actual[0][0]);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn arrow_typeof_i64() -> Result<()> {
+    let ctx = SessionContext::new();
+    let sql = "SELECT arrow_typeof(1)";
+    let actual = execute(&ctx, sql).await;
+    let expected = "Int64";
+    assert_eq!(expected, &actual[0][0]);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn arrow_typeof_i32() -> Result<()> {
+    let ctx = SessionContext::new();
+    let sql = "SELECT arrow_typeof(1::int)";
+    let actual = execute(&ctx, sql).await;
+    let expected = "Int32";
+    assert_eq!(expected, &actual[0][0]);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn arrow_typeof_f64() -> Result<()> {
+    let ctx = SessionContext::new();
+    let sql = "SELECT arrow_typeof(1.0)";
+    let actual = execute(&ctx, sql).await;
+    let expected = "Float64";
+    assert_eq!(expected, &actual[0][0]);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn arrow_typeof_f32() -> Result<()> {
+    let ctx = SessionContext::new();
+    let sql = "SELECT arrow_typeof(1.0::float)";
+    let actual = execute(&ctx, sql).await;
+    let expected = "Float32";
+    assert_eq!(expected, &actual[0][0]);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn arrow_typeof_decimal() -> Result<()> {
+    let ctx = SessionContext::new();
+    let sql = "SELECT arrow_typeof(1::Decimal)";
+    let actual = execute(&ctx, sql).await;
+    let expected = "Decimal128(38, 10)";
+    assert_eq!(expected, &actual[0][0]);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn arrow_typeof_timestamp() -> Result<()> {
+    let ctx = SessionContext::new();
+    let sql = "SELECT arrow_typeof(now()::timestamp)";
+    let actual = execute(&ctx, sql).await;
+    let expected = "Timestamp(Nanosecond, None)";
+    assert_eq!(expected, &actual[0][0]);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn arrow_typeof_timestamp_utc() -> Result<()> {
+    let ctx = SessionContext::new();
+    let sql = "SELECT arrow_typeof(now())";
+    let actual = execute(&ctx, sql).await;
+    let expected = "Timestamp(Nanosecond, Some(\"UTC\"))";
+    assert_eq!(expected, &actual[0][0]);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn arrow_typeof_timestamp_date32() -> Result<()> {
+    let ctx = SessionContext::new();
+    let sql = "SELECT arrow_typeof(now()::date)";
+    let actual = execute(&ctx, sql).await;
+    let expected = "Date32";
+    assert_eq!(expected, &actual[0][0]);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn arrow_typeof_utf8() -> Result<()> {
+    let ctx = SessionContext::new();
+    let sql = "SELECT arrow_typeof('1')";
+    let actual = execute(&ctx, sql).await;
+    let expected = "Utf8";
+    assert_eq!(expected, &actual[0][0]);
+
+    Ok(())
+}
diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs
index 6f1ae52ae..724bdc90d 100644
--- a/datafusion/core/tests/sql/mod.rs
+++ b/datafusion/core/tests/sql/mod.rs
@@ -105,6 +105,7 @@ pub mod union;
 pub mod wildcard;
 pub mod window;
 
+pub mod arrow_typeof;
 pub mod decimal;
 mod explain;
 mod idenfifers;
diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs
index 79926446e..532699a37 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -164,8 +164,10 @@ pub enum BuiltinScalarFunction {
     Upper,
     /// regexp_match
     RegexpMatch,
-    ///struct
+    /// struct
     Struct,
+    /// arrow_typeof
+    ArrowTypeof,
 }
 
 impl BuiltinScalarFunction {
@@ -248,6 +250,7 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::RegexpMatch => Volatility::Immutable,
             BuiltinScalarFunction::Struct => Volatility::Immutable,
             BuiltinScalarFunction::FromUnixtime => Volatility::Immutable,
+            BuiltinScalarFunction::ArrowTypeof => Volatility::Immutable,
 
             // Stable builtin functions
             BuiltinScalarFunction::Now => Volatility::Stable,
@@ -345,6 +348,7 @@ impl FromStr for BuiltinScalarFunction {
             "regexp_match" => BuiltinScalarFunction::RegexpMatch,
             "struct" => BuiltinScalarFunction::Struct,
             "from_unixtime" => BuiltinScalarFunction::FromUnixtime,
+            "arrow_typeof" => BuiltinScalarFunction::ArrowTypeof,
             _ => {
                 return Err(DataFusionError::Plan(format!(
                     "There is no built-in function named {}",
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index 75abe44f9..09ac0c287 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -379,6 +379,8 @@ scalar_expr!(ToTimestampMicros, to_timestamp_micros, date);
 scalar_expr!(ToTimestampSeconds, to_timestamp_seconds, date);
 scalar_expr!(FromUnixtime, from_unixtime, unixtime);
 
+unary_scalar_expr!(ArrowTypeof, arrow_typeof, "data type");
+
 /// Returns an array of fixed size with each argument on it.
 pub fn array(args: Vec<Expr>) -> Expr {
     Expr::ScalarFunction {
@@ -630,6 +632,8 @@ mod test {
         test_scalar_expr!(DateTrunc, date_trunc, part, date);
         test_scalar_expr!(DateBin, date_bin, stride, source, origin);
         test_scalar_expr!(FromUnixtime, from_unixtime, unixtime);
+
+        test_unary_scalar_expr!(ArrowTypeof, arrow_typeof);
     }
 
     #[test]
diff --git a/datafusion/expr/src/function.rs b/datafusion/expr/src/function.rs
index 20a55381d..5cf42fbd2 100644
--- a/datafusion/expr/src/function.rs
+++ b/datafusion/expr/src/function.rs
@@ -237,6 +237,8 @@ pub fn return_type(
             _ => Ok(DataType::Float64),
         },
 
+        BuiltinScalarFunction::ArrowTypeof => Ok(DataType::Utf8),
+
         BuiltinScalarFunction::Abs
         | BuiltinScalarFunction::Acos
         | BuiltinScalarFunction::Asin
@@ -567,6 +569,7 @@ pub fn signature(fun: &BuiltinScalarFunction) -> Signature {
             ],
             fun.volatility(),
         ),
+        BuiltinScalarFunction::ArrowTypeof => Signature::any(1, fun.volatility()),
         // math expressions expect 1 argument of type f64 or f32
         // priority is given to f64 because e.g. `sqrt(1i32)` is in IR (real numbers) and thus we
         // return the best approximation for it (in f64).
diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs
index 913a2c384..dde0ee0a0 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -169,6 +169,15 @@ pub fn create_physical_expr(
                 }
             }
         }),
+        BuiltinScalarFunction::ArrowTypeof => {
+            let input_data_type = coerced_phy_exprs[0].data_type(input_schema)?;
+            Arc::new(move |_| {
+                Ok(ColumnarValue::Scalar(ScalarValue::Utf8(Some(format!(
+                    "{}",
+                    input_data_type
+                )))))
+            })
+        }
         // These don't need args and input schema
         _ => create_physical_fun(fun, execution_props)?,
     };
diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto
index 7296466d4..7b08e4f40 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -441,6 +441,7 @@ enum ScalarFunction {
   FromUnixtime=66;
   Atan2=67;
   DateBin=68;
+  ArrowTypeof=69;
 }
 
 message ScalarFunctionNode {
diff --git a/datafusion/proto/src/from_proto.rs b/datafusion/proto/src/from_proto.rs
index c1c88e70d..524b03bd6 100644
--- a/datafusion/proto/src/from_proto.rs
+++ b/datafusion/proto/src/from_proto.rs
@@ -476,6 +476,7 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction {
             ScalarFunction::StructFun => Self::Struct,
             ScalarFunction::FromUnixtime => Self::FromUnixtime,
             ScalarFunction::Atan2 => Self::Atan2,
+            ScalarFunction::ArrowTypeof => Self::ArrowTypeof,
         }
     }
 }
diff --git a/datafusion/proto/src/to_proto.rs b/datafusion/proto/src/to_proto.rs
index e78706e52..045b97a31 100644
--- a/datafusion/proto/src/to_proto.rs
+++ b/datafusion/proto/src/to_proto.rs
@@ -1128,6 +1128,7 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction {
             BuiltinScalarFunction::Struct => Self::StructFun,
             BuiltinScalarFunction::FromUnixtime => Self::FromUnixtime,
             BuiltinScalarFunction::Atan2 => Self::Atan2,
+            BuiltinScalarFunction::ArrowTypeof => Self::ArrowTypeof,
         };
 
         Ok(scalar_function)