You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ji...@apache.org on 2023/06/02 07:53:05 UTC

[arrow-datafusion] branch add-greatest-least created (now 5214393d1f)

This is an automated email from the ASF dual-hosted git repository.

jiayuliu pushed a change to branch add-greatest-least
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


      at 5214393d1f [built-in function] add greatest and least

This branch includes the following new commits:

     new 5214393d1f [built-in function] add greatest and least

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[arrow-datafusion] 01/01: [built-in function] add greatest and least

Posted by ji...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

jiayuliu pushed a commit to branch add-greatest-least
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git

commit 5214393d1fbf80fc7438fada6ff6563b949aeaf4
Author: Jiayu Liu <ji...@hey.com>
AuthorDate: Fri Jun 2 15:52:38 2023 +0800

    [built-in function] add greatest and least
---
 datafusion/core/tests/sql/expr.rs                  |   6 +
 datafusion/expr/src/built_in_function.rs           |  10 ++
 datafusion/expr/src/comparison_expressions.rs      |  35 ++++++
 datafusion/expr/src/expr_fn.rs                     |  24 +++-
 datafusion/expr/src/function.rs                    |  15 ++-
 datafusion/expr/src/lib.rs                         |   1 +
 .../physical-expr/src/comparison_expressions.rs    | 133 +++++++++++++++++++++
 datafusion/physical-expr/src/functions.rs          |   5 +-
 datafusion/physical-expr/src/lib.rs                |   1 +
 datafusion/proto/proto/datafusion.proto            |   2 +
 datafusion/proto/proto/proto_descriptor.bin        | Bin 0 -> 85877 bytes
 .../src/{generated/prost.rs => datafusion.rs}      |   6 +
 .../{generated/pbjson.rs => datafusion.serde.rs}   |   6 +
 datafusion/proto/src/generated/pbjson.rs           |   6 +
 datafusion/proto/src/generated/prost.rs            |   6 +
 datafusion/proto/src/logical_plan/from_proto.rs    |  17 ++-
 datafusion/proto/src/logical_plan/to_proto.rs      |   2 +
 docs/source/user-guide/sql/sql_status.md           |   3 +
 18 files changed, 273 insertions(+), 5 deletions(-)

diff --git a/datafusion/core/tests/sql/expr.rs b/datafusion/core/tests/sql/expr.rs
index 6783670545..c432f62572 100644
--- a/datafusion/core/tests/sql/expr.rs
+++ b/datafusion/core/tests/sql/expr.rs
@@ -200,6 +200,12 @@ async fn binary_bitwise_shift() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn test_comparison_func_expressions() -> Result<()> {
+    test_expression!("greatest(1,2,3)", "3");
+    test_expression!("least(1,2,3)", "1");
+}
+
 #[tokio::test]
 async fn test_interval_expressions() -> Result<()> {
     // day nano intervals
diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs
index 3911939b4c..d4ca93ba24 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -205,6 +205,10 @@ pub enum BuiltinScalarFunction {
     Struct,
     /// arrow_typeof
     ArrowTypeof,
+    /// greatest
+    Greatest,
+    /// least
+    Least,
 }
 
 lazy_static! {
@@ -328,6 +332,8 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::Struct => Volatility::Immutable,
             BuiltinScalarFunction::FromUnixtime => Volatility::Immutable,
             BuiltinScalarFunction::ArrowTypeof => Volatility::Immutable,
+            BuiltinScalarFunction::Greatest => Volatility::Immutable,
+            BuiltinScalarFunction::Least => Volatility::Immutable,
 
             // Stable builtin functions
             BuiltinScalarFunction::Now => Volatility::Stable,
@@ -414,6 +420,10 @@ fn aliases(func: &BuiltinScalarFunction) -> &'static [&'static str] {
         BuiltinScalarFunction::Upper => &["upper"],
         BuiltinScalarFunction::Uuid => &["uuid"],
 
+        // comparison functions
+        BuiltinScalarFunction::Greatest => &["greatest"],
+        BuiltinScalarFunction::Least => &["least"],
+
         // regex functions
         BuiltinScalarFunction::RegexpMatch => &["regexp_match"],
         BuiltinScalarFunction::RegexpReplace => &["regexp_replace"],
diff --git a/datafusion/expr/src/comparison_expressions.rs b/datafusion/expr/src/comparison_expressions.rs
new file mode 100644
index 0000000000..c7f13f04f0
--- /dev/null
+++ b/datafusion/expr/src/comparison_expressions.rs
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::datatypes::DataType;
+
+/// Currently supported types by the comparison function.
+pub static SUPPORTED_COMPARISON_TYPES: &[DataType] = &[
+    DataType::Boolean,
+    DataType::UInt8,
+    DataType::UInt16,
+    DataType::UInt32,
+    DataType::UInt64,
+    DataType::Int8,
+    DataType::Int16,
+    DataType::Int32,
+    DataType::Int64,
+    DataType::Float32,
+    DataType::Float64,
+    DataType::Utf8,
+    DataType::LargeUtf8,
+];
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index 6b0a09baf9..4f236f1720 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -237,6 +237,22 @@ pub fn concat_ws(sep: Expr, values: Vec<Expr>) -> Expr {
     ))
 }
 
+/// Returns the greatest value of all arguments.
+pub fn greatest(args: &[Expr]) -> Expr {
+    Expr::ScalarFunction(ScalarFunction::new(
+        BuiltinScalarFunction::Greatest,
+        args.to_vec(),
+    ))
+}
+
+/// Returns the least value of all arguments.
+pub fn least(args: &[Expr]) -> Expr {
+    Expr::ScalarFunction(ScalarFunction::new(
+        BuiltinScalarFunction::Least,
+        args.to_vec(),
+    ))
+}
+
 /// Returns an approximate value of π
 pub fn pi() -> Expr {
     Expr::ScalarFunction(ScalarFunction::new(BuiltinScalarFunction::Pi, vec![]))
@@ -620,9 +636,15 @@ nary_scalar_expr!(Coalesce, coalesce, "returns `coalesce(args...)`, which evalua
 nary_scalar_expr!(
     ConcatWithSeparator,
     concat_ws_expr,
-    "concatenates several strings, placing a seperator between each one"
+    "concatenates several strings, placing a separator between each one"
 );
 nary_scalar_expr!(Concat, concat_expr, "concatenates several strings");
+nary_scalar_expr!(
+    Greatest,
+    greatest_expr,
+    "gets the largest value of the list"
+);
+nary_scalar_expr!(Least, least_expr, "gets the smallest value of the list");
 
 // date functions
 scalar_expr!(DatePart, date_part, part date, "extracts a subfield from the date");
diff --git a/datafusion/expr/src/function.rs b/datafusion/expr/src/function.rs
index 5ba6852248..1729a6f88d 100644
--- a/datafusion/expr/src/function.rs
+++ b/datafusion/expr/src/function.rs
@@ -22,8 +22,8 @@ use crate::nullif::SUPPORTED_NULLIF_TYPES;
 use crate::type_coercion::functions::data_types;
 use crate::ColumnarValue;
 use crate::{
-    array_expressions, conditional_expressions, struct_expressions, Accumulator,
-    BuiltinScalarFunction, Signature, TypeSignature,
+    array_expressions, comparison_expressions, conditional_expressions,
+    struct_expressions, Accumulator, BuiltinScalarFunction, Signature, TypeSignature,
 };
 use arrow::datatypes::{DataType, Field, Fields, IntervalUnit, TimeUnit};
 use datafusion_common::{DataFusionError, Result};
@@ -168,6 +168,11 @@ pub fn return_type(
             let coerced_types = data_types(input_expr_types, &signature(fun));
             coerced_types.map(|typs| typs[0].clone())
         }
+        BuiltinScalarFunction::Greatest | BuiltinScalarFunction::Least => {
+            // GREATEST and LEAST have multiple args and they might get coerced, get a preview of this
+            let coerced_types = data_types(input_expr_types, &signature(fun));
+            coerced_types.map(|typs| typs[0].clone())
+        }
         BuiltinScalarFunction::OctetLength => {
             utf8_to_int_type(&input_expr_types[0], "octet_length")
         }
@@ -376,6 +381,12 @@ pub fn signature(fun: &BuiltinScalarFunction) -> Signature {
         BuiltinScalarFunction::Chr | BuiltinScalarFunction::ToHex => {
             Signature::uniform(1, vec![DataType::Int64], fun.volatility())
         }
+        BuiltinScalarFunction::Greatest | BuiltinScalarFunction::Least => {
+            Signature::variadic(
+                comparison_expressions::SUPPORTED_COMPARISON_TYPES.to_vec(),
+                fun.volatility(),
+            )
+        }
         BuiltinScalarFunction::Lpad | BuiltinScalarFunction::Rpad => Signature::one_of(
             vec![
                 TypeSignature::Exact(vec![DataType::Utf8, DataType::Int64]),
diff --git a/datafusion/expr/src/lib.rs b/datafusion/expr/src/lib.rs
index 45777e09d4..c717cd3f19 100644
--- a/datafusion/expr/src/lib.rs
+++ b/datafusion/expr/src/lib.rs
@@ -30,6 +30,7 @@ pub mod aggregate_function;
 pub mod array_expressions;
 mod built_in_function;
 mod columnar_value;
+pub mod comparison_expressions;
 pub mod conditional_expressions;
 pub mod expr;
 pub mod expr_fn;
diff --git a/datafusion/physical-expr/src/comparison_expressions.rs b/datafusion/physical-expr/src/comparison_expressions.rs
new file mode 100644
index 0000000000..8f3c675686
--- /dev/null
+++ b/datafusion/physical-expr/src/comparison_expressions.rs
@@ -0,0 +1,133 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Some of these functions reference the Postgres documentation
+// or implementation to ensure compatibility and are subject to
+// the Postgres license.
+
+//! Comparison expressions
+
+use arrow::datatypes::DataType;
+use datafusion_common::{DataFusionError, Result};
+use datafusion_expr::ColumnarValue;
+
+#[derive(Debug, Clone, PartialEq)]
+enum ComparisonOperator {
+    Greatest,
+    Least,
+}
+
+/// reduce a and b to compatible types, if possible
+fn reduce_compatible_types(a: DataType, b: DataType) -> Result<DataType> {
+    Ok(match (a, b) {
+        (DataType::Boolean, DataType::Boolean) => DataType::Boolean,
+        (DataType::Utf8, DataType::Utf8) => DataType::Utf8,
+        (DataType::Utf8, DataType::LargeUtf8) => DataType::LargeUtf8,
+        (DataType::LargeUtf8, DataType::Utf8 | DataType::LargeUtf8) => {
+            DataType::LargeUtf8
+        }
+
+        (DataType::Binary, DataType::Binary) => DataType::Binary,
+        (DataType::Binary, DataType::LargeBinary) => DataType::LargeBinary,
+        (DataType::LargeBinary, DataType::Binary | DataType::LargeBinary) => {
+            DataType::LargeBinary
+        }
+
+        (DataType::Int8, DataType::Int8) => DataType::Int8,
+        (DataType::Int8, DataType::Int16) => DataType::Int16,
+        (DataType::Int8, DataType::Int32) => DataType::Int32,
+        (DataType::Int8, DataType::Int64) => DataType::Int64,
+
+        (DataType::Int16, DataType::Int8 | DataType::Int16) => DataType::Int16,
+        (DataType::Int16, DataType::Int32) => DataType::Int32,
+        (DataType::Int16, DataType::Int64) => DataType::Int64,
+
+        (DataType::Int32, DataType::Int8 | DataType::Int16 | DataType::Int32) => {
+            DataType::Int32
+        }
+        (DataType::Int32, DataType::Int64) => DataType::Int64,
+
+        (
+            DataType::Int64,
+            DataType::Int8 | DataType::Int16 | DataType::Int32 | DataType::Int64,
+        ) => DataType::Int64,
+
+        (DataType::UInt8, DataType::UInt8) => DataType::UInt8,
+        (DataType::UInt8, DataType::UInt16) => DataType::UInt16,
+        (DataType::UInt8, DataType::UInt32) => DataType::UInt32,
+        (DataType::UInt8, DataType::UInt64) => DataType::UInt64,
+
+        (DataType::UInt16, DataType::UInt8 | DataType::UInt16) => DataType::UInt16,
+        (DataType::UInt16, DataType::UInt32) => DataType::UInt32,
+        (DataType::UInt16, DataType::UInt64) => DataType::UInt64,
+
+        (DataType::UInt32, DataType::UInt8 | DataType::UInt16 | DataType::UInt32) => {
+            DataType::UInt32
+        }
+        (DataType::UInt32, DataType::UInt64) => DataType::UInt64,
+
+        (
+            DataType::UInt64,
+            DataType::UInt8 | DataType::UInt16 | DataType::UInt32 | DataType::UInt64,
+        ) => DataType::UInt64,
+
+        (DataType::Float32, DataType::Float32) => DataType::Float32,
+        (DataType::Float32, DataType::Float64) => DataType::Float64,
+
+        (DataType::Float64, DataType::Float32 | DataType::Float64) => DataType::Float64,
+
+        (_, _) => {
+            return Err(DataFusionError::Internal(
+                "Cannot compare types".to_string(),
+            ))
+        }
+    })
+}
+
+fn compare(op: ComparisonOperator, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+    if args.is_empty() {
+        return Err(DataFusionError::Internal(format!(
+            "{:?} expressions require at least one argument",
+            op
+        )));
+    } else if args.len() == 1 {
+        return Ok(args[0].clone());
+    }
+
+    let first_arg: DataType = match &args[0] {
+        ColumnarValue::Array(array) => array.data_type().clone(),
+        ColumnarValue::Scalar(scalar) => scalar.get_datatype(),
+    };
+
+    let _compute_type = args[1..]
+        .iter()
+        .map(|arg| match arg {
+            ColumnarValue::Array(array) => array.data_type().clone(),
+            ColumnarValue::Scalar(scalar) => scalar.get_datatype(),
+        })
+        .try_fold(first_arg, reduce_compatible_types)?;
+
+    unimplemented!()
+}
+
+pub fn greatest(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+    return compare(ComparisonOperator::Greatest, args);
+}
+
+pub fn least(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+    return compare(ComparisonOperator::Least, args);
+}
diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs
index 7020dda8b1..438ebc2687 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -32,7 +32,8 @@
 
 use crate::execution_props::ExecutionProps;
 use crate::{
-    array_expressions, conditional_expressions, datetime_expressions,
+    array_expressions, comparison_expressions, conditional_expressions,
+    datetime_expressions,
     expressions::{cast_column, nullif_func},
     math_expressions, string_expressions, struct_expressions, PhysicalExpr,
     ScalarFunctionExpr,
@@ -452,6 +453,8 @@ pub fn create_physical_fun(
         BuiltinScalarFunction::ConcatWithSeparator => {
             Arc::new(|args| make_scalar_function(string_expressions::concat_ws)(args))
         }
+        BuiltinScalarFunction::Greatest => Arc::new(comparison_expressions::greatest),
+        BuiltinScalarFunction::Least => Arc::new(comparison_expressions::least),
         BuiltinScalarFunction::DatePart => Arc::new(datetime_expressions::date_part),
         BuiltinScalarFunction::DateTrunc => Arc::new(datetime_expressions::date_trunc),
         BuiltinScalarFunction::DateBin => Arc::new(datetime_expressions::date_bin),
diff --git a/datafusion/physical-expr/src/lib.rs b/datafusion/physical-expr/src/lib.rs
index b54bcda601..06fa0d64d1 100644
--- a/datafusion/physical-expr/src/lib.rs
+++ b/datafusion/physical-expr/src/lib.rs
@@ -17,6 +17,7 @@
 
 pub mod aggregate;
 pub mod array_expressions;
+pub mod comparison_expressions;
 pub mod conditional_expressions;
 #[cfg(feature = "crypto_expressions")]
 pub mod crypto_expressions;
diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto
index c23d585e61..f98d2dc284 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -549,6 +549,8 @@ enum ScalarFunction {
   Factorial = 83;
   Lcm = 84;
   Gcd = 85;
+  Greatest = 86;
+  Least = 87;
 }
 
 message ScalarFunctionNode {
diff --git a/datafusion/proto/proto/proto_descriptor.bin b/datafusion/proto/proto/proto_descriptor.bin
new file mode 100644
index 0000000000..60067564b4
Binary files /dev/null and b/datafusion/proto/proto/proto_descriptor.bin differ
diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/datafusion.rs
similarity index 99%
copy from datafusion/proto/src/generated/prost.rs
copy to datafusion/proto/src/datafusion.rs
index 4cf50d70bf..acf85d5ab0 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/datafusion.rs
@@ -2208,6 +2208,8 @@ pub enum ScalarFunction {
     Factorial = 83,
     Lcm = 84,
     Gcd = 85,
+    Greatest = 86,
+    Least = 87,
 }
 impl ScalarFunction {
     /// String value of the enum field names used in the ProtoBuf definition.
@@ -2302,6 +2304,8 @@ impl ScalarFunction {
             ScalarFunction::Factorial => "Factorial",
             ScalarFunction::Lcm => "Lcm",
             ScalarFunction::Gcd => "Gcd",
+            ScalarFunction::Greatest => "Greatest",
+            ScalarFunction::Least => "Least",
         }
     }
     /// Creates an enum from field names used in the ProtoBuf definition.
@@ -2393,6 +2397,8 @@ impl ScalarFunction {
             "Factorial" => Some(Self::Factorial),
             "Lcm" => Some(Self::Lcm),
             "Gcd" => Some(Self::Gcd),
+            "Greatest" => Some(Self::Greatest),
+            "Least" => Some(Self::Least),
             _ => None,
         }
     }
diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/datafusion.serde.rs
similarity index 99%
copy from datafusion/proto/src/generated/pbjson.rs
copy to datafusion/proto/src/datafusion.serde.rs
index 369cc0b24e..fd5657f4cb 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/datafusion.serde.rs
@@ -17851,6 +17851,8 @@ impl serde::Serialize for ScalarFunction {
             Self::Factorial => "Factorial",
             Self::Lcm => "Lcm",
             Self::Gcd => "Gcd",
+            Self::Greatest => "Greatest",
+            Self::Least => "Least",
         };
         serializer.serialize_str(variant)
     }
@@ -17948,6 +17950,8 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
             "Factorial",
             "Lcm",
             "Gcd",
+            "Greatest",
+            "Least",
         ];
 
         struct GeneratedVisitor;
@@ -18076,6 +18080,8 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
                     "Factorial" => Ok(ScalarFunction::Factorial),
                     "Lcm" => Ok(ScalarFunction::Lcm),
                     "Gcd" => Ok(ScalarFunction::Gcd),
+                    "Greatest" => Ok(ScalarFunction::Greatest),
+                    "Least" => Ok(ScalarFunction::Least),
                     _ => Err(serde::de::Error::unknown_variant(value, FIELDS)),
                 }
             }
diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs
index 369cc0b24e..fd5657f4cb 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -17851,6 +17851,8 @@ impl serde::Serialize for ScalarFunction {
             Self::Factorial => "Factorial",
             Self::Lcm => "Lcm",
             Self::Gcd => "Gcd",
+            Self::Greatest => "Greatest",
+            Self::Least => "Least",
         };
         serializer.serialize_str(variant)
     }
@@ -17948,6 +17950,8 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
             "Factorial",
             "Lcm",
             "Gcd",
+            "Greatest",
+            "Least",
         ];
 
         struct GeneratedVisitor;
@@ -18076,6 +18080,8 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
                     "Factorial" => Ok(ScalarFunction::Factorial),
                     "Lcm" => Ok(ScalarFunction::Lcm),
                     "Gcd" => Ok(ScalarFunction::Gcd),
+                    "Greatest" => Ok(ScalarFunction::Greatest),
+                    "Least" => Ok(ScalarFunction::Least),
                     _ => Err(serde::de::Error::unknown_variant(value, FIELDS)),
                 }
             }
diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs
index 4cf50d70bf..acf85d5ab0 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -2208,6 +2208,8 @@ pub enum ScalarFunction {
     Factorial = 83,
     Lcm = 84,
     Gcd = 85,
+    Greatest = 86,
+    Least = 87,
 }
 impl ScalarFunction {
     /// String value of the enum field names used in the ProtoBuf definition.
@@ -2302,6 +2304,8 @@ impl ScalarFunction {
             ScalarFunction::Factorial => "Factorial",
             ScalarFunction::Lcm => "Lcm",
             ScalarFunction::Gcd => "Gcd",
+            ScalarFunction::Greatest => "Greatest",
+            ScalarFunction::Least => "Least",
         }
     }
     /// Creates an enum from field names used in the ProtoBuf definition.
@@ -2393,6 +2397,8 @@ impl ScalarFunction {
             "Factorial" => Some(Self::Factorial),
             "Lcm" => Some(Self::Lcm),
             "Gcd" => Some(Self::Gcd),
+            "Greatest" => Some(Self::Greatest),
+            "Least" => Some(Self::Least),
             _ => None,
         }
     }
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs
index 1150220bef..4438ad945b 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -40,7 +40,8 @@ use datafusion_expr::{
     cbrt, ceil, character_length, chr, coalesce, concat_expr, concat_ws_expr, cos, cosh,
     date_bin, date_part, date_trunc, degrees, digest, exp,
     expr::{self, InList, Sort, WindowFunction},
-    factorial, floor, from_unixtime, gcd, lcm, left, ln, log, log10, log2,
+    factorial, floor, from_unixtime, gcd, greatest, lcm, least, left, ln, log, log10,
+    log2,
     logical_plan::{PlanType, StringifiedPlan},
     lower, lpad, ltrim, md5, now, nullif, octet_length, pi, power, radians, random,
     regexp_match, regexp_replace, repeat, replace, reverse, right, round, rpad, rtrim,
@@ -494,6 +495,8 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction {
             ScalarFunction::FromUnixtime => Self::FromUnixtime,
             ScalarFunction::Atan2 => Self::Atan2,
             ScalarFunction::ArrowTypeof => Self::ArrowTypeof,
+            ScalarFunction::Greatest => Self::Greatest,
+            ScalarFunction::Least => Self::Least,
         }
     }
 }
@@ -1273,6 +1276,18 @@ pub fn parse_expr(
                         .map(|expr| parse_expr(expr, registry))
                         .collect::<Result<Vec<_>, _>>()?,
                 )),
+                ScalarFunction::Greatest => Ok(greatest(
+                    &args.to_owned()
+                        .iter()
+                        .map(|expr| parse_expr(expr, registry))
+                        .collect::<Result<Vec<_>, _>>()?,
+                )),
+                ScalarFunction::Least => Ok(least(
+                    &args.to_owned()
+                        .iter()
+                        .map(|expr| parse_expr(expr, registry))
+                        .collect::<Result<Vec<_>, _>>()?,
+                )),
                 ScalarFunction::Lpad => Ok(lpad(
                     args.to_owned()
                         .iter()
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs
index 191c491944..6e27cbc4ad 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -1386,6 +1386,8 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction {
             BuiltinScalarFunction::FromUnixtime => Self::FromUnixtime,
             BuiltinScalarFunction::Atan2 => Self::Atan2,
             BuiltinScalarFunction::ArrowTypeof => Self::ArrowTypeof,
+            BuiltinScalarFunction::Greatest => Self::Greatest,
+            BuiltinScalarFunction::Least => Self::Least,
         };
 
         Ok(scalar_function)
diff --git a/docs/source/user-guide/sql/sql_status.md b/docs/source/user-guide/sql/sql_status.md
index 6075a23330..b7f4a0bc11 100644
--- a/docs/source/user-guide/sql/sql_status.md
+++ b/docs/source/user-guide/sql/sql_status.md
@@ -76,6 +76,9 @@
   - [x] nullif
   - [x] case
   - [x] coalesce
+- Comparison functions
+  - [x] greatest
+  - [x] least
 - Approximation functions
   - [x] approx_distinct
   - [x] approx_median