You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ji...@apache.org on 2023/06/02 07:53:06 UTC

[arrow-datafusion] 01/01: [built-in function] add greatest and least

This is an automated email from the ASF dual-hosted git repository.

jiayuliu pushed a commit to branch add-greatest-least
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git

commit 5214393d1fbf80fc7438fada6ff6563b949aeaf4
Author: Jiayu Liu <ji...@hey.com>
AuthorDate: Fri Jun 2 15:52:38 2023 +0800

    [built-in function] add greatest and least
---
 datafusion/core/tests/sql/expr.rs                  |   6 +
 datafusion/expr/src/built_in_function.rs           |  10 ++
 datafusion/expr/src/comparison_expressions.rs      |  35 ++++++
 datafusion/expr/src/expr_fn.rs                     |  24 +++-
 datafusion/expr/src/function.rs                    |  15 ++-
 datafusion/expr/src/lib.rs                         |   1 +
 .../physical-expr/src/comparison_expressions.rs    | 133 +++++++++++++++++++++
 datafusion/physical-expr/src/functions.rs          |   5 +-
 datafusion/physical-expr/src/lib.rs                |   1 +
 datafusion/proto/proto/datafusion.proto            |   2 +
 datafusion/proto/proto/proto_descriptor.bin        | Bin 0 -> 85877 bytes
 .../src/{generated/prost.rs => datafusion.rs}      |   6 +
 .../{generated/pbjson.rs => datafusion.serde.rs}   |   6 +
 datafusion/proto/src/generated/pbjson.rs           |   6 +
 datafusion/proto/src/generated/prost.rs            |   6 +
 datafusion/proto/src/logical_plan/from_proto.rs    |  17 ++-
 datafusion/proto/src/logical_plan/to_proto.rs      |   2 +
 docs/source/user-guide/sql/sql_status.md           |   3 +
 18 files changed, 273 insertions(+), 5 deletions(-)

diff --git a/datafusion/core/tests/sql/expr.rs b/datafusion/core/tests/sql/expr.rs
index 6783670545..c432f62572 100644
--- a/datafusion/core/tests/sql/expr.rs
+++ b/datafusion/core/tests/sql/expr.rs
@@ -200,6 +200,12 @@ async fn binary_bitwise_shift() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn test_comparison_func_expressions() -> Result<()> {
+    test_expression!("greatest(1,2,3)", "3");
+    test_expression!("least(1,2,3)", "1");
+}
+
 #[tokio::test]
 async fn test_interval_expressions() -> Result<()> {
     // day nano intervals
diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs
index 3911939b4c..d4ca93ba24 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -205,6 +205,10 @@ pub enum BuiltinScalarFunction {
     Struct,
     /// arrow_typeof
     ArrowTypeof,
+    /// greatest
+    Greatest,
+    /// least
+    Least,
 }
 
 lazy_static! {
@@ -328,6 +332,8 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::Struct => Volatility::Immutable,
             BuiltinScalarFunction::FromUnixtime => Volatility::Immutable,
             BuiltinScalarFunction::ArrowTypeof => Volatility::Immutable,
+            BuiltinScalarFunction::Greatest => Volatility::Immutable,
+            BuiltinScalarFunction::Least => Volatility::Immutable,
 
             // Stable builtin functions
             BuiltinScalarFunction::Now => Volatility::Stable,
@@ -414,6 +420,10 @@ fn aliases(func: &BuiltinScalarFunction) -> &'static [&'static str] {
         BuiltinScalarFunction::Upper => &["upper"],
         BuiltinScalarFunction::Uuid => &["uuid"],
 
+        // comparison functions
+        BuiltinScalarFunction::Greatest => &["greatest"],
+        BuiltinScalarFunction::Least => &["least"],
+
         // regex functions
         BuiltinScalarFunction::RegexpMatch => &["regexp_match"],
         BuiltinScalarFunction::RegexpReplace => &["regexp_replace"],
diff --git a/datafusion/expr/src/comparison_expressions.rs b/datafusion/expr/src/comparison_expressions.rs
new file mode 100644
index 0000000000..c7f13f04f0
--- /dev/null
+++ b/datafusion/expr/src/comparison_expressions.rs
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::datatypes::DataType;
+
+/// Currently supported types by the comparison function.
+pub static SUPPORTED_COMPARISON_TYPES: &[DataType] = &[
+    DataType::Boolean,
+    DataType::UInt8,
+    DataType::UInt16,
+    DataType::UInt32,
+    DataType::UInt64,
+    DataType::Int8,
+    DataType::Int16,
+    DataType::Int32,
+    DataType::Int64,
+    DataType::Float32,
+    DataType::Float64,
+    DataType::Utf8,
+    DataType::LargeUtf8,
+];
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index 6b0a09baf9..4f236f1720 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -237,6 +237,22 @@ pub fn concat_ws(sep: Expr, values: Vec<Expr>) -> Expr {
     ))
 }
 
+/// Returns the greatest value of all arguments.
+pub fn greatest(args: &[Expr]) -> Expr {
+    Expr::ScalarFunction(ScalarFunction::new(
+        BuiltinScalarFunction::Greatest,
+        args.to_vec(),
+    ))
+}
+
+/// Returns the least value of all arguments.
+pub fn least(args: &[Expr]) -> Expr {
+    Expr::ScalarFunction(ScalarFunction::new(
+        BuiltinScalarFunction::Least,
+        args.to_vec(),
+    ))
+}
+
 /// Returns an approximate value of π
 pub fn pi() -> Expr {
     Expr::ScalarFunction(ScalarFunction::new(BuiltinScalarFunction::Pi, vec![]))
@@ -620,9 +636,15 @@ nary_scalar_expr!(Coalesce, coalesce, "returns `coalesce(args...)`, which evalua
 nary_scalar_expr!(
     ConcatWithSeparator,
     concat_ws_expr,
-    "concatenates several strings, placing a seperator between each one"
+    "concatenates several strings, placing a separator between each one"
 );
 nary_scalar_expr!(Concat, concat_expr, "concatenates several strings");
+nary_scalar_expr!(
+    Greatest,
+    greatest_expr,
+    "gets the largest value of the list"
+);
+nary_scalar_expr!(Least, least_expr, "gets the smallest value of the list");
 
 // date functions
 scalar_expr!(DatePart, date_part, part date, "extracts a subfield from the date");
diff --git a/datafusion/expr/src/function.rs b/datafusion/expr/src/function.rs
index 5ba6852248..1729a6f88d 100644
--- a/datafusion/expr/src/function.rs
+++ b/datafusion/expr/src/function.rs
@@ -22,8 +22,8 @@ use crate::nullif::SUPPORTED_NULLIF_TYPES;
 use crate::type_coercion::functions::data_types;
 use crate::ColumnarValue;
 use crate::{
-    array_expressions, conditional_expressions, struct_expressions, Accumulator,
-    BuiltinScalarFunction, Signature, TypeSignature,
+    array_expressions, comparison_expressions, conditional_expressions,
+    struct_expressions, Accumulator, BuiltinScalarFunction, Signature, TypeSignature,
 };
 use arrow::datatypes::{DataType, Field, Fields, IntervalUnit, TimeUnit};
 use datafusion_common::{DataFusionError, Result};
@@ -168,6 +168,11 @@ pub fn return_type(
             let coerced_types = data_types(input_expr_types, &signature(fun));
             coerced_types.map(|typs| typs[0].clone())
         }
+        BuiltinScalarFunction::Greatest | BuiltinScalarFunction::Least => {
+            // GREATEST and LEAST have multiple args and they might get coerced, get a preview of this
+            let coerced_types = data_types(input_expr_types, &signature(fun));
+            coerced_types.map(|typs| typs[0].clone())
+        }
         BuiltinScalarFunction::OctetLength => {
             utf8_to_int_type(&input_expr_types[0], "octet_length")
         }
@@ -376,6 +381,12 @@ pub fn signature(fun: &BuiltinScalarFunction) -> Signature {
         BuiltinScalarFunction::Chr | BuiltinScalarFunction::ToHex => {
             Signature::uniform(1, vec![DataType::Int64], fun.volatility())
         }
+        BuiltinScalarFunction::Greatest | BuiltinScalarFunction::Least => {
+            Signature::variadic(
+                comparison_expressions::SUPPORTED_COMPARISON_TYPES.to_vec(),
+                fun.volatility(),
+            )
+        }
         BuiltinScalarFunction::Lpad | BuiltinScalarFunction::Rpad => Signature::one_of(
             vec![
                 TypeSignature::Exact(vec![DataType::Utf8, DataType::Int64]),
diff --git a/datafusion/expr/src/lib.rs b/datafusion/expr/src/lib.rs
index 45777e09d4..c717cd3f19 100644
--- a/datafusion/expr/src/lib.rs
+++ b/datafusion/expr/src/lib.rs
@@ -30,6 +30,7 @@ pub mod aggregate_function;
 pub mod array_expressions;
 mod built_in_function;
 mod columnar_value;
+pub mod comparison_expressions;
 pub mod conditional_expressions;
 pub mod expr;
 pub mod expr_fn;
diff --git a/datafusion/physical-expr/src/comparison_expressions.rs b/datafusion/physical-expr/src/comparison_expressions.rs
new file mode 100644
index 0000000000..8f3c675686
--- /dev/null
+++ b/datafusion/physical-expr/src/comparison_expressions.rs
@@ -0,0 +1,133 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Some of these functions reference the Postgres documentation
+// or implementation to ensure compatibility and are subject to
+// the Postgres license.
+
+//! Comparison expressions
+
+use arrow::datatypes::DataType;
+use datafusion_common::{DataFusionError, Result};
+use datafusion_expr::ColumnarValue;
+
+#[derive(Debug, Clone, PartialEq)]
+enum ComparisonOperator {
+    Greatest,
+    Least,
+}
+
+/// reduce a and b to compatible types, if possible
+fn reduce_compatible_types(a: DataType, b: DataType) -> Result<DataType> {
+    Ok(match (a, b) {
+        (DataType::Boolean, DataType::Boolean) => DataType::Boolean,
+        (DataType::Utf8, DataType::Utf8) => DataType::Utf8,
+        (DataType::Utf8, DataType::LargeUtf8) => DataType::LargeUtf8,
+        (DataType::LargeUtf8, DataType::Utf8 | DataType::LargeUtf8) => {
+            DataType::LargeUtf8
+        }
+
+        (DataType::Binary, DataType::Binary) => DataType::Binary,
+        (DataType::Binary, DataType::LargeBinary) => DataType::LargeBinary,
+        (DataType::LargeBinary, DataType::Binary | DataType::LargeBinary) => {
+            DataType::LargeBinary
+        }
+
+        (DataType::Int8, DataType::Int8) => DataType::Int8,
+        (DataType::Int8, DataType::Int16) => DataType::Int16,
+        (DataType::Int8, DataType::Int32) => DataType::Int32,
+        (DataType::Int8, DataType::Int64) => DataType::Int64,
+
+        (DataType::Int16, DataType::Int8 | DataType::Int16) => DataType::Int16,
+        (DataType::Int16, DataType::Int32) => DataType::Int32,
+        (DataType::Int16, DataType::Int64) => DataType::Int64,
+
+        (DataType::Int32, DataType::Int8 | DataType::Int16 | DataType::Int32) => {
+            DataType::Int32
+        }
+        (DataType::Int32, DataType::Int64) => DataType::Int64,
+
+        (
+            DataType::Int64,
+            DataType::Int8 | DataType::Int16 | DataType::Int32 | DataType::Int64,
+        ) => DataType::Int64,
+
+        (DataType::UInt8, DataType::UInt8) => DataType::UInt8,
+        (DataType::UInt8, DataType::UInt16) => DataType::UInt16,
+        (DataType::UInt8, DataType::UInt32) => DataType::UInt32,
+        (DataType::UInt8, DataType::UInt64) => DataType::UInt64,
+
+        (DataType::UInt16, DataType::UInt8 | DataType::UInt16) => DataType::UInt16,
+        (DataType::UInt16, DataType::UInt32) => DataType::UInt32,
+        (DataType::UInt16, DataType::UInt64) => DataType::UInt64,
+
+        (DataType::UInt32, DataType::UInt8 | DataType::UInt16 | DataType::UInt32) => {
+            DataType::UInt32
+        }
+        (DataType::UInt32, DataType::UInt64) => DataType::UInt64,
+
+        (
+            DataType::UInt64,
+            DataType::UInt8 | DataType::UInt16 | DataType::UInt32 | DataType::UInt64,
+        ) => DataType::UInt64,
+
+        (DataType::Float32, DataType::Float32) => DataType::Float32,
+        (DataType::Float32, DataType::Float64) => DataType::Float64,
+
+        (DataType::Float64, DataType::Float32 | DataType::Float64) => DataType::Float64,
+
+        (_, _) => {
+            return Err(DataFusionError::Internal(
+                "Cannot compare types".to_string(),
+            ))
+        }
+    })
+}
+
+fn compare(op: ComparisonOperator, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+    if args.is_empty() {
+        return Err(DataFusionError::Internal(format!(
+            "{:?} expressions require at least one argument",
+            op
+        )));
+    } else if args.len() == 1 {
+        return Ok(args[0].clone());
+    }
+
+    let first_arg: DataType = match &args[0] {
+        ColumnarValue::Array(array) => array.data_type().clone(),
+        ColumnarValue::Scalar(scalar) => scalar.get_datatype(),
+    };
+
+    let _compute_type = args[1..]
+        .iter()
+        .map(|arg| match arg {
+            ColumnarValue::Array(array) => array.data_type().clone(),
+            ColumnarValue::Scalar(scalar) => scalar.get_datatype(),
+        })
+        .try_fold(first_arg, reduce_compatible_types)?;
+
+    unimplemented!()
+}
+
+pub fn greatest(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+    return compare(ComparisonOperator::Greatest, args);
+}
+
+pub fn least(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+    return compare(ComparisonOperator::Least, args);
+}
diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs
index 7020dda8b1..438ebc2687 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -32,7 +32,8 @@
 
 use crate::execution_props::ExecutionProps;
 use crate::{
-    array_expressions, conditional_expressions, datetime_expressions,
+    array_expressions, comparison_expressions, conditional_expressions,
+    datetime_expressions,
     expressions::{cast_column, nullif_func},
     math_expressions, string_expressions, struct_expressions, PhysicalExpr,
     ScalarFunctionExpr,
@@ -452,6 +453,8 @@ pub fn create_physical_fun(
         BuiltinScalarFunction::ConcatWithSeparator => {
             Arc::new(|args| make_scalar_function(string_expressions::concat_ws)(args))
         }
+        BuiltinScalarFunction::Greatest => Arc::new(comparison_expressions::greatest),
+        BuiltinScalarFunction::Least => Arc::new(comparison_expressions::least),
         BuiltinScalarFunction::DatePart => Arc::new(datetime_expressions::date_part),
         BuiltinScalarFunction::DateTrunc => Arc::new(datetime_expressions::date_trunc),
         BuiltinScalarFunction::DateBin => Arc::new(datetime_expressions::date_bin),
diff --git a/datafusion/physical-expr/src/lib.rs b/datafusion/physical-expr/src/lib.rs
index b54bcda601..06fa0d64d1 100644
--- a/datafusion/physical-expr/src/lib.rs
+++ b/datafusion/physical-expr/src/lib.rs
@@ -17,6 +17,7 @@
 
 pub mod aggregate;
 pub mod array_expressions;
+pub mod comparison_expressions;
 pub mod conditional_expressions;
 #[cfg(feature = "crypto_expressions")]
 pub mod crypto_expressions;
diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto
index c23d585e61..f98d2dc284 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -549,6 +549,8 @@ enum ScalarFunction {
   Factorial = 83;
   Lcm = 84;
   Gcd = 85;
+  Greatest = 86;
+  Least = 87;
 }
 
 message ScalarFunctionNode {
diff --git a/datafusion/proto/proto/proto_descriptor.bin b/datafusion/proto/proto/proto_descriptor.bin
new file mode 100644
index 0000000000..60067564b4
Binary files /dev/null and b/datafusion/proto/proto/proto_descriptor.bin differ
diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/datafusion.rs
similarity index 99%
copy from datafusion/proto/src/generated/prost.rs
copy to datafusion/proto/src/datafusion.rs
index 4cf50d70bf..acf85d5ab0 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/datafusion.rs
@@ -2208,6 +2208,8 @@ pub enum ScalarFunction {
     Factorial = 83,
     Lcm = 84,
     Gcd = 85,
+    Greatest = 86,
+    Least = 87,
 }
 impl ScalarFunction {
     /// String value of the enum field names used in the ProtoBuf definition.
@@ -2302,6 +2304,8 @@ impl ScalarFunction {
             ScalarFunction::Factorial => "Factorial",
             ScalarFunction::Lcm => "Lcm",
             ScalarFunction::Gcd => "Gcd",
+            ScalarFunction::Greatest => "Greatest",
+            ScalarFunction::Least => "Least",
         }
     }
     /// Creates an enum from field names used in the ProtoBuf definition.
@@ -2393,6 +2397,8 @@ impl ScalarFunction {
             "Factorial" => Some(Self::Factorial),
             "Lcm" => Some(Self::Lcm),
             "Gcd" => Some(Self::Gcd),
+            "Greatest" => Some(Self::Greatest),
+            "Least" => Some(Self::Least),
             _ => None,
         }
     }
diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/datafusion.serde.rs
similarity index 99%
copy from datafusion/proto/src/generated/pbjson.rs
copy to datafusion/proto/src/datafusion.serde.rs
index 369cc0b24e..fd5657f4cb 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/datafusion.serde.rs
@@ -17851,6 +17851,8 @@ impl serde::Serialize for ScalarFunction {
             Self::Factorial => "Factorial",
             Self::Lcm => "Lcm",
             Self::Gcd => "Gcd",
+            Self::Greatest => "Greatest",
+            Self::Least => "Least",
         };
         serializer.serialize_str(variant)
     }
@@ -17948,6 +17950,8 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
             "Factorial",
             "Lcm",
             "Gcd",
+            "Greatest",
+            "Least",
         ];
 
         struct GeneratedVisitor;
@@ -18076,6 +18080,8 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
                     "Factorial" => Ok(ScalarFunction::Factorial),
                     "Lcm" => Ok(ScalarFunction::Lcm),
                     "Gcd" => Ok(ScalarFunction::Gcd),
+                    "Greatest" => Ok(ScalarFunction::Greatest),
+                    "Least" => Ok(ScalarFunction::Least),
                     _ => Err(serde::de::Error::unknown_variant(value, FIELDS)),
                 }
             }
diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs
index 369cc0b24e..fd5657f4cb 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -17851,6 +17851,8 @@ impl serde::Serialize for ScalarFunction {
             Self::Factorial => "Factorial",
             Self::Lcm => "Lcm",
             Self::Gcd => "Gcd",
+            Self::Greatest => "Greatest",
+            Self::Least => "Least",
         };
         serializer.serialize_str(variant)
     }
@@ -17948,6 +17950,8 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
             "Factorial",
             "Lcm",
             "Gcd",
+            "Greatest",
+            "Least",
         ];
 
         struct GeneratedVisitor;
@@ -18076,6 +18080,8 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
                     "Factorial" => Ok(ScalarFunction::Factorial),
                     "Lcm" => Ok(ScalarFunction::Lcm),
                     "Gcd" => Ok(ScalarFunction::Gcd),
+                    "Greatest" => Ok(ScalarFunction::Greatest),
+                    "Least" => Ok(ScalarFunction::Least),
                     _ => Err(serde::de::Error::unknown_variant(value, FIELDS)),
                 }
             }
diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs
index 4cf50d70bf..acf85d5ab0 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -2208,6 +2208,8 @@ pub enum ScalarFunction {
     Factorial = 83,
     Lcm = 84,
     Gcd = 85,
+    Greatest = 86,
+    Least = 87,
 }
 impl ScalarFunction {
     /// String value of the enum field names used in the ProtoBuf definition.
@@ -2302,6 +2304,8 @@ impl ScalarFunction {
             ScalarFunction::Factorial => "Factorial",
             ScalarFunction::Lcm => "Lcm",
             ScalarFunction::Gcd => "Gcd",
+            ScalarFunction::Greatest => "Greatest",
+            ScalarFunction::Least => "Least",
         }
     }
     /// Creates an enum from field names used in the ProtoBuf definition.
@@ -2393,6 +2397,8 @@ impl ScalarFunction {
             "Factorial" => Some(Self::Factorial),
             "Lcm" => Some(Self::Lcm),
             "Gcd" => Some(Self::Gcd),
+            "Greatest" => Some(Self::Greatest),
+            "Least" => Some(Self::Least),
             _ => None,
         }
     }
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs
index 1150220bef..4438ad945b 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -40,7 +40,8 @@ use datafusion_expr::{
     cbrt, ceil, character_length, chr, coalesce, concat_expr, concat_ws_expr, cos, cosh,
     date_bin, date_part, date_trunc, degrees, digest, exp,
     expr::{self, InList, Sort, WindowFunction},
-    factorial, floor, from_unixtime, gcd, lcm, left, ln, log, log10, log2,
+    factorial, floor, from_unixtime, gcd, greatest, lcm, least, left, ln, log, log10,
+    log2,
     logical_plan::{PlanType, StringifiedPlan},
     lower, lpad, ltrim, md5, now, nullif, octet_length, pi, power, radians, random,
     regexp_match, regexp_replace, repeat, replace, reverse, right, round, rpad, rtrim,
@@ -494,6 +495,8 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction {
             ScalarFunction::FromUnixtime => Self::FromUnixtime,
             ScalarFunction::Atan2 => Self::Atan2,
             ScalarFunction::ArrowTypeof => Self::ArrowTypeof,
+            ScalarFunction::Greatest => Self::Greatest,
+            ScalarFunction::Least => Self::Least,
         }
     }
 }
@@ -1273,6 +1276,18 @@ pub fn parse_expr(
                         .map(|expr| parse_expr(expr, registry))
                         .collect::<Result<Vec<_>, _>>()?,
                 )),
+                ScalarFunction::Greatest => Ok(greatest(
+                    &args.to_owned()
+                        .iter()
+                        .map(|expr| parse_expr(expr, registry))
+                        .collect::<Result<Vec<_>, _>>()?,
+                )),
+                ScalarFunction::Least => Ok(least(
+                    &args.to_owned()
+                        .iter()
+                        .map(|expr| parse_expr(expr, registry))
+                        .collect::<Result<Vec<_>, _>>()?,
+                )),
                 ScalarFunction::Lpad => Ok(lpad(
                     args.to_owned()
                         .iter()
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs
index 191c491944..6e27cbc4ad 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -1386,6 +1386,8 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction {
             BuiltinScalarFunction::FromUnixtime => Self::FromUnixtime,
             BuiltinScalarFunction::Atan2 => Self::Atan2,
             BuiltinScalarFunction::ArrowTypeof => Self::ArrowTypeof,
+            BuiltinScalarFunction::Greatest => Self::Greatest,
+            BuiltinScalarFunction::Least => Self::Least,
         };
 
         Ok(scalar_function)
diff --git a/docs/source/user-guide/sql/sql_status.md b/docs/source/user-guide/sql/sql_status.md
index 6075a23330..b7f4a0bc11 100644
--- a/docs/source/user-guide/sql/sql_status.md
+++ b/docs/source/user-guide/sql/sql_status.md
@@ -76,6 +76,9 @@
   - [x] nullif
   - [x] case
   - [x] coalesce
+- Comparison functions
+  - [x] greatest
+  - [x] least
 - Approximation functions
   - [x] approx_distinct
   - [x] approx_median