You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ru...@apache.org on 2023/06/19 05:29:00 UTC

[spark] branch master updated: [SPARK-43929][SQL][PYTHON][CONNECT] Add date time functions to Scala, Python and Connect API - part 1

This is an automated email from the ASF dual-hosted git repository.

ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 25a14c313bf [SPARK-43929][SQL][PYTHON][CONNECT] Add date time functions to Scala, Python and Connect API - part 1
25a14c313bf is described below

commit 25a14c313bfd9e18a0ea06a8a521ee28878c2045
Author: Jiaan Geng <be...@163.com>
AuthorDate: Mon Jun 19 13:28:40 2023 +0800

    [SPARK-43929][SQL][PYTHON][CONNECT] Add date time functions to Scala, Python and Connect API - part 1
    
    ### What changes were proposed in this pull request?
    This PR want add date time functions to Scala, Python and Connect API. These functions show below.
    
    - dateadd
    - date_diff
    - date_from_unix_date
    - day
    
    The origin plan also contains the two function `date_part` and `datepart`. You can see this PR exclude them, since we can't get the data type for unresolved expressions. Please refer https://github.com/apache/spark/blob/b97ce8b9a99c570fc57dec967e7e9db3d115c1db/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala#L2835 and https://github.com/apache/spark/blob/b97ce8b9a99c570fc57dec967e7e9db3d115c1db/sql/catalyst/src/main/scala/org/apache/spark [...]
    
    ### Why are the changes needed?
    Add date time functions to Scala, Python and Connect API.
    
    ### Does this PR introduce _any_ user-facing change?
    'No'.
    New feature.
    
    ### How was this patch tested?
    New test cases.
    
    Closes #41636 from beliefer/SPARK-43929.
    
    Authored-by: Jiaan Geng <be...@163.com>
    Signed-off-by: Ruifeng Zheng <ru...@apache.org>
---
 .../scala/org/apache/spark/sql/functions.scala     |  55 ++++++++++
 .../apache/spark/sql/PlanGenerationTestSuite.scala |  16 +++
 .../explain-results/function_date_diff.explain     |   2 +
 .../function_date_from_unix_date.explain           |   2 +
 .../explain-results/function_dateadd.explain       |   2 +
 .../explain-results/function_day.explain           |   2 +
 .../query-tests/queries/function_date_diff.json    |  42 ++++++++
 .../queries/function_date_diff.proto.bin           | Bin 0 -> 158 bytes
 .../queries/function_date_from_unix_date.json      |  25 +++++
 .../queries/function_date_from_unix_date.proto.bin | Bin 0 -> 132 bytes
 .../query-tests/queries/function_dateadd.json      |  29 +++++
 .../query-tests/queries/function_dateadd.proto.bin | Bin 0 -> 127 bytes
 .../query-tests/queries/function_day.json          |  25 +++++
 .../query-tests/queries/function_day.proto.bin     | Bin 0 -> 117 bytes
 .../source/reference/pyspark.sql/functions.rst     |   4 +
 python/pyspark/sql/connect/functions.py            |  29 +++++
 python/pyspark/sql/functions.py                    | 119 +++++++++++++++++++++
 .../scala/org/apache/spark/sql/functions.scala     |  48 +++++++++
 .../apache/spark/sql/DataFrameFunctionsSuite.scala |   2 +-
 .../org/apache/spark/sql/DateFunctionsSuite.scala  |  37 ++++++-
 20 files changed, 435 insertions(+), 4 deletions(-)

diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
index 9c2a5b96182..206b7df2091 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
@@ -4107,6 +4107,21 @@ object functions {
    */
   def date_add(start: Column, days: Column): Column = Column.fn("date_add", start, days)
 
+  /**
+   * Returns the date that is `days` days after `start`
+   *
+   * @param start
+   *   A date, timestamp or string. If a string, the data must be in a format that can be cast to
+   *   a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
+   * @param days
+   *   A column of the number of days to add to `start`, can be negative to subtract days
+   * @return
+   *   A date, or null if `start` was a string that could not be cast to a date
+   * @group datetime_funcs
+   * @since 3.5.0
+   */
+  def dateadd(start: Column, days: Column): Column = Column.fn("dateadd", start, days)
+
   /**
    * Returns the date that is `days` days before `start`
    *
@@ -4161,6 +4176,37 @@ object functions {
    */
   def datediff(end: Column, start: Column): Column = Column.fn("datediff", end, start)
 
+  /**
+   * Returns the number of days from `start` to `end`.
+   *
+   * Only considers the date part of the input. For example:
+   * {{{
+   * dateddiff("2018-01-10 00:00:00", "2018-01-09 23:59:59")
+   * // returns 1
+   * }}}
+   *
+   * @param end
+   *   A date, timestamp or string. If a string, the data must be in a format that can be cast to
+   *   a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
+   * @param start
+   *   A date, timestamp or string. If a string, the data must be in a format that can be cast to
+   *   a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
+   * @return
+   *   An integer, or null if either `end` or `start` were strings that could not be cast to a
+   *   date. Negative if `end` is before `start`
+   * @group datetime_funcs
+   * @since 3.5.0
+   */
+  def date_diff(end: Column, start: Column): Column = Column.fn("date_diff", end, start)
+
+  /**
+   * Create date from the number of `days` since 1970-01-01.
+   *
+   * @group datetime_funcs
+   * @since 3.5.0
+   */
+  def date_from_unix_date(days: Column): Column = Column.fn("date_from_unix_date", days)
+
   /**
    * Extracts the year as an integer from a given date/timestamp/string.
    * @return
@@ -4207,6 +4253,15 @@ object functions {
    */
   def dayofmonth(e: Column): Column = Column.fn("dayofmonth", e)
 
+  /**
+   * Extracts the day of the month as an integer from a given date/timestamp/string.
+   * @return
+   *   An integer, or null if the input was a string that could not be cast to a date
+   * @group datetime_funcs
+   * @since 3.5.0
+   */
+  def day(e: Column): Column = Column.fn("day", e)
+
   /**
    * Extracts the day of the year as an integer from a given date/timestamp/string.
    * @return
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
index 7633cd7d0c0..ca7797f6df1 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
@@ -1921,6 +1921,10 @@ class PlanGenerationTestSuite
     fn.date_add(fn.col("d"), 2)
   }
 
+  temporalFunctionTest("dateadd") {
+    fn.dateadd(fn.col("d"), lit(2))
+  }
+
   temporalFunctionTest("date_sub") {
     fn.date_sub(fn.col("d"), 2)
   }
@@ -1929,6 +1933,14 @@ class PlanGenerationTestSuite
     fn.datediff(fn.col("d"), fn.make_date(lit(2020), lit(10), lit(10)))
   }
 
+  temporalFunctionTest("date_diff") {
+    fn.date_diff(fn.col("d"), fn.make_date(lit(2020), lit(10), lit(10)))
+  }
+
+  temporalFunctionTest("date_from_unix_date") {
+    fn.date_from_unix_date(lit(10))
+  }
+
   temporalFunctionTest("year") {
     fn.year(fn.col("d"))
   }
@@ -1949,6 +1961,10 @@ class PlanGenerationTestSuite
     fn.dayofmonth(fn.col("d"))
   }
 
+  temporalFunctionTest("day") {
+    fn.day(fn.col("d"))
+  }
+
   temporalFunctionTest("dayofyear") {
     fn.dayofyear(fn.col("d"))
   }
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_date_diff.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_date_diff.explain
new file mode 100644
index 00000000000..4b22dfe0902
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_date_diff.explain
@@ -0,0 +1,2 @@
+Project [date_diff(d#0, make_date(2020, 10, 10, false)) AS date_diff(d, make_date(2020, 10, 10))#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_date_from_unix_date.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_date_from_unix_date.explain
new file mode 100644
index 00000000000..547b33063aa
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_date_from_unix_date.explain
@@ -0,0 +1,2 @@
+Project [date_from_unix_date(10) AS date_from_unix_date(10)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_dateadd.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_dateadd.explain
new file mode 100644
index 00000000000..66325085b9c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_dateadd.explain
@@ -0,0 +1,2 @@
+Project [date_add(d#0, 2) AS date_add(d, 2)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_day.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_day.explain
new file mode 100644
index 00000000000..1ef75653dd7
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_day.explain
@@ -0,0 +1,2 @@
+Project [day(d#0) AS day(d)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_date_diff.json b/connector/connect/common/src/test/resources/query-tests/queries/function_date_diff.json
new file mode 100644
index 00000000000..b6094ff8734
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_date_diff.json
@@ -0,0 +1,42 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "date_diff",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "d"
+          }
+        }, {
+          "unresolvedFunction": {
+            "functionName": "make_date",
+            "arguments": [{
+              "literal": {
+                "integer": 2020
+              }
+            }, {
+              "literal": {
+                "integer": 10
+              }
+            }, {
+              "literal": {
+                "integer": 10
+              }
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_date_diff.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_date_diff.proto.bin
new file mode 100644
index 00000000000..5621af09474
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_date_diff.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_date_from_unix_date.json b/connector/connect/common/src/test/resources/query-tests/queries/function_date_from_unix_date.json
new file mode 100644
index 00000000000..ada0747743b
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_date_from_unix_date.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "date_from_unix_date",
+        "arguments": [{
+          "literal": {
+            "integer": 10
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_date_from_unix_date.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_date_from_unix_date.proto.bin
new file mode 100644
index 00000000000..5cbd76eda90
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_date_from_unix_date.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_dateadd.json b/connector/connect/common/src/test/resources/query-tests/queries/function_dateadd.json
new file mode 100644
index 00000000000..2658c724d28
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_dateadd.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "dateadd",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "d"
+          }
+        }, {
+          "literal": {
+            "integer": 2
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_dateadd.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_dateadd.proto.bin
new file mode 100644
index 00000000000..e72a77f0e23
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_dateadd.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_day.json b/connector/connect/common/src/test/resources/query-tests/queries/function_day.json
new file mode 100644
index 00000000000..c1e4b4d13fb
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_day.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "day",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "d"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_day.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_day.proto.bin
new file mode 100644
index 00000000000..e72a4a354c3
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_day.proto.bin differ
diff --git a/python/docs/source/reference/pyspark.sql/functions.rst b/python/docs/source/reference/pyspark.sql/functions.rst
index 66b2ad149dc..4ea15bd44bd 100644
--- a/python/docs/source/reference/pyspark.sql/functions.rst
+++ b/python/docs/source/reference/pyspark.sql/functions.rst
@@ -121,10 +121,14 @@ Datetime Functions
     current_timestamp
     current_timezone
     date_add
+    date_diff
     date_format
+    date_from_unix_date
     date_sub
     date_trunc
+    dateadd
     datediff
+    day
     dayofmonth
     dayofweek
     dayofyear
diff --git a/python/pyspark/sql/connect/functions.py b/python/pyspark/sql/connect/functions.py
index 84a44baccdc..6ca26191fc4 100644
--- a/python/pyspark/sql/connect/functions.py
+++ b/python/pyspark/sql/connect/functions.py
@@ -2573,6 +2573,13 @@ def dayofmonth(col: "ColumnOrName") -> Column:
 dayofmonth.__doc__ = pysparkfuncs.dayofmonth.__doc__
 
 
+def day(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("day", col)
+
+
+day.__doc__ = pysparkfuncs.day.__doc__
+
+
 def dayofyear(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("dayofyear", col)
 
@@ -2623,6 +2630,14 @@ def date_add(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
 date_add.__doc__ = pysparkfuncs.date_add.__doc__
 
 
+def dateadd(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
+    days = lit(days) if isinstance(days, int) else days
+    return _invoke_function_over_columns("dateadd", start, days)
+
+
+dateadd.__doc__ = pysparkfuncs.dateadd.__doc__
+
+
 def date_sub(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
     days = lit(days) if isinstance(days, int) else days
     return _invoke_function_over_columns("date_sub", start, days)
@@ -2638,6 +2653,20 @@ def datediff(end: "ColumnOrName", start: "ColumnOrName") -> Column:
 datediff.__doc__ = pysparkfuncs.datediff.__doc__
 
 
+def date_diff(end: "ColumnOrName", start: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("date_diff", end, start)
+
+
+date_diff.__doc__ = pysparkfuncs.date_diff.__doc__
+
+
+def date_from_unix_date(days: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("date_from_unix_date", days)
+
+
+date_from_unix_date.__doc__ = pysparkfuncs.date_from_unix_date.__doc__
+
+
 def add_months(start: "ColumnOrName", months: Union["ColumnOrName", int]) -> Column:
     months = lit(months) if isinstance(months, int) else months
     return _invoke_function_over_columns("add_months", start, months)
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 0d8f69daabb..d970fc0bb1e 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -5798,6 +5798,32 @@ def dayofmonth(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("dayofmonth", col)
 
 
+@try_remote_functions
+def day(col: "ColumnOrName") -> Column:
+    """
+    Extract the day of the month of a given date/timestamp as integer.
+
+    .. versionadded:: 3.5.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target date/timestamp column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        day of the month for given date/timestamp as integer.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select(day('dt').alias('day')).collect()
+    [Row(day=8)]
+    """
+    return _invoke_function_over_columns("day", col)
+
+
 @try_remote_functions
 def dayofyear(col: "ColumnOrName") -> Column:
     """
@@ -6019,6 +6045,41 @@ def date_add(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
     return _invoke_function_over_columns("date_add", start, days)
 
 
+@try_remote_functions
+def dateadd(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
+    """
+    Returns the date that is `days` days after `start`. If `days` is a negative value
+    then these amount of days will be deducted from `start`.
+
+    .. versionadded:: 3.5.0
+
+    Parameters
+    ----------
+    start : :class:`~pyspark.sql.Column` or str
+        date column to work on.
+    days : :class:`~pyspark.sql.Column` or str or int
+        how many days after the given date to calculate.
+        Accepts negative value as well to calculate backwards in time.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        a date after/before given number of days.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([('2015-04-08', 2,)], ['dt', 'add'])
+    >>> df.select(dateadd(df.dt, 1).alias('next_date')).collect()
+    [Row(next_date=datetime.date(2015, 4, 9))]
+    >>> df.select(dateadd(df.dt, df.add.cast('integer')).alias('next_date')).collect()
+    [Row(next_date=datetime.date(2015, 4, 10))]
+    >>> df.select(dateadd('dt', -1).alias('prev_date')).collect()
+    [Row(prev_date=datetime.date(2015, 4, 7))]
+    """
+    days = lit(days) if isinstance(days, int) else days
+    return _invoke_function_over_columns("dateadd", start, days)
+
+
 @try_remote_functions
 def date_sub(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
     """
@@ -6088,6 +6149,64 @@ def datediff(end: "ColumnOrName", start: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("datediff", end, start)
 
 
+@try_remote_functions
+def date_diff(end: "ColumnOrName", start: "ColumnOrName") -> Column:
+    """
+    Returns the number of days from `start` to `end`.
+
+    .. versionadded:: 3.5.0
+
+    Parameters
+    ----------
+    end : :class:`~pyspark.sql.Column` or str
+        to date column to work on.
+    start : :class:`~pyspark.sql.Column` or str
+        from date column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        difference in days between two dates.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([('2015-04-08','2015-05-10')], ['d1', 'd2'])
+    >>> df.select(date_diff(df.d2, df.d1).alias('diff')).collect()
+    [Row(diff=32)]
+    """
+    return _invoke_function_over_columns("date_diff", end, start)
+
+
+@try_remote_functions
+def date_from_unix_date(days: "ColumnOrName") -> Column:
+    """
+    Create date from the number of `days` since 1970-01-01.
+
+    .. versionadded:: 3.5.0
+
+    Parameters
+    ----------
+    days : :class:`~pyspark.sql.Column` or str
+        the target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the date from the number of days since 1970-01-01.
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(date_from_unix_date(lit(1))).show()
+    +----------------------+
+    |date_from_unix_date(1)|
+    +----------------------+
+    |            1970-01-02|
+    +----------------------+
+    """
+    return _invoke_function_over_columns("date_from_unix_date", days)
+
+
 @try_remote_functions
 def add_months(start: "ColumnOrName", months: Union["ColumnOrName", int]) -> Column:
     """
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 81a57368a8d..984a6dce875 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -4197,6 +4197,18 @@ object functions {
    */
   def date_add(start: Column, days: Column): Column = withExpr { DateAdd(start.expr, days.expr) }
 
+  /**
+   * Returns the date that is `days` days after `start`
+   *
+   * @param start A date, timestamp or string. If a string, the data must be in a format that
+   *              can be cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
+   * @param days  A column of the number of days to add to `start`, can be negative to subtract days
+   * @return A date, or null if `start` was a string that could not be cast to a date
+   * @group datetime_funcs
+   * @since 3.5.0
+   */
+  def dateadd(start: Column, days: Column): Column = date_add(start, days)
+
   /**
    * Returns the date that is `days` days before `start`
    *
@@ -4242,6 +4254,34 @@ object functions {
    */
   def datediff(end: Column, start: Column): Column = withExpr { DateDiff(end.expr, start.expr) }
 
+  /**
+   * Returns the number of days from `start` to `end`.
+   *
+   * Only considers the date part of the input. For example:
+   * {{{
+   * dateddiff("2018-01-10 00:00:00", "2018-01-09 23:59:59")
+   * // returns 1
+   * }}}
+   *
+   * @param end A date, timestamp or string. If a string, the data must be in a format that
+   *            can be cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
+   * @param start A date, timestamp or string. If a string, the data must be in a format that
+   *              can be cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
+   * @return An integer, or null if either `end` or `start` were strings that could not be cast to
+   *         a date. Negative if `end` is before `start`
+   * @group datetime_funcs
+   * @since 3.5.0
+   */
+  def date_diff(end: Column, start: Column): Column = datediff(end, start)
+
+  /**
+   * Create date from the number of `days` since 1970-01-01.
+   *
+   * @group datetime_funcs
+   * @since 3.5.0
+   */
+  def date_from_unix_date(days: Column): Column = withExpr { DateFromUnixDate(days.expr) }
+
   /**
    * Extracts the year as an integer from a given date/timestamp/string.
    * @return An integer, or null if the input was a string that could not be cast to a date
@@ -4283,6 +4323,14 @@ object functions {
    */
   def dayofmonth(e: Column): Column = withExpr { DayOfMonth(e.expr) }
 
+  /**
+   * Extracts the day of the month as an integer from a given date/timestamp/string.
+   * @return An integer, or null if the input was a string that could not be cast to a date
+   * @group datetime_funcs
+   * @since 3.5.0
+   */
+  def day(e: Column): Column = dayofmonth(e)
+
   /**
    * Extracts the day of the year as an integer from a given date/timestamp/string.
    * @return An integer, or null if the input was a string that could not be cast to a date
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index 9b41720bf9a..6a143c4230d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -78,7 +78,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     val excludedSqlFunctions = Set(
       "random",
       "array_agg", "char_length", "character_length",
-      "lcase", "ucase", "day", "cardinality", "sha",
+      "lcase", "ucase", "cardinality", "sha",
       // aliases for existing functions
       "reflect", "java_method" // Only needed in SQL
     )
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
index 94e1ee2366a..8a50e935847 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
@@ -177,13 +177,17 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession {
       Row(4, 4, 4))
   }
 
-  test("dayofmonth") {
+  test("dayofmonth & day") {
     val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c")
 
     checkAnswer(
       df.select(dayofmonth($"a"), dayofmonth($"b"), dayofmonth($"c")),
       Row(8, 8, 8))
 
+    checkAnswer(
+      df.select(day($"a"), day($"b"), day($"c")),
+      Row(8, 8, 8))
+
     checkAnswer(
       df.selectExpr("day(a)", "day(b)", "dayofmonth(c)"),
       Row(8, 8, 8))
@@ -249,7 +253,7 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession {
       Row(15, 15, 15))
   }
 
-  test("function date_add") {
+  test("function date_add & dateadd") {
     val st1 = "2015-06-01 12:34:56"
     val st2 = "2015-06-02 12:34:56"
     val t1 = Timestamp.valueOf(st1)
@@ -271,10 +275,25 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession {
     checkAnswer(
       df.select(date_add(col("ss"), 7)),
       Seq(Row(Date.valueOf("2015-06-08")), Row(Date.valueOf("2015-06-09"))))
+    checkAnswer(
+      df.select(dateadd(col("d"), lit(1))),
+      Seq(Row(Date.valueOf("2015-06-02")), Row(Date.valueOf("2015-06-03"))))
+    checkAnswer(
+      df.select(dateadd(col("t"), lit(3))),
+      Seq(Row(Date.valueOf("2015-06-04")), Row(Date.valueOf("2015-06-05"))))
+    checkAnswer(
+      df.select(dateadd(col("s"), lit(5))),
+      Seq(Row(Date.valueOf("2015-06-06")), Row(Date.valueOf("2015-06-07"))))
+    checkAnswer(
+      df.select(dateadd(col("ss"), lit(7))),
+      Seq(Row(Date.valueOf("2015-06-08")), Row(Date.valueOf("2015-06-09"))))
 
     checkAnswer(
       df.withColumn("x", lit(1)).select(date_add(col("d"), col("x"))),
       Seq(Row(Date.valueOf("2015-06-02")), Row(Date.valueOf("2015-06-03"))))
+    checkAnswer(
+      df.withColumn("x", lit(1)).select(dateadd(col("d"), col("x"))),
+      Seq(Row(Date.valueOf("2015-06-02")), Row(Date.valueOf("2015-06-03"))))
 
     checkAnswer(df.selectExpr("DATE_ADD(null, 1)"), Seq(Row(null), Row(null)))
     checkAnswer(
@@ -893,7 +912,7 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession {
     }
   }
 
-  test("datediff") {
+  test("datediff & date_diff") {
     val df = Seq(
       (Date.valueOf("2015-07-24"), Timestamp.valueOf("2015-07-24 01:00:00"),
         "2015-07-23", "2015-07-23 03:00:00"),
@@ -903,10 +922,22 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession {
     checkAnswer(df.select(datediff(col("a"), col("b"))), Seq(Row(0), Row(0)))
     checkAnswer(df.select(datediff(col("a"), col("c"))), Seq(Row(1), Row(1)))
     checkAnswer(df.select(datediff(col("d"), col("b"))), Seq(Row(-1), Row(-1)))
+    checkAnswer(df.select(date_diff(col("a"), col("b"))), Seq(Row(0), Row(0)))
+    checkAnswer(df.select(date_diff(col("a"), col("c"))), Seq(Row(1), Row(1)))
+    checkAnswer(df.select(date_diff(col("d"), col("b"))), Seq(Row(-1), Row(-1)))
     checkAnswer(df.selectExpr("datediff(a, d)"), Seq(Row(1), Row(1)))
     checkAnswer(df.selectExpr("date_diff(a, d)"), Seq(Row(1), Row(1)))
   }
 
+  test("date_from_unix_date") {
+    val df = spark.range(1).select(
+      date_from_unix_date(lit(1)).cast("string"),
+      date_from_unix_date(lit(20)).cast("string"),
+      date_from_unix_date(lit(300)).cast("string"))
+
+    checkAnswer(df, Seq(Row("1970-01-02", "1970-01-21", "1970-10-28")))
+  }
+
   test("to_timestamp with microseconds precision") {
     withSQLConf(SQLConf.DATETIME_JAVA8API_ENABLED.key -> "true") {
       val timestamp = "1970-01-01T00:00:00.123456Z"


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org