You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2023/06/21 23:47:39 UTC

[spark] branch master updated: [SPARK-43710][PS][CONNECT] Support `functions.date_part` for Spark Connect

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new bc20e85b0e1 [SPARK-43710][PS][CONNECT] Support `functions.date_part` for Spark Connect
bc20e85b0e1 is described below

commit bc20e85b0e1da510cc091dbd03f210ef9fc56b25
Author: Ruifeng Zheng <ru...@apache.org>
AuthorDate: Thu Jun 22 08:47:27 2023 +0900

    [SPARK-43710][PS][CONNECT] Support `functions.date_part` for Spark Connect
    
    ### What changes were proposed in this pull request?
    switch to the [newly added `date_part` function](https://github.com/apache/spark/commit/8dc02863b926b9e0780b994f9ee6c5c1058d49a0)
    
    ### Why are the changes needed?
    to support connect
    
    ### Does this PR introduce _any_ user-facing change?
    yes
    
    ### How was this patch tested?
    existing UT
    
    Closes #41691 from zhengruifeng/ps_date_part.
    
    Authored-by: Ruifeng Zheng <ru...@apache.org>
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
 python/pyspark/pandas/indexes/timedelta.py |  6 +++---
 python/pyspark/pandas/spark/functions.py   | 32 ++----------------------------
 2 files changed, 5 insertions(+), 33 deletions(-)

diff --git a/python/pyspark/pandas/indexes/timedelta.py b/python/pyspark/pandas/indexes/timedelta.py
index 564c484d968..e46d602e985 100644
--- a/python/pyspark/pandas/indexes/timedelta.py
+++ b/python/pyspark/pandas/indexes/timedelta.py
@@ -150,9 +150,9 @@ class TimedeltaIndex(Index):
 
         @no_type_check
         def get_seconds(scol):
-            hour_scol = SF.date_part("HOUR", scol)
-            minute_scol = SF.date_part("MINUTE", scol)
-            second_scol = SF.date_part("SECOND", scol)
+            hour_scol = F.date_part("HOUR", scol)
+            minute_scol = F.date_part("MINUTE", scol)
+            second_scol = F.date_part("SECOND", scol)
             return (
                 F.when(
                     hour_scol < 0,
diff --git a/python/pyspark/pandas/spark/functions.py b/python/pyspark/pandas/spark/functions.py
index a904071aee7..b33705263c7 100644
--- a/python/pyspark/pandas/spark/functions.py
+++ b/python/pyspark/pandas/spark/functions.py
@@ -17,15 +17,11 @@
 """
 Additional Spark functions used in pandas-on-Spark.
 """
-from typing import Union, no_type_check
+from typing import Union
 
 from pyspark import SparkContext
 import pyspark.sql.functions as F
-from pyspark.sql.column import (
-    Column,
-    _to_java_column,
-    _create_column_from_literal,
-)
+from pyspark.sql.column import Column
 
 # For supporting Spark Connect
 from pyspark.sql.utils import is_remote
@@ -145,27 +141,3 @@ def repeat(col: Column, n: Union[int, Column]) -> Column:
     """
     _n = F.lit(n) if isinstance(n, int) else n
     return F.call_udf("repeat", col, _n)
-
-
-def date_part(field: Union[str, Column], source: Column) -> Column:
-    """
-    Extracts a part of the date/timestamp or interval source.
-    """
-    sc = SparkContext._active_spark_context
-    field = (
-        _to_java_column(field) if isinstance(field, Column) else _create_column_from_literal(field)
-    )
-    return _call_udf(sc, "date_part", field, _to_java_column(source))
-
-
-@no_type_check
-def _call_udf(sc, name, *cols):
-    return Column(sc._jvm.functions.callUDF(name, _make_arguments(sc, *cols)))
-
-
-@no_type_check
-def _make_arguments(sc, *cols):
-    java_arr = sc._gateway.new_array(sc._jvm.Column, len(cols))
-    for i, col in enumerate(cols):
-        java_arr[i] = col
-    return java_arr


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org