You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2020/10/26 01:00:54 UTC

[GitHub] [spark] HyukjinKwon commented on a change in pull request #30143: [SPARK-32084][PYTHON][SQL] Expand dictionary functions

HyukjinKwon commented on a change in pull request #30143:
URL: https://github.com/apache/spark/pull/30143#discussion_r511675622



##########
File path: python/pyspark/sql/functions.py
##########
@@ -42,154 +42,455 @@
 # since it requires to make every single overridden definition.
 
 
-def _create_function(name, doc=""):
-    """Create a PySpark function by its name"""
-    def _(col):
-        sc = SparkContext._active_spark_context
-        jc = getattr(sc._jvm.functions, name)(col._jc if isinstance(col, Column) else col)
-        return Column(jc)
-    _.__name__ = name
-    _.__doc__ = doc
-    return _
-
-
-def _create_function_over_column(name, doc=""):
-    """Similar with `_create_function` but creates a PySpark function that takes a column
-    (as string as well). This is mainly for PySpark functions to take strings as
-    column names.
-    """
-    def _(col):
-        sc = SparkContext._active_spark_context
-        jc = getattr(sc._jvm.functions, name)(_to_java_column(col))
-        return Column(jc)
-    _.__name__ = name
-    _.__doc__ = doc
-    return _
-
-
-def _wrap_deprecated_function(func, message):
-    """ Wrap the deprecated function to print out deprecation warnings"""
-    def _(col):
-        warnings.warn(message, DeprecationWarning)
-        return func(col)
-    return functools.wraps(func)(_)
-
-
-def _create_binary_mathfunction(name, doc=""):
-    """ Create a binary mathfunction by name"""
-    def _(col1, col2):
-        sc = SparkContext._active_spark_context
-        # For legacy reasons, the arguments here can be implicitly converted into floats,
-        # if they are not columns or strings.
-        if isinstance(col1, Column):
-            arg1 = col1._jc
-        elif isinstance(col1, str):
-            arg1 = _create_column_from_name(col1)
-        else:
-            arg1 = float(col1)
-
-        if isinstance(col2, Column):
-            arg2 = col2._jc
-        elif isinstance(col2, str):
-            arg2 = _create_column_from_name(col2)
-        else:
-            arg2 = float(col2)
-
-        jc = getattr(sc._jvm.functions, name)(arg1, arg2)
-        return Column(jc)
-    _.__name__ = name
-    _.__doc__ = doc
-    return _
-
-
-def _create_window_function(name, doc=''):
-    """ Create a window function by name """
-    def _():
-        sc = SparkContext._active_spark_context
-        jc = getattr(sc._jvm.functions, name)()
-        return Column(jc)
-    _.__name__ = name
-    _.__doc__ = 'Window function: ' + doc
-    return _
+def _get_get_jvm_function(name, sc):
+    """
+    Retrieves JVM function identified by name from
+    Java gateway associated with sc.
+    """
+    return getattr(sc._jvm.functions, name)
+
+
+def _invoke_function(name, *args):
+    """
+    Invokes JVM function identified by name with args
+    and wraps the result with :class:`Column`.
+    """
+    jf = _get_get_jvm_function(name, SparkContext._active_spark_context)
+    return Column(jf(*args))
+
+
+def _invoke_function_over_column(name, col):
+    """
+    Invokes unary JVM function identified by name
+    and wraps the result with :class:`Column`.
+    """
+    return _invoke_function(name, _to_java_column(col))
+
+
+def _invoke_binary_math_function(name, col1, col2):
+    """
+    Invokes binary JVM math function identified by name
+    and wraps the result with :class:`Column`.
+    """
+    return _invoke_function(
+        name,
+        float(col1) if isinstance(col1, numbers.Real) else _to_java_column(col1),

Review comment:
       I think you can keep the comment about this - converting floats due to the legacy behavior reason.
   Also, I think we should keep the previous checking as is. For example, now it won't work with decimals.
   
   ```python
   >>> isinstance(decimal.Decimal(1), numbers.Real)
   False
   >>> float(decimal.Decimal(1))
   1.0
   ```




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org