You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2022/12/02 06:36:38 UTC

[GitHub] [spark] zhengruifeng commented on a diff in pull request #38872: [SPARK-41357][CONNECT][PYTHON] Implement math functions

zhengruifeng commented on code in PR #38872:
URL: https://github.com/apache/spark/pull/38872#discussion_r1037827891


##########
python/pyspark/sql/connect/functions.py:
##########
@@ -260,3 +312,1299 @@ def desc_nulls_last(col: "ColumnOrName") -> Column:
 
     """
     return _to_col(col).desc_nulls_last()
+
+
+# Math Functions
+
+
+def abs(col: "ColumnOrName") -> Column:
+    """
+    Computes the absolute value.
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        column for computed results.
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(abs(lit(-1))).show()
+    +-------+
+    |abs(-1)|
+    +-------+
+    |      1|
+    +-------+
+    """
+    return _invoke_function_over_columns("abs", col)
+
+
+def acos(col: "ColumnOrName") -> Column:
+    """
+    Computes inverse cosine of the input column.
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        inverse cosine of `col`, as if computed by `java.lang.Math.acos()`
+
+    Examples
+    --------
+    >>> df = spark.range(1, 3)
+    >>> df.select(acos(df.id)).show()
+    +--------+
+    |ACOS(id)|
+    +--------+
+    |     0.0|
+    |     NaN|
+    +--------+
+    """
+    return _invoke_function_over_columns("acos", col)
+
+
+def acosh(col: "ColumnOrName") -> Column:
+    """
+    Computes inverse hyperbolic cosine of the input column.
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
+    Examples
+    --------
+    >>> df = spark.range(2)
+    >>> df.select(acosh(col("id"))).show()
+    +---------+
+    |ACOSH(id)|
+    +---------+
+    |      NaN|
+    |      0.0|
+    +---------+
+    """
+    return _invoke_function_over_columns("acosh", col)
+
+
+def asin(col: "ColumnOrName") -> Column:
+    """
+    Computes inverse sine of the input column.
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        inverse sine of `col`, as if computed by `java.lang.Math.asin()`
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([(0,), (2,)])
+    >>> df.select(asin(df.schema.fieldNames()[0])).show()
+    +--------+
+    |ASIN(_1)|
+    +--------+
+    |     0.0|
+    |     NaN|
+    +--------+
+    """
+    return _invoke_function_over_columns("asin", col)
+
+
+def asinh(col: "ColumnOrName") -> Column:
+    """
+    Computes inverse hyperbolic sine of the input column.
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(asinh(col("id"))).show()
+    +---------+
+    |ASINH(id)|
+    +---------+
+    |      0.0|
+    +---------+
+    """
+    return _invoke_function_over_columns("asinh", col)
+
+
+def atan(col: "ColumnOrName") -> Column:
+    """
+    Compute inverse tangent of the input column.
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        inverse tangent of `col`, as if computed by `java.lang.Math.atan()`
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(atan(df.id)).show()
+    +--------+
+    |ATAN(id)|
+    +--------+
+    |     0.0|
+    +--------+
+    """
+    return _invoke_function_over_columns("atan", col)
+
+
+def atan2(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float]) -> Column:
+    """
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    col1 : str, :class:`~pyspark.sql.Column` or float
+        coordinate on y-axis
+    col2 : str, :class:`~pyspark.sql.Column` or float
+        coordinate on x-axis
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the `theta` component of the point
+        (`r`, `theta`)
+        in polar coordinates that corresponds to the point
+        (`x`, `y`) in Cartesian coordinates,
+        as if computed by `java.lang.Math.atan2()`
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(atan2(lit(1), lit(2))).first()
+    Row(ATAN2(1, 2)=0.46364...)
+    """
+
+    return _invoke_binary_math_function("atan2", col1, col2)
+
+
+def atanh(col: "ColumnOrName") -> Column:
+    """
+    Computes inverse hyperbolic tangent of the input column.
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([(0,), (2,)], schema=["numbers"])
+    >>> df.select(atanh(df["numbers"])).show()
+    +--------------+
+    |ATANH(numbers)|
+    +--------------+
+    |           0.0|
+    |           NaN|
+    +--------------+
+    """
+    return _invoke_function_over_columns("atanh", col)
+
+
+def bin(col: "ColumnOrName") -> Column:
+    """Returns the string representation of the binary value of the given column.
+
+    .. versionadded:: 1.5.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        binary representation of given value as string.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([2,5], "INT")
+    >>> df.select(bin(df.value).alias('c')).collect()
+    [Row(c='10'), Row(c='101')]
+    """
+    return _invoke_function_over_columns("bin", col)
+
+
+def bround(col: "ColumnOrName", scale: int = 0) -> Column:
+    """
+    Round the given value to `scale` decimal places using HALF_EVEN rounding mode if `scale` >= 0
+    or at integral part when `scale` < 0.
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        input column to round.
+    scale : int optional default 0
+        scale value.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        rounded values.
+
+    Examples
+    --------
+    >>> spark.createDataFrame([(2.5,)], ['a']).select(bround('a', 0).alias('r')).collect()
+    [Row(r=2.0)]
+    """
+    return _invoke_function("bround", _to_col(col), lit(scale))
+
+
+def cbrt(col: "ColumnOrName") -> Column:
+    """
+    Computes the cube-root of the given value.
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(cbrt(lit(27))).show()
+    +--------+
+    |CBRT(27)|
+    +--------+
+    |     3.0|
+    +--------+
+    """
+    return _invoke_function_over_columns("cbrt", col)
+
+
+def ceil(col: "ColumnOrName") -> Column:
+    """
+    Computes the ceiling of the given value.
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(ceil(lit(-0.1))).show()
+    +----------+
+    |CEIL(-0.1)|
+    +----------+
+    |         0|
+    +----------+
+    """
+    return _invoke_function_over_columns("ceil", col)
+
+
+def conv(col: "ColumnOrName", fromBase: int, toBase: int) -> Column:
+    """
+    Convert a number in a string column from one base to another.
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        a column to convert base for.
+    fromBase: int
+        from base number.
+    toBase: int
+        to base number.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        logariphm of given value.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([("010101",)], ['n'])
+    >>> df.select(conv(df.n, 2, 16).alias('hex')).collect()
+    [Row(hex='15')]
+    """
+    return _invoke_function("conv", _to_col(col), lit(fromBase), lit(toBase))
+
+
+def cos(col: "ColumnOrName") -> Column:
+    """
+    Computes cosine of the input column.
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        angle in radians
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        cosine of the angle, as if computed by `java.lang.Math.cos()`.
+
+    Examples
+    --------
+    >>> import math
+    >>> df = spark.range(1)
+    >>> df.select(cos(lit(math.pi))).first()
+    Row(COS(3.14159...)=-1.0)
+    """
+    return _invoke_function_over_columns("cos", col)
+
+
+def cosh(col: "ColumnOrName") -> Column:
+    """
+    Computes hyperbolic cosine of the input column.
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        hyperbolic angle
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        hyperbolic cosine of the angle, as if computed by `java.lang.Math.cosh()`
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(cosh(lit(1))).first()
+    Row(COSH(1)=1.54308...)
+    """
+    return _invoke_function_over_columns("cosh", col)
+
+
+def cot(col: "ColumnOrName") -> Column:
+    """
+    Computes cotangent of the input column.
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        angle in radians.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        cotangent of the angle.
+
+    Examples
+    --------
+    >>> import math
+    >>> df = spark.range(1)
+    >>> df.select(cot(lit(math.radians(45)))).first()
+    Row(COT(0.78539...)=1.00000...)
+    """
+    return _invoke_function_over_columns("cot", col)
+
+
+def csc(col: "ColumnOrName") -> Column:
+    """
+    Computes cosecant of the input column.
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        angle in radians.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        cosecant of the angle.
+
+    Examples
+    --------
+    >>> import math
+    >>> df = spark.range(1)
+    >>> df.select(csc(lit(math.radians(90)))).first()
+    Row(CSC(1.57079...)=1.0)
+    """
+    return _invoke_function_over_columns("csc", col)
+
+
+def degrees(col: "ColumnOrName") -> Column:
+    """
+    Converts an angle measured in radians to an approximately equivalent angle
+    measured in degrees.
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        angle in radians
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        angle in degrees, as if computed by `java.lang.Math.toDegrees()`
+
+    Examples
+    --------
+    >>> import math
+    >>> df = spark.range(1)
+    >>> df.select(degrees(lit(math.pi))).first()
+    Row(DEGREES(3.14159...)=180.0)
+    """
+    return _invoke_function_over_columns("degrees", col)
+
+
+def exp(col: "ColumnOrName") -> Column:
+    """
+    Computes the exponential of the given value.
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        column to calculate exponential for.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        exponential of the given value.
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(exp(lit(0))).show()
+    +------+
+    |EXP(0)|
+    +------+
+    |   1.0|
+    +------+
+    """
+    return _invoke_function_over_columns("exp", col)
+
+
+def expm1(col: "ColumnOrName") -> Column:
+    """
+    Computes the exponential of the given value minus one.
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        column to calculate exponential for.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        exponential less one.
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(expm1(lit(1))).first()
+    Row(EXPM1(1)=1.71828...)
+    """
+    return _invoke_function_over_columns("expm1", col)
+
+
+def factorial(col: "ColumnOrName") -> Column:
+    """
+    Computes the factorial of the given value.
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        a column to calculate factorial for.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        factorial of given value.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([(5,)], ['n'])
+    >>> df.select(factorial(df.n).alias('f')).collect()
+    [Row(f=120)]
+    """
+    return _invoke_function_over_columns("factorial", col)
+
+
+def floor(col: "ColumnOrName") -> Column:
+    """
+    Computes the floor of the given value.
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        column to find floor for.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        neares integer that is less than or equal to given value.
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(floor(lit(2.5))).show()
+    +----------+
+    |FLOOR(2.5)|
+    +----------+
+    |         2|
+    +----------+
+    """
+    return _invoke_function_over_columns("floor", col)
+
+
+def hex(col: "ColumnOrName") -> Column:
+    """Computes hex value of the given column, which could be :class:`pyspark.sql.types.StringType`,
+    :class:`pyspark.sql.types.BinaryType`, :class:`pyspark.sql.types.IntegerType` or
+    :class:`pyspark.sql.types.LongType`.
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        hexadecimal representation of given value as string.
+
+    Examples
+    --------
+    >>> spark.createDataFrame([('ABC', 3)], ['a', 'b']).select(hex('a'), hex('b')).collect()
+    [Row(hex(a)='414243', hex(b)='3')]
+    """
+    return _invoke_function_over_columns("hex", col)
+
+
+def hypot(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float]) -> Column:
+    """
+    Computes ``sqrt(a^2 + b^2)`` without intermediate overflow or underflow.
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    col1 : str, :class:`~pyspark.sql.Column` or float
+        a leg.
+    col2 : str, :class:`~pyspark.sql.Column` or float
+        b leg.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        length of the hypotenuse.
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(hypot(lit(1), lit(2))).first()
+    Row(HYPOT(1, 2)=2.23606...)
+    """
+    return _invoke_binary_math_function("hypot", col1, col2)
+
+
+def log(col: "ColumnOrName") -> Column:
+    """
+    Computes the natural logarithm of the given value.
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        column to calculate natural logarithm for.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        natural logarithm of the given value.
+
+    Examples
+    --------
+    >>> import math
+    >>> df = spark.range(1)
+    >>> df.select(log(lit(math.e))).first()
+    Row(ln(2.71828...)=1.0)
+    """
+    return _invoke_function_over_columns("ln", col)
+
+
+def log10(col: "ColumnOrName") -> Column:
+    """
+    Computes the logarithm of the given value in Base 10.
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        column to calculate logarithm for.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        logarithm of the given value in Base 10.
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(log10(lit(100))).show()
+    +----------+
+    |LOG10(100)|
+    +----------+
+    |       2.0|
+    +----------+
+    """
+    return _invoke_function_over_columns("log10", col)
+
+
+def log1p(col: "ColumnOrName") -> Column:
+    """
+    Computes the natural logarithm of the "given value plus one".
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        column to calculate natural logarithm for.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        natural logarithm of the "given value plus one".
+
+    Examples
+    --------
+    >>> import math
+    >>> df = spark.range(1)
+    >>> df.select(log1p(lit(math.e))).first()
+    Row(LOG1P(2.71828...)=1.31326...)
+
+    Same as:
+
+    >>> df.select(log(lit(math.e+1))).first()
+    Row(ln(3.71828...)=1.31326...)
+    """
+    return _invoke_function_over_columns("log1p", col)
+
+
+def log2(col: "ColumnOrName") -> Column:
+    """Returns the base-2 logarithm of the argument.
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        a column to calculate logariphm for.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        logariphm of given value.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([(4,)], ['a'])
+    >>> df.select(log2('a').alias('log2')).show()
+    +----+
+    |log2|
+    +----+
+    | 2.0|
+    +----+
+    """
+    return _invoke_function_over_columns("log2", col)
+
+
+def pmod(dividend: Union["ColumnOrName", float], divisor: Union["ColumnOrName", float]) -> Column:
+    """
+    Returns the positive value of dividend mod divisor.
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    dividend : str, :class:`~pyspark.sql.Column` or float
+        the column that contains dividend, or the specified dividend value
+    divisor : str, :class:`~pyspark.sql.Column` or float
+        the column that contains divisor, or the specified divisor value
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        positive value of dividend mod divisor.
+
+    Examples
+    --------
+    >>> from pyspark.sql.functions import pmod
+    >>> df = spark.createDataFrame([
+    ...     (1.0, float('nan')), (float('nan'), 2.0), (10.0, 3.0),
+    ...     (float('nan'), float('nan')), (-3.0, 4.0), (-10.0, 3.0),
+    ...     (-5.0, -6.0), (7.0, -8.0), (1.0, 2.0)],
+    ...     ("a", "b"))
+    >>> df.select(pmod("a", "b")).show()
+    +----------+
+    |pmod(a, b)|
+    +----------+
+    |       NaN|
+    |       NaN|
+    |       1.0|
+    |       NaN|
+    |       1.0|
+    |       2.0|
+    |      -5.0|
+    |       7.0|
+    |       1.0|
+    +----------+
+    """
+    return _invoke_binary_math_function("pmod", dividend, divisor)
+
+
+def pow(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float]) -> Column:
+    """
+    Returns the value of the first argument raised to the power of the second argument.
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    col1 : str, :class:`~pyspark.sql.Column` or float
+        the base number.
+    col2 : str, :class:`~pyspark.sql.Column` or float
+        the exponent number.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the base rased to the power the argument.
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(pow(lit(3), lit(2))).first()
+    Row(POWER(3, 2)=9.0)
+    """
+    return _invoke_binary_math_function("power", col1, col2)

Review Comment:
   be careful in some place, we need to avoid use the[ original function name "pow"](https://github.com/apache/spark/blob/f7cfce5e237adccff28c8d2c02423e5afb57e00c/python/pyspark/sql/functions.py#L2084),  otherwise the name of column `pow("a")` will be different, due to `pow` is a [alias](https://github.com/apache/spark/blob/7fd654c0142ab9e4002882da4e65d3b25bebd26c/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala#L418) in FunctionRegistry 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org