You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ru...@apache.org on 2023/06/08 10:39:34 UTC
[spark] branch master updated: [SPARK-43943][SQL][PYTHON][CONNECT] Add SQL math functions to Scala and Python
This is an automated email from the ASF dual-hosted git repository.
ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new f1cca85f513 [SPARK-43943][SQL][PYTHON][CONNECT] Add SQL math functions to Scala and Python
f1cca85f513 is described below
commit f1cca85f51322bac3a75346fa2d01e4133f6e31c
Author: Ruifeng Zheng <ru...@apache.org>
AuthorDate: Thu Jun 8 18:38:59 2023 +0800
[SPARK-43943][SQL][PYTHON][CONNECT] Add SQL math functions to Scala and Python
### What changes were proposed in this pull request?
Add following functions:
* ceiling
* e
* pi
* ln
* negative
* positive
* power
* sign
* std
* width_bucket
to:
* Scala API
* Python API
* Spark Connect Scala Client
* Spark Connect Python Client
This PR also adds `negate` (which already exists in Scala API and SCSC) to Python API and SCPC.
### Why are the changes needed?
for parity
### Does this PR introduce _any_ user-facing change?
yes, new functions
### How was this patch tested?
added ut / doctest
Closes #41435 from zhengruifeng/sql_func_math.
Authored-by: Ruifeng Zheng <ru...@apache.org>
Signed-off-by: Ruifeng Zheng <ru...@apache.org>
---
.../scala/org/apache/spark/sql/functions.scala | 101 +++++++++++
.../apache/spark/sql/PlanGenerationTestSuite.scala | 44 +++++
.../explain-results/function_ceiling.explain | 2 +
.../explain-results/function_ceiling_scale.explain | 2 +
.../query-tests/explain-results/function_e.explain | 2 +
.../explain-results/function_ln.explain | 2 +
.../explain-results/function_negative.explain | 2 +
.../explain-results/function_pi.explain | 2 +
.../explain-results/function_positive.explain | 2 +
.../explain-results/function_power.explain | 2 +
.../explain-results/function_sign.explain | 2 +
.../explain-results/function_std.explain | 2 +
.../explain-results/width_bucket.explain | 2 +
.../query-tests/queries/function_ceiling.json | 25 +++
.../query-tests/queries/function_ceiling.proto.bin | Bin 0 -> 173 bytes
.../queries/function_ceiling_scale.json | 29 ++++
.../queries/function_ceiling_scale.proto.bin | Bin 0 -> 179 bytes
.../resources/query-tests/queries/function_e.json | 20 +++
.../query-tests/queries/function_e.proto.bin | Bin 0 -> 163 bytes
.../resources/query-tests/queries/function_ln.json | 25 +++
.../query-tests/queries/function_ln.proto.bin | Bin 0 -> 172 bytes
.../query-tests/queries/function_negative.json | 25 +++
.../queries/function_negative.proto.bin | Bin 0 -> 177 bytes
.../resources/query-tests/queries/function_pi.json | 20 +++
.../query-tests/queries/function_pi.proto.bin | Bin 0 -> 164 bytes
.../query-tests/queries/function_positive.json | 25 +++
.../queries/function_positive.proto.bin | Bin 0 -> 177 bytes
.../query-tests/queries/function_power.json | 29 ++++
.../query-tests/queries/function_power.proto.bin | Bin 0 -> 181 bytes
.../query-tests/queries/function_sign.json | 25 +++
.../query-tests/queries/function_sign.proto.bin | Bin 0 -> 175 bytes
.../query-tests/queries/function_std.json | 25 +++
.../query-tests/queries/function_std.proto.bin | Bin 0 -> 175 bytes
.../query-tests/queries/width_bucket.json | 37 ++++
.../query-tests/queries/width_bucket.proto.bin | Bin 0 -> 94 bytes
.../source/reference/pyspark.sql/functions.rst | 11 ++
python/pyspark/sql/connect/functions.py | 60 +++++++
python/pyspark/sql/functions.py | 190 +++++++++++++++++++++
python/pyspark/sql/tests/test_functions.py | 1 -
.../scala/org/apache/spark/sql/functions.scala | 97 +++++++++++
.../org/apache/spark/sql/MathFunctionsSuite.scala | 29 ++++
41 files changed, 839 insertions(+), 1 deletion(-)
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
index 9db0ba3f28e..fc9eb074ca9 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
@@ -873,6 +873,14 @@ object functions {
*/
def skewness(columnName: String): Column = skewness(Column(columnName))
+ /**
+ * Aggregate function: alias for `stddev_samp`.
+ *
+ * @group agg_funcs
+ * @since 3.5.0
+ */
+ def std(e: Column): Column = stddev(e)
+
/**
* Aggregate function: alias for `stddev_samp`.
*
@@ -1978,6 +1986,22 @@ object functions {
*/
def ceil(columnName: String): Column = ceil(Column(columnName))
+ /**
+ * Computes the ceiling of the given value of `e` to `scale` decimal places.
+ *
+ * @group math_funcs
+ * @since 3.5.0
+ */
+ def ceiling(e: Column, scale: Column): Column = ceil(e, scale)
+
+ /**
+ * Computes the ceiling of the given value of `e` to 0 decimal places.
+ *
+ * @group math_funcs
+ * @since 3.5.0
+ */
+ def ceiling(e: Column): Column = ceil(e)
+
/**
* Convert a number in a string column from one base to another.
*
@@ -2053,6 +2077,14 @@ object functions {
*/
def csc(e: Column): Column = Column.fn("csc", e)
+ /**
+ * Returns Euler's number.
+ *
+ * @group math_funcs
+ * @since 3.5.0
+ */
+ def e(): Column = Column.fn("e")
+
/**
* Computes the exponential of the given value.
*
@@ -2241,6 +2273,14 @@ object functions {
def least(columnName: String, columnNames: String*): Column =
least((columnName +: columnNames).map(Column.apply): _*)
+ /**
+ * Computes the natural logarithm of the given value.
+ *
+ * @group math_funcs
+ * @since 3.5.0
+ */
+ def ln(e: Column): Column = log(e)
+
/**
* Computes the natural logarithm of the given value.
*
@@ -2321,6 +2361,30 @@ object functions {
*/
def log2(columnName: String): Column = log2(Column(columnName))
+ /**
+ * Returns the negated value.
+ *
+ * @group math_funcs
+ * @since 3.5.0
+ */
+ def negative(e: Column): Column = Column.fn("negative", e)
+
+ /**
+ * Returns Pi.
+ *
+ * @group math_funcs
+ * @since 3.5.0
+ */
+ def pi(): Column = Column.fn("pi")
+
+ /**
+ * Returns the value.
+ *
+ * @group math_funcs
+ * @since 3.5.0
+ */
+ def positive(e: Column): Column = Column.fn("positive", e)
+
/**
* Returns the value of the first argument raised to the power of the second argument.
*
@@ -2385,6 +2449,14 @@ object functions {
*/
def pow(l: Double, rightName: String): Column = pow(l, Column(rightName))
+ /**
+ * Returns the value of the first argument raised to the power of the second argument.
+ *
+ * @group math_funcs
+ * @since 3.5.0
+ */
+ def power(l: Column, r: Column): Column = pow(l, r)
+
/**
* Returns the positive value of dividend mod divisor.
*
@@ -2514,6 +2586,14 @@ object functions {
def shiftrightunsigned(e: Column, numBits: Int): Column =
Column.fn("shiftrightunsigned", e, lit(numBits))
+ /**
+ * Computes the signum of the given value.
+ *
+ * @group math_funcs
+ * @since 3.5.0
+ */
+ def sign(e: Column): Column = signum(e)
+
/**
* Computes the signum of the given value.
*
@@ -2702,6 +2782,27 @@ object functions {
*/
def radians(columnName: String): Column = radians(Column(columnName))
+ /**
+ * Returns the bucket number into which the value of this expression would fall after being
+ * evaluated. Note that input arguments must follow conditions listed below; otherwise, the
+ * method will return null.
+ *
+ * @param v
+ * value to compute a bucket number in the histogram
+ * @param min
+ * minimum value of the histogram
+ * @param max
+ * maximum value of the histogram
+ * @param numBucket
+ * the number of buckets
+ * @return
+ * the bucket number into which the value would fall after being evaluated
+ * @group math_funcs
+ * @since 3.5.0
+ */
+ def width_bucket(v: Column, min: Column, max: Column, numBucket: Column): Column =
+ Column.fn("width_bucket", v, min, max, numBucket)
+
//////////////////////////////////////////////////////////////////////////////////////////////
// Misc functions
//////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
index d23b4ef2e02..109219603b9 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
@@ -1006,6 +1006,10 @@ class PlanGenerationTestSuite
fn.stddev("a")
}
+ functionTest("std") {
+ fn.std(fn.col("a"))
+ }
+
functionTest("stddev_samp") {
fn.stddev_samp("a")
}
@@ -1182,6 +1186,14 @@ class PlanGenerationTestSuite
fn.ceil(fn.col("b"), lit(2))
}
+ functionTest("ceiling") {
+ fn.ceiling(fn.col("b"))
+ }
+
+ functionTest("ceiling scale") {
+ fn.ceiling(fn.col("b"), lit(2))
+ }
+
functionTest("conv") {
fn.conv(fn.col("b"), 10, 16)
}
@@ -1202,6 +1214,10 @@ class PlanGenerationTestSuite
fn.csc(fn.col("b"))
}
+ functionTest("e") {
+ fn.e()
+ }
+
functionTest("exp") {
fn.exp("b")
}
@@ -1246,6 +1262,10 @@ class PlanGenerationTestSuite
fn.log("b")
}
+ functionTest("ln") {
+ fn.ln(fn.col("b"))
+ }
+
functionTest("log with base") {
fn.log(2, "b")
}
@@ -1262,10 +1282,26 @@ class PlanGenerationTestSuite
fn.log2("a")
}
+ functionTest("negative") {
+ fn.negative(fn.col("a"))
+ }
+
+ functionTest("pi") {
+ fn.pi()
+ }
+
+ functionTest("positive") {
+ fn.positive(fn.col("a"))
+ }
+
functionTest("pow") {
fn.pow("a", "b")
}
+ functionTest("power") {
+ fn.power(fn.col("a"), fn.col("b"))
+ }
+
functionTest("pmod") {
fn.pmod(fn.col("a"), fn.lit(10))
}
@@ -1302,6 +1338,10 @@ class PlanGenerationTestSuite
fn.signum("b")
}
+ functionTest("sign") {
+ fn.sign(fn.col("b"))
+ }
+
functionTest("sin") {
fn.sin("b")
}
@@ -2132,6 +2172,10 @@ class PlanGenerationTestSuite
simple.groupBy(Column("id")).pivot("a").agg(functions.count(Column("b")))
}
+ test("width_bucket") {
+ simple.select(fn.width_bucket(fn.col("b"), fn.col("b"), fn.col("b"), fn.col("a")))
+ }
+
test("test broadcast") {
left.join(fn.broadcast(right), "id")
}
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_ceiling.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_ceiling.explain
new file mode 100644
index 00000000000..9cf776a8dba
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_ceiling.explain
@@ -0,0 +1,2 @@
+Project [CEIL(b#0) AS CEIL(b)#0L]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_ceiling_scale.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_ceiling_scale.explain
new file mode 100644
index 00000000000..cdf8d356e47
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_ceiling_scale.explain
@@ -0,0 +1,2 @@
+Project [ceil(cast(b#0 as decimal(30,15)), 2) AS ceil(b, 2)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_e.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_e.explain
new file mode 100644
index 00000000000..30a94c78310
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_e.explain
@@ -0,0 +1,2 @@
+Project [E() AS E()#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_ln.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_ln.explain
new file mode 100644
index 00000000000..d3c3743b1ef
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_ln.explain
@@ -0,0 +1,2 @@
+Project [LOG(E(), b#0) AS LOG(E(), b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_negative.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_negative.explain
new file mode 100644
index 00000000000..4f047e75f06
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_negative.explain
@@ -0,0 +1,2 @@
+Project [-a#0 AS negative(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_pi.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_pi.explain
new file mode 100644
index 00000000000..a3f4e78ed64
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_pi.explain
@@ -0,0 +1,2 @@
+Project [PI() AS PI()#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_positive.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_positive.explain
new file mode 100644
index 00000000000..8e1df4a0435
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_positive.explain
@@ -0,0 +1,2 @@
+Project [positive(a#0) AS (+ a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_power.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_power.explain
new file mode 100644
index 00000000000..c6c6c0603e3
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_power.explain
@@ -0,0 +1,2 @@
+Project [POWER(cast(a#0 as double), b#0) AS POWER(a, b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_sign.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_sign.explain
new file mode 100644
index 00000000000..807fa330083
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_sign.explain
@@ -0,0 +1,2 @@
+Project [SIGNUM(b#0) AS SIGNUM(b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_std.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_std.explain
new file mode 100644
index 00000000000..106191e5a32
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_std.explain
@@ -0,0 +1,2 @@
+Aggregate [stddev(cast(a#0 as double)) AS stddev(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/width_bucket.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/width_bucket.explain
new file mode 100644
index 00000000000..22b799481c9
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/width_bucket.explain
@@ -0,0 +1,2 @@
+Project [width_bucket(b#0, b#0, b#0, cast(a#0 as bigint)) AS width_bucket(b, b, b, a)#0L]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_ceiling.json b/connector/connect/common/src/test/resources/query-tests/queries/function_ceiling.json
new file mode 100644
index 00000000000..5a9961ab47f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_ceiling.json
@@ -0,0 +1,25 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "ceil",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "b"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_ceiling.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_ceiling.proto.bin
new file mode 100644
index 00000000000..3761deb1663
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_ceiling.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_ceiling_scale.json b/connector/connect/common/src/test/resources/query-tests/queries/function_ceiling_scale.json
new file mode 100644
index 00000000000..bda5e85924c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_ceiling_scale.json
@@ -0,0 +1,29 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "ceil",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "b"
+ }
+ }, {
+ "literal": {
+ "integer": 2
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_ceiling_scale.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_ceiling_scale.proto.bin
new file mode 100644
index 00000000000..8db402ac167
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_ceiling_scale.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_e.json b/connector/connect/common/src/test/resources/query-tests/queries/function_e.json
new file mode 100644
index 00000000000..c99c04a6bef
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_e.json
@@ -0,0 +1,20 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "e"
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_e.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_e.proto.bin
new file mode 100644
index 00000000000..49f6c12fbcc
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_e.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_ln.json b/connector/connect/common/src/test/resources/query-tests/queries/function_ln.json
new file mode 100644
index 00000000000..1b2d0ed0b14
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_ln.json
@@ -0,0 +1,25 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "log",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "b"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_ln.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_ln.proto.bin
new file mode 100644
index 00000000000..548fb480dd2
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_ln.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_negative.json b/connector/connect/common/src/test/resources/query-tests/queries/function_negative.json
new file mode 100644
index 00000000000..e269fabe44b
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_negative.json
@@ -0,0 +1,25 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "negative",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "a"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_negative.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_negative.proto.bin
new file mode 100644
index 00000000000..9c56c111cee
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_negative.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_pi.json b/connector/connect/common/src/test/resources/query-tests/queries/function_pi.json
new file mode 100644
index 00000000000..46474dfd8e3
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_pi.json
@@ -0,0 +1,20 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "pi"
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_pi.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_pi.proto.bin
new file mode 100644
index 00000000000..14f018904bf
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_pi.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_positive.json b/connector/connect/common/src/test/resources/query-tests/queries/function_positive.json
new file mode 100644
index 00000000000..a8b3a2d6244
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_positive.json
@@ -0,0 +1,25 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "positive",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "a"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_positive.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_positive.proto.bin
new file mode 100644
index 00000000000..5507abce8ca
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_positive.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_power.json b/connector/connect/common/src/test/resources/query-tests/queries/function_power.json
new file mode 100644
index 00000000000..187636fb360
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_power.json
@@ -0,0 +1,29 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "power",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "a"
+ }
+ }, {
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "b"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_power.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_power.proto.bin
new file mode 100644
index 00000000000..6e1d3b06fe8
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_power.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_sign.json b/connector/connect/common/src/test/resources/query-tests/queries/function_sign.json
new file mode 100644
index 00000000000..bcf6ad7eb17
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_sign.json
@@ -0,0 +1,25 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "signum",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "b"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_sign.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_sign.proto.bin
new file mode 100644
index 00000000000..af52abfb7f2
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_sign.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_std.json b/connector/connect/common/src/test/resources/query-tests/queries/function_std.json
new file mode 100644
index 00000000000..1403817886c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_std.json
@@ -0,0 +1,25 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "stddev",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "a"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_std.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_std.proto.bin
new file mode 100644
index 00000000000..8d214eea8e7
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_std.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/width_bucket.json b/connector/connect/common/src/test/resources/query-tests/queries/width_bucket.json
new file mode 100644
index 00000000000..93d3b5297d9
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/width_bucket.json
@@ -0,0 +1,37 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "width_bucket",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "b"
+ }
+ }, {
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "b"
+ }
+ }, {
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "b"
+ }
+ }, {
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "a"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/width_bucket.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/width_bucket.proto.bin
new file mode 100644
index 00000000000..f212e97bc1c
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/width_bucket.proto.bin differ
diff --git a/python/docs/source/reference/pyspark.sql/functions.rst b/python/docs/source/reference/pyspark.sql/functions.rst
index aac032dd4c3..3aa77971aa1 100644
--- a/python/docs/source/reference/pyspark.sql/functions.rst
+++ b/python/docs/source/reference/pyspark.sql/functions.rst
@@ -64,11 +64,13 @@ Math Functions
bin
cbrt
ceil
+ ceiling
conv
cos
cosh
cot
csc
+ e
exp
expm1
factorial
@@ -76,12 +78,18 @@ Math Functions
hex
unhex
hypot
+ ln
log
log10
log1p
log2
+ negate
+ negative
+ pi
pmod
+ positive
pow
+ power
rint
round
bround
@@ -89,6 +97,7 @@ Math Functions
shiftleft
shiftright
shiftrightunsigned
+ sign
signum
sin
sinh
@@ -98,6 +107,7 @@ Math Functions
degrees
toRadians
radians
+ width_bucket
Datetime Functions
@@ -269,6 +279,7 @@ Aggregate Functions
regr_sxy
regr_syy
skewness
+ std
stddev
stddev_pop
stddev_samp
diff --git a/python/pyspark/sql/connect/functions.py b/python/pyspark/sql/connect/functions.py
index 71860d3eb57..85863f2e115 100644
--- a/python/pyspark/sql/connect/functions.py
+++ b/python/pyspark/sql/connect/functions.py
@@ -532,6 +532,9 @@ def ceil(col: "ColumnOrName") -> Column:
ceil.__doc__ = pysparkfuncs.ceil.__doc__
+ceiling = ceil
+
+
def conv(col: "ColumnOrName", fromBase: int, toBase: int) -> Column:
return _invoke_function("conv", _to_col(col), lit(fromBase), lit(toBase))
@@ -574,6 +577,13 @@ def degrees(col: "ColumnOrName") -> Column:
degrees.__doc__ = pysparkfuncs.degrees.__doc__
+def e() -> Column:
+ return _invoke_function("e")
+
+
+e.__doc__ = pysparkfuncs.e.__doc__
+
+
def exp(col: "ColumnOrName") -> Column:
return _invoke_function_over_columns("exp", col)
@@ -642,6 +652,13 @@ def log1p(col: "ColumnOrName") -> Column:
log1p.__doc__ = pysparkfuncs.log1p.__doc__
+def ln(col: "ColumnOrName") -> Column:
+ return _invoke_function_over_columns("ln", col)
+
+
+ln.__doc__ = pysparkfuncs.ln.__doc__
+
+
def log2(col: "ColumnOrName") -> Column:
return _invoke_function_over_columns("log2", col)
@@ -649,6 +666,30 @@ def log2(col: "ColumnOrName") -> Column:
log2.__doc__ = pysparkfuncs.log2.__doc__
+def negative(col: "ColumnOrName") -> Column:
+ return _invoke_function_over_columns("negative", col)
+
+
+negative.__doc__ = pysparkfuncs.negative.__doc__
+
+
+negate = negative
+
+
+def pi() -> Column:
+ return _invoke_function("pi")
+
+
+pi.__doc__ = pysparkfuncs.pi.__doc__
+
+
+def positive(col: "ColumnOrName") -> Column:
+ return _invoke_function_over_columns("positive", col)
+
+
+positive.__doc__ = pysparkfuncs.positive.__doc__
+
+
def pmod(dividend: Union["ColumnOrName", float], divisor: Union["ColumnOrName", float]) -> Column:
return _invoke_binary_math_function("pmod", dividend, divisor)
@@ -656,6 +697,19 @@ def pmod(dividend: Union["ColumnOrName", float], divisor: Union["ColumnOrName",
pmod.__doc__ = pysparkfuncs.pmod.__doc__
+def width_bucket(
+ v: "ColumnOrName",
+ min: "ColumnOrName",
+ max: "ColumnOrName",
+ numBucket: Union["ColumnOrName", int],
+) -> Column:
+ numBucket = lit(numBucket) if isinstance(numBucket, int) else numBucket
+ return _invoke_function_over_columns("width_bucket", v, min, max, numBucket)
+
+
+width_bucket.__doc__ = pysparkfuncs.width_bucket.__doc__
+
+
def pow(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float]) -> Column:
return _invoke_binary_math_function("power", col1, col2)
@@ -743,6 +797,9 @@ def signum(col: "ColumnOrName") -> Column:
signum.__doc__ = pysparkfuncs.signum.__doc__
+sigh = signum
+
+
def sin(col: "ColumnOrName") -> Column:
return _invoke_function_over_columns("sin", col)
@@ -1041,6 +1098,9 @@ def stddev(col: "ColumnOrName") -> Column:
stddev.__doc__ = pysparkfuncs.stddev.__doc__
+std = stddev
+
+
def stddev_samp(col: "ColumnOrName") -> Column:
return _invoke_function_over_columns("stddev_samp", col)
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 25939670cc6..b443fb281b9 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -1089,6 +1089,9 @@ def ceil(col: "ColumnOrName") -> Column:
return _invoke_function_over_columns("ceil", col)
+ceiling = ceil
+
+
@try_remote_functions
def cos(col: "ColumnOrName") -> Column:
"""
@@ -1208,6 +1211,22 @@ def csc(col: "ColumnOrName") -> Column:
return _invoke_function_over_columns("csc", col)
+@try_remote_functions
+def e() -> Column:
+ """Returns Euler's number.
+
+ Examples
+ --------
+ >>> spark.range(1).select(e()).show()
+ +-----------------+
+ | E()|
+ +-----------------+
+ |2.718281828459045|
+ +-----------------+
+ """
+ return _invoke_function("e")
+
+
@try_remote_functions
def exp(col: "ColumnOrName") -> Column:
"""
@@ -1401,6 +1420,88 @@ def log1p(col: "ColumnOrName") -> Column:
return _invoke_function_over_columns("log1p", col)
+@try_remote_functions
+def negative(col: "ColumnOrName") -> Column:
+ """
+ Returns the negative value.
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ col : :class:`~pyspark.sql.Column` or str
+ column to calculate negative value for.
+
+ Returns
+ -------
+ :class:`~pyspark.sql.Column`
+ negative value.
+
+ Examples
+ --------
+ >>> spark.range(3).select(negative("id").alias("n")).show()
+ +---+
+ | n|
+ +---+
+ | 0|
+ | -1|
+ | -2|
+ +---+
+ """
+ return _invoke_function_over_columns("negative", col)
+
+
+negate = negative
+
+
+@try_remote_functions
+def pi() -> Column:
+ """Returns Pi.
+
+ Examples
+ --------
+ >>> spark.range(1).select(pi()).show()
+ +-----------------+
+ | PI()|
+ +-----------------+
+ |3.141592653589793|
+ +-----------------+
+ """
+ return _invoke_function("pi")
+
+
+@try_remote_functions
+def positive(col: "ColumnOrName") -> Column:
+ """
+ Returns the value.
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ col : :class:`~pyspark.sql.Column` or str
+ input value column.
+
+ Returns
+ -------
+ :class:`~pyspark.sql.Column`
+ value.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([(-1,), (0,), (1,)], ['v'])
+ >>> df.select(positive("v").alias("p")).show()
+ +---+
+ | p|
+ +---+
+ | -1|
+ | 0|
+ | 1|
+ +---+
+ """
+ return _invoke_function_over_columns("positive", col)
+
+
@try_remote_functions
def rint(col: "ColumnOrName") -> Column:
"""
@@ -1511,6 +1612,9 @@ def signum(col: "ColumnOrName") -> Column:
return _invoke_function_over_columns("signum", col)
+sign = signum
+
+
@try_remote_functions
def sin(col: "ColumnOrName") -> Column:
"""
@@ -1917,6 +2021,9 @@ def stddev(col: "ColumnOrName") -> Column:
return _invoke_function_over_columns("stddev", col)
+std = stddev
+
+
@try_remote_functions
def stddev_samp(col: "ColumnOrName") -> Column:
"""
@@ -2639,6 +2746,9 @@ def pow(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float])
return _invoke_binary_math_function("pow", col1, col2)
+power = pow
+
+
@try_remote_functions
def pmod(dividend: Union["ColumnOrName", float], divisor: Union["ColumnOrName", float]) -> Column:
"""
@@ -2687,6 +2797,57 @@ def pmod(dividend: Union["ColumnOrName", float], divisor: Union["ColumnOrName",
return _invoke_binary_math_function("pmod", dividend, divisor)
+def width_bucket(
+ v: "ColumnOrName",
+ min: "ColumnOrName",
+ max: "ColumnOrName",
+ numBucket: Union["ColumnOrName", int],
+) -> Column:
+ """
+ Returns the bucket number into which the value of this expression would fall
+ after being evaluated. Note that input arguments must follow conditions listed below;
+ otherwise, the method will return null.
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ v : str or :class:`~pyspark.sql.Column`
+ value to compute a bucket number in the histogram
+ min : str or :class:`~pyspark.sql.Column`
+ minimum value of the histogram
+ max : str or :class:`~pyspark.sql.Column`
+ maximum value of the histogram
+ numBucket : str, :class:`~pyspark.sql.Column` or int
+ the number of buckets
+
+ Returns
+ -------
+ :class:`~pyspark.sql.Column`
+ the bucket number into which the value would fall after being evaluated
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([
+ ... (5.3, 0.2, 10.6, 5),
+ ... (-2.1, 1.3, 3.4, 3),
+ ... (8.1, 0.0, 5.7, 4),
+ ... (-0.9, 5.2, 0.5, 2)],
+ ... ['v', 'min', 'max', 'n'])
+ >>> df.select(width_bucket('v', 'min', 'max', 'n')).show()
+ +----------------------------+
+ |width_bucket(v, min, max, n)|
+ +----------------------------+
+ | 3|
+ | 0|
+ | 5|
+ | 3|
+ +----------------------------+
+ """
+ numBucket = lit(numBucket) if isinstance(numBucket, int) else numBucket
+ return _invoke_function_over_columns("width_bucket", v, min, max, numBucket)
+
+
@try_remote_functions
def row_number() -> Column:
"""
@@ -4255,6 +4416,35 @@ def log(arg1: Union["ColumnOrName", float], arg2: Optional["ColumnOrName"] = Non
return _invoke_function("log", arg1, _to_java_column(arg2))
+@try_remote_functions
+def ln(col: "ColumnOrName") -> Column:
+ """Returns the natural logarithm of the argument.
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ col : :class:`~pyspark.sql.Column` or str
+ a column to calculate logariphm for.
+
+ Returns
+ -------
+ :class:`~pyspark.sql.Column`
+ natural logarithm of given value.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([(4,)], ['a'])
+ >>> df.select(ln('a')).show()
+ +------------------+
+ | ln(a)|
+ +------------------+
+ |1.3862943611198906|
+ +------------------+
+ """
+ return _invoke_function_over_columns("ln", col)
+
+
@try_remote_functions
def log2(col: "ColumnOrName") -> Column:
"""Returns the base-2 logarithm of the argument.
diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py
index c92f636830f..150fab9bebf 100644
--- a/python/pyspark/sql/tests/test_functions.py
+++ b/python/pyspark/sql/tests/test_functions.py
@@ -59,7 +59,6 @@ class FunctionsTestsMixin:
"typedlit", # Scala only
"typedLit", # Scala only
"monotonicallyIncreasingId", # depreciated, use monotonically_increasing_id
- "negate", # equivalent to python -expression
"not", # equivalent to python ~expression
"udaf", # used for creating UDAF's which are not supported in PySpark
]
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 85435e7891a..ab14d4eb955 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -931,6 +931,14 @@ object functions {
*/
def skewness(columnName: String): Column = skewness(Column(columnName))
+ /**
+ * Aggregate function: alias for `stddev_samp`.
+ *
+ * @group agg_funcs
+ * @since 3.5.0
+ */
+ def std(e: Column): Column = stddev(e)
+
/**
* Aggregate function: alias for `stddev_samp`.
*
@@ -2024,6 +2032,22 @@ object functions {
*/
def ceil(columnName: String): Column = ceil(Column(columnName))
+ /**
+ * Computes the ceiling of the given value of `e` to `scale` decimal places.
+ *
+ * @group math_funcs
+ * @since 3.5.0
+ */
+ def ceiling(e: Column, scale: Column): Column = ceil(e, scale)
+
+ /**
+ * Computes the ceiling of the given value of `e` to 0 decimal places.
+ *
+ * @group math_funcs
+ * @since 3.5.0
+ */
+ def ceiling(e: Column): Column = ceil(e)
+
/**
* Convert a number in a string column from one base to another.
*
@@ -2088,6 +2112,14 @@ object functions {
*/
def csc(e: Column): Column = withExpr { Csc(e.expr) }
+ /**
+ * Returns Euler's number.
+ *
+ * @group math_funcs
+ * @since 3.5.0
+ */
+ def e(): Column = withExpr { EulerNumber() }
+
/**
* Computes the exponential of the given value.
*
@@ -2282,6 +2314,14 @@ object functions {
least((columnName +: columnNames).map(Column.apply): _*)
}
+ /**
+ * Computes the natural logarithm of the given value.
+ *
+ * @group math_funcs
+ * @since 3.5.0
+ */
+ def ln(e: Column): Column = log(e)
+
/**
* Computes the natural logarithm of the given value.
*
@@ -2362,6 +2402,30 @@ object functions {
*/
def log2(columnName: String): Column = log2(Column(columnName))
+ /**
+ * Returns the negated value.
+ *
+ * @group math_funcs
+ * @since 3.5.0
+ */
+ def negative(e: Column): Column = withExpr { UnaryMinus(e.expr) }
+
+ /**
+ * Returns Pi.
+ *
+ * @group math_funcs
+ * @since 3.5.0
+ */
+ def pi(): Column = withExpr { Pi() }
+
+ /**
+ * Returns the value.
+ *
+ * @group math_funcs
+ * @since 3.5.0
+ */
+ def positive(e: Column): Column = withExpr { UnaryPositive(e.expr) }
+
/**
* Returns the value of the first argument raised to the power of the second argument.
*
@@ -2426,6 +2490,14 @@ object functions {
*/
def pow(l: Double, rightName: String): Column = pow(l, Column(rightName))
+ /**
+ * Returns the value of the first argument raised to the power of the second argument.
+ *
+ * @group math_funcs
+ * @since 3.5.0
+ */
+ def power(l: Column, r: Column): Column = pow(l, r)
+
/**
* Returns the positive value of dividend mod divisor.
*
@@ -2558,6 +2630,14 @@ object functions {
ShiftRightUnsigned(e.expr, lit(numBits).expr)
}
+ /**
+ * Computes the signum of the given value.
+ *
+ * @group math_funcs
+ * @since 3.5.0
+ */
+ def sign(e: Column): Column = signum(e)
+
/**
* Computes the signum of the given value.
*
@@ -2718,6 +2798,23 @@ object functions {
*/
def radians(columnName: String): Column = radians(Column(columnName))
+ /**
+ * Returns the bucket number into which the value of this expression would fall
+ * after being evaluated. Note that input arguments must follow conditions listed below;
+ * otherwise, the method will return null.
+ *
+ * @param v value to compute a bucket number in the histogram
+ * @param min minimum value of the histogram
+ * @param max maximum value of the histogram
+ * @param numBucket the number of buckets
+ * @return the bucket number into which the value would fall after being evaluated
+ * @group math_funcs
+ * @since 3.5.0
+ */
+ def width_bucket(v: Column, min: Column, max: Column, numBucket: Column): Column = withExpr {
+ WidthBucket(v.expr, min.expr, max.expr, numBucket.expr)
+ }
+
//////////////////////////////////////////////////////////////////////////////////////////////
// Misc functions
//////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
index 45b3c379a45..fde55e27bf3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
@@ -428,6 +428,10 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
checkAnswer(
sql("SELECT sign(10), signum(-11)"),
Row(1, -1))
+
+ checkAnswer(
+ Seq((1, 2)).toDF().select(signum(lit(10)), signum(lit(-11))),
+ Row(1, -1))
}
test("pow / power") {
@@ -437,6 +441,11 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
sql("SELECT pow(1, 2), power(2, 1)"),
Seq((1, 2)).toDF().select(pow(lit(1), lit(2)), pow(lit(2), lit(1)))
)
+
+ checkAnswer(
+ sql("SELECT pow(1, 2), power(2, 1)"),
+ Seq((1, 2)).toDF().select(power(lit(1), lit(2)), power(lit(2), lit(1)))
+ )
}
test("hex") {
@@ -595,12 +604,19 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
checkAnswer(
sql("SELECT negative(1), negative(0), negative(-1)"),
Row(-1, 0, 1))
+
+ checkAnswer(
+ Seq((1, 2)).toDF().select(negative(lit(1)), negative(lit(0)), negative(lit(-1))),
+ Row(-1, 0, 1))
}
test("positive") {
val df = Seq((1, -1, "abc")).toDF("a", "b", "c")
checkAnswer(df.selectExpr("positive(a)"), Row(1))
checkAnswer(df.selectExpr("positive(b)"), Row(-1))
+
+ checkAnswer(df.select(positive(col("a"))), Row(1))
+ checkAnswer(df.select(positive(col("b"))), Row(-1))
}
test("SPARK-35926: Support YearMonthIntervalType in width-bucket function") {
@@ -616,6 +632,19 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
).foreach { case ((value, start, end, num), expected) =>
val df = Seq((value, start, end, num)).toDF("v", "s", "e", "n")
checkAnswer(df.selectExpr("width_bucket(v, s, e, n)"), Row(expected))
+ checkAnswer(df.select(width_bucket(col("v"), col("s"), col("e"), col("n"))), Row(expected))
}
}
+
+ test("width_bucket with numbers") {
+ val df1 = Seq(
+ (5.3, 0.2, 10.6, 5), (-2.1, 1.3, 3.4, 3),
+ (8.1, 0.0, 5.7, 4), (-0.9, 5.2, 0.5, 2)
+ ).toDF("v", "min", "max", "n")
+
+ checkAnswer(
+ df1.selectExpr("width_bucket(v, min, max, n)"),
+ df1.select(width_bucket(col("v"), col("min"), col("max"), col("n")))
+ )
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org