You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ru...@apache.org on 2023/06/17 04:36:16 UTC
[spark] branch master updated: [SPARK-43928][SQL][PYTHON][CONNECT] Add bit operations to Scala, Python and Connect API
This is an automated email from the ASF dual-hosted git repository.
ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 02beb50ed7c [SPARK-43928][SQL][PYTHON][CONNECT] Add bit operations to Scala, Python and Connect API
02beb50ed7c is described below
commit 02beb50ed7cd0f3d1fa77e94a4d3cd30f325cf70
Author: Jiaan Geng <be...@163.com>
AuthorDate: Sat Jun 17 12:35:56 2023 +0800
[SPARK-43928][SQL][PYTHON][CONNECT] Add bit operations to Scala, Python and Connect API
### What changes were proposed in this pull request?
This PR want add bit operations to Scala, Python and Connect API.
These API show below.
- bit_and
- bit_count
- bit_get
- bit_or
- bit_xor
- getbit
### Why are the changes needed?
Add bit operations to Scala, Python and Connect API
### Does this PR introduce _any_ user-facing change?
'No'.
New feature.
### How was this patch tested?
New test cases.
Closes #41608 from beliefer/SPARK-43928.
Authored-by: Jiaan Geng <be...@163.com>
Signed-off-by: Ruifeng Zheng <ru...@apache.org>
---
.../scala/org/apache/spark/sql/functions.scala | 51 ++++++
.../apache/spark/sql/PlanGenerationTestSuite.scala | 24 +++
.../explain-results/function_bit_and.explain | 2 +
.../explain-results/function_bit_count.explain | 2 +
.../explain-results/function_bit_get.explain | 2 +
.../explain-results/function_bit_or.explain | 2 +
.../explain-results/function_bit_xor.explain | 2 +
.../explain-results/function_getbit.explain | 2 +
.../query-tests/queries/function_bit_and.json | 25 +++
.../query-tests/queries/function_bit_and.proto.bin | Bin 0 -> 176 bytes
.../query-tests/queries/function_bit_count.json | 25 +++
.../queries/function_bit_count.proto.bin | Bin 0 -> 178 bytes
.../query-tests/queries/function_bit_get.json | 29 ++++
.../query-tests/queries/function_bit_get.proto.bin | Bin 0 -> 182 bytes
.../query-tests/queries/function_bit_or.json | 25 +++
.../query-tests/queries/function_bit_or.proto.bin | Bin 0 -> 175 bytes
.../query-tests/queries/function_bit_xor.json | 25 +++
.../query-tests/queries/function_bit_xor.proto.bin | Bin 0 -> 176 bytes
.../query-tests/queries/function_getbit.json | 29 ++++
.../query-tests/queries/function_getbit.proto.bin | Bin 0 -> 181 bytes
.../source/reference/pyspark.sql/functions.rst | 13 ++
python/pyspark/sql/connect/functions.py | 42 +++++
python/pyspark/sql/functions.py | 184 +++++++++++++++++++++
.../scala/org/apache/spark/sql/functions.scala | 53 ++++++
.../apache/spark/sql/DataFrameAggregateSuite.scala | 6 +
.../apache/spark/sql/DataFrameFunctionsSuite.scala | 16 ++
26 files changed, 559 insertions(+)
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
index 65a394c8e7d..61783746c56 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
@@ -1251,6 +1251,30 @@ object functions {
*/
def bool_or(e: Column): Column = Column.fn("bool_or", e)
+ /**
+ * Aggregate function: returns the bitwise AND of all non-null input values, or null if none.
+ *
+ * @group agg_funcs
+ * @since 3.5.0
+ */
+ def bit_and(e: Column): Column = Column.fn("bit_and", e)
+
+ /**
+ * Aggregate function: returns the bitwise OR of all non-null input values, or null if none.
+ *
+ * @group agg_funcs
+ * @since 3.5.0
+ */
+ def bit_or(e: Column): Column = Column.fn("bit_or", e)
+
+ /**
+ * Aggregate function: returns the bitwise XOR of all non-null input values, or null if none.
+ *
+ * @group agg_funcs
+ * @since 3.5.0
+ */
+ def bit_xor(e: Column): Column = Column.fn("bit_xor", e)
+
//////////////////////////////////////////////////////////////////////////////////////////////
// Window functions
//////////////////////////////////////////////////////////////////////////////////////////////
@@ -1851,6 +1875,33 @@ object functions {
*/
def bitwise_not(e: Column): Column = Column.fn("~", e)
+ /**
+ * Returns the number of bits that are set in the argument expr as an unsigned 64-bit integer,
+ * or NULL if the argument is NULL.
+ *
+ * @group bitwise_funcs
+ * @since 3.5.0
+ */
+ def bit_count(e: Column): Column = Column.fn("bit_count", e)
+
+ /**
+ * Returns the value of the bit (0 or 1) at the specified position. The positions are numbered
+ * from right to left, starting at zero. The position argument cannot be negative.
+ *
+ * @group bitwise_funcs
+ * @since 3.5.0
+ */
+ def bit_get(e: Column, pos: Column): Column = Column.fn("bit_get", e, pos)
+
+ /**
+ * Returns the value of the bit (0 or 1) at the specified position. The positions are numbered
+ * from right to left, starting at zero. The position argument cannot be negative.
+ *
+ * @group bitwise_funcs
+ * @since 3.5.0
+ */
+ def getbit(e: Column, pos: Column): Column = Column.fn("getbit", e, pos)
+
/**
* Parses the expression string into the column that it represents, similar to
* [[Dataset#selectExpr]].
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
index 0702b595b4a..8e40a29c3d5 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
@@ -1009,6 +1009,18 @@ class PlanGenerationTestSuite
fn.histogram_numeric(fn.col("a"), lit(10))
}
+ functionTest("bit_and") {
+ fn.bit_and(fn.col("a"))
+ }
+
+ functionTest("bit_or") {
+ fn.bit_or(fn.col("a"))
+ }
+
+ functionTest("bit_xor") {
+ fn.bit_xor(fn.col("a"))
+ }
+
functionTest("mode") {
fn.mode(fn.col("a"))
}
@@ -1209,6 +1221,18 @@ class PlanGenerationTestSuite
fn.bitwise_not(fn.col("a"))
}
+ functionTest("bit_count") {
+ fn.bit_count(fn.col("a"))
+ }
+
+ functionTest("bit_get") {
+ fn.bit_get(fn.col("a"), lit(0))
+ }
+
+ functionTest("getbit") {
+ fn.getbit(fn.col("a"), lit(0))
+ }
+
functionTest("expr") {
fn.expr("a + 1")
}
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_bit_and.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bit_and.explain
new file mode 100644
index 00000000000..17358c51ae3
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bit_and.explain
@@ -0,0 +1,2 @@
+Aggregate [bit_and(a#0) AS bit_and(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_bit_count.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bit_count.explain
new file mode 100644
index 00000000000..11265d775f6
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bit_count.explain
@@ -0,0 +1,2 @@
+Project [bit_count(a#0) AS bit_count(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_bit_get.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bit_get.explain
new file mode 100644
index 00000000000..7302233837c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bit_get.explain
@@ -0,0 +1,2 @@
+Project [bit_get(a#0, 0) AS bit_get(a, 0)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_bit_or.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bit_or.explain
new file mode 100644
index 00000000000..48a2c722b6e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bit_or.explain
@@ -0,0 +1,2 @@
+Aggregate [bit_or(a#0) AS bit_or(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_bit_xor.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bit_xor.explain
new file mode 100644
index 00000000000..61980b03b23
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bit_xor.explain
@@ -0,0 +1,2 @@
+Aggregate [bit_xor(a#0) AS bit_xor(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_getbit.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_getbit.explain
new file mode 100644
index 00000000000..d6469a13d5c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_getbit.explain
@@ -0,0 +1,2 @@
+Project [getbit(a#0, 0) AS getbit(a, 0)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_bit_and.json b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_and.json
new file mode 100644
index 00000000000..83b2bcf599f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_and.json
@@ -0,0 +1,25 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "bit_and",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "a"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_bit_and.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_and.proto.bin
new file mode 100644
index 00000000000..ad81bec6f08
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_and.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_bit_count.json b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_count.json
new file mode 100644
index 00000000000..d5c6b698f7f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_count.json
@@ -0,0 +1,25 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "bit_count",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "a"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_bit_count.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_count.proto.bin
new file mode 100644
index 00000000000..875e17d974e
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_count.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_bit_get.json b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_get.json
new file mode 100644
index 00000000000..39425c5e3ff
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_get.json
@@ -0,0 +1,29 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "bit_get",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "a"
+ }
+ }, {
+ "literal": {
+ "integer": 0
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_bit_get.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_get.proto.bin
new file mode 100644
index 00000000000..cd0f4098374
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_get.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_bit_or.json b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_or.json
new file mode 100644
index 00000000000..c8e1b2acfe4
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_or.json
@@ -0,0 +1,25 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "bit_or",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "a"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_bit_or.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_or.proto.bin
new file mode 100644
index 00000000000..a52907474fb
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_or.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_bit_xor.json b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_xor.json
new file mode 100644
index 00000000000..463e6fc5322
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_xor.json
@@ -0,0 +1,25 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "bit_xor",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "a"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_bit_xor.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_xor.proto.bin
new file mode 100644
index 00000000000..c4a9a5e654f
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_xor.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_getbit.json b/connector/connect/common/src/test/resources/query-tests/queries/function_getbit.json
new file mode 100644
index 00000000000..ef33382022a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_getbit.json
@@ -0,0 +1,29 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "getbit",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "a"
+ }
+ }, {
+ "literal": {
+ "integer": 0
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_getbit.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_getbit.proto.bin
new file mode 100644
index 00000000000..15575e4f7cb
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_getbit.proto.bin differ
diff --git a/python/docs/source/reference/pyspark.sql/functions.rst b/python/docs/source/reference/pyspark.sql/functions.rst
index 49239c47af8..ede67262df5 100644
--- a/python/docs/source/reference/pyspark.sql/functions.rst
+++ b/python/docs/source/reference/pyspark.sql/functions.rst
@@ -258,6 +258,9 @@ Aggregate Functions
approx_count_distinct
approx_percentile
avg
+ bit_and
+ bit_or
+ bit_xor
bool_and
bool_or
collect_list
@@ -392,6 +395,16 @@ String Functions
upper
+Bitwise Functions
+-----------------
+.. autosummary::
+ :toctree: api/
+
+ bit_count
+ bit_get
+ getbit
+
+
UDF
---
.. autosummary::
diff --git a/python/pyspark/sql/connect/functions.py b/python/pyspark/sql/connect/functions.py
index 4fd8570faab..4edbc54a556 100644
--- a/python/pyspark/sql/connect/functions.py
+++ b/python/pyspark/sql/connect/functions.py
@@ -271,6 +271,27 @@ def bitwise_not(col: "ColumnOrName") -> Column:
bitwise_not.__doc__ = pysparkfuncs.bitwise_not.__doc__
+def bit_count(col: "ColumnOrName") -> Column:
+ return _invoke_function_over_columns("bit_count", col)
+
+
+bit_count.__doc__ = pysparkfuncs.bit_count.__doc__
+
+
+def bit_get(col: "ColumnOrName", pos: "ColumnOrName") -> Column:
+ return _invoke_function_over_columns("bit_get", col, pos)
+
+
+bit_get.__doc__ = pysparkfuncs.bit_get.__doc__
+
+
+def getbit(col: "ColumnOrName", pos: "ColumnOrName") -> Column:
+ return _invoke_function_over_columns("getbit", col, pos)
+
+
+getbit.__doc__ = pysparkfuncs.getbit.__doc__
+
+
def broadcast(df: "DataFrame") -> "DataFrame":
from pyspark.sql.connect.dataframe import DataFrame
@@ -1270,6 +1291,27 @@ def bool_or(col: "ColumnOrName") -> Column:
bool_or.__doc__ = pysparkfuncs.bool_or.__doc__
+def bit_and(col: "ColumnOrName") -> Column:
+ return _invoke_function_over_columns("bit_and", col)
+
+
+bit_and.__doc__ = pysparkfuncs.bit_and.__doc__
+
+
+def bit_or(col: "ColumnOrName") -> Column:
+ return _invoke_function_over_columns("bit_or", col)
+
+
+bit_or.__doc__ = pysparkfuncs.bit_or.__doc__
+
+
+def bit_xor(col: "ColumnOrName") -> Column:
+ return _invoke_function_over_columns("bit_xor", col)
+
+
+bit_xor.__doc__ = pysparkfuncs.bit_xor.__doc__
+
+
# Window Functions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index c92034633a8..e9b6ee8fa40 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -1829,6 +1829,112 @@ def bitwise_not(col: "ColumnOrName") -> Column:
return _invoke_function_over_columns("bitwise_not", col)
+@try_remote_functions
+def bit_count(col: "ColumnOrName") -> Column:
+ """
+ Returns the number of bits that are set in the argument expr as an unsigned 64-bit integer,
+ or NULL if the argument is NULL.
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ col : :class:`~pyspark.sql.Column` or str
+ target column to compute on.
+
+ Returns
+ -------
+ :class:`~pyspark.sql.Column`
+ the number of bits that are set in the argument expr as an unsigned 64-bit integer,
+ or NULL if the argument is NULL.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([[1],[1],[2]], ["c"])
+ >>> df.select(bit_count("c")).show()
+ +------------+
+ |bit_count(c)|
+ +------------+
+ | 1|
+ | 1|
+ | 1|
+ +------------+
+ """
+ return _invoke_function_over_columns("bit_count", col)
+
+
+@try_remote_functions
+def bit_get(col: "ColumnOrName", pos: "ColumnOrName") -> Column:
+ """
+ Returns the value of the bit (0 or 1) at the specified position.
+ The positions are numbered from right to left, starting at zero.
+ The position argument cannot be negative.
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ col : :class:`~pyspark.sql.Column` or str
+ target column to compute on.
+ pos : :class:`~pyspark.sql.Column` or str
+ The positions are numbered from right to left, starting at zero.
+
+ Returns
+ -------
+ :class:`~pyspark.sql.Column`
+ the value of the bit (0 or 1) at the specified position.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([[1],[1],[2]], ["c"])
+ >>> df.select(bit_get("c", lit(1))).show()
+ +-------------+
+ |bit_get(c, 1)|
+ +-------------+
+ | 0|
+ | 0|
+ | 1|
+ +-------------+
+ """
+ return _invoke_function_over_columns("bit_get", col, pos)
+
+
+@try_remote_functions
+def getbit(col: "ColumnOrName", pos: "ColumnOrName") -> Column:
+ """
+ Returns the value of the bit (0 or 1) at the specified position.
+ The positions are numbered from right to left, starting at zero.
+ The position argument cannot be negative.
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ col : :class:`~pyspark.sql.Column` or str
+ target column to compute on.
+ pos : :class:`~pyspark.sql.Column` or str
+ The positions are numbered from right to left, starting at zero.
+
+ Returns
+ -------
+ :class:`~pyspark.sql.Column`
+ the value of the bit (0 or 1) at the specified position.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([[1],[1],[2]], ["c"])
+ >>> df.select(getbit("c", lit(1)).alias("d")).show()
+ +---+
+ | d|
+ +---+
+ | 0|
+ | 0|
+ | 1|
+ +---+
+ """
+ return _invoke_function_over_columns("getbit", col, pos)
+
+
@try_remote_functions
def asc_nulls_first(col: "ColumnOrName") -> Column:
"""
@@ -2643,6 +2749,84 @@ def bool_or(col: "ColumnOrName") -> Column:
return _invoke_function_over_columns("bool_or", col)
+@try_remote_functions
+def bit_and(col: "ColumnOrName") -> Column:
+ """
+ Aggregate function: returns the bitwise AND of all non-null input values, or null if none.
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ col : :class:`~pyspark.sql.Column` or str
+ target column to compute on.
+
+ Returns
+ -------
+ :class:`~pyspark.sql.Column`
+ the bitwise AND of all non-null input values, or null if none.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([[1],[1],[2]], ["c"])
+ >>> df.select(bit_and("c")).first()
+ Row(bit_and(c)=0)
+ """
+ return _invoke_function_over_columns("bit_and", col)
+
+
+@try_remote_functions
+def bit_or(col: "ColumnOrName") -> Column:
+ """
+ Aggregate function: returns the bitwise OR of all non-null input values, or null if none.
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ col : :class:`~pyspark.sql.Column` or str
+ target column to compute on.
+
+ Returns
+ -------
+ :class:`~pyspark.sql.Column`
+ the bitwise OR of all non-null input values, or null if none.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([[1],[1],[2]], ["c"])
+ >>> df.select(bit_or("c")).first()
+ Row(bit_or(c)=3)
+ """
+ return _invoke_function_over_columns("bit_or", col)
+
+
+@try_remote_functions
+def bit_xor(col: "ColumnOrName") -> Column:
+ """
+ Aggregate function: returns the bitwise XOR of all non-null input values, or null if none.
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ col : :class:`~pyspark.sql.Column` or str
+ target column to compute on.
+
+ Returns
+ -------
+ :class:`~pyspark.sql.Column`
+ the bitwise XOR of all non-null input values, or null if none.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([[1],[1],[2]], ["c"])
+ >>> df.select(bit_xor("c")).first()
+ Row(bit_xor(c)=2)
+ """
+ return _invoke_function_over_columns("bit_xor", col)
+
+
@try_remote_functions
def skewness(col: "ColumnOrName") -> Column:
"""
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 62d09c283c4..a76044ac98e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -1315,6 +1315,30 @@ object functions {
*/
def bool_or(e: Column): Column = withAggregateFunction { BoolOr(e.expr) }
+ /**
+ * Aggregate function: returns the bitwise AND of all non-null input values, or null if none.
+ *
+ * @group agg_funcs
+ * @since 3.5.0
+ */
+ def bit_and(e: Column): Column = withAggregateFunction { BitAndAgg(e.expr) }
+
+ /**
+ * Aggregate function: returns the bitwise OR of all non-null input values, or null if none.
+ *
+ * @group agg_funcs
+ * @since 3.5.0
+ */
+ def bit_or(e: Column): Column = withAggregateFunction { BitOrAgg(e.expr) }
+
+ /**
+ * Aggregate function: returns the bitwise XOR of all non-null input values, or null if none.
+ *
+ * @group agg_funcs
+ * @since 3.5.0
+ */
+ def bit_xor(e: Column): Column = withAggregateFunction { BitXorAgg(e.expr) }
+
//////////////////////////////////////////////////////////////////////////////////////////////
// Window functions
//////////////////////////////////////////////////////////////////////////////////////////////
@@ -1914,6 +1938,35 @@ object functions {
*/
def bitwise_not(e: Column): Column = withExpr { BitwiseNot(e.expr) }
+ /**
+ * Returns the number of bits that are set in the argument expr as an unsigned 64-bit integer,
+ * or NULL if the argument is NULL.
+ *
+ * @group bitwise_funcs
+ * @since 3.5.0
+ */
+ def bit_count(e: Column): Column = withExpr { BitwiseCount(e.expr) }
+
+ /**
+ * Returns the value of the bit (0 or 1) at the specified position.
+ * The positions are numbered from right to left, starting at zero.
+ * The position argument cannot be negative.
+ *
+ * @group bitwise_funcs
+ * @since 3.5.0
+ */
+ def bit_get(e: Column, pos: Column): Column = withExpr { BitwiseGet(e.expr, pos.expr) }
+
+ /**
+ * Returns the value of the bit (0 or 1) at the specified position.
+ * The positions are numbered from right to left, starting at zero.
+ * The position argument cannot be negative.
+ *
+ * @group bitwise_funcs
+ * @since 3.5.0
+ */
+ def getbit(e: Column, pos: Column): Column = bit_get(e, pos)
+
/**
* Parses the expression string into the column that it represents, similar to
* [[Dataset#selectExpr]].
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index fdce04203cc..28c7ee94b9c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -483,6 +483,12 @@ class DataFrameAggregateSuite extends QueryTest
checkAnswer(complexData.agg(bool_or($"b")), complexData.selectExpr("bool_or(b)"))
}
+ test("bit aggregate") {
+ checkAnswer(testData2.agg(bit_and($"b")), testData2.selectExpr("bit_and(b)"))
+ checkAnswer(testData2.agg(bit_or($"b")), testData2.selectExpr("bit_or(b)"))
+ checkAnswer(testData2.agg(bit_xor($"b")), testData2.selectExpr("bit_xor(b)"))
+ }
+
test("zero moments") {
withSQLConf(SQLConf.LEGACY_STATISTICAL_AGGREGATE.key -> "true") {
val input = Seq((1, 2)).toDF("a", "b")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index 4d7e8cbb351..ecdad48a87e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -275,6 +275,22 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
testData2.collect().toSeq.map(r => Row(~r.getInt(0), ~r.getInt(0))))
}
+ test("bit_count") {
+ checkAnswer(testData2.select(bit_count($"a")), testData2.selectExpr("bit_count(a)"))
+ }
+
+ test("bit_get") {
+ checkAnswer(
+ testData2.select(bit_get($"a", lit(0)), bit_get($"a", lit(1)), bit_get($"a", lit(2))),
+ testData2.selectExpr("bit_get(a, 0)", "bit_get(a, 1)", "bit_get(a, 2)"))
+ }
+
+ test("getbit") {
+ checkAnswer(
+ testData2.select(getbit($"a", lit(0)), getbit($"a", lit(1)), getbit($"a", lit(2))),
+ testData2.selectExpr("getbit(a, 0)", "getbit(a, 1)", "getbit(a, 2)"))
+ }
+
test("bin") {
val df = Seq[(Integer, Integer)]((12, null)).toDF("a", "b")
checkAnswer(
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org