You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ru...@apache.org on 2023/06/17 04:36:16 UTC

[spark] branch master updated: [SPARK-43928][SQL][PYTHON][CONNECT] Add bit operations to Scala, Python and Connect API

This is an automated email from the ASF dual-hosted git repository.

ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 02beb50ed7c [SPARK-43928][SQL][PYTHON][CONNECT] Add bit operations to Scala, Python and Connect API
02beb50ed7c is described below

commit 02beb50ed7cd0f3d1fa77e94a4d3cd30f325cf70
Author: Jiaan Geng <be...@163.com>
AuthorDate: Sat Jun 17 12:35:56 2023 +0800

    [SPARK-43928][SQL][PYTHON][CONNECT] Add bit operations to Scala, Python and Connect API
    
    ### What changes were proposed in this pull request?
    This PR want add bit operations to Scala, Python and Connect API.
    These API show below.
    
    - bit_and
    - bit_count
    - bit_get
    - bit_or
    - bit_xor
    - getbit
    
    ### Why are the changes needed?
    Add bit operations to Scala, Python and Connect API
    
    ### Does this PR introduce _any_ user-facing change?
    'No'.
    New feature.
    
    ### How was this patch tested?
    New test cases.
    
    Closes #41608 from beliefer/SPARK-43928.
    
    Authored-by: Jiaan Geng <be...@163.com>
    Signed-off-by: Ruifeng Zheng <ru...@apache.org>
---
 .../scala/org/apache/spark/sql/functions.scala     |  51 ++++++
 .../apache/spark/sql/PlanGenerationTestSuite.scala |  24 +++
 .../explain-results/function_bit_and.explain       |   2 +
 .../explain-results/function_bit_count.explain     |   2 +
 .../explain-results/function_bit_get.explain       |   2 +
 .../explain-results/function_bit_or.explain        |   2 +
 .../explain-results/function_bit_xor.explain       |   2 +
 .../explain-results/function_getbit.explain        |   2 +
 .../query-tests/queries/function_bit_and.json      |  25 +++
 .../query-tests/queries/function_bit_and.proto.bin | Bin 0 -> 176 bytes
 .../query-tests/queries/function_bit_count.json    |  25 +++
 .../queries/function_bit_count.proto.bin           | Bin 0 -> 178 bytes
 .../query-tests/queries/function_bit_get.json      |  29 ++++
 .../query-tests/queries/function_bit_get.proto.bin | Bin 0 -> 182 bytes
 .../query-tests/queries/function_bit_or.json       |  25 +++
 .../query-tests/queries/function_bit_or.proto.bin  | Bin 0 -> 175 bytes
 .../query-tests/queries/function_bit_xor.json      |  25 +++
 .../query-tests/queries/function_bit_xor.proto.bin | Bin 0 -> 176 bytes
 .../query-tests/queries/function_getbit.json       |  29 ++++
 .../query-tests/queries/function_getbit.proto.bin  | Bin 0 -> 181 bytes
 .../source/reference/pyspark.sql/functions.rst     |  13 ++
 python/pyspark/sql/connect/functions.py            |  42 +++++
 python/pyspark/sql/functions.py                    | 184 +++++++++++++++++++++
 .../scala/org/apache/spark/sql/functions.scala     |  53 ++++++
 .../apache/spark/sql/DataFrameAggregateSuite.scala |   6 +
 .../apache/spark/sql/DataFrameFunctionsSuite.scala |  16 ++
 26 files changed, 559 insertions(+)

diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
index 65a394c8e7d..61783746c56 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
@@ -1251,6 +1251,30 @@ object functions {
    */
   def bool_or(e: Column): Column = Column.fn("bool_or", e)
 
+  /**
+   * Aggregate function: returns the bitwise AND of all non-null input values, or null if none.
+   *
+   * @group agg_funcs
+   * @since 3.5.0
+   */
+  def bit_and(e: Column): Column = Column.fn("bit_and", e)
+
+  /**
+   * Aggregate function: returns the bitwise OR of all non-null input values, or null if none.
+   *
+   * @group agg_funcs
+   * @since 3.5.0
+   */
+  def bit_or(e: Column): Column = Column.fn("bit_or", e)
+
+  /**
+   * Aggregate function: returns the bitwise XOR of all non-null input values, or null if none.
+   *
+   * @group agg_funcs
+   * @since 3.5.0
+   */
+  def bit_xor(e: Column): Column = Column.fn("bit_xor", e)
+
   //////////////////////////////////////////////////////////////////////////////////////////////
   // Window functions
   //////////////////////////////////////////////////////////////////////////////////////////////
@@ -1851,6 +1875,33 @@ object functions {
    */
   def bitwise_not(e: Column): Column = Column.fn("~", e)
 
+  /**
+   * Returns the number of bits that are set in the argument expr as an unsigned 64-bit integer,
+   * or NULL if the argument is NULL.
+   *
+   * @group bitwise_funcs
+   * @since 3.5.0
+   */
+  def bit_count(e: Column): Column = Column.fn("bit_count", e)
+
+  /**
+   * Returns the value of the bit (0 or 1) at the specified position. The positions are numbered
+   * from right to left, starting at zero. The position argument cannot be negative.
+   *
+   * @group bitwise_funcs
+   * @since 3.5.0
+   */
+  def bit_get(e: Column, pos: Column): Column = Column.fn("bit_get", e, pos)
+
+  /**
+   * Returns the value of the bit (0 or 1) at the specified position. The positions are numbered
+   * from right to left, starting at zero. The position argument cannot be negative.
+   *
+   * @group bitwise_funcs
+   * @since 3.5.0
+   */
+  def getbit(e: Column, pos: Column): Column = Column.fn("getbit", e, pos)
+
   /**
    * Parses the expression string into the column that it represents, similar to
    * [[Dataset#selectExpr]].
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
index 0702b595b4a..8e40a29c3d5 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
@@ -1009,6 +1009,18 @@ class PlanGenerationTestSuite
     fn.histogram_numeric(fn.col("a"), lit(10))
   }
 
+  functionTest("bit_and") {
+    fn.bit_and(fn.col("a"))
+  }
+
+  functionTest("bit_or") {
+    fn.bit_or(fn.col("a"))
+  }
+
+  functionTest("bit_xor") {
+    fn.bit_xor(fn.col("a"))
+  }
+
   functionTest("mode") {
     fn.mode(fn.col("a"))
   }
@@ -1209,6 +1221,18 @@ class PlanGenerationTestSuite
     fn.bitwise_not(fn.col("a"))
   }
 
+  functionTest("bit_count") {
+    fn.bit_count(fn.col("a"))
+  }
+
+  functionTest("bit_get") {
+    fn.bit_get(fn.col("a"), lit(0))
+  }
+
+  functionTest("getbit") {
+    fn.getbit(fn.col("a"), lit(0))
+  }
+
   functionTest("expr") {
     fn.expr("a + 1")
   }
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_bit_and.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bit_and.explain
new file mode 100644
index 00000000000..17358c51ae3
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bit_and.explain
@@ -0,0 +1,2 @@
+Aggregate [bit_and(a#0) AS bit_and(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_bit_count.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bit_count.explain
new file mode 100644
index 00000000000..11265d775f6
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bit_count.explain
@@ -0,0 +1,2 @@
+Project [bit_count(a#0) AS bit_count(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_bit_get.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bit_get.explain
new file mode 100644
index 00000000000..7302233837c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bit_get.explain
@@ -0,0 +1,2 @@
+Project [bit_get(a#0, 0) AS bit_get(a, 0)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_bit_or.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bit_or.explain
new file mode 100644
index 00000000000..48a2c722b6e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bit_or.explain
@@ -0,0 +1,2 @@
+Aggregate [bit_or(a#0) AS bit_or(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_bit_xor.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bit_xor.explain
new file mode 100644
index 00000000000..61980b03b23
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bit_xor.explain
@@ -0,0 +1,2 @@
+Aggregate [bit_xor(a#0) AS bit_xor(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_getbit.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_getbit.explain
new file mode 100644
index 00000000000..d6469a13d5c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_getbit.explain
@@ -0,0 +1,2 @@
+Project [getbit(a#0, 0) AS getbit(a, 0)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_bit_and.json b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_and.json
new file mode 100644
index 00000000000..83b2bcf599f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_and.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "bit_and",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_bit_and.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_and.proto.bin
new file mode 100644
index 00000000000..ad81bec6f08
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_and.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_bit_count.json b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_count.json
new file mode 100644
index 00000000000..d5c6b698f7f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_count.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "bit_count",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_bit_count.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_count.proto.bin
new file mode 100644
index 00000000000..875e17d974e
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_count.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_bit_get.json b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_get.json
new file mode 100644
index 00000000000..39425c5e3ff
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_get.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "bit_get",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "literal": {
+            "integer": 0
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_bit_get.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_get.proto.bin
new file mode 100644
index 00000000000..cd0f4098374
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_get.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_bit_or.json b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_or.json
new file mode 100644
index 00000000000..c8e1b2acfe4
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_or.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "bit_or",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_bit_or.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_or.proto.bin
new file mode 100644
index 00000000000..a52907474fb
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_or.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_bit_xor.json b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_xor.json
new file mode 100644
index 00000000000..463e6fc5322
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_xor.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "bit_xor",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_bit_xor.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_xor.proto.bin
new file mode 100644
index 00000000000..c4a9a5e654f
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_xor.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_getbit.json b/connector/connect/common/src/test/resources/query-tests/queries/function_getbit.json
new file mode 100644
index 00000000000..ef33382022a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_getbit.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "getbit",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "literal": {
+            "integer": 0
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_getbit.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_getbit.proto.bin
new file mode 100644
index 00000000000..15575e4f7cb
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_getbit.proto.bin differ
diff --git a/python/docs/source/reference/pyspark.sql/functions.rst b/python/docs/source/reference/pyspark.sql/functions.rst
index 49239c47af8..ede67262df5 100644
--- a/python/docs/source/reference/pyspark.sql/functions.rst
+++ b/python/docs/source/reference/pyspark.sql/functions.rst
@@ -258,6 +258,9 @@ Aggregate Functions
     approx_count_distinct
     approx_percentile
     avg
+    bit_and
+    bit_or
+    bit_xor
     bool_and
     bool_or
     collect_list
@@ -392,6 +395,16 @@ String Functions
     upper
 
 
+Bitwise Functions
+-----------------
+.. autosummary::
+    :toctree: api/
+
+    bit_count
+    bit_get
+    getbit
+
+
 UDF
 ---
 .. autosummary::
diff --git a/python/pyspark/sql/connect/functions.py b/python/pyspark/sql/connect/functions.py
index 4fd8570faab..4edbc54a556 100644
--- a/python/pyspark/sql/connect/functions.py
+++ b/python/pyspark/sql/connect/functions.py
@@ -271,6 +271,27 @@ def bitwise_not(col: "ColumnOrName") -> Column:
 bitwise_not.__doc__ = pysparkfuncs.bitwise_not.__doc__
 
 
+def bit_count(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("bit_count", col)
+
+
+bit_count.__doc__ = pysparkfuncs.bit_count.__doc__
+
+
+def bit_get(col: "ColumnOrName", pos: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("bit_get", col, pos)
+
+
+bit_get.__doc__ = pysparkfuncs.bit_get.__doc__
+
+
+def getbit(col: "ColumnOrName", pos: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("getbit", col, pos)
+
+
+getbit.__doc__ = pysparkfuncs.getbit.__doc__
+
+
 def broadcast(df: "DataFrame") -> "DataFrame":
     from pyspark.sql.connect.dataframe import DataFrame
 
@@ -1270,6 +1291,27 @@ def bool_or(col: "ColumnOrName") -> Column:
 bool_or.__doc__ = pysparkfuncs.bool_or.__doc__
 
 
+def bit_and(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("bit_and", col)
+
+
+bit_and.__doc__ = pysparkfuncs.bit_and.__doc__
+
+
+def bit_or(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("bit_or", col)
+
+
+bit_or.__doc__ = pysparkfuncs.bit_or.__doc__
+
+
+def bit_xor(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("bit_xor", col)
+
+
+bit_xor.__doc__ = pysparkfuncs.bit_xor.__doc__
+
+
 # Window Functions
 
 
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index c92034633a8..e9b6ee8fa40 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -1829,6 +1829,112 @@ def bitwise_not(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("bitwise_not", col)
 
 
+@try_remote_functions
+def bit_count(col: "ColumnOrName") -> Column:
+    """
+    Returns the number of bits that are set in the argument expr as an unsigned 64-bit integer,
+    or NULL if the argument is NULL.
+
+    .. versionadded:: 3.5.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the number of bits that are set in the argument expr as an unsigned 64-bit integer,
+        or NULL if the argument is NULL.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([[1],[1],[2]], ["c"])
+    >>> df.select(bit_count("c")).show()
+    +------------+
+    |bit_count(c)|
+    +------------+
+    |           1|
+    |           1|
+    |           1|
+    +------------+
+    """
+    return _invoke_function_over_columns("bit_count", col)
+
+
+@try_remote_functions
+def bit_get(col: "ColumnOrName", pos: "ColumnOrName") -> Column:
+    """
+    Returns the value of the bit (0 or 1) at the specified position.
+    The positions are numbered from right to left, starting at zero.
+    The position argument cannot be negative.
+
+    .. versionadded:: 3.5.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+    pos : :class:`~pyspark.sql.Column` or str
+        The positions are numbered from right to left, starting at zero.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the value of the bit (0 or 1) at the specified position.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([[1],[1],[2]], ["c"])
+    >>> df.select(bit_get("c", lit(1))).show()
+    +-------------+
+    |bit_get(c, 1)|
+    +-------------+
+    |            0|
+    |            0|
+    |            1|
+    +-------------+
+    """
+    return _invoke_function_over_columns("bit_get", col, pos)
+
+
+@try_remote_functions
+def getbit(col: "ColumnOrName", pos: "ColumnOrName") -> Column:
+    """
+    Returns the value of the bit (0 or 1) at the specified position.
+    The positions are numbered from right to left, starting at zero.
+    The position argument cannot be negative.
+
+    .. versionadded:: 3.5.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+    pos : :class:`~pyspark.sql.Column` or str
+        The positions are numbered from right to left, starting at zero.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the value of the bit (0 or 1) at the specified position.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([[1],[1],[2]], ["c"])
+    >>> df.select(getbit("c", lit(1)).alias("d")).show()
+    +---+
+    |  d|
+    +---+
+    |  0|
+    |  0|
+    |  1|
+    +---+
+    """
+    return _invoke_function_over_columns("getbit", col, pos)
+
+
 @try_remote_functions
 def asc_nulls_first(col: "ColumnOrName") -> Column:
     """
@@ -2643,6 +2749,84 @@ def bool_or(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("bool_or", col)
 
 
+@try_remote_functions
+def bit_and(col: "ColumnOrName") -> Column:
+    """
+    Aggregate function: returns the bitwise AND of all non-null input values, or null if none.
+
+    .. versionadded:: 3.5.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the bitwise AND of all non-null input values, or null if none.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([[1],[1],[2]], ["c"])
+    >>> df.select(bit_and("c")).first()
+    Row(bit_and(c)=0)
+    """
+    return _invoke_function_over_columns("bit_and", col)
+
+
+@try_remote_functions
+def bit_or(col: "ColumnOrName") -> Column:
+    """
+    Aggregate function: returns the bitwise OR of all non-null input values, or null if none.
+
+    .. versionadded:: 3.5.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the bitwise OR of all non-null input values, or null if none.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([[1],[1],[2]], ["c"])
+    >>> df.select(bit_or("c")).first()
+    Row(bit_or(c)=3)
+    """
+    return _invoke_function_over_columns("bit_or", col)
+
+
+@try_remote_functions
+def bit_xor(col: "ColumnOrName") -> Column:
+    """
+    Aggregate function: returns the bitwise XOR of all non-null input values, or null if none.
+
+    .. versionadded:: 3.5.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the bitwise XOR of all non-null input values, or null if none.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([[1],[1],[2]], ["c"])
+    >>> df.select(bit_xor("c")).first()
+    Row(bit_xor(c)=2)
+    """
+    return _invoke_function_over_columns("bit_xor", col)
+
+
 @try_remote_functions
 def skewness(col: "ColumnOrName") -> Column:
     """
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 62d09c283c4..a76044ac98e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -1315,6 +1315,30 @@ object functions {
    */
   def bool_or(e: Column): Column = withAggregateFunction { BoolOr(e.expr) }
 
+  /**
+   * Aggregate function: returns the bitwise AND of all non-null input values, or null if none.
+   *
+   * @group agg_funcs
+   * @since 3.5.0
+   */
+  def bit_and(e: Column): Column = withAggregateFunction { BitAndAgg(e.expr) }
+
+  /**
+   * Aggregate function: returns the bitwise OR of all non-null input values, or null if none.
+   *
+   * @group agg_funcs
+   * @since 3.5.0
+   */
+  def bit_or(e: Column): Column = withAggregateFunction { BitOrAgg(e.expr) }
+
+  /**
+   * Aggregate function: returns the bitwise XOR of all non-null input values, or null if none.
+   *
+   * @group agg_funcs
+   * @since 3.5.0
+   */
+  def bit_xor(e: Column): Column = withAggregateFunction { BitXorAgg(e.expr) }
+
   //////////////////////////////////////////////////////////////////////////////////////////////
   // Window functions
   //////////////////////////////////////////////////////////////////////////////////////////////
@@ -1914,6 +1938,35 @@ object functions {
    */
   def bitwise_not(e: Column): Column = withExpr { BitwiseNot(e.expr) }
 
+  /**
+   * Returns the number of bits that are set in the argument expr as an unsigned 64-bit integer,
+   * or NULL if the argument is NULL.
+   *
+   * @group bitwise_funcs
+   * @since 3.5.0
+   */
+  def bit_count(e: Column): Column = withExpr { BitwiseCount(e.expr) }
+
+  /**
+   * Returns the value of the bit (0 or 1) at the specified position.
+   * The positions are numbered from right to left, starting at zero.
+   * The position argument cannot be negative.
+   *
+   * @group bitwise_funcs
+   * @since 3.5.0
+   */
+  def bit_get(e: Column, pos: Column): Column = withExpr { BitwiseGet(e.expr, pos.expr) }
+
+  /**
+   * Returns the value of the bit (0 or 1) at the specified position.
+   * The positions are numbered from right to left, starting at zero.
+   * The position argument cannot be negative.
+   *
+   * @group bitwise_funcs
+   * @since 3.5.0
+   */
+  def getbit(e: Column, pos: Column): Column = bit_get(e, pos)
+
   /**
    * Parses the expression string into the column that it represents, similar to
    * [[Dataset#selectExpr]].
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index fdce04203cc..28c7ee94b9c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -483,6 +483,12 @@ class DataFrameAggregateSuite extends QueryTest
     checkAnswer(complexData.agg(bool_or($"b")), complexData.selectExpr("bool_or(b)"))
   }
 
+  test("bit aggregate") {
+    checkAnswer(testData2.agg(bit_and($"b")), testData2.selectExpr("bit_and(b)"))
+    checkAnswer(testData2.agg(bit_or($"b")), testData2.selectExpr("bit_or(b)"))
+    checkAnswer(testData2.agg(bit_xor($"b")), testData2.selectExpr("bit_xor(b)"))
+  }
+
   test("zero moments") {
     withSQLConf(SQLConf.LEGACY_STATISTICAL_AGGREGATE.key -> "true") {
       val input = Seq((1, 2)).toDF("a", "b")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index 4d7e8cbb351..ecdad48a87e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -275,6 +275,22 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       testData2.collect().toSeq.map(r => Row(~r.getInt(0), ~r.getInt(0))))
   }
 
+  test("bit_count") {
+    checkAnswer(testData2.select(bit_count($"a")), testData2.selectExpr("bit_count(a)"))
+  }
+
+  test("bit_get") {
+    checkAnswer(
+      testData2.select(bit_get($"a", lit(0)), bit_get($"a", lit(1)), bit_get($"a", lit(2))),
+      testData2.selectExpr("bit_get(a, 0)", "bit_get(a, 1)", "bit_get(a, 2)"))
+  }
+
+  test("getbit") {
+    checkAnswer(
+      testData2.select(getbit($"a", lit(0)), getbit($"a", lit(1)), getbit($"a", lit(2))),
+      testData2.selectExpr("getbit(a, 0)", "getbit(a, 1)", "getbit(a, 2)"))
+  }
+
   test("bin") {
     val df = Seq[(Integer, Integer)]((12, null)).toDF("a", "b")
     checkAnswer(


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org