You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ru...@apache.org on 2023/06/20 03:45:19 UTC
[spark] branch master updated: [SPARK-43942][SQL][CONNECT][PYTHON][FOLLOW-UP] Make contains support binary type
This is an automated email from the ASF dual-hosted git repository.
ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new d6380ad4d02 [SPARK-43942][SQL][CONNECT][PYTHON][FOLLOW-UP] Make contains support binary type
d6380ad4d02 is described below
commit d6380ad4d02cb3b04ccd83c40f3d32e063627735
Author: panbingkun <pb...@gmail.com>
AuthorDate: Tue Jun 20 11:44:55 2023 +0800
[SPARK-43942][SQL][CONNECT][PYTHON][FOLLOW-UP] Make contains support binary type
What changes were proposed in this pull request?
Make contains support binary type:
- in Connect API, contains actually already support binary type;
- in vanilla API, support binary type via call_udf
Why are the changes needed?
for parity
Does this PR introduce any user-facing change?
yes
How was this patch tested?
added ut
Closes #41665 from panbingkun/SPARK-43942_FOLLOWUP.
Authored-by: panbingkun <pb...@gmail.com>
Signed-off-by: Ruifeng Zheng <ru...@apache.org>
---
.../main/scala/org/apache/spark/sql/functions.scala | 8 ++------
python/pyspark/sql/functions.py | 20 ++++++++++++++------
.../main/scala/org/apache/spark/sql/functions.scala | 12 +++++-------
.../org/apache/spark/sql/StringFunctionsSuite.scala | 8 +++++++-
4 files changed, 28 insertions(+), 20 deletions(-)
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
index c12bb23f850..ccd46c2d267 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
@@ -4025,12 +4025,8 @@ object functions {
/**
* Returns a boolean. The value is True if right is found inside left. Returns NULL if either
- * input expression is NULL. Otherwise, returns False. Both left or right must be of STRING
- * type.
- *
- * @note
- * Only STRING type is supported in this function, while `contains` in SQL supports both
- * STRING and BINARY.
+ * input expression is NULL. Otherwise, returns False. Both left or right must be of STRING or
+ * BINARY type.
*
* @group string_funcs
* @since 3.5.0
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index adef14de454..1a5633e3c5e 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -9711,15 +9711,10 @@ def contains(left: "ColumnOrName", right: "ColumnOrName") -> Column:
"""
Returns a boolean. The value is True if right is found inside left.
Returns NULL if either input expression is NULL. Otherwise, returns False.
- Both left or right must be of STRING.
+ Both left or right must be of STRING or BINARY type.
.. versionadded:: 3.5.0
- Notes
- -----
- Only STRING type is supported in this function,
- while `contains` in SQL supports both STRING and BINARY.
-
Parameters
----------
left : :class:`~pyspark.sql.Column` or str
@@ -9732,6 +9727,19 @@ def contains(left: "ColumnOrName", right: "ColumnOrName") -> Column:
>>> df = spark.createDataFrame([("Spark SQL", "Spark")], ['a', 'b'])
>>> df.select(contains(df.a, df.b).alias('r')).collect()
[Row(r=True)]
+
+ >>> df = spark.createDataFrame([("414243", "4243",)], ["c", "d"])
+ >>> df = df.select(to_binary("c").alias("c"), to_binary("d").alias("d"))
+ >>> df.printSchema()
+ root
+ |-- c: binary (nullable = true)
+ |-- d: binary (nullable = true)
+ >>> df.select(contains("c", "d"), contains("d", "c")).show()
+ +--------------+--------------+
+ |contains(c, d)|contains(d, c)|
+ +--------------+--------------+
+ | true| false|
+ +--------------+--------------+
"""
return _invoke_function_over_columns("contains", left, right)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index e7e14e30477..582e3b9e363 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -4144,17 +4144,15 @@ object functions {
/**
* Returns a boolean. The value is True if right is found inside left.
* Returns NULL if either input expression is NULL. Otherwise, returns False.
- * Both left or right must be of STRING type.
- *
- * @note
- * Only STRING type is supported in this function, while `contains` in SQL supports both
- * STRING and BINARY.
+ * Both left or right must be of STRING or BINARY type.
*
* @group string_funcs
* @since 3.5.0
*/
- def contains(left: Column, right: Column): Column = withExpr {
- Contains(left.expr, right.expr)
+ def contains(left: Column, right: Column): Column = {
+ // 'Contains' expression only supports StringType
+ // use 'call_udf' to support both StringType and BinaryType.
+ call_udf("contains", left, right)
}
/**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
index d68b6da2957..bdacd8a914f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
@@ -902,9 +902,15 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
}
test("contains function") {
- val df = Seq(("Spark SQL", "Spark")).toDF("a", "b")
+ val df = Seq(("Spark SQL", "Spark", Array[Byte](1, 2, 3, 4), Array[Byte](1, 2))).
+ toDF("a", "b", "c", "d")
+
checkAnswer(df.selectExpr("contains(a, b)"), Seq(Row(true)))
checkAnswer(df.select(contains(col("a"), col("b"))), Seq(Row(true)))
+
+ // test binary
+ checkAnswer(df.selectExpr("contains(c, d)"), Seq(Row(true)))
+ checkAnswer(df.select(contains(col("c"), col("d"))), Seq(Row(true)))
}
test("elt function") {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org