You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ru...@apache.org on 2023/06/18 06:46:19 UTC
[spark] branch master updated: [SPARK-43944][CONNECT][PYTHON] Add string functions to Scala and Python - part 2
This is an automated email from the ASF dual-hosted git repository.
ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 4503efcaadd [SPARK-43944][CONNECT][PYTHON] Add string functions to Scala and Python - part 2
4503efcaadd is described below
commit 4503efcaadd838cabad806558250abed9c2ace86
Author: panbingkun <pb...@gmail.com>
AuthorDate: Sun Jun 18 14:45:59 2023 +0800
[SPARK-43944][CONNECT][PYTHON] Add string functions to Scala and Python - part 2
### What changes were proposed in this pull request?
Add following functions:
- replace
- split_part
- substr
- parse_url
- printf
- url_decode
- url_encode
- position
- endswith
- startswith
to:
- Scala API
- Python API
- Spark Connect Scala Client
- Spark Connect Python Client
### Why are the changes needed?
for parity
### Does this PR introduce _any_ user-facing change?
Yes, new functions.
### How was this patch tested?
- Add New UT.
- Pass GA.
Closes #41594 from panbingkun/SPARK-43944.
Authored-by: panbingkun <pb...@gmail.com>
Signed-off-by: Ruifeng Zheng <ru...@apache.org>
---
.../scala/org/apache/spark/sql/functions.scala | 152 +++++++++++
.../apache/spark/sql/PlanGenerationTestSuite.scala | 58 +++-
.../explain-results/function_endswith.explain | 2 +
.../explain-results/function_parse_url.explain | 2 +
.../function_parse_url_with_key.explain | 2 +
.../explain-results/function_position.explain | 2 +
.../function_position_with_start.explain | 2 +
.../explain-results/function_printf.explain | 2 +
.../explain-results/function_replace.explain | 2 +
.../function_replace_with_specified_string.explain | 2 +
.../explain-results/function_split_part.explain | 2 +
.../explain-results/function_startswith.explain | 2 +
.../explain-results/function_substr.explain | 2 +
.../function_substr_with_len.explain | 2 +
.../explain-results/function_to_number.explain | 2 +-
.../explain-results/function_url_decode.explain | 2 +
.../explain-results/function_url_encode.explain | 2 +
...ction_to_number.json => function_endswith.json} | 6 +-
...umber.proto.bin => function_endswith.proto.bin} | Bin 188 -> 184 bytes
...tion_to_number.json => function_parse_url.json} | 6 +-
...mber.proto.bin => function_parse_url.proto.bin} | Bin 188 -> 185 bytes
...umber.json => function_parse_url_with_key.json} | 10 +-
...o.bin => function_parse_url_with_key.proto.bin} | Bin 188 -> 192 bytes
...ction_to_number.json => function_position.json} | 6 +-
...umber.proto.bin => function_position.proto.bin} | Bin 188 -> 184 bytes
...mber.json => function_position_with_start.json} | 10 +-
....bin => function_position_with_start.proto.bin} | Bin 188 -> 191 bytes
...unction_to_number.json => function_printf.json} | 10 +-
..._number.proto.bin => function_printf.proto.bin} | Bin 188 -> 196 bytes
...nction_to_number.json => function_replace.json} | 6 +-
...number.proto.bin => function_replace.proto.bin} | Bin 188 -> 183 bytes
...=> function_replace_with_specified_string.json} | 10 +-
...nction_replace_with_specified_string.proto.bin} | Bin 188 -> 190 bytes
...ion_to_number.json => function_split_part.json} | 10 +-
...ber.proto.bin => function_split_part.proto.bin} | Bin 188 -> 193 bytes
...ion_to_number.json => function_startswith.json} | 6 +-
...ber.proto.bin => function_startswith.proto.bin} | Bin 188 -> 186 bytes
...unction_to_number.json => function_substr.json} | 6 +-
..._number.proto.bin => function_substr.proto.bin} | Bin 188 -> 182 bytes
...o_number.json => function_substr_with_len.json} | 10 +-
...roto.bin => function_substr_with_len.proto.bin} | Bin 188 -> 189 bytes
.../query-tests/queries/function_to_number.json | 2 +-
.../queries/function_to_number.proto.bin | Bin 188 -> 190 bytes
...ion_to_number.json => function_url_decode.json} | 6 +-
...ber.proto.bin => function_url_decode.proto.bin} | Bin 188 -> 179 bytes
...ion_to_number.json => function_url_encode.json} | 6 +-
...ber.proto.bin => function_url_encode.proto.bin} | Bin 188 -> 179 bytes
.../source/reference/pyspark.sql/functions.rst | 10 +
python/pyspark/sql/connect/functions.py | 90 +++++++
python/pyspark/sql/functions.py | 297 +++++++++++++++++++++
.../scala/org/apache/spark/sql/functions.scala | 175 ++++++++++++
.../apache/spark/sql/DataFrameFunctionsSuite.scala | 2 +-
.../apache/spark/sql/StringFunctionsSuite.scala | 133 ++++++++-
53 files changed, 1006 insertions(+), 51 deletions(-)
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
index 61783746c56..9c2a5b96182 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
@@ -3819,6 +3819,158 @@ object functions {
*/
def to_number(e: Column, format: Column): Column = Column.fn("to_number", e, format)
+ /**
+ * Replaces all occurrences of `search` with `replace`.
+ *
+ * @param src
+ * A column of string to be replaced
+ * @param search
+ * A column of string, If `search` is not found in `str`, `str` is returned unchanged.
+ * @param replace
+ * A column of string, If `replace` is not specified or is an empty string, nothing replaces
+ * the string that is removed from `str`.
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def replace(src: Column, search: Column, replace: Column): Column =
+ Column.fn("replace", src, search, replace)
+
+ /**
+ * Replaces all occurrences of `search` with `replace`.
+ *
+ * @param src
+ * A column of string to be replaced
+ * @param search
+ * A column of string, If `search` is not found in `src`, `src` is returned unchanged.
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def replace(src: Column, search: Column): Column = Column.fn("replace", src, search)
+
+ /**
+ * Splits `str` by delimiter and return requested part of the split (1-based). If any input is
+ * null, returns null. if `partNum` is out of range of split parts, returns empty string. If
+ * `partNum` is 0, throws an error. If `partNum` is negative, the parts are counted backward
+ * from the end of the string. If the `delimiter` is an empty string, the `str` is not split.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def split_part(str: Column, delimiter: Column, partNum: Column): Column =
+ Column.fn("split_part", str, delimiter, partNum)
+
+ /**
+ * Returns the substring of `str` that starts at `pos` and is of length `len`, or the slice of
+ * byte array that starts at `pos` and is of length `len`.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def substr(str: Column, pos: Column, len: Column): Column =
+ Column.fn("substr", str, pos, len)
+
+ /**
+ * Returns the substring of `str` that starts at `pos`, or the slice of byte array that starts
+ * at `pos`.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def substr(str: Column, pos: Column): Column = Column.fn("substr", str, pos)
+
+ /**
+ * Extracts a part from a URL.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def parse_url(url: Column, partToExtract: Column, key: Column): Column =
+ Column.fn("parse_url", url, partToExtract, key)
+
+ /**
+ * Extracts a part from a URL.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def parse_url(url: Column, partToExtract: Column): Column =
+ Column.fn("parse_url", url, partToExtract)
+
+ /**
+ * Formats the arguments in printf-style and returns the result as a string column.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def printf(format: Column, arguments: Column*): Column =
+ Column.fn("format_string", lit(format) +: arguments: _*)
+
+ /**
+ * Decodes a `str` in 'application/x-www-form-urlencoded' format using a specific encoding
+ * scheme.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def url_decode(str: Column): Column = Column.fn("url_decode", str)
+
+ /**
+ * Translates a string into 'application/x-www-form-urlencoded' format using a specific encoding
+ * scheme.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def url_encode(str: Column): Column = Column.fn("url_encode", str)
+
+ /**
+ * Returns the position of the first occurrence of `substr` in `str` after position `start`. The
+ * given `start` and return value are 1-based.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def position(substr: Column, str: Column, start: Column): Column =
+ Column.fn("position", substr, str, start)
+
+ /**
+ * Returns the position of the first occurrence of `substr` in `str` after position `1`. The
+ * return value are 1-based.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def position(substr: Column, str: Column): Column =
+ Column.fn("position", substr, str)
+
+ /**
+ * Returns a boolean. The value is True if str ends with suffix. Returns NULL if either input
+ * expression is NULL. Otherwise, returns False. Both str or suffix must be of STRING type.
+ *
+ * @note
+ * Only STRING type is supported in this function, while `endswith` in SQL supports both
+ * STRING and BINARY.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def endswith(str: Column, suffix: Column): Column =
+ Column.fn("endswith", str, suffix)
+
+ /**
+ * Returns a boolean. The value is True if str starts with prefix. Returns NULL if either input
+ * expression is NULL. Otherwise, returns False. Both str or prefix must be of STRING type.
+ *
+ * @note
+ * Only STRING type is supported in this function, while `startswith` in SQL supports both
+ * STRING and BINARY.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def startswith(str: Column, prefix: Column): Column =
+ Column.fn("startswith", str, prefix)
+
//////////////////////////////////////////////////////////////////////////////////////////////
// DateTime functions
//////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
index 8e40a29c3d5..7633cd7d0c0 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
@@ -2391,7 +2391,63 @@ class PlanGenerationTestSuite
}
functionTest("to_number") {
- fn.to_char(fn.col("g"), lit("$99.99"))
+ fn.to_number(fn.col("g"), lit("$99.99"))
+ }
+
+ functionTest("replace") {
+ fn.replace(fn.col("g"), fn.col("g"))
+ }
+
+ functionTest("replace with specified string") {
+ fn.replace(fn.col("g"), fn.col("g"), fn.col("g"))
+ }
+
+ functionTest("split_part") {
+ fn.split_part(fn.col("g"), fn.col("g"), fn.col("a"))
+ }
+
+ functionTest("substr") {
+ fn.substr(fn.col("g"), fn.col("a"))
+ }
+
+ functionTest("substr with len") {
+ fn.substr(fn.col("g"), fn.col("a"), fn.col("a"))
+ }
+
+ functionTest("parse_url") {
+ fn.parse_url(fn.col("g"), fn.col("g"))
+ }
+
+ functionTest("parse_url with key") {
+ fn.parse_url(fn.col("g"), fn.col("g"), fn.col("g"))
+ }
+
+ functionTest("printf") {
+ fn.printf(fn.col("g"), fn.col("a"), fn.col("g"))
+ }
+
+ functionTest("url_decode") {
+ fn.url_decode(fn.col("g"))
+ }
+
+ functionTest("url_encode") {
+ fn.url_encode(fn.col("g"))
+ }
+
+ functionTest("position") {
+ fn.position(fn.col("g"), fn.col("g"))
+ }
+
+ functionTest("position with start") {
+ fn.position(fn.col("g"), fn.col("g"), fn.col("a"))
+ }
+
+ functionTest("endswith") {
+ fn.endswith(fn.col("g"), fn.col("g"))
+ }
+
+ functionTest("startswith") {
+ fn.startswith(fn.col("g"), fn.col("g"))
}
functionTest("to_timestamp_ltz") {
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_endswith.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_endswith.explain
new file mode 100644
index 00000000000..f78ed7492a5
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_endswith.explain
@@ -0,0 +1,2 @@
+Project [EndsWith(g#0, g#0) AS endswith(g, g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_parse_url.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_parse_url.explain
new file mode 100644
index 00000000000..3c874b5c8b6
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_parse_url.explain
@@ -0,0 +1,2 @@
+Project [parse_url(g#0, g#0, false) AS parse_url(g, g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_parse_url_with_key.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_parse_url_with_key.explain
new file mode 100644
index 00000000000..eba1c5c814f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_parse_url_with_key.explain
@@ -0,0 +1,2 @@
+Project [parse_url(g#0, g#0, g#0, false) AS parse_url(g, g, g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_position.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_position.explain
new file mode 100644
index 00000000000..b0ec42dfc58
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_position.explain
@@ -0,0 +1,2 @@
+Project [position(g#0, g#0, 1) AS position(g, g, 1)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_position_with_start.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_position_with_start.explain
new file mode 100644
index 00000000000..c17e658faff
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_position_with_start.explain
@@ -0,0 +1,2 @@
+Project [position(g#0, g#0, a#0) AS position(g, g, a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_printf.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_printf.explain
new file mode 100644
index 00000000000..10409df0070
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_printf.explain
@@ -0,0 +1,2 @@
+Project [format_string(g#0, a#0, g#0) AS format_string(g, a, g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_replace.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_replace.explain
new file mode 100644
index 00000000000..b521eedaff1
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_replace.explain
@@ -0,0 +1,2 @@
+Project [replace(g#0, g#0, ) AS replace(g, g, )#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_replace_with_specified_string.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_replace_with_specified_string.explain
new file mode 100644
index 00000000000..1f5609d75ec
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_replace_with_specified_string.explain
@@ -0,0 +1,2 @@
+Project [replace(g#0, g#0, g#0) AS replace(g, g, g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_split_part.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_split_part.explain
new file mode 100644
index 00000000000..486b1a4538c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_split_part.explain
@@ -0,0 +1,2 @@
+Project [element_at(stringsplitsql(g#0, g#0), a#0, Some(), false) AS split_part(g, g, a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_startswith.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_startswith.explain
new file mode 100644
index 00000000000..4b7c2d6f28f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_startswith.explain
@@ -0,0 +1,2 @@
+Project [StartsWith(g#0, g#0) AS startswith(g, g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_substr.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_substr.explain
new file mode 100644
index 00000000000..434836cb8d8
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_substr.explain
@@ -0,0 +1,2 @@
+Project [substr(g#0, a#0, 2147483647) AS substr(g, a, 2147483647)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_substr_with_len.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_substr_with_len.explain
new file mode 100644
index 00000000000..98f51716e34
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_substr_with_len.explain
@@ -0,0 +1,2 @@
+Project [substr(g#0, a#0, a#0) AS substr(g, a, a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_number.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_number.explain
index 79ece963928..37a68489ba8 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_number.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_number.explain
@@ -1,2 +1,2 @@
-Project [to_char(cast(g#0 as decimal(38,18)), $99.99) AS to_char(g, $99.99)#0]
+Project [to_number(g#0, $99.99) AS to_number(g, $99.99)#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_url_decode.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_url_decode.explain
new file mode 100644
index 00000000000..36b21e27c10
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_url_decode.explain
@@ -0,0 +1,2 @@
+Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.UrlCodec$, StringType, decode, g#0, UTF-8, StringType, true, true, true) AS url_decode(g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_url_encode.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_url_encode.explain
new file mode 100644
index 00000000000..70a0f628fc9
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_url_encode.explain
@@ -0,0 +1,2 @@
+Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.UrlCodec$, StringType, encode, g#0, UTF-8, StringType, true, true, true) AS url_encode(g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json b/connector/connect/common/src/test/resources/query-tests/queries/function_endswith.json
similarity index 84%
copy from connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json
copy to connector/connect/common/src/test/resources/query-tests/queries/function_endswith.json
index a39682de10f..1f7943f5116 100644
--- a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_endswith.json
@@ -13,14 +13,14 @@
},
"expressions": [{
"unresolvedFunction": {
- "functionName": "to_char",
+ "functionName": "endswith",
"arguments": [{
"unresolvedAttribute": {
"unparsedIdentifier": "g"
}
}, {
- "literal": {
- "string": "$99.99"
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
}
}]
}
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_endswith.proto.bin
similarity index 70%
copy from connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin
copy to connector/connect/common/src/test/resources/query-tests/queries/function_endswith.proto.bin
index 86ab9d23572..2dfef1c6d86 100644
Binary files a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin and b/connector/connect/common/src/test/resources/query-tests/queries/function_endswith.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json b/connector/connect/common/src/test/resources/query-tests/queries/function_parse_url.json
similarity index 83%
copy from connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json
copy to connector/connect/common/src/test/resources/query-tests/queries/function_parse_url.json
index a39682de10f..e03b86c21eb 100644
--- a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_parse_url.json
@@ -13,14 +13,14 @@
},
"expressions": [{
"unresolvedFunction": {
- "functionName": "to_char",
+ "functionName": "parse_url",
"arguments": [{
"unresolvedAttribute": {
"unparsedIdentifier": "g"
}
}, {
- "literal": {
- "string": "$99.99"
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
}
}]
}
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_parse_url.proto.bin
similarity index 70%
copy from connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin
copy to connector/connect/common/src/test/resources/query-tests/queries/function_parse_url.proto.bin
index 86ab9d23572..56917289c1e 100644
Binary files a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin and b/connector/connect/common/src/test/resources/query-tests/queries/function_parse_url.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json b/connector/connect/common/src/test/resources/query-tests/queries/function_parse_url_with_key.json
similarity index 73%
copy from connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json
copy to connector/connect/common/src/test/resources/query-tests/queries/function_parse_url_with_key.json
index a39682de10f..bd627911ef2 100644
--- a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_parse_url_with_key.json
@@ -13,14 +13,18 @@
},
"expressions": [{
"unresolvedFunction": {
- "functionName": "to_char",
+ "functionName": "parse_url",
"arguments": [{
"unresolvedAttribute": {
"unparsedIdentifier": "g"
}
}, {
- "literal": {
- "string": "$99.99"
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
+ }
+ }, {
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
}
}]
}
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_parse_url_with_key.proto.bin
similarity index 68%
copy from connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin
copy to connector/connect/common/src/test/resources/query-tests/queries/function_parse_url_with_key.proto.bin
index 86ab9d23572..231622cbd8a 100644
Binary files a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin and b/connector/connect/common/src/test/resources/query-tests/queries/function_parse_url_with_key.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json b/connector/connect/common/src/test/resources/query-tests/queries/function_position.json
similarity index 84%
copy from connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json
copy to connector/connect/common/src/test/resources/query-tests/queries/function_position.json
index a39682de10f..7b005e2bb82 100644
--- a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_position.json
@@ -13,14 +13,14 @@
},
"expressions": [{
"unresolvedFunction": {
- "functionName": "to_char",
+ "functionName": "position",
"arguments": [{
"unresolvedAttribute": {
"unparsedIdentifier": "g"
}
}, {
- "literal": {
- "string": "$99.99"
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
}
}]
}
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_position.proto.bin
similarity index 70%
copy from connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin
copy to connector/connect/common/src/test/resources/query-tests/queries/function_position.proto.bin
index 86ab9d23572..34b7e301fe9 100644
Binary files a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin and b/connector/connect/common/src/test/resources/query-tests/queries/function_position.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json b/connector/connect/common/src/test/resources/query-tests/queries/function_position_with_start.json
similarity index 73%
copy from connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json
copy to connector/connect/common/src/test/resources/query-tests/queries/function_position_with_start.json
index a39682de10f..2cd04992d1d 100644
--- a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_position_with_start.json
@@ -13,14 +13,18 @@
},
"expressions": [{
"unresolvedFunction": {
- "functionName": "to_char",
+ "functionName": "position",
"arguments": [{
"unresolvedAttribute": {
"unparsedIdentifier": "g"
}
}, {
- "literal": {
- "string": "$99.99"
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
+ }
+ }, {
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "a"
}
}]
}
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_position_with_start.proto.bin
similarity index 69%
copy from connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin
copy to connector/connect/common/src/test/resources/query-tests/queries/function_position_with_start.proto.bin
index 86ab9d23572..b34eaf80f88 100644
Binary files a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin and b/connector/connect/common/src/test/resources/query-tests/queries/function_position_with_start.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json b/connector/connect/common/src/test/resources/query-tests/queries/function_printf.json
similarity index 73%
copy from connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json
copy to connector/connect/common/src/test/resources/query-tests/queries/function_printf.json
index a39682de10f..dc7ca880c4b 100644
--- a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_printf.json
@@ -13,14 +13,18 @@
},
"expressions": [{
"unresolvedFunction": {
- "functionName": "to_char",
+ "functionName": "format_string",
"arguments": [{
"unresolvedAttribute": {
"unparsedIdentifier": "g"
}
}, {
- "literal": {
- "string": "$99.99"
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "a"
+ }
+ }, {
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
}
}]
}
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_printf.proto.bin
similarity index 67%
copy from connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin
copy to connector/connect/common/src/test/resources/query-tests/queries/function_printf.proto.bin
index 86ab9d23572..7ebdda6cac1 100644
Binary files a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin and b/connector/connect/common/src/test/resources/query-tests/queries/function_printf.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json b/connector/connect/common/src/test/resources/query-tests/queries/function_replace.json
similarity index 84%
copy from connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json
copy to connector/connect/common/src/test/resources/query-tests/queries/function_replace.json
index a39682de10f..2f6df6833f3 100644
--- a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_replace.json
@@ -13,14 +13,14 @@
},
"expressions": [{
"unresolvedFunction": {
- "functionName": "to_char",
+ "functionName": "replace",
"arguments": [{
"unresolvedAttribute": {
"unparsedIdentifier": "g"
}
}, {
- "literal": {
- "string": "$99.99"
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
}
}]
}
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_replace.proto.bin
similarity index 70%
copy from connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin
copy to connector/connect/common/src/test/resources/query-tests/queries/function_replace.proto.bin
index 86ab9d23572..0564f7ed575 100644
Binary files a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin and b/connector/connect/common/src/test/resources/query-tests/queries/function_replace.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json b/connector/connect/common/src/test/resources/query-tests/queries/function_replace_with_specified_string.json
similarity index 73%
copy from connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json
copy to connector/connect/common/src/test/resources/query-tests/queries/function_replace_with_specified_string.json
index a39682de10f..2e91450552c 100644
--- a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_replace_with_specified_string.json
@@ -13,14 +13,18 @@
},
"expressions": [{
"unresolvedFunction": {
- "functionName": "to_char",
+ "functionName": "replace",
"arguments": [{
"unresolvedAttribute": {
"unparsedIdentifier": "g"
}
}, {
- "literal": {
- "string": "$99.99"
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
+ }
+ }, {
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
}
}]
}
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_replace_with_specified_string.proto.bin
similarity index 69%
copy from connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin
copy to connector/connect/common/src/test/resources/query-tests/queries/function_replace_with_specified_string.proto.bin
index 86ab9d23572..136a6b31821 100644
Binary files a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin and b/connector/connect/common/src/test/resources/query-tests/queries/function_replace_with_specified_string.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json b/connector/connect/common/src/test/resources/query-tests/queries/function_split_part.json
similarity index 73%
copy from connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json
copy to connector/connect/common/src/test/resources/query-tests/queries/function_split_part.json
index a39682de10f..81ced1555d3 100644
--- a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_split_part.json
@@ -13,14 +13,18 @@
},
"expressions": [{
"unresolvedFunction": {
- "functionName": "to_char",
+ "functionName": "split_part",
"arguments": [{
"unresolvedAttribute": {
"unparsedIdentifier": "g"
}
}, {
- "literal": {
- "string": "$99.99"
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
+ }
+ }, {
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "a"
}
}]
}
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_split_part.proto.bin
similarity index 68%
copy from connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin
copy to connector/connect/common/src/test/resources/query-tests/queries/function_split_part.proto.bin
index 86ab9d23572..2c1948f20dc 100644
Binary files a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin and b/connector/connect/common/src/test/resources/query-tests/queries/function_split_part.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json b/connector/connect/common/src/test/resources/query-tests/queries/function_startswith.json
similarity index 83%
copy from connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json
copy to connector/connect/common/src/test/resources/query-tests/queries/function_startswith.json
index a39682de10f..ce2b0ac658c 100644
--- a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_startswith.json
@@ -13,14 +13,14 @@
},
"expressions": [{
"unresolvedFunction": {
- "functionName": "to_char",
+ "functionName": "startswith",
"arguments": [{
"unresolvedAttribute": {
"unparsedIdentifier": "g"
}
}, {
- "literal": {
- "string": "$99.99"
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
}
}]
}
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_startswith.proto.bin
similarity index 70%
copy from connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin
copy to connector/connect/common/src/test/resources/query-tests/queries/function_startswith.proto.bin
index 86ab9d23572..2f09e8095f5 100644
Binary files a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin and b/connector/connect/common/src/test/resources/query-tests/queries/function_startswith.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json b/connector/connect/common/src/test/resources/query-tests/queries/function_substr.json
similarity index 84%
copy from connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json
copy to connector/connect/common/src/test/resources/query-tests/queries/function_substr.json
index a39682de10f..ef6d225821c 100644
--- a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_substr.json
@@ -13,14 +13,14 @@
},
"expressions": [{
"unresolvedFunction": {
- "functionName": "to_char",
+ "functionName": "substr",
"arguments": [{
"unresolvedAttribute": {
"unparsedIdentifier": "g"
}
}, {
- "literal": {
- "string": "$99.99"
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "a"
}
}]
}
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_substr.proto.bin
similarity index 70%
copy from connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin
copy to connector/connect/common/src/test/resources/query-tests/queries/function_substr.proto.bin
index 86ab9d23572..934201c4333 100644
Binary files a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin and b/connector/connect/common/src/test/resources/query-tests/queries/function_substr.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json b/connector/connect/common/src/test/resources/query-tests/queries/function_substr_with_len.json
similarity index 73%
copy from connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json
copy to connector/connect/common/src/test/resources/query-tests/queries/function_substr_with_len.json
index a39682de10f..d8492899d69 100644
--- a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_substr_with_len.json
@@ -13,14 +13,18 @@
},
"expressions": [{
"unresolvedFunction": {
- "functionName": "to_char",
+ "functionName": "substr",
"arguments": [{
"unresolvedAttribute": {
"unparsedIdentifier": "g"
}
}, {
- "literal": {
- "string": "$99.99"
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "a"
+ }
+ }, {
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "a"
}
}]
}
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_substr_with_len.proto.bin
similarity index 69%
copy from connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin
copy to connector/connect/common/src/test/resources/query-tests/queries/function_substr_with_len.proto.bin
index 86ab9d23572..0fab03c0250 100644
Binary files a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin and b/connector/connect/common/src/test/resources/query-tests/queries/function_substr_with_len.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json b/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json
index a39682de10f..abb71e80a76 100644
--- a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json
@@ -13,7 +13,7 @@
},
"expressions": [{
"unresolvedFunction": {
- "functionName": "to_char",
+ "functionName": "to_number",
"arguments": [{
"unresolvedAttribute": {
"unparsedIdentifier": "g"
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin
index 86ab9d23572..189c73553c5 100644
Binary files a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin and b/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json b/connector/connect/common/src/test/resources/query-tests/queries/function_url_decode.json
similarity index 82%
copy from connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json
copy to connector/connect/common/src/test/resources/query-tests/queries/function_url_decode.json
index a39682de10f..d4cdeeb6c48 100644
--- a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_url_decode.json
@@ -13,15 +13,11 @@
},
"expressions": [{
"unresolvedFunction": {
- "functionName": "to_char",
+ "functionName": "url_decode",
"arguments": [{
"unresolvedAttribute": {
"unparsedIdentifier": "g"
}
- }, {
- "literal": {
- "string": "$99.99"
- }
}]
}
}]
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_url_decode.proto.bin
similarity index 70%
copy from connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin
copy to connector/connect/common/src/test/resources/query-tests/queries/function_url_decode.proto.bin
index 86ab9d23572..e347e73c3ae 100644
Binary files a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin and b/connector/connect/common/src/test/resources/query-tests/queries/function_url_decode.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json b/connector/connect/common/src/test/resources/query-tests/queries/function_url_encode.json
similarity index 82%
copy from connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json
copy to connector/connect/common/src/test/resources/query-tests/queries/function_url_encode.json
index a39682de10f..5d221e0fea6 100644
--- a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.json
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_url_encode.json
@@ -13,15 +13,11 @@
},
"expressions": [{
"unresolvedFunction": {
- "functionName": "to_char",
+ "functionName": "url_encode",
"arguments": [{
"unresolvedAttribute": {
"unparsedIdentifier": "g"
}
- }, {
- "literal": {
- "string": "$99.99"
- }
}]
}
}]
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_url_encode.proto.bin
similarity index 70%
copy from connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin
copy to connector/connect/common/src/test/resources/query-tests/queries/function_url_encode.proto.bin
index 86ab9d23572..9313fb82498 100644
Binary files a/connector/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin and b/connector/connect/common/src/test/resources/query-tests/queries/function_url_encode.proto.bin differ
diff --git a/python/docs/source/reference/pyspark.sql/functions.rst b/python/docs/source/reference/pyspark.sql/functions.rst
index ede67262df5..66b2ad149dc 100644
--- a/python/docs/source/reference/pyspark.sql/functions.rst
+++ b/python/docs/source/reference/pyspark.sql/functions.rst
@@ -357,6 +357,7 @@ String Functions
concat_ws
decode
encode
+ endswith
format_number
format_string
initcap
@@ -368,6 +369,9 @@ String Functions
lpad
ltrim
octet_length
+ parse_url
+ position
+ printf
rlike
regexp
regexp_like
@@ -377,12 +381,16 @@ String Functions
regexp_replace
regexp_substr
regexp_instr
+ replace
unbase64
rpad
repeat
rtrim
soundex
split
+ split_part
+ startswith
+ substr
substring
substring_index
overlay
@@ -393,6 +401,8 @@ String Functions
translate
trim
upper
+ url_decode
+ url_encode
Bitwise Functions
diff --git a/python/pyspark/sql/connect/functions.py b/python/pyspark/sql/connect/functions.py
index 4edbc54a556..84a44baccdc 100644
--- a/python/pyspark/sql/connect/functions.py
+++ b/python/pyspark/sql/connect/functions.py
@@ -2400,6 +2400,96 @@ def to_number(col: "ColumnOrName", format: "ColumnOrName") -> Column:
to_number.__doc__ = pysparkfuncs.to_number.__doc__
+def replace(
+ src: "ColumnOrName", search: "ColumnOrName", replace: Optional["ColumnOrName"] = None
+) -> Column:
+ if replace is not None:
+ return _invoke_function_over_columns("replace", src, search, replace)
+ else:
+ return _invoke_function_over_columns("replace", src, search)
+
+
+replace.__doc__ = pysparkfuncs.replace.__doc__
+
+
+def split_part(src: "ColumnOrName", delimiter: "ColumnOrName", partNum: "ColumnOrName") -> Column:
+ return _invoke_function_over_columns("split_part", src, delimiter, partNum)
+
+
+split_part.__doc__ = pysparkfuncs.split_part.__doc__
+
+
+def substr(
+ str: "ColumnOrName", pos: "ColumnOrName", len: Optional["ColumnOrName"] = None
+) -> Column:
+ if len is not None:
+ return _invoke_function_over_columns("substr", str, pos, len)
+ else:
+ return _invoke_function_over_columns("substr", str, pos)
+
+
+substr.__doc__ = pysparkfuncs.substr.__doc__
+
+
+def parse_url(
+ url: "ColumnOrName", partToExtract: "ColumnOrName", key: Optional["ColumnOrName"] = None
+) -> Column:
+ if key is not None:
+ return _invoke_function_over_columns("parse_url", url, partToExtract, key)
+ else:
+ return _invoke_function_over_columns("parse_url", url, partToExtract)
+
+
+parse_url.__doc__ = pysparkfuncs.parse_url.__doc__
+
+
+def printf(format: "ColumnOrName", *cols: "ColumnOrName") -> Column:
+ return _invoke_function("printf", lit(format), *[_to_col(c) for c in cols])
+
+
+printf.__doc__ = pysparkfuncs.printf.__doc__
+
+
+def url_decode(str: "ColumnOrName") -> Column:
+ return _invoke_function_over_columns("url_decode", str)
+
+
+url_decode.__doc__ = pysparkfuncs.url_decode.__doc__
+
+
+def url_encode(str: "ColumnOrName") -> Column:
+ return _invoke_function_over_columns("url_encode", str)
+
+
+url_encode.__doc__ = pysparkfuncs.url_encode.__doc__
+
+
+def position(
+ substr: "ColumnOrName", str: "ColumnOrName", start: Optional["ColumnOrName"] = None
+) -> Column:
+ if start is not None:
+ return _invoke_function_over_columns("position", substr, str, start)
+ else:
+ return _invoke_function_over_columns("position", substr, str)
+
+
+position.__doc__ = pysparkfuncs.position.__doc__
+
+
+def endswith(str: "ColumnOrName", suffix: "ColumnOrName") -> Column:
+ return _invoke_function_over_columns("endswith", str, suffix)
+
+
+endswith.__doc__ = pysparkfuncs.endswith.__doc__
+
+
+def startswith(str: "ColumnOrName", prefix: "ColumnOrName") -> Column:
+ return _invoke_function_over_columns("startswith", str, prefix)
+
+
+startswith.__doc__ = pysparkfuncs.startswith.__doc__
+
+
# Date/Timestamp functions
# TODO(SPARK-41455): Resolve dtypes inconsistencies for:
# to_timestamp, from_utc_timestamp, to_utc_timestamp,
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index e9b6ee8fa40..0d8f69daabb 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -9091,6 +9091,7 @@ def to_binary(col: "ColumnOrName", format: Optional["ColumnOrName"] = None) -> C
return _invoke_function_over_columns("to_binary", col)
+@try_remote_functions
def to_char(col: "ColumnOrName", format: "ColumnOrName") -> Column:
"""
Convert `col` to a string based on the `format`.
@@ -9130,6 +9131,7 @@ def to_char(col: "ColumnOrName", format: "ColumnOrName") -> Column:
return _invoke_function_over_columns("to_char", col, format)
+@try_remote_functions
def to_number(col: "ColumnOrName", format: "ColumnOrName") -> Column:
"""
Convert string 'col' to a number based on the string format 'format'.
@@ -9170,6 +9172,301 @@ def to_number(col: "ColumnOrName", format: "ColumnOrName") -> Column:
return _invoke_function_over_columns("to_number", col, format)
+@try_remote_functions
+def replace(
+ src: "ColumnOrName", search: "ColumnOrName", replace: Optional["ColumnOrName"] = None
+) -> Column:
+ """
+ Replaces all occurrences of `search` with `replace`.
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ src : :class:`~pyspark.sql.Column` or str
+ A column of string to be replaced.
+ search : :class:`~pyspark.sql.Column` or str
+ A column of string, If `search` is not found in `str`, `str` is returned unchanged.
+ replace : :class:`~pyspark.sql.Column` or str, optional
+ A column of string, If `replace` is not specified or is an empty string,
+ nothing replaces the string that is removed from `str`.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([("ABCabc", "abc", "DEF",)], ["a", "b", "c"])
+ >>> df.select(replace(df.a, df.b, df.c).alias('r')).collect()
+ [Row(r='ABCDEF')]
+
+ >>> df.select(replace(df.a, df.b).alias('r')).collect()
+ [Row(r='ABC')]
+ """
+ if replace is not None:
+ return _invoke_function_over_columns("replace", src, search, replace)
+ else:
+ return _invoke_function_over_columns("replace", src, search)
+
+
+@try_remote_functions
+def split_part(src: "ColumnOrName", delimiter: "ColumnOrName", partNum: "ColumnOrName") -> Column:
+ """
+ Splits `str` by delimiter and return requested part of the split (1-based).
+ If any input is null, returns null. if `partNum` is out of range of split parts,
+ returns empty string. If `partNum` is 0, throws an error. If `partNum` is negative,
+ the parts are counted backward from the end of the string.
+ If the `delimiter` is an empty string, the `str` is not split.
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ src : :class:`~pyspark.sql.Column` or str
+ A column of string to be splited.
+ delimiter : :class:`~pyspark.sql.Column` or str
+ A column of string, the delimiter used for split.
+ partNum : :class:`~pyspark.sql.Column` or str
+ A column of string, requested part of the split (1-based).
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([("11.12.13", ".", 3,)], ["a", "b", "c"])
+ >>> df.select(split_part(df.a, df.b, df.c).alias('r')).collect()
+ [Row(r='13')]
+ """
+ return _invoke_function_over_columns("split_part", src, delimiter, partNum)
+
+
+@try_remote_functions
+def substr(
+ str: "ColumnOrName", pos: "ColumnOrName", len: Optional["ColumnOrName"] = None
+) -> Column:
+ """
+ Returns the substring of `str` that starts at `pos` and is of length `len`,
+ or the slice of byte array that starts at `pos` and is of length `len`.
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ src : :class:`~pyspark.sql.Column` or str
+ A column of string.
+ pos : :class:`~pyspark.sql.Column` or str
+ A column of string, the substring of `str` that starts at `pos`.
+ len : :class:`~pyspark.sql.Column` or str, optional
+ A column of string, the substring of `str` is of length `len`.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([("Spark SQL", 5, 1,)], ["a", "b", "c"])
+ >>> df.select(substr(df.a, df.b, df.c).alias('r')).collect()
+ [Row(r='k')]
+
+ >>> df.select(substr(df.a, df.b).alias('r')).collect()
+ [Row(r='k SQL')]
+ """
+ if len is not None:
+ return _invoke_function_over_columns("substr", str, pos, len)
+ else:
+ return _invoke_function_over_columns("substr", str, pos)
+
+
+@try_remote_functions
+def parse_url(
+ url: "ColumnOrName", partToExtract: "ColumnOrName", key: Optional["ColumnOrName"] = None
+) -> Column:
+ """
+ Extracts a part from a URL.
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ url : :class:`~pyspark.sql.Column` or str
+ A column of string.
+ partToExtract : :class:`~pyspark.sql.Column` or str
+ A column of string, the path.
+ key : :class:`~pyspark.sql.Column` or str, optional
+ A column of string, the key.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame(
+ ... [("http://spark.apache.org/path?query=1", "QUERY", "query",)],
+ ... ["a", "b", "c"]
+ ... )
+ >>> df.select(parse_url(df.a, df.b, df.c).alias('r')).collect()
+ [Row(r='1')]
+
+ >>> df.select(parse_url(df.a, df.b).alias('r')).collect()
+ [Row(r='query=1')]
+ """
+ if key is not None:
+ return _invoke_function_over_columns("parse_url", url, partToExtract, key)
+ else:
+ return _invoke_function_over_columns("parse_url", url, partToExtract)
+
+
+@try_remote_functions
+def printf(format: "ColumnOrName", *cols: "ColumnOrName") -> Column:
+ """
+ Formats the arguments in printf-style and returns the result as a string column.
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ format : :class:`~pyspark.sql.Column` or str
+ string that can contain embedded format tags and used as result column's value
+ cols : :class:`~pyspark.sql.Column` or str
+ column names or :class:`~pyspark.sql.Column`\\s to be used in formatting
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([("aa%d%s", 123, "cc",)], ["a", "b", "c"])
+ >>> df.select(printf(df.a, df.b, df.c).alias('r')).collect()
+ [Row(r='aa123cc')]
+ """
+ sc = get_active_spark_context()
+ return _invoke_function("printf", _to_java_column(format), _to_seq(sc, cols, _to_java_column))
+
+
+@try_remote_functions
+def url_decode(str: "ColumnOrName") -> Column:
+ """
+ Decodes a `str` in 'application/x-www-form-urlencoded' format
+ using a specific encoding scheme.
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ str : :class:`~pyspark.sql.Column` or str
+ A column of string to decode.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([("https%3A%2F%2Fspark.apache.org",)], ["a"])
+ >>> df.select(url_decode(df.a).alias('r')).collect()
+ [Row(r='https://spark.apache.org')]
+ """
+ return _invoke_function_over_columns("url_decode", str)
+
+
+@try_remote_functions
+def url_encode(str: "ColumnOrName") -> Column:
+ """
+ Translates a string into 'application/x-www-form-urlencoded' format
+ using a specific encoding scheme.
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ str : :class:`~pyspark.sql.Column` or str
+ A column of string to encode.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([("https://spark.apache.org",)], ["a"])
+ >>> df.select(url_encode(df.a).alias('r')).collect()
+ [Row(r='https%3A%2F%2Fspark.apache.org')]
+ """
+ return _invoke_function_over_columns("url_encode", str)
+
+
+@try_remote_functions
+def position(
+ substr: "ColumnOrName", str: "ColumnOrName", start: Optional["ColumnOrName"] = None
+) -> Column:
+ """
+ Returns the position of the first occurrence of `substr` in `str` after position `start`.
+ The given `start` and return value are 1-based.
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ substr : :class:`~pyspark.sql.Column` or str
+ A column of string, substring.
+ str : :class:`~pyspark.sql.Column` or str
+ A column of string.
+ start : :class:`~pyspark.sql.Column` or str, optional
+ A column of string, start position.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([("bar", "foobarbar", 5,)], ["a", "b", "c"])
+ >>> df.select(position(df.a, df.b, df.c).alias('r')).collect()
+ [Row(r=7)]
+
+ >>> df.select(position(df.a, df.b).alias('r')).collect()
+ [Row(r=4)]
+ """
+ if start is not None:
+ return _invoke_function_over_columns("position", substr, str, start)
+ else:
+ return _invoke_function_over_columns("position", substr, str)
+
+
+@try_remote_functions
+def endswith(str: "ColumnOrName", suffix: "ColumnOrName") -> Column:
+ """
+ Returns a boolean. The value is True if str ends with suffix.
+ Returns NULL if either input expression is NULL. Otherwise, returns False.
+ Both str or suffix must be of STRING or BINARY type.
+
+ .. versionadded:: 3.5.0
+
+ Notes
+ -----
+ Only STRING type is supported in this function,
+ while `startswith` in SQL supports both STRING and BINARY.
+
+ Parameters
+ ----------
+ str : :class:`~pyspark.sql.Column` or str
+ A column of string.
+ suffix : :class:`~pyspark.sql.Column` or str
+ A column of string, the suffix.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([("Spark SQL", "Spark",)], ["a", "b"])
+ >>> df.select(endswith(df.a, df.b).alias('r')).collect()
+ [Row(r=False)]
+ """
+ return _invoke_function_over_columns("endswith", str, suffix)
+
+
+@try_remote_functions
+def startswith(str: "ColumnOrName", prefix: "ColumnOrName") -> Column:
+ """
+ Returns a boolean. The value is True if str starts with prefix.
+ Returns NULL if either input expression is NULL. Otherwise, returns False.
+ Both str or prefix must be of STRING or BINARY type.
+
+ .. versionadded:: 3.5.0
+
+ Notes
+ -----
+ Only STRING type is supported in this function,
+ while `startswith` in SQL supports both STRING and BINARY.
+
+ Parameters
+ ----------
+ str : :class:`~pyspark.sql.Column` or str
+ A column of string.
+ prefix : :class:`~pyspark.sql.Column` or str
+ A column of string, the prefix.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([("Spark SQL", "Spark",)], ["a", "b"])
+ >>> df.select(startswith(df.a, df.b).alias('r')).collect()
+ [Row(r=True)]
+ """
+ return _invoke_function_over_columns("startswith", str, prefix)
+
+
# ---------------------- Collection functions ------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index a76044ac98e..81a57368a8d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -3900,6 +3900,181 @@ object functions {
ToNumber(e.expr, format.expr)
}
+ /**
+ * Replaces all occurrences of `search` with `replace`.
+ *
+ * @param src
+ * A column of string to be replaced
+ * @param search
+ * A column of string, If `search` is not found in `str`, `str` is returned unchanged.
+ * @param replace
+ * A column of string, If `replace` is not specified or is an empty string, nothing replaces
+ * the string that is removed from `str`.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def replace(src: Column, search: Column, replace: Column): Column = withExpr {
+ StringReplace(src.expr, search.expr, replace.expr)
+ }
+
+ /**
+ * Replaces all occurrences of `search` with `replace`.
+ *
+ * @param src
+ * A column of string to be replaced
+ * @param search
+ * A column of string, If `search` is not found in `src`, `src` is returned unchanged.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def replace(src: Column, search: Column): Column = withExpr {
+ new StringReplace(src.expr, search.expr)
+ }
+
+ /**
+ * Splits `str` by delimiter and return requested part of the split (1-based).
+ * If any input is null, returns null. if `partNum` is out of range of split parts,
+ * returns empty string. If `partNum` is 0, throws an error. If `partNum` is negative,
+ * the parts are counted backward from the end of the string.
+ * If the `delimiter` is an empty string, the `str` is not split.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def split_part(str: Column, delimiter: Column, partNum: Column): Column = withExpr {
+ SplitPart(str.expr, delimiter.expr, partNum.expr)
+ }
+
+ /**
+ * Returns the substring of `str` that starts at `pos` and is of length `len`,
+ * or the slice of byte array that starts at `pos` and is of length `len`.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def substr(str: Column, pos: Column, len: Column): Column = withExpr {
+ Substring(str.expr, pos.expr, len.expr)
+ }
+
+ /**
+ * Returns the substring of `str` that starts at `pos`,
+ * or the slice of byte array that starts at `pos`.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def substr(str: Column, pos: Column): Column = withExpr {
+ new Substring(str.expr, pos.expr)
+ }
+
+ /**
+ * Extracts a part from a URL.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def parse_url(url: Column, partToExtract: Column, key: Column): Column = withExpr {
+ ParseUrl(Seq(url.expr, partToExtract.expr, key.expr))
+ }
+
+ /**
+ * Extracts a part from a URL.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def parse_url(url: Column, partToExtract: Column): Column = withExpr {
+ ParseUrl(Seq(url.expr, partToExtract.expr))
+ }
+
+ /**
+ * Formats the arguments in printf-style and returns the result as a string column.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def printf(format: Column, arguments: Column*): Column = withExpr {
+ FormatString((lit(format) +: arguments).map(_.expr): _*)
+ }
+
+ /**
+ * Decodes a `str` in 'application/x-www-form-urlencoded' format
+ * using a specific encoding scheme.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def url_decode(str: Column): Column = withExpr {
+ UrlDecode(str.expr)
+ }
+
+ /**
+ * Translates a string into 'application/x-www-form-urlencoded' format
+ * using a specific encoding scheme.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def url_encode(str: Column): Column = withExpr {
+ UrlEncode(str.expr)
+ }
+
+ /**
+ * Returns the position of the first occurrence of `substr` in `str` after position `start`.
+ * The given `start` and return value are 1-based.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def position(substr: Column, str: Column, start: Column): Column = withExpr {
+ StringLocate(substr.expr, str.expr, start.expr)
+ }
+
+ /**
+ * Returns the position of the first occurrence of `substr` in `str` after position `1`.
+ * The return value are 1-based.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def position(substr: Column, str: Column): Column = withExpr {
+ new StringLocate(substr.expr, str.expr)
+ }
+
+ /**
+ * Returns a boolean. The value is True if str ends with suffix.
+ * Returns NULL if either input expression is NULL. Otherwise, returns False.
+ * Both str or suffix must be of STRING type.
+ *
+ * @note
+ * Only STRING type is supported in this function, while `endswith` in SQL supports both
+ * STRING and BINARY.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def endswith(str: Column, suffix: Column): Column = withExpr {
+ EndsWith(str.expr, suffix.expr)
+ }
+
+ /**
+ * Returns a boolean. The value is True if str starts with prefix.
+ * Returns NULL if either input expression is NULL. Otherwise, returns False.
+ * Both str or prefix must be of STRING type.
+ *
+ * @note
+ * Only STRING type is supported in this function, while `endswith` in SQL supports both
+ * STRING and BINARY.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def startswith(str: Column, prefix: Column): Column = withExpr {
+ StartsWith(str.expr, prefix.expr)
+ }
+
//////////////////////////////////////////////////////////////////////////////////////////////
// DateTime functions
//////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index 30583cfd0d0..9b41720bf9a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -78,7 +78,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
val excludedSqlFunctions = Set(
"random",
"array_agg", "char_length", "character_length",
- "lcase", "position", "printf", "substr", "ucase", "day", "cardinality", "sha",
+ "lcase", "ucase", "day", "cardinality", "sha",
// aliases for existing functions
"reflect", "java_method" // Only needed in SQL
)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
index 9b45f645af6..21ab4899a75 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
@@ -862,7 +862,7 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
}
test("to_number") {
- val df = Seq("$78.12").toDF("a")
+ val df = Seq("$78.12").toDF("a")
checkAnswer(
df.selectExpr("to_number(a, '$99.99')"),
Seq(Row(78.12))
@@ -872,4 +872,135 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
Seq(Row(78.12))
)
}
+
+ test("replace") {
+ val df = Seq(("ABCabc", "abc", "DEF")).toDF("a", "b", "c")
+
+ checkAnswer(
+ df.selectExpr("replace(a, b, c)"),
+ Seq(Row("ABCDEF"))
+ )
+ checkAnswer(
+ df.select(replace(col("a"), col("b"), col("c"))),
+ Seq(Row("ABCDEF"))
+ )
+
+ checkAnswer(
+ df.selectExpr("replace(a, b)"),
+ Seq(Row("ABC"))
+ )
+ checkAnswer(
+ df.select(replace(col("a"), col("b"))),
+ Seq(Row("ABC"))
+ )
+ }
+
+ test("split_part") {
+ val df = Seq(("11.12.13", ".", 3)).toDF("a", "b", "c")
+ checkAnswer(
+ df.selectExpr("split_part(a, b, c)"),
+ Seq(Row("13"))
+ )
+ checkAnswer(
+ df.select(split_part(col("a"), col("b"), col("c"))),
+ Seq(Row("13"))
+ )
+ }
+
+ test("substr") {
+ val df = Seq(("Spark SQL", 5, 1)).toDF("a", "b", "c")
+ checkAnswer(
+ df.selectExpr("substr(a, b, c)"),
+ Seq(Row("k"))
+ )
+ checkAnswer(
+ df.select(substr(col("a"), col("b"), col("c"))),
+ Seq(Row("k"))
+ )
+
+ checkAnswer(
+ df.selectExpr("substr(a, b)"),
+ Seq(Row("k SQL"))
+ )
+ checkAnswer(
+ df.select(substr(col("a"), col("b"))),
+ Seq(Row("k SQL"))
+ )
+ }
+
+ test("parse_url") {
+ val df = Seq(("http://spark.apache.org/path?query=1", "QUERY", "query")).toDF("a", "b", "c")
+
+ checkAnswer(
+ df.selectExpr("parse_url(a, b, c)"),
+ Seq(Row("1"))
+ )
+ checkAnswer(
+ df.select(parse_url(col("a"), col("b"), col("c"))),
+ Seq(Row("1"))
+ )
+
+ checkAnswer(
+ df.selectExpr("parse_url(a, b)"),
+ Seq(Row("query=1"))
+ )
+ checkAnswer(
+ df.select(parse_url(col("a"), col("b"))),
+ Seq(Row("query=1"))
+ )
+ }
+
+ test("printf") {
+ val df = Seq(("aa%d%s", 123, "cc")).toDF("a", "b", "c")
+ checkAnswer(
+ df.selectExpr("printf(a, b, c)"),
+ Row("aa123cc"))
+ checkAnswer(
+ df.select(printf(col("a"), col("b"), col("c"))),
+ Row("aa123cc"))
+ }
+
+ test("url_decode") {
+ val df = Seq("https%3A%2F%2Fspark.apache.org").toDF("a")
+ checkAnswer(
+ df.selectExpr("url_decode(a)"),
+ Row("https://spark.apache.org"))
+ checkAnswer(
+ df.select(url_decode(col("a"))),
+ Row("https://spark.apache.org"))
+ }
+
+ test("url_encode") {
+ val df = Seq("https://spark.apache.org").toDF("a")
+ checkAnswer(
+ df.selectExpr("url_encode(a)"),
+ Row("https%3A%2F%2Fspark.apache.org"))
+ checkAnswer(
+ df.select(url_encode(col("a"))),
+ Row("https%3A%2F%2Fspark.apache.org"))
+ }
+
+ test("position") {
+ val df = Seq(("bar", "foobarbar", 5)).toDF("a", "b", "c")
+
+ checkAnswer(df.selectExpr("position(a, b)"), Row(4))
+ checkAnswer(df.select(position(col("a"), col("b"))), Row(4))
+
+ checkAnswer(df.selectExpr("position(a, b, c)"), Row(7))
+ checkAnswer(df.select(position(col("a"), col("b"), col("c"))), Row(7))
+ }
+
+ test("endswith") {
+ val df = Seq(("Spark SQL", "Spark")).toDF("a", "b")
+
+ checkAnswer(df.selectExpr("endswith(a, b)"), Row(false))
+ checkAnswer(df.select(endswith(col("a"), col("b"))), Row(false))
+ }
+
+ test("startswith") {
+ val df = Seq(("Spark SQL", "Spark")).toDF("a", "b")
+
+ checkAnswer(df.selectExpr("startswith(a, b)"), Row(true))
+ checkAnswer(df.select(startswith(col("a"), col("b"))), Row(true))
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org