You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ru...@apache.org on 2023/06/19 12:36:45 UTC
[spark] branch master updated: [SPARK-43942][CONNECT][PYTHON] Add string functions to Scala and Python - part 1
This is an automated email from the ASF dual-hosted git repository.
ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 476c58ed26a [SPARK-43942][CONNECT][PYTHON] Add string functions to Scala and Python - part 1
476c58ed26a is described below
commit 476c58ed26a1155fabe5afe1eb502bb992f31954
Author: panbingkun <pb...@gmail.com>
AuthorDate: Mon Jun 19 20:36:18 2023 +0800
[SPARK-43942][CONNECT][PYTHON] Add string functions to Scala and Python - part 1
### What changes were proposed in this pull request?
Add following functions:
- char
- btrim
- char_length
- character_length
- chr
- contains
- elt
- find_in_set
- like
- ilike
- lcase
- ucase
- ~~len: Because it conflicts with Python keywords, and we already have `length`~~
- left
- right
to:
- Scala API
- Python API
- Spark Connect Scala Client
- Spark Connect Python Client
### Why are the changes needed?
for parity
### Does this PR introduce _any_ user-facing change?
Yes, new functions.
### How was this patch tested?
- Add New UT.
Closes #41561 from panbingkun/SPARK-43942.
Authored-by: panbingkun <pb...@gmail.com>
Signed-off-by: Ruifeng Zheng <ru...@apache.org>
---
.../scala/org/apache/spark/sql/functions.scala | 160 +++++++++
.../apache/spark/sql/PlanGenerationTestSuite.scala | 68 ++++
.../explain-results/function_btrim.explain | 2 +
...nction_btrim_with_specified_trim_string.explain | 2 +
.../explain-results/function_char.explain | 2 +
.../explain-results/function_char_length.explain | 2 +
.../function_character_length.explain | 2 +
.../explain-results/function_chr.explain | 2 +
.../explain-results/function_contains.explain | 2 +
.../explain-results/function_elt.explain | 2 +
.../explain-results/function_find_in_set.explain | 2 +
.../explain-results/function_ilike.explain | 2 +
.../function_ilike_with_escape.explain | 2 +
.../explain-results/function_lcase.explain | 2 +
.../explain-results/function_left.explain | 2 +
.../explain-results/function_like.explain | 2 +
.../function_like_with_escape.explain | 2 +
.../explain-results/function_right.explain | 2 +
.../explain-results/function_ucase.explain | 2 +
.../query-tests/queries/function_btrim.json | 25 ++
.../query-tests/queries/function_btrim.proto.bin | Bin 0 -> 174 bytes
.../function_btrim_with_specified_trim_string.json | 29 ++
...tion_btrim_with_specified_trim_string.proto.bin | Bin 0 -> 181 bytes
.../query-tests/queries/function_char.json | 25 ++
.../query-tests/queries/function_char.proto.bin | Bin 0 -> 173 bytes
.../query-tests/queries/function_char_length.json | 25 ++
.../queries/function_char_length.proto.bin | Bin 0 -> 180 bytes
.../queries/function_character_length.json | 25 ++
.../queries/function_character_length.proto.bin | Bin 0 -> 185 bytes
.../query-tests/queries/function_chr.json | 25 ++
.../query-tests/queries/function_chr.proto.bin | Bin 0 -> 172 bytes
.../query-tests/queries/function_contains.json | 29 ++
.../queries/function_contains.proto.bin | Bin 0 -> 184 bytes
.../query-tests/queries/function_elt.json | 33 ++
.../query-tests/queries/function_elt.proto.bin | Bin 0 -> 186 bytes
.../query-tests/queries/function_find_in_set.json | 29 ++
.../queries/function_find_in_set.proto.bin | Bin 0 -> 187 bytes
.../query-tests/queries/function_ilike.json | 29 ++
.../query-tests/queries/function_ilike.proto.bin | Bin 0 -> 181 bytes
.../queries/function_ilike_with_escape.json | 33 ++
.../queries/function_ilike_with_escape.proto.bin | Bin 0 -> 188 bytes
.../query-tests/queries/function_lcase.json | 25 ++
.../query-tests/queries/function_lcase.proto.bin | Bin 0 -> 174 bytes
.../query-tests/queries/function_left.json | 29 ++
.../query-tests/queries/function_left.proto.bin | Bin 0 -> 180 bytes
.../query-tests/queries/function_like.json | 29 ++
.../query-tests/queries/function_like.proto.bin | Bin 0 -> 180 bytes
.../queries/function_like_with_escape.json | 33 ++
.../queries/function_like_with_escape.proto.bin | Bin 0 -> 187 bytes
.../query-tests/queries/function_right.json | 29 ++
.../query-tests/queries/function_right.proto.bin | Bin 0 -> 181 bytes
.../query-tests/queries/function_ucase.json | 25 ++
.../query-tests/queries/function_ucase.proto.bin | Bin 0 -> 174 bytes
.../sql/connect/planner/SparkConnectPlanner.scala | 12 +
core/src/main/resources/error/error-classes.json | 5 +
.../source/reference/pyspark.sql/functions.rst | 14 +
python/pyspark/sql/connect/functions.py | 111 ++++++
python/pyspark/sql/functions.py | 390 +++++++++++++++++++++
.../spark/sql/errors/QueryCompilationErrors.scala | 6 +
.../scala/org/apache/spark/sql/functions.scala | 204 +++++++++++
.../apache/spark/sql/DataFrameFunctionsSuite.scala | 4 +-
.../apache/spark/sql/StringFunctionsSuite.scala | 119 +++++++
62 files changed, 1601 insertions(+), 3 deletions(-)
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
index 206b7df2091..c12bb23f850 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
@@ -3971,6 +3971,166 @@ object functions {
def startswith(str: Column, prefix: Column): Column =
Column.fn("startswith", str, prefix)
+ /**
+ * Returns the ASCII character having the binary equivalent to `n`. If n is larger than 256 the
+ * result is equivalent to char(n % 256)
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def char(n: Column): Column = Column.fn("char", n)
+
+ /**
+ * Removes the leading and trailing space characters from `str`.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def btrim(str: Column): Column = Column.fn("btrim", str)
+
+ /**
+ * Remove the leading and trailing `trim` characters from `str`.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def btrim(str: Column, trim: Column): Column = Column.fn("btrim", str, trim)
+
+ /**
+ * Returns the character length of string data or number of bytes of binary data. The length of
+ * string data includes the trailing spaces. The length of binary data includes binary zeros.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def char_length(str: Column): Column = Column.fn("char_length", str)
+
+ /**
+ * Returns the character length of string data or number of bytes of binary data. The length of
+ * string data includes the trailing spaces. The length of binary data includes binary zeros.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def character_length(str: Column): Column = Column.fn("character_length", str)
+
+ /**
+ * Returns the ASCII character having the binary equivalent to `n`. If n is larger than 256 the
+ * result is equivalent to chr(n % 256)
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def chr(n: Column): Column = Column.fn("chr", n)
+
+ /**
+ * Returns a boolean. The value is True if right is found inside left. Returns NULL if either
+ * input expression is NULL. Otherwise, returns False. Both left or right must be of STRING
+ * type.
+ *
+ * @note
+ * Only STRING type is supported in this function, while `contains` in SQL supports both
+ * STRING and BINARY.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def contains(left: Column, right: Column): Column = Column.fn("contains", left, right)
+
+ /**
+ * Returns the `n`-th input, e.g., returns `input2` when `n` is 2. The function returns NULL if
+ * the index exceeds the length of the array and `spark.sql.ansi.enabled` is set to false. If
+ * `spark.sql.ansi.enabled` is set to true, it throws ArrayIndexOutOfBoundsException for invalid
+ * indices.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ @scala.annotation.varargs
+ def elt(inputs: Column*): Column = Column.fn("elt", inputs: _*)
+
+ /**
+ * Returns the index (1-based) of the given string (`str`) in the comma-delimited list
+ * (`strArray`). Returns 0, if the string was not found or if the given string (`str`) contains
+ * a comma.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def find_in_set(str: Column, strArray: Column): Column = Column.fn("find_in_set", str, strArray)
+
+ /**
+ * Returns true if str matches `pattern` with `escapeChar`, null if any arguments are null,
+ * false otherwise.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def like(str: Column, pattern: Column, escapeChar: Column): Column =
+ Column.fn("like", str, pattern, escapeChar)
+
+ /**
+ * Returns true if str matches `pattern` with `escapeChar`('\'), null if any arguments are null,
+ * false otherwise.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def like(str: Column, pattern: Column): Column = Column.fn("like", str, pattern)
+
+ /**
+ * Returns true if str matches `pattern` with `escapeChar` case-insensitively, null if any
+ * arguments are null, false otherwise.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def ilike(str: Column, pattern: Column, escapeChar: Column): Column =
+ Column.fn("ilike", str, pattern, escapeChar)
+
+ /**
+ * Returns true if str matches `pattern` with `escapeChar`('\') case-insensitively, null if any
+ * arguments are null, false otherwise.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def ilike(str: Column, pattern: Column): Column = Column.fn("ilike", str, pattern)
+
+ /**
+ * Returns `str` with all characters changed to lowercase.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def lcase(str: Column): Column = Column.fn("lcase", str)
+
+ /**
+ * Returns `str` with all characters changed to uppercase.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def ucase(str: Column): Column = Column.fn("ucase", str)
+
+ /**
+ * Returns the leftmost `len`(`len` can be string type) characters from the string `str`, if
+ * `len` is less or equal than 0 the result is an empty string.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def left(str: Column, len: Column): Column = Column.fn("left", str, len)
+
+ /**
+ * Returns the rightmost `len`(`len` can be string type) characters from the string `str`, if
+ * `len` is less or equal than 0 the result is an empty string.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def right(str: Column, len: Column): Column = Column.fn("right", str, len)
+
//////////////////////////////////////////////////////////////////////////////////////////////
// DateTime functions
//////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
index ca7797f6df1..0b3bcf8a79c 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
@@ -2514,6 +2514,74 @@ class PlanGenerationTestSuite
fn.nvl2(fn.col("g"), fn.col("g"), fn.col("g"))
}
+ functionTest("char") {
+ fn.char(fn.col("a"))
+ }
+
+ functionTest("btrim") {
+ fn.btrim(fn.col("g"))
+ }
+
+ functionTest("btrim with specified trim string") {
+ fn.btrim(fn.col("g"), fn.col("g"))
+ }
+
+ functionTest("char_length") {
+ fn.char_length(fn.col("g"))
+ }
+
+ functionTest("character_length") {
+ fn.character_length(fn.col("g"))
+ }
+
+ functionTest("chr") {
+ fn.chr(fn.col("a"))
+ }
+
+ functionTest("contains") {
+ fn.contains(fn.col("g"), fn.col("g"))
+ }
+
+ functionTest("elt") {
+ fn.elt(fn.col("a"), fn.col("g"), fn.col("g"))
+ }
+
+ functionTest("find_in_set") {
+ fn.find_in_set(fn.col("g"), fn.col("g"))
+ }
+
+ functionTest("like") {
+ fn.like(fn.col("g"), fn.col("g"))
+ }
+
+ functionTest("like with escape") {
+ fn.like(fn.col("g"), fn.col("g"), lit('/'))
+ }
+
+ functionTest("ilike") {
+ fn.ilike(fn.col("g"), fn.col("g"))
+ }
+
+ functionTest("ilike with escape") {
+ fn.ilike(fn.col("g"), fn.col("g"), lit('/'))
+ }
+
+ functionTest("lcase") {
+ fn.lcase(fn.col("g"))
+ }
+
+ functionTest("ucase") {
+ fn.ucase(fn.col("g"))
+ }
+
+ functionTest("left") {
+ fn.left(fn.col("g"), fn.col("g"))
+ }
+
+ functionTest("right") {
+ fn.right(fn.col("g"), fn.col("g"))
+ }
+
test("groupby agg") {
simple
.groupBy(Column("id"))
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_btrim.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_btrim.explain
new file mode 100644
index 00000000000..a3d707e01ac
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_btrim.explain
@@ -0,0 +1,2 @@
+Project [trim(g#0, None) AS btrim(g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_btrim_with_specified_trim_string.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_btrim_with_specified_trim_string.explain
new file mode 100644
index 00000000000..d651de40388
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_btrim_with_specified_trim_string.explain
@@ -0,0 +1,2 @@
+Project [trim(g#0, Some(g#0)) AS btrim(g, g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_char.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_char.explain
new file mode 100644
index 00000000000..f30da48aa43
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_char.explain
@@ -0,0 +1,2 @@
+Project [char(cast(a#0 as bigint)) AS char(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_char_length.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_char_length.explain
new file mode 100644
index 00000000000..962dc9f8d7f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_char_length.explain
@@ -0,0 +1,2 @@
+Project [char_length(g#0) AS char_length(g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_character_length.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_character_length.explain
new file mode 100644
index 00000000000..4cc854a1cfb
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_character_length.explain
@@ -0,0 +1,2 @@
+Project [character_length(g#0) AS character_length(g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_chr.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_chr.explain
new file mode 100644
index 00000000000..7c4dc023e2f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_chr.explain
@@ -0,0 +1,2 @@
+Project [chr(cast(a#0 as bigint)) AS chr(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_contains.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_contains.explain
new file mode 100644
index 00000000000..e2be049e0a3
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_contains.explain
@@ -0,0 +1,2 @@
+Project [Contains(g#0, g#0) AS contains(g, g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_elt.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_elt.explain
new file mode 100644
index 00000000000..a8d125c8f8b
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_elt.explain
@@ -0,0 +1,2 @@
+Project [elt(a#0, g#0, g#0, false) AS elt(a, g, g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_find_in_set.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_find_in_set.explain
new file mode 100644
index 00000000000..c4f3c701aa2
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_find_in_set.explain
@@ -0,0 +1,2 @@
+Project [find_in_set(g#0, g#0) AS find_in_set(g, g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_ilike.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_ilike.explain
new file mode 100644
index 00000000000..457c5fc2679
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_ilike.explain
@@ -0,0 +1,2 @@
+Project [lower(g#0) LIKE lower(g#0) AS ilike(g, g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_ilike_with_escape.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_ilike_with_escape.explain
new file mode 100644
index 00000000000..7a96dae7d25
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_ilike_with_escape.explain
@@ -0,0 +1,2 @@
+Project [lower(g#0) LIKE lower(g#0) ESCAPE '/' AS ilike(g, g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_lcase.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_lcase.explain
new file mode 100644
index 00000000000..f7f09b10041
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_lcase.explain
@@ -0,0 +1,2 @@
+Project [lcase(g#0) AS lcase(g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_left.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_left.explain
new file mode 100644
index 00000000000..ffd450bd05e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_left.explain
@@ -0,0 +1,2 @@
+Project [substring(g#0, 1, cast(g#0 as int)) AS left(g, g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_like.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_like.explain
new file mode 100644
index 00000000000..d41ff61e8ef
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_like.explain
@@ -0,0 +1,2 @@
+Project [g#0 LIKE g#0 AS g LIKE g#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_like_with_escape.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_like_with_escape.explain
new file mode 100644
index 00000000000..471a3a4bd52
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_like_with_escape.explain
@@ -0,0 +1,2 @@
+Project [g#0 LIKE g#0 ESCAPE '/' AS g LIKE g#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_right.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_right.explain
new file mode 100644
index 00000000000..f8413c9deb7
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_right.explain
@@ -0,0 +1,2 @@
+Project [if (isnull(g#0)) null else if ((cast(g#0 as int) <= 0)) else substring(g#0, -cast(g#0 as int), 2147483647) AS right(g, g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_ucase.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_ucase.explain
new file mode 100644
index 00000000000..c093bd153e6
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_ucase.explain
@@ -0,0 +1,2 @@
+Project [ucase(g#0) AS ucase(g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_btrim.json b/connector/connect/common/src/test/resources/query-tests/queries/function_btrim.json
new file mode 100644
index 00000000000..3f35d627f9a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_btrim.json
@@ -0,0 +1,25 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "btrim",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_btrim.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_btrim.proto.bin
new file mode 100644
index 00000000000..200dac07a0b
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_btrim.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_btrim_with_specified_trim_string.json b/connector/connect/common/src/test/resources/query-tests/queries/function_btrim_with_specified_trim_string.json
new file mode 100644
index 00000000000..cf0476340cc
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_btrim_with_specified_trim_string.json
@@ -0,0 +1,29 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "btrim",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
+ }
+ }, {
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_btrim_with_specified_trim_string.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_btrim_with_specified_trim_string.proto.bin
new file mode 100644
index 00000000000..d7669c93b2b
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_btrim_with_specified_trim_string.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_char.json b/connector/connect/common/src/test/resources/query-tests/queries/function_char.json
new file mode 100644
index 00000000000..593139a0a58
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_char.json
@@ -0,0 +1,25 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "char",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "a"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_char.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_char.proto.bin
new file mode 100644
index 00000000000..21c3dad5565
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_char.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_char_length.json b/connector/connect/common/src/test/resources/query-tests/queries/function_char_length.json
new file mode 100644
index 00000000000..3e408260d70
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_char_length.json
@@ -0,0 +1,25 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "char_length",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_char_length.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_char_length.proto.bin
new file mode 100644
index 00000000000..7f290c6ddc6
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_char_length.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_character_length.json b/connector/connect/common/src/test/resources/query-tests/queries/function_character_length.json
new file mode 100644
index 00000000000..ad12dde8a95
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_character_length.json
@@ -0,0 +1,25 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "character_length",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_character_length.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_character_length.proto.bin
new file mode 100644
index 00000000000..f1762971d4e
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_character_length.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_chr.json b/connector/connect/common/src/test/resources/query-tests/queries/function_chr.json
new file mode 100644
index 00000000000..28366f87e10
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_chr.json
@@ -0,0 +1,25 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "chr",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "a"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_chr.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_chr.proto.bin
new file mode 100644
index 00000000000..dc665d294ec
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_chr.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_contains.json b/connector/connect/common/src/test/resources/query-tests/queries/function_contains.json
new file mode 100644
index 00000000000..b7cb12d9aa9
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_contains.json
@@ -0,0 +1,29 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "contains",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
+ }
+ }, {
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_contains.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_contains.proto.bin
new file mode 100644
index 00000000000..8864968a9dc
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_contains.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_elt.json b/connector/connect/common/src/test/resources/query-tests/queries/function_elt.json
new file mode 100644
index 00000000000..fe7dd29f91a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_elt.json
@@ -0,0 +1,33 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "elt",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "a"
+ }
+ }, {
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
+ }
+ }, {
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_elt.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_elt.proto.bin
new file mode 100644
index 00000000000..d719db6f89c
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_elt.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_find_in_set.json b/connector/connect/common/src/test/resources/query-tests/queries/function_find_in_set.json
new file mode 100644
index 00000000000..538651b52c4
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_find_in_set.json
@@ -0,0 +1,29 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "find_in_set",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
+ }
+ }, {
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_find_in_set.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_find_in_set.proto.bin
new file mode 100644
index 00000000000..26abfa0e394
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_find_in_set.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_ilike.json b/connector/connect/common/src/test/resources/query-tests/queries/function_ilike.json
new file mode 100644
index 00000000000..46b1b87e032
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_ilike.json
@@ -0,0 +1,29 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "ilike",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
+ }
+ }, {
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_ilike.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_ilike.proto.bin
new file mode 100644
index 00000000000..b1c50e3aaf4
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_ilike.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_ilike_with_escape.json b/connector/connect/common/src/test/resources/query-tests/queries/function_ilike_with_escape.json
new file mode 100644
index 00000000000..6392912efe8
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_ilike_with_escape.json
@@ -0,0 +1,33 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "ilike",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
+ }
+ }, {
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
+ }
+ }, {
+ "literal": {
+ "string": "/"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_ilike_with_escape.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_ilike_with_escape.proto.bin
new file mode 100644
index 00000000000..de0d89f2c8c
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_ilike_with_escape.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_lcase.json b/connector/connect/common/src/test/resources/query-tests/queries/function_lcase.json
new file mode 100644
index 00000000000..a1610815b6c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_lcase.json
@@ -0,0 +1,25 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "lcase",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_lcase.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_lcase.proto.bin
new file mode 100644
index 00000000000..d5627abb0a5
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_lcase.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_left.json b/connector/connect/common/src/test/resources/query-tests/queries/function_left.json
new file mode 100644
index 00000000000..e629782ba6d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_left.json
@@ -0,0 +1,29 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "left",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
+ }
+ }, {
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_left.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_left.proto.bin
new file mode 100644
index 00000000000..497cf68194e
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_left.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_like.json b/connector/connect/common/src/test/resources/query-tests/queries/function_like.json
new file mode 100644
index 00000000000..3ce3431e50f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_like.json
@@ -0,0 +1,29 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "like",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
+ }
+ }, {
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_like.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_like.proto.bin
new file mode 100644
index 00000000000..d9a13f5c79b
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_like.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_like_with_escape.json b/connector/connect/common/src/test/resources/query-tests/queries/function_like_with_escape.json
new file mode 100644
index 00000000000..0313398f0ad
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_like_with_escape.json
@@ -0,0 +1,33 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "like",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
+ }
+ }, {
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
+ }
+ }, {
+ "literal": {
+ "string": "/"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_like_with_escape.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_like_with_escape.proto.bin
new file mode 100644
index 00000000000..cc5fefe193f
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_like_with_escape.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_right.json b/connector/connect/common/src/test/resources/query-tests/queries/function_right.json
new file mode 100644
index 00000000000..843f5be44a6
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_right.json
@@ -0,0 +1,29 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "right",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
+ }
+ }, {
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_right.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_right.proto.bin
new file mode 100644
index 00000000000..b8d0156c981
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_right.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_ucase.json b/connector/connect/common/src/test/resources/query-tests/queries/function_ucase.json
new file mode 100644
index 00000000000..7193142acdb
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_ucase.json
@@ -0,0 +1,25 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "ucase",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "g"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_ucase.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_ucase.proto.bin
new file mode 100644
index 00000000000..3e17a01d4b1
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_ucase.proto.bin differ
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
index 06576e419de..b02b49d00dc 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
@@ -1523,6 +1523,18 @@ class SparkConnectPlanner(val sessionHolder: SessionHolder) extends Logging {
val ignoreNulls = extractBoolean(children(2), "ignoreNulls")
Some(NthValue(children(0), children(1), ignoreNulls))
+ case "like" if fun.getArgumentsCount == 3 =>
+ // Like does not have a constructor which accepts Expression typed 'escapeChar'
+ val children = fun.getArgumentsList.asScala.map(transformExpression)
+ val escapeChar = extractString(children(2), "escapeChar")
+ Some(Like(children(0), children(1), escapeChar.charAt(0)))
+
+ case "ilike" if fun.getArgumentsCount == 3 =>
+ // ILike does not have a constructor which accepts Expression typed 'escapeChar'
+ val children = fun.getArgumentsList.asScala.map(transformExpression)
+ val escapeChar = extractString(children(2), "escapeChar")
+ Some(ILike(children(0), children(1), escapeChar.charAt(0)))
+
case "lag" if fun.getArgumentsCount == 4 =>
// Lag does not have a constructor which accepts Expression typed 'ignoreNulls'
val children = fun.getArgumentsList.asScala.map(transformExpression)
diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index f1173233e21..54b920cc36f 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -1052,6 +1052,11 @@
"Found an invalid escape string: <invalidEscape>. The escape string must contain only one character."
]
},
+ "INVALID_ESCAPE_CHAR" : {
+ "message" : [
+ "`EscapeChar` should be a string literal of length one, but got <sqlExpr>."
+ ]
+ },
"INVALID_EXECUTOR_MEMORY" : {
"message" : [
"Executor memory <executorMemory> must be at least <minSystemMemory>. Please increase executor memory using the --executor-memory option or \"<config>\" in Spark configuration."
diff --git a/python/docs/source/reference/pyspark.sql/functions.rst b/python/docs/source/reference/pyspark.sql/functions.rst
index 4ea15bd44bd..92422818737 100644
--- a/python/docs/source/reference/pyspark.sql/functions.rst
+++ b/python/docs/source/reference/pyspark.sql/functions.rst
@@ -358,16 +358,28 @@ String Functions
ascii
base64
bit_length
+ btrim
+ char
+ character_length
+ char_length
+ chr
concat_ws
+ contains
decode
+ elt
encode
endswith
+ find_in_set
format_number
format_string
+ ilike
initcap
instr
+ lcase
length
+ like
lower
+ left
levenshtein
locate
lpad
@@ -386,6 +398,8 @@ String Functions
regexp_substr
regexp_instr
replace
+ right
+ ucase
unbase64
rpad
repeat
diff --git a/python/pyspark/sql/connect/functions.py b/python/pyspark/sql/connect/functions.py
index 6ca26191fc4..3f4395f9297 100644
--- a/python/pyspark/sql/connect/functions.py
+++ b/python/pyspark/sql/connect/functions.py
@@ -2490,6 +2490,117 @@ def startswith(str: "ColumnOrName", prefix: "ColumnOrName") -> Column:
startswith.__doc__ = pysparkfuncs.startswith.__doc__
+def char(col: "ColumnOrName") -> Column:
+ return _invoke_function_over_columns("char", col)
+
+
+char.__doc__ = pysparkfuncs.char.__doc__
+
+
+def btrim(str: "ColumnOrName", trim: Optional["ColumnOrName"] = None) -> Column:
+ if trim is not None:
+ return _invoke_function_over_columns("btrim", str, trim)
+ else:
+ return _invoke_function_over_columns("btrim", str)
+
+
+btrim.__doc__ = pysparkfuncs.btrim.__doc__
+
+
+def char_length(str: "ColumnOrName") -> Column:
+ return _invoke_function_over_columns("char_length", str)
+
+
+char_length.__doc__ = pysparkfuncs.char_length.__doc__
+
+
+def character_length(str: "ColumnOrName") -> Column:
+ return _invoke_function_over_columns("character_length", str)
+
+
+character_length.__doc__ = pysparkfuncs.character_length.__doc__
+
+
+def chr(col: "ColumnOrName") -> Column:
+ return _invoke_function_over_columns("chr", col)
+
+
+chr.__doc__ = pysparkfuncs.chr.__doc__
+
+
+def contains(left: "ColumnOrName", right: "ColumnOrName") -> Column:
+ return _invoke_function_over_columns("contains", left, right)
+
+
+contains.__doc__ = pysparkfuncs.contains.__doc__
+
+
+def elt(*inputs: "ColumnOrName") -> Column:
+ return _invoke_function("elt", *[_to_col(input) for input in inputs])
+
+
+elt.__doc__ = pysparkfuncs.elt.__doc__
+
+
+def find_in_set(str: "ColumnOrName", str_array: "ColumnOrName") -> Column:
+ return _invoke_function_over_columns("find_in_set", str, str_array)
+
+
+find_in_set.__doc__ = pysparkfuncs.find_in_set.__doc__
+
+
+def like(
+ str: "ColumnOrName", pattern: "ColumnOrName", escapeChar: Optional["Column"] = None
+) -> Column:
+ if escapeChar is not None:
+ return _invoke_function_over_columns("like", str, pattern, escapeChar)
+ else:
+ return _invoke_function_over_columns("like", str, pattern)
+
+
+like.__doc__ = pysparkfuncs.like.__doc__
+
+
+def ilike(
+ str: "ColumnOrName", pattern: "ColumnOrName", escapeChar: Optional["Column"] = None
+) -> Column:
+ if escapeChar is not None:
+ return _invoke_function_over_columns("ilike", str, pattern, escapeChar)
+ else:
+ return _invoke_function_over_columns("ilike", str, pattern)
+
+
+ilike.__doc__ = pysparkfuncs.ilike.__doc__
+
+
+def lcase(str: "ColumnOrName") -> Column:
+ return _invoke_function_over_columns("lcase", str)
+
+
+lcase.__doc__ = pysparkfuncs.lcase.__doc__
+
+
+def ucase(str: "ColumnOrName") -> Column:
+ return _invoke_function_over_columns("ucase", str)
+
+
+ucase.__doc__ = pysparkfuncs.ucase.__doc__
+
+
+def left(str: "ColumnOrName", len: "ColumnOrName") -> Column:
+ return _invoke_function_over_columns("left", str, len)
+
+
+left.__doc__ = pysparkfuncs.left.__doc__
+
+
+def right(str: "ColumnOrName", len: "ColumnOrName") -> Column:
+ return _invoke_function_over_columns("right", str, len)
+
+
+right.__doc__ = pysparkfuncs.right.__doc__
+
+
# Date/Timestamp functions
# TODO(SPARK-41455): Resolve dtypes inconsistencies for:
# to_timestamp, from_utc_timestamp, to_utc_timestamp,
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 530349c39dd..adef14de454 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -9586,6 +9586,396 @@ def startswith(str: "ColumnOrName", prefix: "ColumnOrName") -> Column:
return _invoke_function_over_columns("startswith", str, prefix)
+@try_remote_functions
+def char(col: "ColumnOrName") -> Column:
+ """
+ Returns the ASCII character having the binary equivalent to `col`. If col is larger than 256 the
+ result is equivalent to char(col % 256)
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ col : :class:`~pyspark.sql.Column` or str
+ Input column or strings.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([(65,)], ['a'])
+ >>> df.select(char(df.a).alias('r')).collect()
+ [Row(r='A')]
+ """
+ return _invoke_function_over_columns("char", col)
+
+
+@try_remote_functions
+def btrim(str: "ColumnOrName", trim: Optional["ColumnOrName"] = None) -> Column:
+ """
+ Remove the leading and trailing `trim` characters from `str`.
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ str : :class:`~pyspark.sql.Column` or str
+ Input column or strings.
+ trim : :class:`~pyspark.sql.Column` or str
+ The trim string characters to trim, the default value is a single space
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([("SSparkSQLS", "SL", )], ['a', 'b'])
+ >>> df.select(btrim(df.a, df.b).alias('r')).collect()
+ [Row(r='parkSQ')]
+
+ >>> df = spark.createDataFrame([(" SparkSQL ",)], ['a'])
+ >>> df.select(btrim(df.a).alias('r')).collect()
+ [Row(r='SparkSQL')]
+ """
+ if trim is not None:
+ return _invoke_function_over_columns("btrim", str, trim)
+ else:
+ return _invoke_function_over_columns("btrim", str)
+
+
+@try_remote_functions
+def char_length(str: "ColumnOrName") -> Column:
+ """
+ Returns the character length of string data or number of bytes of binary data.
+ The length of string data includes the trailing spaces.
+ The length of binary data includes binary zeros.
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ str : :class:`~pyspark.sql.Column` or str
+ Input column or strings.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([("SparkSQL",)], ['a'])
+ >>> df.select(char_length(df.a).alias('r')).collect()
+ [Row(r=8)]
+ """
+ return _invoke_function_over_columns("char_length", str)
+
+
+@try_remote_functions
+def character_length(str: "ColumnOrName") -> Column:
+ """
+ Returns the character length of string data or number of bytes of binary data.
+ The length of string data includes the trailing spaces.
+ The length of binary data includes binary zeros.
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ str : :class:`~pyspark.sql.Column` or str
+ Input column or strings.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([("SparkSQL",)], ['a'])
+ >>> df.select(character_length(df.a).alias('r')).collect()
+ [Row(r=8)]
+ """
+ return _invoke_function_over_columns("character_length", str)
+
+
+@try_remote_functions
+def chr(col: "ColumnOrName") -> Column:
+ """
+ Returns the ASCII character having the binary equivalent to `col`.
+ If col is larger than 256 the result is equivalent to chr(col % 256)
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ col : :class:`~pyspark.sql.Column` or str
+ Input column or strings.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([(65,)], ['a'])
+ >>> df.select(chr(df.a).alias('r')).collect()
+ [Row(r='A')]
+ """
+ return _invoke_function_over_columns("chr", col)
+
+
+@try_remote_functions
+def contains(left: "ColumnOrName", right: "ColumnOrName") -> Column:
+ """
+ Returns a boolean. The value is True if right is found inside left.
+ Returns NULL if either input expression is NULL. Otherwise, returns False.
+ Both left or right must be of STRING.
+
+ .. versionadded:: 3.5.0
+
+ Notes
+ -----
+ Only STRING type is supported in this function,
+ while `contains` in SQL supports both STRING and BINARY.
+
+ Parameters
+ ----------
+ left : :class:`~pyspark.sql.Column` or str
+ The input column or strings to check, may be NULL.
+ right : :class:`~pyspark.sql.Column` or str
+ The input column or strings to find, may be NULL.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([("Spark SQL", "Spark")], ['a', 'b'])
+ >>> df.select(contains(df.a, df.b).alias('r')).collect()
+ [Row(r=True)]
+ """
+ return _invoke_function_over_columns("contains", left, right)
+
+
+@try_remote_functions
+def elt(*inputs: "ColumnOrName") -> Column:
+ """
+ Returns the `n`-th input, e.g., returns `input2` when `n` is 2.
+ The function returns NULL if the index exceeds the length of the array
+ and `spark.sql.ansi.enabled` is set to false. If `spark.sql.ansi.enabled` is set to true,
+ it throws ArrayIndexOutOfBoundsException for invalid indices.
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ inputs : :class:`~pyspark.sql.Column` or str
+ Input columns or strings.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([(1, "scala", "java")], ['a', 'b', 'c'])
+ >>> df.select(elt(df.a, df.b, df.c).alias('r')).collect()
+ [Row(r='scala')]
+ """
+ sc = get_active_spark_context()
+ return _invoke_function("elt", _to_seq(sc, inputs, _to_java_column))
+
+
+@try_remote_functions
+def find_in_set(str: "ColumnOrName", str_array: "ColumnOrName") -> Column:
+ """
+ Returns the index (1-based) of the given string (`str`) in the comma-delimited
+ list (`strArray`). Returns 0, if the string was not found or if the given string (`str`)
+ contains a comma.
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ str : :class:`~pyspark.sql.Column` or str
+ The given string to be found.
+ str_array : :class:`~pyspark.sql.Column` or str
+ The comma-delimited list.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([("ab", "abc,b,ab,c,def")], ['a', 'b'])
+ >>> df.select(find_in_set(df.a, df.b).alias('r')).collect()
+ [Row(r=3)]
+ """
+ return _invoke_function_over_columns("find_in_set", str, str_array)
+
+
+@try_remote_functions
+def like(
+ str: "ColumnOrName", pattern: "ColumnOrName", escapeChar: Optional["Column"] = None
+) -> Column:
+ """
+ Returns true if str matches `pattern` with `escape`,
+ null if any arguments are null, false otherwise.
+ The default escape character is the '\'.
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ str : :class:`~pyspark.sql.Column` or str
+ A string.
+ pattern : :class:`~pyspark.sql.Column` or str
+ A string. The pattern is a string which is matched literally, with
+ exception to the following special symbols:
+ _ matches any one character in the input (similar to . in posix regular expressions)
+ % matches zero or more characters in the input (similar to .* in posix regular
+ expressions)
+ Since Spark 2.0, string literals are unescaped in our SQL parser. For example, in order
+ to match "\abc", the pattern should be "\\abc".
+ When SQL config 'spark.sql.parser.escapedStringLiterals' is enabled, it falls back
+ to Spark 1.6 behavior regarding string literal parsing. For example, if the config is
+ enabled, the pattern to match "\abc" should be "\abc".
+ escape : :class:`~pyspark.sql.Column`
+ An character added since Spark 3.0. The default escape character is the '\'.
+ If an escape character precedes a special symbol or another escape character, the
+ following character is matched literally. It is invalid to escape any other character.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([("Spark", "_park")], ['a', 'b'])
+ >>> df.select(like(df.a, df.b).alias('r')).collect()
+ [Row(r=True)]
+
+ >>> df = spark.createDataFrame(
+ ... [("%SystemDrive%/Users/John", "/%SystemDrive/%//Users%")],
+ ... ['a', 'b']
+ ... )
+ >>> df.select(like(df.a, df.b, lit('/')).alias('r')).collect()
+ [Row(r=True)]
+ """
+ if escapeChar is not None:
+ return _invoke_function_over_columns("like", str, pattern, escapeChar)
+ else:
+ return _invoke_function_over_columns("like", str, pattern)
+
+
+@try_remote_functions
+def ilike(
+ str: "ColumnOrName", pattern: "ColumnOrName", escapeChar: Optional["Column"] = None
+) -> Column:
+ """
+ Returns true if str matches `pattern` with `escape` case-insensitively,
+ null if any arguments are null, false otherwise.
+ The default escape character is the '\'.
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ str : :class:`~pyspark.sql.Column` or str
+ A string.
+ pattern : :class:`~pyspark.sql.Column` or str
+ A string. The pattern is a string which is matched literally, with
+ exception to the following special symbols:
+ _ matches any one character in the input (similar to . in posix regular expressions)
+ % matches zero or more characters in the input (similar to .* in posix regular
+ expressions)
+ Since Spark 2.0, string literals are unescaped in our SQL parser. For example, in order
+ to match "\abc", the pattern should be "\\abc".
+ When SQL config 'spark.sql.parser.escapedStringLiterals' is enabled, it falls back
+ to Spark 1.6 behavior regarding string literal parsing. For example, if the config is
+ enabled, the pattern to match "\abc" should be "\abc".
+ escape : :class:`~pyspark.sql.Column`
+ An character added since Spark 3.0. The default escape character is the '\'.
+ If an escape character precedes a special symbol or another escape character, the
+ following character is matched literally. It is invalid to escape any other character.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([("Spark", "_park")], ['a', 'b'])
+ >>> df.select(ilike(df.a, df.b).alias('r')).collect()
+ [Row(r=True)]
+
+ >>> df = spark.createDataFrame(
+ ... [("%SystemDrive%/Users/John", "/%SystemDrive/%//Users%")],
+ ... ['a', 'b']
+ ... )
+ >>> df.select(ilike(df.a, df.b, lit('/')).alias('r')).collect()
+ [Row(r=True)]
+ """
+ if escapeChar is not None:
+ return _invoke_function_over_columns("ilike", str, pattern, escapeChar)
+ else:
+ return _invoke_function_over_columns("ilike", str, pattern)
+
+
+@try_remote_functions
+def lcase(str: "ColumnOrName") -> Column:
+ """
+ Returns `str` with all characters changed to lowercase.
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ str : :class:`~pyspark.sql.Column` or str
+ Input column or strings.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([("Spark",)], ['a'])
+ >>> df.select(lcase(df.a).alias('r')).collect()
+ [Row(r='spark')]
+ """
+ return _invoke_function_over_columns("lcase", str)
+
+
+@try_remote_functions
+def ucase(str: "ColumnOrName") -> Column:
+ """
+ Returns `str` with all characters changed to uppercase.
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ str : :class:`~pyspark.sql.Column` or str
+ Input column or strings.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([("Spark",)], ['a'])
+ >>> df.select(ucase(df.a).alias('r')).collect()
+ [Row(r='SPARK')]
+ """
+ return _invoke_function_over_columns("ucase", str)
+
+
+@try_remote_functions
+def left(str: "ColumnOrName", len: "ColumnOrName") -> Column:
+ """
+ Returns the leftmost `len`(`len` can be string type) characters from the string `str`,
+ if `len` is less or equal than 0 the result is an empty string.
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ str : :class:`~pyspark.sql.Column` or str
+ Input column or strings.
+ len : :class:`~pyspark.sql.Column` or str
+ Input column or strings, the leftmost `len`.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([("Spark SQL", 3,)], ['a', 'b'])
+ >>> df.select(left(df.a, df.b).alias('r')).collect()
+ [Row(r='Spa')]
+ """
+ return _invoke_function_over_columns("left", str, len)
+
+
+@try_remote_functions
+def right(str: "ColumnOrName", len: "ColumnOrName") -> Column:
+ """
+ Returns the rightmost `len`(`len` can be string type) characters from the string `str`,
+ if `len` is less or equal than 0 the result is an empty string.
+
+ .. versionadded:: 3.5.0
+
+ Parameters
+ ----------
+ str : :class:`~pyspark.sql.Column` or str
+ Input column or strings.
+ len : :class:`~pyspark.sql.Column` or str
+ Input column or strings, the rightmost `len`.
+
+ Examples
+ --------
+ >>> df = spark.createDataFrame([("Spark SQL", 3,)], ['a', 'b'])
+ >>> df.select(right(df.a, df.b).alias('r')).collect()
+ [Row(r='SQL')]
+ """
+ return _invoke_function_over_columns("right", str, len)
+
+
# ---------------------- Collection functions ------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 4c87b9da1c7..01b90210047 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -2097,6 +2097,12 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
"errors" -> errors.mkString("\n- ", "\n- ", "")))
}
+ def invalidEscapeChar(sqlExpr: Expression): Throwable = {
+ new AnalysisException(
+ errorClass = "INVALID_ESCAPE_CHAR",
+ messageParameters = Map("sqlExpr" -> toSQLExpr(sqlExpr)))
+ }
+
def secondArgumentOfFunctionIsNotIntegerError(
function: String, e: NumberFormatException): Throwable = {
// The second argument of {function} function needs to be an integer
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 984a6dce875..e7e14e30477 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -4075,6 +4075,210 @@ object functions {
StartsWith(str.expr, prefix.expr)
}
+ /**
+ * Returns the ASCII character having the binary equivalent to `n`.
+ * If n is larger than 256 the result is equivalent to char(n % 256)
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def char(n: Column): Column = withExpr {
+ Chr(n.expr)
+ }
+
+ /**
+ * Removes the leading and trailing space characters from `str`.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def btrim(str: Column): Column = withExpr {
+ new StringTrimBoth(str.expr)
+ }
+
+ /**
+ * Remove the leading and trailing `trim` characters from `str`.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def btrim(str: Column, trim: Column): Column = withExpr {
+ new StringTrimBoth(str.expr, trim.expr)
+ }
+
+ /**
+ * Returns the character length of string data or number of bytes of binary data.
+ * The length of string data includes the trailing spaces.
+ * The length of binary data includes binary zeros.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def char_length(str: Column): Column = withExpr {
+ Length(str.expr)
+ }
+
+ /**
+ * Returns the character length of string data or number of bytes of binary data.
+ * The length of string data includes the trailing spaces.
+ * The length of binary data includes binary zeros.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def character_length(str: Column): Column = withExpr {
+ Length(str.expr)
+ }
+
+ /**
+ * Returns the ASCII character having the binary equivalent to `n`.
+ * If n is larger than 256 the result is equivalent to chr(n % 256)
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def chr(n: Column): Column = withExpr {
+ Chr(n.expr)
+ }
+
+ /**
+ * Returns a boolean. The value is True if right is found inside left.
+ * Returns NULL if either input expression is NULL. Otherwise, returns False.
+ * Both left or right must be of STRING type.
+ *
+ * @note
+ * Only STRING type is supported in this function, while `contains` in SQL supports both
+ * STRING and BINARY.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def contains(left: Column, right: Column): Column = withExpr {
+ Contains(left.expr, right.expr)
+ }
+
+ /**
+ * Returns the `n`-th input, e.g., returns `input2` when `n` is 2.
+ * The function returns NULL if the index exceeds the length of the array
+ * and `spark.sql.ansi.enabled` is set to false. If `spark.sql.ansi.enabled` is set to true,
+ * it throws ArrayIndexOutOfBoundsException for invalid indices.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ @scala.annotation.varargs
+ def elt(inputs: Column*): Column = withExpr {
+ Elt(inputs.map(_.expr))
+ }
+
+ /**
+ * Returns the index (1-based) of the given string (`str`) in the comma-delimited
+ * list (`strArray`). Returns 0, if the string was not found or if the given string (`str`)
+ * contains a comma.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def find_in_set(str: Column, strArray: Column): Column = withExpr {
+ FindInSet(str.expr, strArray.expr)
+ }
+
+ /**
+ * Returns true if str matches `pattern` with `escapeChar`, null if any arguments are null,
+ * false otherwise.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def like(str: Column, pattern: Column, escapeChar: Column): Column = withExpr {
+ escapeChar.expr match {
+ case StringLiteral(v) if v.length == 1 =>
+ Like(str.expr, pattern.expr, v.charAt(0))
+ case _ =>
+ throw QueryCompilationErrors.invalidEscapeChar(escapeChar.expr)
+ }
+ }
+
+ /**
+ * Returns true if str matches `pattern` with `escapeChar`('\'), null if any arguments are null,
+ * false otherwise.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def like(str: Column, pattern: Column): Column = withExpr {
+ new Like(str.expr, pattern.expr)
+ }
+
+ /**
+ * Returns true if str matches `pattern` with `escapeChar` case-insensitively, null if any
+ * arguments are null, false otherwise.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def ilike(str: Column, pattern: Column, escapeChar: Column): Column = withExpr {
+ escapeChar.expr match {
+ case StringLiteral(v) if v.length == 1 =>
+ ILike(str.expr, pattern.expr, v.charAt(0))
+ case _ =>
+ throw QueryCompilationErrors.invalidEscapeChar(escapeChar.expr)
+ }
+ }
+
+ /**
+ * Returns true if str matches `pattern` with `escapeChar`('\') case-insensitively, null if any
+ * arguments are null, false otherwise.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def ilike(str: Column, pattern: Column): Column = withExpr {
+ new ILike(str.expr, pattern.expr)
+ }
+
+ /**
+ * Returns `str` with all characters changed to lowercase.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def lcase(str: Column): Column = withExpr {
+ Lower(str.expr)
+ }
+
+ /**
+ * Returns `str` with all characters changed to uppercase.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def ucase(str: Column): Column = withExpr {
+ Upper(str.expr)
+ }
+
+ /**
+ * Returns the leftmost `len`(`len` can be string type) characters from the string `str`,
+ * if `len` is less or equal than 0 the result is an empty string.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def left(str: Column, len: Column): Column = withExpr {
+ Left(str.expr, len.expr)
+ }
+
+ /**
+ * Returns the rightmost `len`(`len` can be string type) characters from the string `str`,
+ * if `len` is less or equal than 0 the result is an empty string.
+ *
+ * @group string_funcs
+ * @since 3.5.0
+ */
+ def right(str: Column, len: Column): Column = withExpr {
+ Right(str.expr, len.expr)
+ }
+
//////////////////////////////////////////////////////////////////////////////////////////////
// DateTime functions
//////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index 6a143c4230d..e892ca0b567 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -76,9 +76,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
)
val excludedSqlFunctions = Set(
- "random",
- "array_agg", "char_length", "character_length",
- "lcase", "ucase", "cardinality", "sha",
+ "random", "array_agg", "cardinality", "sha",
// aliases for existing functions
"reflect", "java_method" // Only needed in SQL
)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
index 21ab4899a75..d68b6da2957 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
@@ -873,6 +873,125 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
)
}
+ test("char & chr function") {
+ val df = Seq(65).toDF("a")
+ checkAnswer(df.selectExpr("char(a)"), Seq(Row("A")))
+ checkAnswer(df.select(char(col("a"))), Seq(Row("A")))
+
+ checkAnswer(df.selectExpr("chr(a)"), Seq(Row("A")))
+ checkAnswer(df.select(chr(col("a"))), Seq(Row("A")))
+ }
+
+ test("btrim function") {
+ val df = Seq(("SSparkSQLS", "SL")).toDF("a", "b")
+
+ checkAnswer(df.selectExpr("btrim(a)"), Seq(Row("SSparkSQLS")))
+ checkAnswer(df.select(btrim(col("a"))), Seq(Row("SSparkSQLS")))
+
+ checkAnswer(df.selectExpr("btrim(a, b)"), Seq(Row("parkSQ")))
+ checkAnswer(df.select(btrim(col("a"), col("b"))), Seq(Row("parkSQ")))
+ }
+
+ test("char_length & character_length function") {
+ val df = Seq("SSparkSQLS").toDF("a")
+ checkAnswer(df.selectExpr("char_length(a)"), Seq(Row(10)))
+ checkAnswer(df.select(char_length(col("a"))), Seq(Row(10)))
+
+ checkAnswer(df.selectExpr("character_length(a)"), Seq(Row(10)))
+ checkAnswer(df.select(character_length(col("a"))), Seq(Row(10)))
+ }
+
+ test("contains function") {
+ val df = Seq(("Spark SQL", "Spark")).toDF("a", "b")
+ checkAnswer(df.selectExpr("contains(a, b)"), Seq(Row(true)))
+ checkAnswer(df.select(contains(col("a"), col("b"))), Seq(Row(true)))
+ }
+
+ test("elt function") {
+ val df = Seq((1, "scala", "java")).toDF("a", "b", "c")
+ checkAnswer(df.selectExpr("elt(a, b, c)"), Seq(Row("scala")))
+ checkAnswer(df.select(elt(col("a"), col("b"), col("c"))), Seq(Row("scala")))
+ }
+
+ test("find_in_set function") {
+ val df = Seq(("ab", "abc,b,ab,c,def")).toDF("a", "b")
+ checkAnswer(df.selectExpr("find_in_set(a, b)"), Seq(Row(3)))
+ checkAnswer(df.select(find_in_set(col("a"), col("b"))), Seq(Row(3)))
+ }
+
+ test("like & ilike function") {
+ val df = Seq(("Spark", "_park")).toDF("a", "b")
+
+ checkAnswer(df.selectExpr("a like b"), Seq(Row(true)))
+ checkAnswer(df.select(like(col("a"), col("b"))), Seq(Row(true)))
+
+ checkAnswer(df.selectExpr("a ilike b"), Seq(Row(true)))
+ checkAnswer(df.select(ilike(col("a"), col("b"))), Seq(Row(true)))
+
+ val df1 = Seq(("%SystemDrive%/Users/John", "/%SystemDrive/%//Users%")).toDF("a", "b")
+
+ checkAnswer(df1.selectExpr("a like b escape '/'"), Seq(Row(true)))
+ checkAnswer(df1.select(like(col("a"), col("b"), lit('/'))), Seq(Row(true)))
+
+ checkAnswer(df.selectExpr("a ilike b escape '/'"), Seq(Row(true)))
+ checkAnswer(df.select(ilike(col("a"), col("b"), lit('/'))), Seq(Row(true)))
+
+ checkError(
+ exception = intercept[AnalysisException] {
+ df1.select(like(col("a"), col("b"), lit(618))).collect()
+ },
+ errorClass = "INVALID_ESCAPE_CHAR",
+ parameters = Map("sqlExpr" -> "\"618\"")
+ )
+
+ checkError(
+ exception = intercept[AnalysisException] {
+ df1.select(ilike(col("a"), col("b"), lit(618))).collect()
+ },
+ errorClass = "INVALID_ESCAPE_CHAR",
+ parameters = Map("sqlExpr" -> "\"618\"")
+ )
+
+ // scalastyle:off
+ // non ascii characters are not allowed in the code, so we disable the scalastyle here.
+ checkError(
+ exception = intercept[AnalysisException] {
+ df1.select(like(col("a"), col("b"), lit("中国"))).collect()
+ },
+ errorClass = "INVALID_ESCAPE_CHAR",
+ parameters = Map("sqlExpr" -> "\"中国\"")
+ )
+
+ checkError(
+ exception = intercept[AnalysisException] {
+ df1.select(ilike(col("a"), col("b"), lit("中国"))).collect()
+ },
+ errorClass = "INVALID_ESCAPE_CHAR",
+ parameters = Map("sqlExpr" -> "\"中国\"")
+ )
+ // scalastyle:on
+ }
+
+ test("lcase & ucase function") {
+ val df = Seq("Spark").toDF("a")
+
+ checkAnswer(df.selectExpr("lcase(a)"), Seq(Row("spark")))
+ checkAnswer(df.select(lcase(col("a"))), Seq(Row("spark")))
+
+ checkAnswer(df.selectExpr("ucase(a)"), Seq(Row("SPARK")))
+ checkAnswer(df.select(ucase(col("a"))), Seq(Row("SPARK")))
+ }
+
+ test("left & right function") {
+ val df = Seq(("Spark SQL", 3)).toDF("a", "b")
+
+ checkAnswer(df.selectExpr("left(a, b)"), Seq(Row("Spa")))
+ checkAnswer(df.select(left(col("a"), col("b"))), Seq(Row("Spa")))
+
+ checkAnswer(df.selectExpr("right(a, b)"), Seq(Row("SQL")))
+ checkAnswer(df.select(right(col("a"), col("b"))), Seq(Row("SQL")))
+ }
+
test("replace") {
val df = Seq(("ABCabc", "abc", "DEF")).toDF("a", "b", "c")
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org