You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by hv...@apache.org on 2023/02/22 00:47:59 UTC
[spark] branch master updated: [SPARK-42514][CONNECT] Scala Client add partition transforms functions
This is an automated email from the ASF dual-hosted git repository.
hvanhovell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 4f3afdcd756 [SPARK-42514][CONNECT] Scala Client add partition transforms functions
4f3afdcd756 is described below
commit 4f3afdcd7561db38f3c0427d31db4f27fa94a83c
Author: yangjie01 <ya...@baidu.com>
AuthorDate: Tue Feb 21 20:47:42 2023 -0400
[SPARK-42514][CONNECT] Scala Client add partition transforms functions
### What changes were proposed in this pull request?
This PR aims add the partition transforms functions to the Scala spark connect client.
### Why are the changes needed?
Provide same APIs in the Scala spark connect client as in the original Dataset API.
### Does this PR introduce _any_ user-facing change?
Yes, it adds new for functions to the Spark Connect Scala client.
### How was this patch tested?
- Add new test
- Manual checked `connect-client-jvm` and `connect` with Scala-2.13
Closes #40105 from LuciferYang/partition-transforms-functions.
Authored-by: yangjie01 <ya...@baidu.com>
Signed-off-by: Herman van Hovell <he...@databricks.com>
---
.../scala/org/apache/spark/sql/functions.scala | 58 ++++++++++++++++++++++
.../org/apache/spark/sql/FunctionTestSuite.scala | 1 +
.../apache/spark/sql/PlanGenerationTestSuite.scala | 20 ++++++++
.../explain-results/function_bucket.explain | 2 +
.../explain-results/function_days.explain | 2 +
.../explain-results/function_hours.explain | 2 +
.../explain-results/function_months.explain | 2 +
.../explain-results/function_years.explain | 2 +
.../query-tests/queries/function_bucket.json | 23 +++++++++
.../query-tests/queries/function_bucket.proto.bin | 5 ++
.../query-tests/queries/function_days.json | 19 +++++++
.../query-tests/queries/function_days.proto.bin | 4 ++
.../query-tests/queries/function_hours.json | 19 +++++++
.../query-tests/queries/function_hours.proto.bin | 4 ++
.../query-tests/queries/function_months.json | 19 +++++++
.../query-tests/queries/function_months.proto.bin | 4 ++
.../query-tests/queries/function_years.json | 19 +++++++
.../query-tests/queries/function_years.proto.bin | 4 ++
18 files changed, 209 insertions(+)
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
index 6dffa8d3ea1..4996b5033e3 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
@@ -3591,6 +3591,64 @@ object functions {
*/
def timestamp_seconds(e: Column): Column = Column.fn("timestamp_seconds", e)
+ //////////////////////////////////////////////////////////////////////////////////////////////
+ // Partition Transforms functions
+ //////////////////////////////////////////////////////////////////////////////////////////////
+
+ /**
+ * A transform for timestamps and dates to partition data into years.
+ *
+ * @group partition_transforms
+ * @since 3.4.0
+ */
+ def years(e: Column): Column =
+ Column.fn("years", e)
+
+ /**
+ * A transform for timestamps and dates to partition data into months.
+ *
+ * @group partition_transforms
+ * @since 3.4.0
+ */
+ def months(e: Column): Column =
+ Column.fn("months", e)
+
+ /**
+ * A transform for timestamps and dates to partition data into days.
+ *
+ * @group partition_transforms
+ * @since 3.4.0
+ */
+ def days(e: Column): Column =
+ Column.fn("days", e)
+
+ /**
+ * A transform for timestamps to partition data into hours.
+ *
+ * @group partition_transforms
+ * @since 3.4.0
+ */
+ def hours(e: Column): Column =
+ Column.fn("hours", e)
+
+ /**
+ * A transform for any type that partitions by a hash of the input column.
+ *
+ * @group partition_transforms
+ * @since 3.4.0
+ */
+ def bucket(numBuckets: Column, e: Column): Column =
+ Column.fn("bucket", numBuckets, e)
+
+ /**
+ * A transform for any type that partitions by a hash of the input column.
+ *
+ * @group partition_transforms
+ * @since 3.4.0
+ */
+ def bucket(numBuckets: Int, e: Column): Column =
+ Column.fn("bucket", lit(numBuckets), e)
+
//////////////////////////////////////////////////////////////////////////////////////////////
// Scala UDF functions
//////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/FunctionTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/FunctionTestSuite.scala
index 1e4960ef9b2..d600ac432a2 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/FunctionTestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/FunctionTestSuite.scala
@@ -170,6 +170,7 @@ class FunctionTestSuite extends ConnectFunSuite {
window(a, "10 seconds", "10 seconds"),
window(a, "10 seconds"))
testEquals("session_window", session_window(a, "1 second"), session_window(a, lit("1 second")))
+ testEquals("bucket", bucket(lit(3), a), bucket(3, a))
test("assert_true no message") {
val e = assert_true(a).expr
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
index 830d55b6cbf..b9ae66b2b9e 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
@@ -1386,6 +1386,26 @@ class PlanGenerationTestSuite extends ConnectFunSuite with BeforeAndAfterAll wit
fn.upper(fn.col("g"))
}
+ functionTest("years") {
+ fn.years(Column("a"))
+ }
+
+ functionTest("months") {
+ fn.months(Column("a"))
+ }
+
+ functionTest("days") {
+ fn.days(Column("a"))
+ }
+
+ functionTest("hours") {
+ fn.hours(Column("a"))
+ }
+
+ functionTest("bucket") {
+ fn.bucket(3, Column("a"))
+ }
+
private def temporalFunctionTest(name: String)(f: => Column): Unit = {
test("function " + name) {
temporals.select(f)
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_bucket.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bucket.explain
new file mode 100644
index 00000000000..8ab0c9493ab
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bucket.explain
@@ -0,0 +1,2 @@
+Project [bucket(3, a#0) AS bucket(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_days.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_days.explain
new file mode 100644
index 00000000000..16ca2fe415e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_days.explain
@@ -0,0 +1,2 @@
+Project [days(a#0) AS days(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_hours.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_hours.explain
new file mode 100644
index 00000000000..a019836233d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_hours.explain
@@ -0,0 +1,2 @@
+Project [hours(a#0) AS hours(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_months.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_months.explain
new file mode 100644
index 00000000000..17b991ec1aa
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_months.explain
@@ -0,0 +1,2 @@
+Project [months(a#0) AS months(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_years.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_years.explain
new file mode 100644
index 00000000000..ee2342c4b02
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_years.explain
@@ -0,0 +1,2 @@
+Project [years(a#0) AS years(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_bucket.json b/connector/connect/common/src/test/resources/query-tests/queries/function_bucket.json
new file mode 100644
index 00000000000..4ec5fb5f27b
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_bucket.json
@@ -0,0 +1,23 @@
+{
+ "project": {
+ "input": {
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "bucket",
+ "arguments": [{
+ "literal": {
+ "integer": 3
+ }
+ }, {
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "a"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_bucket.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_bucket.proto.bin
new file mode 100644
index 00000000000..4ccecb3d59c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_bucket.proto.bin
@@ -0,0 +1,5 @@
+�
+�Z��struct<id:bigint,a:int,b:double,d:struct<id:bigint,a:int,b:double>,e:array<int>,f:map<string,struct<id:bigint,a:int,b:double>>,g:string>
+bucket
+0
+a
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_days.json b/connector/connect/common/src/test/resources/query-tests/queries/function_days.json
new file mode 100644
index 00000000000..8fb62b4a2e4
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_days.json
@@ -0,0 +1,19 @@
+{
+ "project": {
+ "input": {
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "days",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "a"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_days.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_days.proto.bin
new file mode 100644
index 00000000000..ecfa97f445c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_days.proto.bin
@@ -0,0 +1,4 @@
+�
+�Z��struct<id:bigint,a:int,b:double,d:struct<id:bigint,a:int,b:double>,e:array<int>,f:map<string,struct<id:bigint,a:int,b:double>>,g:string>
+days
+a
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_hours.json b/connector/connect/common/src/test/resources/query-tests/queries/function_hours.json
new file mode 100644
index 00000000000..4cccc0eef47
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_hours.json
@@ -0,0 +1,19 @@
+{
+ "project": {
+ "input": {
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "hours",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "a"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_hours.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_hours.proto.bin
new file mode 100644
index 00000000000..17cb707448a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_hours.proto.bin
@@ -0,0 +1,4 @@
+�
+�Z��struct<id:bigint,a:int,b:double,d:struct<id:bigint,a:int,b:double>,e:array<int>,f:map<string,struct<id:bigint,a:int,b:double>>,g:string>
+hours
+a
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_months.json b/connector/connect/common/src/test/resources/query-tests/queries/function_months.json
new file mode 100644
index 00000000000..189def244d2
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_months.json
@@ -0,0 +1,19 @@
+{
+ "project": {
+ "input": {
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "months",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "a"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_months.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_months.proto.bin
new file mode 100644
index 00000000000..9f6ea9641f6
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_months.proto.bin
@@ -0,0 +1,4 @@
+�
+�Z��struct<id:bigint,a:int,b:double,d:struct<id:bigint,a:int,b:double>,e:array<int>,f:map<string,struct<id:bigint,a:int,b:double>>,g:string>
+months
+a
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_years.json b/connector/connect/common/src/test/resources/query-tests/queries/function_years.json
new file mode 100644
index 00000000000..a0b6f4228d0
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_years.json
@@ -0,0 +1,19 @@
+{
+ "project": {
+ "input": {
+ "localRelation": {
+ "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "years",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "a"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_years.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_years.proto.bin
new file mode 100644
index 00000000000..f1e2a949fb4
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_years.proto.bin
@@ -0,0 +1,4 @@
+�
+�Z��struct<id:bigint,a:int,b:double,d:struct<id:bigint,a:int,b:double>,e:array<int>,f:map<string,struct<id:bigint,a:int,b:double>>,g:string>
+years
+a
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org