You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by hv...@apache.org on 2023/03/06 02:08:05 UTC
[spark] branch master updated: [SPARK-42671][CONNECT] Fix bug for createDataFrame from complex type schema
This is an automated email from the ASF dual-hosted git repository.
hvanhovell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 5f999b0e06d [SPARK-42671][CONNECT] Fix bug for createDataFrame from complex type schema
5f999b0e06d is described below
commit 5f999b0e06da8457a39678b9063ae4a47ae10f71
Author: panbingkun <pb...@gmail.com>
AuthorDate: Sun Mar 5 22:07:48 2023 -0400
[SPARK-42671][CONNECT] Fix bug for createDataFrame from complex type schema
### What changes were proposed in this pull request?
The pr aims to fix bug for createDataFrame from complex type schema.
### Why are the changes needed?
When I code UT for `DataFrameNaFunctions` as follow:
val schema = new StructType()
.add("c1", new StructType()
.add("c1-1", StringType)
.add("c1-2", StringType))
val data = Seq(Row(Row(null, "a2")), Row(Row("b1", "b2")), Row(null))
val df = spark.createDataFrame(data.asJava, schema)
df.show()
I found that the above code does not work. The error message as follow:
<img width="657" alt="image" src="https://user-images.githubusercontent.com/15246973/222938339-77dec8c6-549b-41de-869b-8a191a0f745e.png">
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Add new UT
Pass GA.
Closes #40280 from panbingkun/fix_local_relation.
Authored-by: panbingkun <pb...@gmail.com>
Signed-off-by: Herman van Hovell <he...@databricks.com>
---
.../src/main/scala/org/apache/spark/sql/SparkSession.scala | 2 +-
.../test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala | 12 ++++++++++++
2 files changed, 13 insertions(+), 1 deletion(-)
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 2b032b7cc8a..31a63720c5c 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -115,7 +115,7 @@ class SparkSession private[sql] (
private def createDataset[T](encoder: AgnosticEncoder[T], data: Iterator[T]): Dataset[T] = {
newDataset(encoder) { builder =>
val localRelationBuilder = builder.getLocalRelationBuilder
- .setSchema(encoder.schema.catalogString)
+ .setSchema(encoder.schema.json)
if (data.nonEmpty) {
val timeZoneId = conf.get("spark.sql.session.timeZone")
val arrowData = ConvertToArrow(encoder, data, timeZoneId, allocator)
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
index 0397995bba6..79902e769c6 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
@@ -602,6 +602,18 @@ class ClientE2ETestSuite extends RemoteSparkSession {
val newId = spark.newSession().sql("SELECT 1").analyze.getClientId
assert(oldId != newId)
}
+
+ test("createDataFrame from complex type schema") {
+ val schema = new StructType()
+ .add(
+ "c1",
+ new StructType()
+ .add("c1-1", StringType)
+ .add("c1-2", StringType))
+ val data = Seq(Row(Row(null, "a2")), Row(Row("b1", "b2")), Row(null))
+ val result = spark.createDataFrame(data.asJava, schema).collect()
+ assert(result === data)
+ }
}
private[sql] case class MyType(id: Long, a: Double, b: Double)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org