You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by hv...@apache.org on 2023/03/06 02:08:05 UTC
[spark] branch master updated: [SPARK-42671][CONNECT] Fix bug for createDataFrame from complex type schema

This is an automated email from the ASF dual-hosted git repository.

hvanhovell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 5f999b0e06d [SPARK-42671][CONNECT] Fix bug for createDataFrame from complex type schema
5f999b0e06d is described below

commit 5f999b0e06da8457a39678b9063ae4a47ae10f71
Author: panbingkun <pb...@gmail.com>
AuthorDate: Sun Mar 5 22:07:48 2023 -0400

    [SPARK-42671][CONNECT] Fix bug for createDataFrame from complex type schema
    
    ### What changes were proposed in this pull request?
    The pr aims to fix bug for createDataFrame from complex type schema.
    
    ### Why are the changes needed?
    When I code UT for `DataFrameNaFunctions` as follow:
    val schema = new StructType()
          .add("c1", new StructType()
            .add("c1-1", StringType)
            .add("c1-2", StringType))
    val data = Seq(Row(Row(null, "a2")), Row(Row("b1", "b2")), Row(null))
    val df = spark.createDataFrame(data.asJava, schema)
    df.show()
    
    I found that the above code does not work. The error message as follow:
    <img width="657" alt="image" src="https://user-images.githubusercontent.com/15246973/222938339-77dec8c6-549b-41de-869b-8a191a0f745e.png">
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    Add new UT
    Pass GA.
    
    Closes #40280 from panbingkun/fix_local_relation.
    
    Authored-by: panbingkun <pb...@gmail.com>
    Signed-off-by: Herman van Hovell <he...@databricks.com>
---
 .../src/main/scala/org/apache/spark/sql/SparkSession.scala   |  2 +-
 .../test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala | 12 ++++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 2b032b7cc8a..31a63720c5c 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -115,7 +115,7 @@ class SparkSession private[sql] (
   private def createDataset[T](encoder: AgnosticEncoder[T], data: Iterator[T]): Dataset[T] = {
     newDataset(encoder) { builder =>
       val localRelationBuilder = builder.getLocalRelationBuilder
-        .setSchema(encoder.schema.catalogString)
+        .setSchema(encoder.schema.json)
       if (data.nonEmpty) {
         val timeZoneId = conf.get("spark.sql.session.timeZone")
         val arrowData = ConvertToArrow(encoder, data, timeZoneId, allocator)
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
index 0397995bba6..79902e769c6 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
@@ -602,6 +602,18 @@ class ClientE2ETestSuite extends RemoteSparkSession {
     val newId = spark.newSession().sql("SELECT 1").analyze.getClientId
     assert(oldId != newId)
   }
+
+  test("createDataFrame from complex type schema") {
+    val schema = new StructType()
+      .add(
+        "c1",
+        new StructType()
+          .add("c1-1", StringType)
+          .add("c1-2", StringType))
+    val data = Seq(Row(Row(null, "a2")), Row(Row("b1", "b2")), Row(null))
+    val result = spark.createDataFrame(data.asJava, schema).collect()
+    assert(result === data)
+  }
 }
 
 private[sql] case class MyType(id: Long, a: Double, b: Double)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org