You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2020/10/02 14:49:03 UTC

[GitHub] [spark] maropu commented on a change in pull request #29885: [SPARK-33010][SQL]Make DataFrameWriter.jdbc work for DataSource V2

maropu commented on a change in pull request #29885:
URL: https://github.com/apache/spark/pull/29885#discussion_r498867658



##########
File path: sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
##########
@@ -808,9 +808,26 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
     assertNotBucketed("jdbc")
     // connectionProperties should override settings in extraOptions.
     this.extraOptions ++= connectionProperties.asScala
-    // explicit url and dbtable should override all
-    this.extraOptions ++= Seq("url" -> url, "dbtable" -> table)
-    format("jdbc").save()
+    this.extraOptions ++= Seq("url" -> url)
+
+    import df.sparkSession.sessionState.analyzer.{AsTableIdentifier, NonSessionCatalogAndIdentifier}
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+
+    val session = df.sparkSession
+    format("jdbc")

Review comment:
       Why did you call `format` here? `this.source = "jdbc"` insead?

##########
File path: sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
##########
@@ -808,9 +808,26 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
     assertNotBucketed("jdbc")
     // connectionProperties should override settings in extraOptions.
     this.extraOptions ++= connectionProperties.asScala
-    // explicit url and dbtable should override all
-    this.extraOptions ++= Seq("url" -> url, "dbtable" -> table)
-    format("jdbc").save()
+    this.extraOptions ++= Seq("url" -> url)
+
+    import df.sparkSession.sessionState.analyzer.{AsTableIdentifier, NonSessionCatalogAndIdentifier}
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+
+    val session = df.sparkSession
+    format("jdbc")
+
+    session.sessionState.sqlParser.parseMultipartIdentifier(table) match {
+      case nameParts @ NonSessionCatalogAndIdentifier(catalog, tableIdentifier) =>
+        saveAsTable(catalog.asTableCatalog, tableIdentifier, nameParts)
+
+      case AsTableIdentifier(_) =>
+        this.extraOptions ++= Seq("dbtable" -> table)
+        saveToV1Source(None)
+
+      case other =>
+        throw new AnalysisException(
+          s"Couldn't find a catalog to handle the identifier ${other.quoted}.")

Review comment:
       We need this error handling? Rather, this is it like this?
   ```
       session.sessionState.sqlParser.parseMultipartIdentifier(table) match {
         case nameParts @ NonSessionCatalogAndIdentifier(catalog, tableIdentifier) =>
           saveAsTable(catalog.asTableCatalog, tableIdentifier, nameParts)
   
         case _ =>
           // explicit dbtable should override all
           this.extraOptions ++= Seq("dbtable" -> table)
           saveToV1Source(None)
       }
   ```

##########
File path: sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
##########
@@ -221,4 +221,36 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession {
       checkAnswer(sql("SELECT name, id FROM h2.test.abc"), Row("bob", 4))
     }
   }
+
+  test("dfwriter.jdbc") {

Review comment:
       Could you make this test title clearer?

##########
File path: sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
##########
@@ -808,9 +808,26 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
     assertNotBucketed("jdbc")
     // connectionProperties should override settings in extraOptions.
     this.extraOptions ++= connectionProperties.asScala
-    // explicit url and dbtable should override all

Review comment:
       Could you keep this comment?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org