You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2020/10/07 11:42:16 UTC

[GitHub] [spark] MaxGekk commented on a change in pull request #29024: [SPARK-32001][SQL]Create JDBC authentication provider developer API

MaxGekk commented on a change in pull request #29024:
URL: https://github.com/apache/spark/pull/29024#discussion_r500944073



##########
File path: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala
##########
@@ -18,60 +18,45 @@
 package org.apache.spark.sql.execution.datasources.jdbc.connection
 
 import java.sql.{Connection, Driver}
-import java.util.Properties
+import java.util.ServiceLoader
 
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
-
-/**
- * Connection provider which opens connection toward various databases (database specific instance
- * needed). If kerberos authentication required then it's the provider's responsibility to set all
- * the parameters.
- */
-private[jdbc] trait ConnectionProvider {
-  /**
-   * Additional properties for data connection (Data source property takes precedence).
-   */
-  def getAdditionalProperties(): Properties = new Properties()
+import scala.collection.mutable
 
-  /**
-   * Opens connection toward the database.
-   */
-  def getConnection(): Connection
-}
+import org.apache.spark.internal.Logging
+import org.apache.spark.security.SecurityConfigurationLock
+import org.apache.spark.sql.jdbc.JdbcConnectionProvider
+import org.apache.spark.util.Utils
 
 private[jdbc] object ConnectionProvider extends Logging {
-  def create(driver: Driver, options: JDBCOptions): ConnectionProvider = {
-    if (options.keytab == null || options.principal == null) {
-      logDebug("No authentication configuration found, using basic connection provider")
-      new BasicConnectionProvider(driver, options)
-    } else {
-      logDebug("Authentication configuration found, using database specific connection provider")
-      options.driverClass match {
-        case PostgresConnectionProvider.driverClass =>
-          logDebug("Postgres connection provider found")
-          new PostgresConnectionProvider(driver, options)
-
-        case MariaDBConnectionProvider.driverClass =>
-          logDebug("MariaDB connection provider found")
-          new MariaDBConnectionProvider(driver, options)
-
-        case DB2ConnectionProvider.driverClass =>
-          logDebug("DB2 connection provider found")
-          new DB2ConnectionProvider(driver, options)
-
-        case MSSQLConnectionProvider.driverClass =>
-          logDebug("MS SQL connection provider found")
-          new MSSQLConnectionProvider(driver, options)
-
-        case OracleConnectionProvider.driverClass =>
-          logDebug("Oracle connection provider found")
-          new OracleConnectionProvider(driver, options)
-
-        case _ =>
-          throw new IllegalArgumentException(s"Driver ${options.driverClass} does not support " +
-            "Kerberos authentication")
+  private val providers = loadProviders()
+
+  def loadProviders(): Seq[JdbcConnectionProvider] = {
+    val loader = ServiceLoader.load(classOf[JdbcConnectionProvider],
+      Utils.getContextOrSparkClassLoader)
+    val providers = mutable.ArrayBuffer[JdbcConnectionProvider]()
+
+    val iterator = loader.iterator
+    while (iterator.hasNext) {
+      try {
+        val provider = iterator.next
+        logDebug(s"Loaded built in provider: $provider")
+        providers += provider
+      } catch {
+        case t: Throwable =>
+          logError(s"Failed to load built in provider.", t)

Review comment:
       I am getting the following exception on my console permanently while running JDBC tests. Should it be really logged as an error?
   ```
   14:31:25.070 ERROR org.apache.spark.sql.execution.datasources.jdbc.connection.ConnectionProvider: Failed to load built in provider.
   java.util.ServiceConfigurationError: org.apache.spark.sql.jdbc.JdbcConnectionProvider: Provider org.apache.spark.sql.execution.datasources.jdbc.connection.IntentionallyFaultyConnectionProvider could not be instantiated
   	at java.util.ServiceLoader.fail(ServiceLoader.java:232)
   	at java.util.ServiceLoader.access$100(ServiceLoader.java:185)
   	at java.util.ServiceLoader$LazyIterator.nextService(ServiceLoader.java:384)
   	at java.util.ServiceLoader$LazyIterator.next(ServiceLoader.java:404)
   	at java.util.ServiceLoader$1.next(ServiceLoader.java:480)
   	at org.apache.spark.sql.execution.datasources.jdbc.connection.ConnectionProvider$.loadProviders(ConnectionProvider.scala:41)
   	at org.apache.spark.sql.execution.datasources.jdbc.connection.ConnectionProvider$.<init>(ConnectionProvider.scala:31)
   	at org.apache.spark.sql.execution.datasources.jdbc.connection.ConnectionProvider$.<clinit>(ConnectionProvider.scala)
   	at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$.$anonfun$createConnectionFactory$1(JdbcUtils.scala:66)
   	at org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog.withConnection(JDBCTableCatalog.scala:156)
   	at org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog.listTables(JDBCTableCatalog.scala:58)
   	at org.apache.spark.sql.execution.datasources.v2.ShowTablesExec.run(ShowTablesExec.scala:42)
   	at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result$lzycompute(V2CommandExec.scala:39)
   	at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result(V2CommandExec.scala:39)
   	at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.executeCollect(V2CommandExec.scala:45)
   	at org.apache.spark.sql.Dataset.$anonfun$logicalPlan$1(Dataset.scala:229)
   	at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3675)
   	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
   	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
   	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
   	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:769)
   	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
   	at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3673)
   	at org.apache.spark.sql.Dataset.<init>(Dataset.scala:229)
   	at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:100)
   	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:769)
   	at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97)
   	at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:612)
   	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:769)
   	at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:607)
   	at org.apache.spark.sql.test.SQLTestUtilsBase.$anonfun$sql$1(SQLTestUtils.scala:231)
   	at org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalogSuite.$anonfun$new$2(JDBCTableCatalogSuite.scala:67)
   	at org.apache.spark.sql.QueryTest.checkAnswer(QueryTest.scala:134)
   	at org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalogSuite.$anonfun$new$1(JDBCTableCatalogSuite.scala:67)
   	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
   	at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85)
   	at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83)
   	at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
   	at org.scalatest.Transformer.apply(Transformer.scala:22)
   	at org.scalatest.Transformer.apply(Transformer.scala:20)
   	at org.scalatest.funsuite.AnyFunSuiteLike$$anon$1.apply(AnyFunSuiteLike.scala:189)
   	at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:176)
   	at org.scalatest.funsuite.AnyFunSuiteLike.invokeWithFixture$1(AnyFunSuiteLike.scala:187)
   	at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTest$1(AnyFunSuiteLike.scala:199)
   	at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306)
   	at org.scalatest.funsuite.AnyFunSuiteLike.runTest(AnyFunSuiteLike.scala:199)
   	at org.scalatest.funsuite.AnyFunSuiteLike.runTest$(AnyFunSuiteLike.scala:181)
   	at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterEach$$super$runTest(SparkFunSuite.scala:61)
   	at org.scalatest.BeforeAndAfterEach.runTest(BeforeAndAfterEach.scala:234)
   	at org.scalatest.BeforeAndAfterEach.runTest$(BeforeAndAfterEach.scala:227)
   	at org.apache.spark.SparkFunSuite.runTest(SparkFunSuite.scala:61)
   	at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTests$1(AnyFunSuiteLike.scala:232)
   	at org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:413)
   	at scala.collection.immutable.List.foreach(List.scala:392)
   	at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401)
   	at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:396)
   	at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:475)
   	at org.scalatest.funsuite.AnyFunSuiteLike.runTests(AnyFunSuiteLike.scala:232)
   	at org.scalatest.funsuite.AnyFunSuiteLike.runTests$(AnyFunSuiteLike.scala:231)
   	at org.scalatest.funsuite.AnyFunSuite.runTests(AnyFunSuite.scala:1562)
   	at org.scalatest.Suite.run(Suite.scala:1112)
   	at org.scalatest.Suite.run$(Suite.scala:1094)
   	at org.scalatest.funsuite.AnyFunSuite.org$scalatest$funsuite$AnyFunSuiteLike$$super$run(AnyFunSuite.scala:1562)
   	at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$run$1(AnyFunSuiteLike.scala:236)
   	at org.scalatest.SuperEngine.runImpl(Engine.scala:535)
   	at org.scalatest.funsuite.AnyFunSuiteLike.run(AnyFunSuiteLike.scala:236)
   	at org.scalatest.funsuite.AnyFunSuiteLike.run$(AnyFunSuiteLike.scala:235)
   	at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:61)
   	at org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:213)
   	at org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210)
   	at org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208)
   	at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:61)
   	at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:45)
   	at org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$13(Runner.scala:1320)
   	at org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$13$adapted(Runner.scala:1314)
   	at scala.collection.immutable.List.foreach(List.scala:392)
   	at org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:1314)
   	at org.scalatest.tools.Runner$.$anonfun$runOptionallyWithPassFailReporter$24(Runner.scala:993)
   	at org.scalatest.tools.Runner$.$anonfun$runOptionallyWithPassFailReporter$24$adapted(Runner.scala:971)
   	at org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:1480)
   	at org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:971)
   	at org.scalatest.tools.Runner$.run(Runner.scala:798)
   	at org.scalatest.tools.Runner.run(Runner.scala)
   	at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.runScalaTest2or3(ScalaTestRunner.java:40)
   	at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.main(ScalaTestRunner.java:27)
   Caused by: java.lang.IllegalArgumentException: Intentional Exception
   	at org.apache.spark.sql.execution.datasources.jdbc.connection.IntentionallyFaultyConnectionProvider.<init>(IntentionallyFaultyConnectionProvider.scala:26)
   	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
   	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
   	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
   	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
   	at java.lang.Class.newInstance(Class.java:442)
   	at java.util.ServiceLoader$LazyIterator.nextService(ServiceLoader.java:380)
   	... 82 more
   ```




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org