You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Hyukjin Kwon (Jira)" <ji...@apache.org> on 2023/01/12 02:41:00 UTC
[jira] [Created] (SPARK-42000) saveAsTable fail to find the default source

Hyukjin Kwon created SPARK-42000:
------------------------------------

             Summary: saveAsTable fail to find the default source
                 Key: SPARK-42000
                 URL: https://issues.apache.org/jira/browse/SPARK-42000
             Project: Spark
          Issue Type: Sub-task
          Components: Connect
    Affects Versions: 3.4.0
            Reporter: Hyukjin Kwon


{code}
org.apache.spark.SparkClassNotFoundException: [DATA_SOURCE_NOT_FOUND] Failed to find the data source: . Please find packages at `https://spark.apache.org/third-party-projects.html`.
	at org.apache.spark.sql.errors.QueryExecutionErrors$.dataSourceNotFoundError(QueryExecutionErrors.scala:739)
	at org.apache.spark.sql.execution.datasources.DataSource$.lookupDataSource(DataSource.scala:646)
	at org.apache.spark.sql.execution.datasources.DataSource$.lookupDataSourceV2(DataSource.scala:696)
	at org.apache.spark.sql.DataFrameWriter.lookupV2Provider(DataFrameWriter.scala:860)
	at org.apache.spark.sql.DataFrameWriter.saveAsTable(DataFrameWriter.scala:559)
	at org.apache.spark.sql.connect.planner.SparkConnectPlanner.handleWriteOperation(SparkConnectPlanner.scala:1426)
	at org.apache.spark.sql.connect.planner.SparkConnectPlanner.process(SparkConnectPlanner.scala:1297)
	at org.apache.spark.sql.connect.service.SparkConnectStreamHandler.handleCommand(SparkConnectStreamHandler.scala:182)
	at org.apache.spark.sql.connect.service.SparkConnectStreamHandler.handle(SparkConnectStreamHandler.scala:48)
	at org.apache.spark.sql.connect.service.SparkConnectService.executePlan(SparkConnectService.scala:135)
	at org.apache.spark.connect.proto.SparkConnectServiceGrpc$MethodHandlers.invoke(SparkConnectServiceGrpc.java:306)
	at org.sparkproject.connect.grpc.stub.ServerCalls$UnaryServerCallHandler$UnaryServerCallListener.onHalfClose(ServerCalls.java:182)
	at org.sparkproject.connect.grpc.internal.ServerCallImpl$ServerStreamListenerImpl.halfClosed(ServerCallImpl.java:352)
	at org.sparkproject.connect.grpc.internal.ServerImpl$JumpToApplicationThreadServerStreamListener$1HalfClosed.runInContext(ServerImpl.java:866)
	at org.sparkproject.connect.grpc.internal.ContextRunnable.run(ContextRunnable.java:37)
	at org.sparkproject.connect.grpc.internal.SerializingExecutor.run(SerializingExecutor.java:133)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.ClassNotFoundException: .DefaultSource
	at java.net.URLClassLoader.findClass(URLClassLoader.java:382)
	at java.lang.ClassLoader.loadClass(ClassLoader.java:418)
	at java.lang.ClassLoader.loadClass(ClassLoader.java:351)
	at org.apache.spark.sql.execution.datasources.DataSource$.$anonfun$lookupDataSource$5(DataSource.scala:632)
	at scala.util.Try$.apply(Try.scala:213)
	at org.apache.spark.sql.execution.datasources.DataSource$.$anonfun$lookupDataSource$4(DataSource.scala:632)
	at scala.util.Failure.orElse(Try.scala:224)
	at org.apache.spark.sql.execution.datasources.DataSource$.lookupDataSource(DataSource.scala:632)
	... 17 more

pyspark/sql/tests/test_readwriter.py:159 (ReadwriterParityTests.test_insert_into)
self = <pyspark.sql.tests.connect.test_parity_readwriter.ReadwriterParityTests testMethod=test_insert_into>

    def test_insert_into(self):
        df = self.spark.createDataFrame([("a", 1), ("b", 2)], ["C1", "C2"])
        with self.table("test_table"):
>           df.write.saveAsTable("test_table")

../test_readwriter.py:163: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
../../connect/readwriter.py:381: in saveAsTable
    self._spark.client.execute_command(self._write.command(self._spark.client))
../../connect/client.py:478: in execute_command
    self._execute(req)
../../connect/client.py:562: in _execute
    self._handle_error(rpc_error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <pyspark.sql.connect.client.SparkConnectClient object at 0x7fe0d069b5b0>
rpc_error = <_MultiThreadedRendezvous of RPC that terminated with:
	status = StatusCode.INTERNAL
	details = ".DefaultSource"
	debu...pv6:%5B::1%5D:15002 {created_time:"2023-01-12T11:27:46.698322+09:00", grpc_status:13, grpc_message:".DefaultSource"}"
>

    def _handle_error(self, rpc_error: grpc.RpcError) -> NoReturn:
        """
        Error handling helper for dealing with GRPC Errors. On the server side, certain
        exceptions are enriched with additional RPC Status information. These are
        unpacked in this function and put into the exception.
    
        To avoid overloading the user with GRPC errors, this message explicitly
        swallows the error context from the call. This GRPC Error is logged however,
        and can be enabled.
    
        Parameters
        ----------
        rpc_error : grpc.RpcError
           RPC Error containing the details of the exception.
    
        Returns
        -------
        Throws the appropriate internal Python exception.
        """
        logger.exception("GRPC Error received")
        # We have to cast the value here because, a RpcError is a Call as well.
        # https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
        status = rpc_status.from_call(cast(grpc.Call, rpc_error))
        if status:
            for d in status.details:
                if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
                    info = error_details_pb2.ErrorInfo()
                    d.Unpack(info)
                    if info.reason == "org.apache.spark.sql.AnalysisException":
                        raise SparkConnectAnalysisException(
                            info.reason, info.metadata["message"], info.metadata["plan"]
                        ) from None
                    else:
>                       raise SparkConnectException(status.message, info.reason) from None
E                       pyspark.sql.connect.client.SparkConnectException: (java.lang.ClassNotFoundException) .DefaultSource

../../connect/client.py:636: SparkConnectException

{code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org