You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Hyukjin Kwon (Jira)" <ji...@apache.org> on 2023/01/12 02:41:00 UTC
[jira] [Created] (SPARK-42000) saveAsTable fail to find the default source
Hyukjin Kwon created SPARK-42000:
------------------------------------
Summary: saveAsTable fail to find the default source
Key: SPARK-42000
URL: https://issues.apache.org/jira/browse/SPARK-42000
Project: Spark
Issue Type: Sub-task
Components: Connect
Affects Versions: 3.4.0
Reporter: Hyukjin Kwon
{code}
org.apache.spark.SparkClassNotFoundException: [DATA_SOURCE_NOT_FOUND] Failed to find the data source: . Please find packages at `https://spark.apache.org/third-party-projects.html`.
at org.apache.spark.sql.errors.QueryExecutionErrors$.dataSourceNotFoundError(QueryExecutionErrors.scala:739)
at org.apache.spark.sql.execution.datasources.DataSource$.lookupDataSource(DataSource.scala:646)
at org.apache.spark.sql.execution.datasources.DataSource$.lookupDataSourceV2(DataSource.scala:696)
at org.apache.spark.sql.DataFrameWriter.lookupV2Provider(DataFrameWriter.scala:860)
at org.apache.spark.sql.DataFrameWriter.saveAsTable(DataFrameWriter.scala:559)
at org.apache.spark.sql.connect.planner.SparkConnectPlanner.handleWriteOperation(SparkConnectPlanner.scala:1426)
at org.apache.spark.sql.connect.planner.SparkConnectPlanner.process(SparkConnectPlanner.scala:1297)
at org.apache.spark.sql.connect.service.SparkConnectStreamHandler.handleCommand(SparkConnectStreamHandler.scala:182)
at org.apache.spark.sql.connect.service.SparkConnectStreamHandler.handle(SparkConnectStreamHandler.scala:48)
at org.apache.spark.sql.connect.service.SparkConnectService.executePlan(SparkConnectService.scala:135)
at org.apache.spark.connect.proto.SparkConnectServiceGrpc$MethodHandlers.invoke(SparkConnectServiceGrpc.java:306)
at org.sparkproject.connect.grpc.stub.ServerCalls$UnaryServerCallHandler$UnaryServerCallListener.onHalfClose(ServerCalls.java:182)
at org.sparkproject.connect.grpc.internal.ServerCallImpl$ServerStreamListenerImpl.halfClosed(ServerCallImpl.java:352)
at org.sparkproject.connect.grpc.internal.ServerImpl$JumpToApplicationThreadServerStreamListener$1HalfClosed.runInContext(ServerImpl.java:866)
at org.sparkproject.connect.grpc.internal.ContextRunnable.run(ContextRunnable.java:37)
at org.sparkproject.connect.grpc.internal.SerializingExecutor.run(SerializingExecutor.java:133)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.ClassNotFoundException: .DefaultSource
at java.net.URLClassLoader.findClass(URLClassLoader.java:382)
at java.lang.ClassLoader.loadClass(ClassLoader.java:418)
at java.lang.ClassLoader.loadClass(ClassLoader.java:351)
at org.apache.spark.sql.execution.datasources.DataSource$.$anonfun$lookupDataSource$5(DataSource.scala:632)
at scala.util.Try$.apply(Try.scala:213)
at org.apache.spark.sql.execution.datasources.DataSource$.$anonfun$lookupDataSource$4(DataSource.scala:632)
at scala.util.Failure.orElse(Try.scala:224)
at org.apache.spark.sql.execution.datasources.DataSource$.lookupDataSource(DataSource.scala:632)
... 17 more
pyspark/sql/tests/test_readwriter.py:159 (ReadwriterParityTests.test_insert_into)
self = <pyspark.sql.tests.connect.test_parity_readwriter.ReadwriterParityTests testMethod=test_insert_into>
def test_insert_into(self):
df = self.spark.createDataFrame([("a", 1), ("b", 2)], ["C1", "C2"])
with self.table("test_table"):
> df.write.saveAsTable("test_table")
../test_readwriter.py:163:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
../../connect/readwriter.py:381: in saveAsTable
self._spark.client.execute_command(self._write.command(self._spark.client))
../../connect/client.py:478: in execute_command
self._execute(req)
../../connect/client.py:562: in _execute
self._handle_error(rpc_error)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pyspark.sql.connect.client.SparkConnectClient object at 0x7fe0d069b5b0>
rpc_error = <_MultiThreadedRendezvous of RPC that terminated with:
status = StatusCode.INTERNAL
details = ".DefaultSource"
debu...pv6:%5B::1%5D:15002 {created_time:"2023-01-12T11:27:46.698322+09:00", grpc_status:13, grpc_message:".DefaultSource"}"
>
def _handle_error(self, rpc_error: grpc.RpcError) -> NoReturn:
"""
Error handling helper for dealing with GRPC Errors. On the server side, certain
exceptions are enriched with additional RPC Status information. These are
unpacked in this function and put into the exception.
To avoid overloading the user with GRPC errors, this message explicitly
swallows the error context from the call. This GRPC Error is logged however,
and can be enabled.
Parameters
----------
rpc_error : grpc.RpcError
RPC Error containing the details of the exception.
Returns
-------
Throws the appropriate internal Python exception.
"""
logger.exception("GRPC Error received")
# We have to cast the value here because, a RpcError is a Call as well.
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
if status:
for d in status.details:
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
info = error_details_pb2.ErrorInfo()
d.Unpack(info)
if info.reason == "org.apache.spark.sql.AnalysisException":
raise SparkConnectAnalysisException(
info.reason, info.metadata["message"], info.metadata["plan"]
) from None
else:
> raise SparkConnectException(status.message, info.reason) from None
E pyspark.sql.connect.client.SparkConnectException: (java.lang.ClassNotFoundException) .DefaultSource
../../connect/client.py:636: SparkConnectException
{code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org