You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2024/01/23 03:36:02 UTC
(spark) branch master updated: [SPARK-46806][PYTHON] Improve error message for spark.table when argument type is wrong
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new ae2d43f279d5 [SPARK-46806][PYTHON] Improve error message for spark.table when argument type is wrong
ae2d43f279d5 is described below
commit ae2d43f279d5d27b63db3356abaf7d64755f3f5c
Author: Hyukjin Kwon <gu...@apache.org>
AuthorDate: Tue Jan 23 12:35:52 2024 +0900
[SPARK-46806][PYTHON] Improve error message for spark.table when argument type is wrong
### What changes were proposed in this pull request?
This PR improves error message for spark.table when argument type is wrong
```python
spark.table(None)
```
**Before:**
```
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/.../spark/python/pyspark/sql/session.py", line 1710, in table
return DataFrame(self._jsparkSession.table(tableName), self)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/.../spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py", line 1322, in __call__
File "/.../spark/python/pyspark/errors/exceptions/captured.py", line 215, in deco
return f(*a, **kw)
^^^^^^^^^^^
File "/.../spark/python/lib/py4j-0.10.9.7-src.zip/py4j/protocol.py", line 326, in get_return_value
py4j.protocol.Py4JJavaError: An error occurred while calling o27.table.
: java.lang.NullPointerException: Cannot invoke "String.length()" because "s" is null
at org.antlr.v4.runtime.CharStreams.fromString(CharStreams.java:222)
at org.antlr.v4.runtime.CharStreams.fromString(CharStreams.java:212)
at org.apache.spark.sql.catalyst.parser.AbstractParser.parse(parsers.scala:58)
at org.apache.spark.sql.execution.SparkSqlParser.parse(SparkSqlParser.scala:55)
at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parseMultipartIdentifier(AbstractSqlParser.scala:54)
at org.apache.spark.sql.DataFrameReader.table(DataFrameReader.scala:681)
at org.apache.spark.sql.SparkSession.table(SparkSession.scala:619)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:568)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
at py4j.Gateway.invoke(Gateway.java:282)
```
**After:**
```
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/.../spark/python/pyspark/sql/session.py", line 1711, in table
raise PySparkTypeError(
pyspark.errors.exceptions.base.PySparkTypeError: [INVALID_TYPE] Argument `tableName` should not be a str.
```
### Why are the changes needed?
For better error messages to the end users.
### Does this PR introduce _any_ user-facing change?
Yes, it fixes the user-facing error messages.
### How was this patch tested?
Unittest was added.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #44846 from HyukjinKwon/SPARK-46806.
Authored-by: Hyukjin Kwon <gu...@apache.org>
Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
python/pyspark/sql/connect/session.py | 6 ++++++
python/pyspark/sql/session.py | 6 ++++++
python/pyspark/sql/tests/test_dataframe.py | 10 ++++++++++
3 files changed, 22 insertions(+)
diff --git a/python/pyspark/sql/connect/session.py b/python/pyspark/sql/connect/session.py
index 5cbcb4ab5c35..1c53e460c196 100644
--- a/python/pyspark/sql/connect/session.py
+++ b/python/pyspark/sql/connect/session.py
@@ -287,6 +287,12 @@ class SparkSession:
active.__doc__ = PySparkSession.active.__doc__
def table(self, tableName: str) -> DataFrame:
+ if not isinstance(tableName, str):
+ raise PySparkTypeError(
+ error_class="NOT_STR",
+ message_parameters={"arg_name": "tableName", "arg_type": type(tableName).__name__},
+ )
+
return self.read.table(tableName)
table.__doc__ = PySparkSession.table.__doc__
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index fef834b9f0a0..7d0d9dc113f2 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -1707,6 +1707,12 @@ class SparkSession(SparkConversionMixin):
| 4|
+---+
"""
+ if not isinstance(tableName, str):
+ raise PySparkTypeError(
+ error_class="NOT_STR",
+ message_parameters={"arg_name": "tableName", "arg_type": type(tableName).__name__},
+ )
+
return DataFrame(self._jsparkSession.table(tableName), self)
@property
diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py
index 1788f1d9fb1a..f87e3b15eadb 100644
--- a/python/pyspark/sql/tests/test_dataframe.py
+++ b/python/pyspark/sql/tests/test_dataframe.py
@@ -69,6 +69,16 @@ class DataFrameTestsMixin:
self.assertEqual(self.spark.range(-2).count(), 0)
self.assertEqual(self.spark.range(3).count(), 3)
+ def test_table(self):
+ with self.assertRaises(PySparkTypeError) as pe:
+ self.spark.table(None)
+
+ self.check_error(
+ exception=pe.exception,
+ error_class="NOT_STR",
+ message_parameters={"arg_name": "tableName", "arg_type": "NoneType"},
+ )
+
def test_dataframe_star(self):
df1 = self.spark.createDataFrame([{"a": 1}])
df2 = self.spark.createDataFrame([{"a": 1, "b": "v"}])
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org