You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2020/02/19 02:20:21 UTC
[spark] branch branch-3.0 updated: [SPARK-30861][PYTHON][SQL]
Deprecate constructor of SQLContext and getOrCreate in SQLContext at
PySpark
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 2231a98 [SPARK-30861][PYTHON][SQL] Deprecate constructor of SQLContext and getOrCreate in SQLContext at PySpark
2231a98 is described below
commit 2231a98b1bea8f384507a6856a25bd75591e8741
Author: HyukjinKwon <gu...@apache.org>
AuthorDate: Wed Feb 19 11:17:47 2020 +0900
[SPARK-30861][PYTHON][SQL] Deprecate constructor of SQLContext and getOrCreate in SQLContext at PySpark
### What changes were proposed in this pull request?
This PR proposes to deprecate the APIs at `SQLContext` removed in SPARK-25908. We should remove equivalent APIs; however, seems we missed to deprecate.
While I am here, I fix one more issue. After SPARK-25908, `sc._jvm.SQLContext.getOrCreate` dose not exist anymore. So,
```python
from pyspark.sql import SQLContext
from pyspark import SparkContext
sc = SparkContext.getOrCreate()
SQLContext.getOrCreate(sc).range(10).show()
```
throws an exception as below:
```
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/.../spark/python/pyspark/sql/context.py", line 110, in getOrCreate
jsqlContext = sc._jvm.SQLContext.getOrCreate(sc._jsc.sc())
File "/.../spark/python/lib/py4j-0.10.8.1-src.zip/py4j/java_gateway.py", line 1516, in __getattr__
py4j.protocol.Py4JError: org.apache.spark.sql.SQLContext.getOrCreate does not exist in the JVM
```
After this PR:
```
/.../spark/python/pyspark/sql/context.py:113: DeprecationWarning: Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead.
DeprecationWarning)
+---+
| id|
+---+
| 0|
| 1|
| 2|
| 3|
| 4|
| 5|
| 6|
| 7|
| 8|
| 9|
+---+
```
In case of the constructor of `SQLContext`, after this PR:
```python
from pyspark.sql import SQLContext
sc = SparkContext.getOrCreate()
SQLContext(sc)
```
```
/.../spark/python/pyspark/sql/context.py:77: DeprecationWarning: Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead.
DeprecationWarning)
```
### Why are the changes needed?
To promote to use SparkSession, and keep the API party consistent with Scala side.
### Does this PR introduce any user-facing change?
Yes, it will show deprecation warning to users.
### How was this patch tested?
Manually tested as described above. Unittests were also added.
Closes #27614 from HyukjinKwon/SPARK-30861.
Authored-by: HyukjinKwon <gu...@apache.org>
Signed-off-by: HyukjinKwon <gu...@apache.org>
---
python/pyspark/sql/context.py | 15 ++++++++++++++-
python/pyspark/sql/tests/test_context.py | 18 ++++++++++++++++++
2 files changed, 32 insertions(+), 1 deletion(-)
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index 68d5ef4..f203e1c 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -56,6 +56,8 @@ class SQLContext(object):
def __init__(self, sparkContext, sparkSession=None, jsqlContext=None):
"""Creates a new SQLContext.
+ .. note:: Deprecated in 3.0.0. Use :func:`SparkSession.builder.getOrCreate()` instead.
+
>>> from datetime import datetime
>>> sqlContext = SQLContext(sc)
>>> allTypes = sc.parallelize([Row(i=1, s="string", d=1.0, l=1,
@@ -70,6 +72,10 @@ class SQLContext(object):
>>> df.rdd.map(lambda x: (x.i, x.s, x.d, x.l, x.b, x.time, x.row.a, x.list)).collect()
[(1, u'string', 1.0, 1, True, datetime.datetime(2014, 8, 1, 14, 1, 5), 1, [1, 2, 3])]
"""
+ warnings.warn(
+ "Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead.",
+ DeprecationWarning)
+
self._sc = sparkContext
self._jsc = self._sc._jsc
self._jvm = self._sc._jvm
@@ -105,9 +111,16 @@ class SQLContext(object):
Get the existing SQLContext or create a new one with given SparkContext.
:param sc: SparkContext
+
+ .. note:: Deprecated in 3.0.0. Use :func:`SparkSession.builder.getOrCreate()` instead.
"""
+ warnings.warn(
+ "Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead.",
+ DeprecationWarning)
+
if cls._instantiatedContext is None:
- jsqlContext = sc._jvm.SQLContext.getOrCreate(sc._jsc.sc())
+ jsqlContext = sc._jvm.SparkSession.builder().sparkContext(
+ sc._jsc.sc()).getOrCreate().sqlContext()
sparkSession = SparkSession(sc, jsqlContext.sparkSession())
cls(sc, sparkSession, jsqlContext)
return cls._instantiatedContext
diff --git a/python/pyspark/sql/tests/test_context.py b/python/pyspark/sql/tests/test_context.py
index d57ebc4..92e5434 100644
--- a/python/pyspark/sql/tests/test_context.py
+++ b/python/pyspark/sql/tests/test_context.py
@@ -27,6 +27,7 @@ except ImportError:
import py4j
+from pyspark import SparkContext, SQLContext
from pyspark.sql import Row, SparkSession
from pyspark.sql.types import *
from pyspark.sql.window import Window
@@ -259,6 +260,23 @@ class HiveContextSQLTests(ReusedPySparkTestCase):
reload(window)
+class SQLContextTests(unittest.TestCase):
+
+ def test_get_or_create(self):
+ sc = None
+ sql_context = None
+ try:
+ sc = SparkContext('local[4]', "SQLContextTests")
+ sql_context = SQLContext.getOrCreate(sc)
+ assert(isinstance(sql_context, SQLContext))
+ finally:
+ SQLContext._instantiatedContext = None
+ if sql_context is not None:
+ sql_context.sparkSession.stop()
+ if sc is not None:
+ sc.stop()
+
+
if __name__ == "__main__":
from pyspark.sql.tests.test_context import *
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org