You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2020/02/19 02:20:21 UTC

[spark] branch branch-3.0 updated: [SPARK-30861][PYTHON][SQL] Deprecate constructor of SQLContext and getOrCreate in SQLContext at PySpark

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 2231a98  [SPARK-30861][PYTHON][SQL] Deprecate constructor of SQLContext and getOrCreate in SQLContext at PySpark
2231a98 is described below

commit 2231a98b1bea8f384507a6856a25bd75591e8741
Author: HyukjinKwon <gu...@apache.org>
AuthorDate: Wed Feb 19 11:17:47 2020 +0900

    [SPARK-30861][PYTHON][SQL] Deprecate constructor of SQLContext and getOrCreate in SQLContext at PySpark
    
    ### What changes were proposed in this pull request?
    
    This PR proposes to deprecate the APIs at `SQLContext` removed in SPARK-25908. We should remove equivalent APIs; however, seems we missed to deprecate.
    
    While I am here, I fix one more issue. After SPARK-25908, `sc._jvm.SQLContext.getOrCreate` dose not exist anymore. So,
    
    ```python
    from pyspark.sql import SQLContext
    from pyspark import SparkContext
    sc = SparkContext.getOrCreate()
    SQLContext.getOrCreate(sc).range(10).show()
    ```
    
    throws an exception as below:
    
    ```
    Traceback (most recent call last):
      File "<stdin>", line 1, in <module>
      File "/.../spark/python/pyspark/sql/context.py", line 110, in getOrCreate
        jsqlContext = sc._jvm.SQLContext.getOrCreate(sc._jsc.sc())
      File "/.../spark/python/lib/py4j-0.10.8.1-src.zip/py4j/java_gateway.py", line 1516, in __getattr__
    py4j.protocol.Py4JError: org.apache.spark.sql.SQLContext.getOrCreate does not exist in the JVM
    ```
    
    After this PR:
    
    ```
    /.../spark/python/pyspark/sql/context.py:113: DeprecationWarning: Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead.
      DeprecationWarning)
    +---+
    | id|
    +---+
    |  0|
    |  1|
    |  2|
    |  3|
    |  4|
    |  5|
    |  6|
    |  7|
    |  8|
    |  9|
    +---+
    ```
    
    In case of the constructor of `SQLContext`, after this PR:
    
    ```python
    from pyspark.sql import SQLContext
    sc = SparkContext.getOrCreate()
    SQLContext(sc)
    ```
    
    ```
    /.../spark/python/pyspark/sql/context.py:77: DeprecationWarning: Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead.
      DeprecationWarning)
    ```
    
    ### Why are the changes needed?
    
    To promote to use SparkSession, and keep the API party consistent with Scala side.
    
    ### Does this PR introduce any user-facing change?
    
    Yes, it will show deprecation warning to users.
    
    ### How was this patch tested?
    
    Manually tested as described above. Unittests were also added.
    
    Closes #27614 from HyukjinKwon/SPARK-30861.
    
    Authored-by: HyukjinKwon <gu...@apache.org>
    Signed-off-by: HyukjinKwon <gu...@apache.org>
---
 python/pyspark/sql/context.py            | 15 ++++++++++++++-
 python/pyspark/sql/tests/test_context.py | 18 ++++++++++++++++++
 2 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index 68d5ef4..f203e1c 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -56,6 +56,8 @@ class SQLContext(object):
     def __init__(self, sparkContext, sparkSession=None, jsqlContext=None):
         """Creates a new SQLContext.
 
+        .. note:: Deprecated in 3.0.0. Use :func:`SparkSession.builder.getOrCreate()` instead.
+
         >>> from datetime import datetime
         >>> sqlContext = SQLContext(sc)
         >>> allTypes = sc.parallelize([Row(i=1, s="string", d=1.0, l=1,
@@ -70,6 +72,10 @@ class SQLContext(object):
         >>> df.rdd.map(lambda x: (x.i, x.s, x.d, x.l, x.b, x.time, x.row.a, x.list)).collect()
         [(1, u'string', 1.0, 1, True, datetime.datetime(2014, 8, 1, 14, 1, 5), 1, [1, 2, 3])]
         """
+        warnings.warn(
+            "Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead.",
+            DeprecationWarning)
+
         self._sc = sparkContext
         self._jsc = self._sc._jsc
         self._jvm = self._sc._jvm
@@ -105,9 +111,16 @@ class SQLContext(object):
         Get the existing SQLContext or create a new one with given SparkContext.
 
         :param sc: SparkContext
+
+        .. note:: Deprecated in 3.0.0. Use :func:`SparkSession.builder.getOrCreate()` instead.
         """
+        warnings.warn(
+            "Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead.",
+            DeprecationWarning)
+
         if cls._instantiatedContext is None:
-            jsqlContext = sc._jvm.SQLContext.getOrCreate(sc._jsc.sc())
+            jsqlContext = sc._jvm.SparkSession.builder().sparkContext(
+                sc._jsc.sc()).getOrCreate().sqlContext()
             sparkSession = SparkSession(sc, jsqlContext.sparkSession())
             cls(sc, sparkSession, jsqlContext)
         return cls._instantiatedContext
diff --git a/python/pyspark/sql/tests/test_context.py b/python/pyspark/sql/tests/test_context.py
index d57ebc4..92e5434 100644
--- a/python/pyspark/sql/tests/test_context.py
+++ b/python/pyspark/sql/tests/test_context.py
@@ -27,6 +27,7 @@ except ImportError:
 
 import py4j
 
+from pyspark import SparkContext, SQLContext
 from pyspark.sql import Row, SparkSession
 from pyspark.sql.types import *
 from pyspark.sql.window import Window
@@ -259,6 +260,23 @@ class HiveContextSQLTests(ReusedPySparkTestCase):
         reload(window)
 
 
+class SQLContextTests(unittest.TestCase):
+
+    def test_get_or_create(self):
+        sc = None
+        sql_context = None
+        try:
+            sc = SparkContext('local[4]', "SQLContextTests")
+            sql_context = SQLContext.getOrCreate(sc)
+            assert(isinstance(sql_context, SQLContext))
+        finally:
+            SQLContext._instantiatedContext = None
+            if sql_context is not None:
+                sql_context.sparkSession.stop()
+            if sc is not None:
+                sc.stop()
+
+
 if __name__ == "__main__":
     from pyspark.sql.tests.test_context import *
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org