You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2021/07/20 13:10:36 UTC

[spark] branch master updated: [SPARK-36207][PYTHON] Expose databaseExists in pyspark.sql.catalog

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 463fcb3  [SPARK-36207][PYTHON] Expose databaseExists in pyspark.sql.catalog
463fcb3 is described below

commit 463fcb3723d4c5cffd4b787e2d7254ceaf2bca98
Author: Dominik Gehl <do...@open.ch>
AuthorDate: Tue Jul 20 22:10:06 2021 +0900

    [SPARK-36207][PYTHON] Expose databaseExists in pyspark.sql.catalog
    
    ### What changes were proposed in this pull request?
    Expose databaseExists in pyspark.sql.catalog
    
    ### Why are the changes needed?
    Was available in scala, but not in pyspark
    
    ### Does this PR introduce _any_ user-facing change?
    New method databaseExists
    
    ### How was this patch tested?
    Unit tests in codebase
    
    Closes #33416 from dominikgehl/feature/SPARK-36207.
    
    Lead-authored-by: Dominik Gehl <do...@open.ch>
    Co-authored-by: Dominik Gehl <ge...@fastmail.fm>
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
 python/docs/source/reference/pyspark.sql.rst |  1 +
 python/pyspark/sql/catalog.py                | 26 ++++++++++++++++++++++++++
 python/pyspark/sql/catalog.pyi               |  1 +
 python/pyspark/sql/tests/test_catalog.py     |  8 ++++++++
 4 files changed, 36 insertions(+)

diff --git a/python/docs/source/reference/pyspark.sql.rst b/python/docs/source/reference/pyspark.sql.rst
index 74eac3d..d8e7b41 100644
--- a/python/docs/source/reference/pyspark.sql.rst
+++ b/python/docs/source/reference/pyspark.sql.rst
@@ -618,6 +618,7 @@ Catalog APIs
     Catalog.createExternalTable
     Catalog.createTable
     Catalog.currentDatabase
+    Catalog.databaseExists
     Catalog.dropGlobalTempView
     Catalog.dropTempView
     Catalog.isCached
diff --git a/python/pyspark/sql/catalog.py b/python/pyspark/sql/catalog.py
index 8087d63..2d74c73 100644
--- a/python/pyspark/sql/catalog.py
+++ b/python/pyspark/sql/catalog.py
@@ -65,6 +65,32 @@ class Catalog(object):
                 locationUri=jdb.locationUri()))
         return databases
 
+    def databaseExists(self, dbName):
+        """Check if the database with the specified name exists.
+
+        .. versionadded:: 3.3.0
+
+        Parameters
+        ----------
+        dbName : str
+             name of the database to check existence
+
+        Returns
+        -------
+        bool
+            Indicating whether the database exists
+
+        Examples
+        --------
+        >>> spark.catalog.databaseExists("test_new_database")
+        False
+        >>> df = spark.sql("CREATE DATABASE test_new_database")
+        >>> spark.catalog.databaseExists("test_new_database")
+        True
+        >>> df = spark.sql("DROP DATABASE test_new_database")
+        """
+        return self._jcatalog.databaseExists(dbName)
+
     @since(2.0)
     def listTables(self, dbName=None):
         """Returns a list of tables/views in the specified database.
diff --git a/python/pyspark/sql/catalog.pyi b/python/pyspark/sql/catalog.pyi
index 6892719..1eed73a 100644
--- a/python/pyspark/sql/catalog.pyi
+++ b/python/pyspark/sql/catalog.pyi
@@ -36,6 +36,7 @@ class Catalog:
     def currentDatabase(self) -> str: ...
     def setCurrentDatabase(self, dbName: str) -> None: ...
     def listDatabases(self) -> List[Database]: ...
+    def databaseExists(self, dbName: str) -> bool: ...
     def listTables(self, dbName: Optional[str] = ...) -> List[Table]: ...
     def listFunctions(self, dbName: Optional[str] = ...) -> List[Function]: ...
     def listColumns(
diff --git a/python/pyspark/sql/tests/test_catalog.py b/python/pyspark/sql/tests/test_catalog.py
index 8699878..90467fa 100644
--- a/python/pyspark/sql/tests/test_catalog.py
+++ b/python/pyspark/sql/tests/test_catalog.py
@@ -43,6 +43,14 @@ class CatalogTests(ReusedSQLTestCase):
             databases = [db.name for db in spark.catalog.listDatabases()]
             self.assertEqual(sorted(databases), ["default", "some_db"])
 
+    def test_database_exists(self):
+        # SPARK-36207: testing that database_exists returns correct boolean
+        spark = self.spark
+        with self.database("some_db"):
+            self.assertFalse(spark.catalog.databaseExists("some_db"))
+            spark.sql("CREATE DATABASE some_db")
+            self.assertTrue(spark.catalog.databaseExists("some_db"))
+
     def test_list_tables(self):
         from pyspark.sql.catalog import Table
         spark = self.spark

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org