You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2022/06/30 06:58:58 UTC
[spark] branch master updated: [SPARK-39597][PYTHON] Make GetTable, TableExists and DatabaseExists in the python side support 3-layer-namespace
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new c1106fbe22c [SPARK-39597][PYTHON] Make GetTable, TableExists and DatabaseExists in the python side support 3-layer-namespace
c1106fbe22c is described below
commit c1106fbe22c6ef36b785befb25686cf64741c447
Author: Ruifeng Zheng <ru...@apache.org>
AuthorDate: Thu Jun 30 14:58:37 2022 +0800
[SPARK-39597][PYTHON] Make GetTable, TableExists and DatabaseExists in the python side support 3-layer-namespace
### What changes were proposed in this pull request?
corresponding changes in the python side to https://issues.apache.org/jira/browse/SPARK-39263
1, make TableExists and DatabaseExists support 3-layer-namespace
2, add GetTable in the python side
### Why are the changes needed?
to support 3-layer-namespace
### Does this PR introduce _any_ user-facing change?
Yes, In `TableExists`, when `dbName` is empty, will first try to treat `tabelName` as a multi-layer-namespace.
### How was this patch tested?
added UT
Closes #36985 from zhengruifeng/py_3L_db_tbl_exist.
Authored-by: Ruifeng Zheng <ru...@apache.org>
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
python/pyspark/sql/catalog.py | 78 +++++++++++++++++++++++++++++++-
python/pyspark/sql/tests/test_catalog.py | 18 ++++++++
2 files changed, 94 insertions(+), 2 deletions(-)
diff --git a/python/pyspark/sql/catalog.py b/python/pyspark/sql/catalog.py
index cd16be7ba19..17516bbfa66 100644
--- a/python/pyspark/sql/catalog.py
+++ b/python/pyspark/sql/catalog.py
@@ -118,6 +118,9 @@ class Catalog:
bool
Indicating whether the database exists
+ .. versionchanged:: 3.4
+ Allowed ``dbName`` to be qualified with catalog name.
+
Examples
--------
>>> spark.catalog.databaseExists("test_new_database")
@@ -125,6 +128,8 @@ class Catalog:
>>> df = spark.sql("CREATE DATABASE test_new_database")
>>> spark.catalog.databaseExists("test_new_database")
True
+ >>> spark.catalog.databaseExists("spark_catalog.test_new_database")
+ True
>>> df = spark.sql("DROP DATABASE test_new_database")
"""
return self._jcatalog.databaseExists(dbName)
@@ -164,6 +169,47 @@ class Catalog:
)
return tables
+ def getTable(self, tableName: str) -> Table:
+ """Get the table or view with the specified name. This table can be a temporary view or a
+ table/view. This throws an AnalysisException when no Table can be found.
+
+ .. versionadded:: 3.4.0
+
+ Parameters
+ ----------
+ tableName : str
+ name of the table to check existence.
+
+ Examples
+ --------
+ >>> df = spark.sql("CREATE TABLE tab1 (name STRING, age INT) USING parquet")
+ >>> spark.catalog.getTable("tab1")
+ Table(name='tab1', catalog='spark_catalog', namespace=['default'], ...
+ >>> spark.catalog.getTable("default.tab1")
+ Table(name='tab1', catalog='spark_catalog', namespace=['default'], ...
+ >>> spark.catalog.getTable("spark_catalog.default.tab1")
+ Table(name='tab1', catalog='spark_catalog', namespace=['default'], ...
+ >>> df = spark.sql("DROP TABLE tab1")
+ >>> spark.catalog.getTable("tab1")
+ Traceback (most recent call last):
+ ...
+ pyspark.sql.utils.AnalysisException: ...
+ """
+ jtable = self._jcatalog.getTable(tableName)
+ jnamespace = jtable.namespace()
+ if jnamespace is not None:
+ namespace = [jnamespace[i] for i in range(0, len(jnamespace))]
+ else:
+ namespace = None
+ return Table(
+ name=jtable.name(),
+ catalog=jtable.catalog(),
+ namespace=namespace,
+ description=jtable.description(),
+ tableType=jtable.tableType(),
+ isTemporary=jtable.isTemporary(),
+ )
+
@since(2.0)
def listFunctions(self, dbName: Optional[str] = None) -> List[Function]:
"""Returns a list of functions registered in the specified database.
@@ -255,15 +301,23 @@ class Catalog:
----------
tableName : str
name of the table to check existence
+ If no database is specified, first try to treat ``tableName`` as a
+ multi-layer-namespace identifier, then try to ``tableName`` as a normal table
+ name in current database if necessary.
dbName : str, optional
name of the database to check table existence in.
- If no database is specified, the current database is used
+
+ .. deprecated:: 3.4.0
+
Returns
-------
bool
Indicating whether the table/view exists
+ .. versionchanged:: 3.4
+ Allowed ``tableName`` to be qualified with catalog name when ``dbName`` is None.
+
Examples
--------
@@ -274,6 +328,12 @@ class Catalog:
>>> df = spark.sql("CREATE TABLE tab1 (name STRING, age INT) USING parquet")
>>> spark.catalog.tableExists("tab1")
True
+ >>> spark.catalog.tableExists("default.tab1")
+ True
+ >>> spark.catalog.tableExists("spark_catalog.default.tab1")
+ True
+ >>> spark.catalog.tableExists("tab1", "default")
+ True
>>> df = spark.sql("DROP TABLE tab1")
>>> spark.catalog.tableExists("unexisting_table")
False
@@ -285,6 +345,12 @@ class Catalog:
>>> df = spark.sql("CREATE VIEW view1 AS SELECT 1")
>>> spark.catalog.tableExists("view1")
True
+ >>> spark.catalog.tableExists("default.view1")
+ True
+ >>> spark.catalog.tableExists("spark_catalog.default.view1")
+ True
+ >>> spark.catalog.tableExists("view1", "default")
+ True
>>> df = spark.sql("DROP VIEW view1")
>>> spark.catalog.tableExists("view1")
False
@@ -298,7 +364,15 @@ class Catalog:
>>> spark.catalog.tableExists("view1")
False
"""
- return self._jcatalog.tableExists(dbName, tableName)
+ if dbName is None:
+ return self._jcatalog.tableExists(tableName)
+ else:
+ warnings.warn(
+ "`dbName` has been deprecated since Spark 3.4 and might be removed in "
+ "a future version. Use tableExists(`dbName.tableName`) instead.",
+ FutureWarning,
+ )
+ return self._jcatalog.tableExists(dbName, tableName)
def createExternalTable(
self,
diff --git a/python/pyspark/sql/tests/test_catalog.py b/python/pyspark/sql/tests/test_catalog.py
index b2bf83d7a0d..57c071eea35 100644
--- a/python/pyspark/sql/tests/test_catalog.py
+++ b/python/pyspark/sql/tests/test_catalog.py
@@ -50,6 +50,8 @@ class CatalogTests(ReusedSQLTestCase):
self.assertFalse(spark.catalog.databaseExists("some_db"))
spark.sql("CREATE DATABASE some_db")
self.assertTrue(spark.catalog.databaseExists("some_db"))
+ self.assertTrue(spark.catalog.databaseExists("spark_catalog.some_db"))
+ self.assertFalse(spark.catalog.databaseExists("spark_catalog.some_db2"))
def test_list_tables(self):
from pyspark.sql.catalog import Table
@@ -316,9 +318,25 @@ class CatalogTests(ReusedSQLTestCase):
self.assertFalse(spark.catalog.tableExists("tab2", "some_db"))
spark.sql("CREATE TABLE tab1 (name STRING, age INT) USING parquet")
self.assertTrue(spark.catalog.tableExists("tab1"))
+ self.assertTrue(spark.catalog.tableExists("default.tab1"))
+ self.assertTrue(spark.catalog.tableExists("spark_catalog.default.tab1"))
+ self.assertTrue(spark.catalog.tableExists("tab1", "default"))
spark.sql("CREATE TABLE some_db.tab2 (name STRING, age INT) USING parquet")
+ self.assertFalse(spark.catalog.tableExists("tab2"))
+ self.assertTrue(spark.catalog.tableExists("some_db.tab2"))
+ self.assertTrue(spark.catalog.tableExists("spark_catalog.some_db.tab2"))
self.assertTrue(spark.catalog.tableExists("tab2", "some_db"))
+ def test_get_table(self):
+ spark = self.spark
+ with self.database("some_db"):
+ spark.sql("CREATE DATABASE some_db")
+ with self.table("tab1"):
+ spark.sql("CREATE TABLE tab1 (name STRING, age INT) USING parquet")
+ self.assertEqual(spark.catalog.getTable("tab1").database, "default")
+ self.assertEqual(spark.catalog.getTable("default.tab1").catalog, "spark_catalog")
+ self.assertEqual(spark.catalog.getTable("spark_catalog.default.tab1").name, "tab1")
+
if __name__ == "__main__":
import unittest
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org