You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2023/02/12 00:06:06 UTC
[spark] branch master updated: [SPARK-42265][SPARK-41820][CONNECT] Fix createTempView and its variations to work with not analyzed plans

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 90be059509a [SPARK-42265][SPARK-41820][CONNECT] Fix createTempView and its variations to work with not analyzed plans
90be059509a is described below

commit 90be059509a687172188f268eb3f9663819740a3
Author: Takuya UESHIN <ue...@databricks.com>
AuthorDate: Sun Feb 12 09:05:53 2023 +0900

    [SPARK-42265][SPARK-41820][CONNECT] Fix createTempView and its variations to work with not analyzed plans
    
    ### What changes were proposed in this pull request?
    
    Fixes `createTempView` and its variations to work with not analyzed plans.
    
    - `createTempView`
    - `createOrReplaceTempView`
    - `createGlobalTempView`
    - `createOrReplaceGlobalTempView`
    
    ### Why are the changes needed?
    
    Currently `SparkConnectPlanner` creates `CreateViewCommand` with `isAnalyzed = true`, but the child plan can be not-analyzed yet.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Users can run `createTempView` and its variations with not analyzed plans.
    
    ### How was this patch tested?
    
    Enabled the related tests.
    
    Closes #39968 from ueshin/issues/SPARK-41279/createTempView.
    
    Authored-by: Takuya UESHIN <ue...@databricks.com>
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
 .../org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala   | 3 +--
 python/pyspark/sql/connect/dataframe.py                              | 4 ----
 python/pyspark/sql/tests/connect/test_parity_udf.py                  | 5 -----
 3 files changed, 1 insertion(+), 11 deletions(-)

diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
index 75581851b5f..194588fe89b 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
@@ -1479,8 +1479,7 @@ class SparkConnectPlanner(val session: SparkSession) {
       plan = transformRelation(createView.getInput),
       allowExisting = false,
       replace = createView.getReplace,
-      viewType = viewType,
-      isAnalyzed = true)
+      viewType = viewType)
 
     Dataset.ofRows(session, plan).queryExecution.commandExecuted
   }
diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py
index 40625f77df5..536c909883e 100644
--- a/python/pyspark/sql/connect/dataframe.py
+++ b/python/pyspark/sql/connect/dataframe.py
@@ -1690,10 +1690,6 @@ def _test() -> None:
     del pyspark.sql.connect.dataframe.DataFrame.repartition.__doc__
     del pyspark.sql.connect.dataframe.DataFrame.repartitionByRange.__doc__
 
-    # TODO(SPARK-41820): Fix SparkConnectException: requirement failed
-    del pyspark.sql.connect.dataframe.DataFrame.createOrReplaceGlobalTempView.__doc__
-    del pyspark.sql.connect.dataframe.DataFrame.createOrReplaceTempView.__doc__
-
     # TODO(SPARK-41823): ambiguous column names
     del pyspark.sql.connect.dataframe.DataFrame.drop.__doc__
     del pyspark.sql.connect.dataframe.DataFrame.join.__doc__
diff --git a/python/pyspark/sql/tests/connect/test_parity_udf.py b/python/pyspark/sql/tests/connect/test_parity_udf.py
index 5fe1dee7fe8..b35f55febf2 100644
--- a/python/pyspark/sql/tests/connect/test_parity_udf.py
+++ b/python/pyspark/sql/tests/connect/test_parity_udf.py
@@ -170,11 +170,6 @@ class UDFParityTests(BaseUDFTestsMixin, ReusedConnectTestCase):
     def test_udf_with_string_return_type(self):
         super().test_udf_with_string_return_type()
 
-    # TODO(SPARK-41279): fix DataFrame.createTempView
-    @unittest.skip("Fails in Spark Connect, should enable.")
-    def test_udf_in_subquery(self):
-        super().test_udf_in_subquery()
-
     def test_udf_registration_returns_udf(self):
         df = self.spark.range(10)
         add_three = self.spark.udf.register("add_three", lambda x: x + 3, IntegerType())


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org