You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2023/02/12 00:06:06 UTC
[spark] branch master updated: [SPARK-42265][SPARK-41820][CONNECT] Fix createTempView and its variations to work with not analyzed plans
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 90be059509a [SPARK-42265][SPARK-41820][CONNECT] Fix createTempView and its variations to work with not analyzed plans
90be059509a is described below
commit 90be059509a687172188f268eb3f9663819740a3
Author: Takuya UESHIN <ue...@databricks.com>
AuthorDate: Sun Feb 12 09:05:53 2023 +0900
[SPARK-42265][SPARK-41820][CONNECT] Fix createTempView and its variations to work with not analyzed plans
### What changes were proposed in this pull request?
Fixes `createTempView` and its variations to work with not analyzed plans.
- `createTempView`
- `createOrReplaceTempView`
- `createGlobalTempView`
- `createOrReplaceGlobalTempView`
### Why are the changes needed?
Currently `SparkConnectPlanner` creates `CreateViewCommand` with `isAnalyzed = true`, but the child plan can be not-analyzed yet.
### Does this PR introduce _any_ user-facing change?
Users can run `createTempView` and its variations with not analyzed plans.
### How was this patch tested?
Enabled the related tests.
Closes #39968 from ueshin/issues/SPARK-41279/createTempView.
Authored-by: Takuya UESHIN <ue...@databricks.com>
Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
.../org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala | 3 +--
python/pyspark/sql/connect/dataframe.py | 4 ----
python/pyspark/sql/tests/connect/test_parity_udf.py | 5 -----
3 files changed, 1 insertion(+), 11 deletions(-)
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
index 75581851b5f..194588fe89b 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
@@ -1479,8 +1479,7 @@ class SparkConnectPlanner(val session: SparkSession) {
plan = transformRelation(createView.getInput),
allowExisting = false,
replace = createView.getReplace,
- viewType = viewType,
- isAnalyzed = true)
+ viewType = viewType)
Dataset.ofRows(session, plan).queryExecution.commandExecuted
}
diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py
index 40625f77df5..536c909883e 100644
--- a/python/pyspark/sql/connect/dataframe.py
+++ b/python/pyspark/sql/connect/dataframe.py
@@ -1690,10 +1690,6 @@ def _test() -> None:
del pyspark.sql.connect.dataframe.DataFrame.repartition.__doc__
del pyspark.sql.connect.dataframe.DataFrame.repartitionByRange.__doc__
- # TODO(SPARK-41820): Fix SparkConnectException: requirement failed
- del pyspark.sql.connect.dataframe.DataFrame.createOrReplaceGlobalTempView.__doc__
- del pyspark.sql.connect.dataframe.DataFrame.createOrReplaceTempView.__doc__
-
# TODO(SPARK-41823): ambiguous column names
del pyspark.sql.connect.dataframe.DataFrame.drop.__doc__
del pyspark.sql.connect.dataframe.DataFrame.join.__doc__
diff --git a/python/pyspark/sql/tests/connect/test_parity_udf.py b/python/pyspark/sql/tests/connect/test_parity_udf.py
index 5fe1dee7fe8..b35f55febf2 100644
--- a/python/pyspark/sql/tests/connect/test_parity_udf.py
+++ b/python/pyspark/sql/tests/connect/test_parity_udf.py
@@ -170,11 +170,6 @@ class UDFParityTests(BaseUDFTestsMixin, ReusedConnectTestCase):
def test_udf_with_string_return_type(self):
super().test_udf_with_string_return_type()
- # TODO(SPARK-41279): fix DataFrame.createTempView
- @unittest.skip("Fails in Spark Connect, should enable.")
- def test_udf_in_subquery(self):
- super().test_udf_in_subquery()
-
def test_udf_registration_returns_udf(self):
df = self.spark.range(10)
add_three = self.spark.udf.register("add_three", lambda x: x + 3, IntegerType())
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org