You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2019/04/21 17:26:21 UTC
[spark] branch master updated: [SPARK-27439][SQL] Use analyzed plan
when explaining Dataset
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new ad60c6d [SPARK-27439][SQL] Use analyzed plan when explaining Dataset
ad60c6d is described below
commit ad60c6d9be3234a0296d1620129d5ca108f0876b
Author: Liang-Chi Hsieh <vi...@gmail.com>
AuthorDate: Sun Apr 21 10:25:56 2019 -0700
[SPARK-27439][SQL] Use analyzed plan when explaining Dataset
## What changes were proposed in this pull request?
Because a review is resolved during analysis when we create a dataset, the content of the view is determined when the dataset is created, not when it is evaluated. Now the explain result of a dataset is not correctly consistent with the collected result of it, because we use pre-analyzed logical plan of the dataset in explain command. The explain command will analyzed the logical plan passed in. So if a view is changed after the dataset was created, the plans shown by explain command [...]
```scala
scala> spark.range(10).createOrReplaceTempView("test")
scala> spark.range(5).createOrReplaceTempView("test2")
scala> spark.sql("select * from test").createOrReplaceTempView("tmp001")
scala> val df = spark.sql("select * from tmp001")
scala> spark.sql("select * from test2").createOrReplaceTempView("tmp001")
scala> df.show
+---+
| id|
+---+
| 0|
| 1|
| 2|
| 3|
| 4|
| 5|
| 6|
| 7|
| 8|
| 9|
+---+
scala> df.explain
```
Before:
```scala
== Physical Plan ==
*(1) Range (0, 5, step=1, splits=12)
```
After:
```scala
== Physical Plan ==
*(1) Range (0, 10, step=1, splits=12)
```
## How was this patch tested?
Manually test and unit test.
Closes #24415 from viirya/SPARK-27439.
Authored-by: Liang-Chi Hsieh <vi...@gmail.com>
Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
.../src/main/scala/org/apache/spark/sql/Dataset.scala | 5 ++++-
.../scala/org/apache/spark/sql/DataFrameSuite.scala | 19 ++++++++++++++++++-
2 files changed, 22 insertions(+), 2 deletions(-)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 793714f..e974912 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -498,7 +498,10 @@ class Dataset[T] private[sql](
* @since 1.6.0
*/
def explain(extended: Boolean): Unit = {
- val explain = ExplainCommand(queryExecution.logical, extended = extended)
+ // Because views are possibly resolved in the analyzed plan of this dataset. We use analyzed
+ // plan in `ExplainCommand`, for consistency. Otherwise, the plans shown by explain command
+ // might be inconsistent with the evaluated data of this dataset.
+ val explain = ExplainCommand(queryExecution.analyzed, extended = extended)
sparkSession.sessionState.executePlan(explain).executedPlan.executeCollect().foreach {
// scalastyle:off println
r => println(r.getString(0))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 8a9c526..62fcca4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -17,7 +17,7 @@
package org.apache.spark.sql
-import java.io.File
+import java.io.{ByteArrayOutputStream, File}
import java.nio.charset.StandardCharsets
import java.sql.{Date, Timestamp}
import java.util.UUID
@@ -2133,4 +2133,21 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
checkAnswer(res, Row("1-1", 6, 6))
}
}
+
+ test("SPARK-27439: Explain result should match collected result after view change") {
+ withTempView("test", "test2", "tmp") {
+ spark.range(10).createOrReplaceTempView("test")
+ spark.range(5).createOrReplaceTempView("test2")
+ spark.sql("select * from test").createOrReplaceTempView("tmp")
+ val df = spark.sql("select * from tmp")
+ spark.sql("select * from test2").createOrReplaceTempView("tmp")
+
+ val captured = new ByteArrayOutputStream()
+ Console.withOut(captured) {
+ df.explain()
+ }
+ checkAnswer(df, spark.range(10).toDF)
+ assert(captured.toString().contains("Range (0, 10, step=1, splits=2)"))
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org