You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/04/21 12:34:56 UTC
[spark] branch branch-3.0 updated: [SPARK-31504][SQL] Formatted
Explain should have determined order of Output fields
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 756e85e [SPARK-31504][SQL] Formatted Explain should have determined order of Output fields
756e85e is described below
commit 756e85e781374e2d14284b7df5f0112bb8c5d6d2
Author: yi.wu <yi...@databricks.com>
AuthorDate: Tue Apr 21 12:33:58 2020 +0000
[SPARK-31504][SQL] Formatted Explain should have determined order of Output fields
### What changes were proposed in this pull request?
In `verboseStringWithOperatorId`, use `output` (it's `Seq[Attribute]`) instead of `producedAttributes` (it's `AttributeSet`) to generates `"Output"` for the leaf node in order to make `"Output"` determined.
### Why are the changes needed?
Currently, Formatted Explain use `producedAttributes`, the `AttributeSet`, to generate `"Output"`. As a result, the fields order within `"Output"` can be different from time to time. It's That means, for the same plan, it could have different explain outputs.
### Does this PR introduce any user-facing change?
Yes, user see the determined fields order within formatted explain now.
### How was this patch tested?
Added a regression test.
Closes #28282 from Ngone51/fix_output.
Authored-by: yi.wu <yi...@databricks.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
(cherry picked from commit 55b026a783ce3a5aced1f396e5dd03f0cab9356b)
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
.../org/apache/spark/sql/execution/DataSourceScanExec.scala | 4 ++--
.../main/scala/org/apache/spark/sql/execution/SparkPlan.scala | 2 +-
.../src/test/scala/org/apache/spark/sql/ExplainSuite.scala | 10 ++++++++++
3 files changed, 13 insertions(+), 3 deletions(-)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index 8d488d4..bd0e1d0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -76,7 +76,7 @@ trait DataSourceScanExec extends LeafExecNode {
s"""
|(${ExplainUtils.getOpId(this)}) $nodeName ${ExplainUtils.getCodegenId(this)}
- |${ExplainUtils.generateFieldString("Output", producedAttributes)}
+ |${ExplainUtils.generateFieldString("Output", output)}
|${metadataStr.mkString("\n")}
""".stripMargin
}
@@ -378,7 +378,7 @@ case class FileSourceScanExec(
s"""
|(${ExplainUtils.getOpId(this)}) $nodeName ${ExplainUtils.getCodegenId(this)}
- |${ExplainUtils.generateFieldString("Output", producedAttributes)}
+ |${ExplainUtils.generateFieldString("Output", output)}
|${metadataStr.mkString("\n")}
""".stripMargin
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
index f5bb554..e1a6495 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
@@ -515,7 +515,7 @@ trait LeafExecNode extends SparkPlan {
override def verboseStringWithOperatorId(): String = {
val argumentString = argString(SQLConf.get.maxToStringFields)
val baseStr = s"(${ExplainUtils.getOpId(this)}) $nodeName ${ExplainUtils.getCodegenId(this)}"
- val outputStr = s"${ExplainUtils.generateFieldString("Output", producedAttributes)}"
+ val outputStr = s"${ExplainUtils.generateFieldString("Output", output)}"
if (argumentString.nonEmpty) {
s"""
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
index 1a35e5b..b204709 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
@@ -330,6 +330,16 @@ class ExplainSuite extends QueryTest with SharedSparkSession with DisableAdaptiv
}.getMessage
assert(errMsg.contains("Unknown explain mode: unknown"))
}
+
+ test("SPARK-31504: Output fields in formatted Explain should have determined order") {
+ withTempPath { path =>
+ spark.range(10).selectExpr("id as a", "id as b", "id as c", "id as d", "id as e")
+ .write.mode("overwrite").parquet(path.getAbsolutePath)
+ val df1 = spark.read.parquet(path.getAbsolutePath)
+ val df2 = spark.read.parquet(path.getAbsolutePath)
+ assert(getNormalizedExplain(df1, FormattedMode) === getNormalizedExplain(df2, FormattedMode))
+ }
+ }
}
case class ExplainSingleData(id: Int)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org