You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/04/21 12:34:56 UTC
[spark] branch branch-3.0 updated: [SPARK-31504][SQL] Formatted Explain should have determined order of Output fields

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 756e85e  [SPARK-31504][SQL] Formatted Explain should have determined order of Output fields
756e85e is described below

commit 756e85e781374e2d14284b7df5f0112bb8c5d6d2
Author: yi.wu <yi...@databricks.com>
AuthorDate: Tue Apr 21 12:33:58 2020 +0000

    [SPARK-31504][SQL] Formatted Explain should have determined order of Output fields
    
    ### What changes were proposed in this pull request?
    
    In `verboseStringWithOperatorId`, use `output` (it's `Seq[Attribute]`) instead of `producedAttributes` (it's `AttributeSet`) to generates `"Output"` for the leaf node in order to make `"Output"` determined.
    
    ### Why are the changes needed?
    
    Currently, Formatted Explain use `producedAttributes`, the `AttributeSet`,  to generate `"Output"`. As a result, the fields order within `"Output"` can be different from time to time. It's That means, for the same plan, it could have different explain outputs.
    
    ### Does this PR introduce any user-facing change?
    
    Yes, user see the determined fields order within formatted explain now.
    
    ### How was this patch tested?
    
    Added a regression test.
    
    Closes #28282 from Ngone51/fix_output.
    
    Authored-by: yi.wu <yi...@databricks.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
    (cherry picked from commit 55b026a783ce3a5aced1f396e5dd03f0cab9356b)
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../org/apache/spark/sql/execution/DataSourceScanExec.scala    |  4 ++--
 .../main/scala/org/apache/spark/sql/execution/SparkPlan.scala  |  2 +-
 .../src/test/scala/org/apache/spark/sql/ExplainSuite.scala     | 10 ++++++++++
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index 8d488d4..bd0e1d0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -76,7 +76,7 @@ trait DataSourceScanExec extends LeafExecNode {
 
     s"""
        |(${ExplainUtils.getOpId(this)}) $nodeName ${ExplainUtils.getCodegenId(this)}
-       |${ExplainUtils.generateFieldString("Output", producedAttributes)}
+       |${ExplainUtils.generateFieldString("Output", output)}
        |${metadataStr.mkString("\n")}
      """.stripMargin
   }
@@ -378,7 +378,7 @@ case class FileSourceScanExec(
 
     s"""
        |(${ExplainUtils.getOpId(this)}) $nodeName ${ExplainUtils.getCodegenId(this)}
-       |${ExplainUtils.generateFieldString("Output", producedAttributes)}
+       |${ExplainUtils.generateFieldString("Output", output)}
        |${metadataStr.mkString("\n")}
      """.stripMargin
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
index f5bb554..e1a6495 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
@@ -515,7 +515,7 @@ trait LeafExecNode extends SparkPlan {
   override def verboseStringWithOperatorId(): String = {
     val argumentString = argString(SQLConf.get.maxToStringFields)
     val baseStr = s"(${ExplainUtils.getOpId(this)}) $nodeName ${ExplainUtils.getCodegenId(this)}"
-    val outputStr = s"${ExplainUtils.generateFieldString("Output", producedAttributes)}"
+    val outputStr = s"${ExplainUtils.generateFieldString("Output", output)}"
 
     if (argumentString.nonEmpty) {
       s"""
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
index 1a35e5b..b204709 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
@@ -330,6 +330,16 @@ class ExplainSuite extends QueryTest with SharedSparkSession with DisableAdaptiv
     }.getMessage
     assert(errMsg.contains("Unknown explain mode: unknown"))
   }
+
+  test("SPARK-31504: Output fields in formatted Explain should have determined order") {
+    withTempPath { path =>
+      spark.range(10).selectExpr("id as a", "id as b", "id as c", "id as d", "id as e")
+        .write.mode("overwrite").parquet(path.getAbsolutePath)
+      val df1 = spark.read.parquet(path.getAbsolutePath)
+      val df2 = spark.read.parquet(path.getAbsolutePath)
+      assert(getNormalizedExplain(df1, FormattedMode) === getNormalizedExplain(df2, FormattedMode))
+    }
+  }
 }
 
 case class ExplainSingleData(id: Int)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org