You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2022/08/02 01:41:20 UTC
[spark] branch master updated: [SPARK-39902][SQL] Add Scan details to spark plan scan node in SparkUI

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new a27edf9ad41 [SPARK-39902][SQL] Add Scan details to spark plan scan node in SparkUI
a27edf9ad41 is described below

commit a27edf9ad4104f7df30dbbf77ec06fcf3cf9feda
Author: Sumeet Gajjar <su...@gmail.com>
AuthorDate: Mon Aug 1 18:41:03 2022 -0700

    [SPARK-39902][SQL] Add Scan details to spark plan scan node in SparkUI
    
    ### What changes were proposed in this pull request?
    
    In this PR, we propose to add a method "String name()" to the Scan interface, that "BatchScanExec" can invoke to set the node name of the plan. This nodeName will be eventually used by "SparkPlanGraphNode" to display it in the header of the UI node.
    
    ### Why are the changes needed?
    
    Since for DSv2, the scan node in the spark plan on SparkUI simply shows "BatchScan" instead of an informative name.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, after this change the user will be able to see the scan name in the spark plan on SparkUI.
    
    ### How was this patch tested?
    
    - Tested this change using existing UTs
    - Further tested this change by overriding the newly added method in Iceberg's implementation of the Scan interface.
    
    Closes #37325 from sumeetgajjar/v2_scan_ui_improvement.
    
    Authored-by: Sumeet Gajjar <su...@gmail.com>
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 .../src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala |  2 +-
 .../main/java/org/apache/spark/sql/connector/read/Scan.java  | 12 ++++++++++++
 .../spark/sql/execution/datasources/v2/BatchScanExec.scala   |  6 ++++++
 .../apache/spark/sql/execution/datasources/v2/FileScan.scala |  4 ++++
 .../src/test/scala/org/apache/spark/sql/ExplainSuite.scala   |  2 +-
 .../sql/execution/DataSourceScanExecRedactionSuite.scala     |  8 ++++----
 6 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
index d75e6906719..8a088a43579 100644
--- a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
+++ b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
@@ -2408,7 +2408,7 @@ class AvroV2Suite extends AvroSuite with ExplainSuiteHelper {
       val basePath = dir.getCanonicalPath + "/avro"
       val expected_plan_fragment =
         s"""
-           |\\(1\\) BatchScan
+           |\\(1\\) BatchScan avro
            |Output \\[2\\]: \\[value#xL, id#x\\]
            |DataFilters: \\[isnotnull\\(value#xL\\), \\(value#xL > 2\\)\\]
            |Format: avro
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Scan.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Scan.java
index d161de92eb8..941a11b8b1d 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Scan.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Scan.java
@@ -60,6 +60,18 @@ public interface Scan {
     return this.getClass().toString();
   }
 
+  /**
+   * The name of the scan, which will be shown in the header of a spark plan scan node on SparkUI.
+   * E.g. "scan parquet sample_db.sample_table"
+   * <p>
+   * By default this returns the simple class name of the implementation. Please override it to
+   * provide a meaningful name.
+   * </p>
+   */
+  default String name() {
+    return this.getClass().getSimpleName();
+  }
+
   /**
    * Returns the physical representation of this scan for batch query. By default this method throws
    * exception, data sources must overwrite this method to provide an implementation, if the
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala
index f1c43b8f60c..8da1123c9fe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala
@@ -131,4 +131,10 @@ case class BatchScanExec(
     val result = s"$nodeName$truncatedOutputString ${scan.description()} $runtimeFiltersString"
     redact(result)
   }
+
+  /**
+   * Returns the name of this type of TreeNode.  Defaults to the class name.
+   * Note that we remove the "Exec" suffix for physical operators here.
+   */
+  override def nodeName: String = s"BatchScan ${scan.name()}"
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
index 21503fda53e..a7b6afc7f4e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
@@ -213,4 +213,8 @@ trait FileScan extends Scan
       name.toLowerCase(Locale.ROOT)
     }
   }
+
+  override def name(): String = Utils.getSimpleName(this.getClass)
+    .toLowerCase(Locale.ROOT)
+    .replaceAll("scan$", "")
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
index f74b63f1b82..4bcf0fccb59 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
@@ -474,7 +474,7 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
         )
         val expected_plan_fragment1 =
           s"""
-             |\\(1\\) BatchScan
+             |\\(1\\) BatchScan $fmt
              |Output \\[2\\]: \\[value#x, id#x\\]
              |DataFilters: \\[isnotnull\\(value#x\\), \\(value#x > 2\\)\\]
              |Format: $fmt
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala
index e29b7f579fa..b6b1eb3184e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala
@@ -182,14 +182,14 @@ class DataSourceV2ScanExecRedactionSuite extends DataSourceScanRedactionTest {
 
       // Respect SparkConf and replace file:/
       assert(isIncluded(df.queryExecution, replacement))
-      assert(isIncluded(df.queryExecution, "BatchScan"))
+      assert(isIncluded(df.queryExecution, "BatchScan orc"))
       assert(!isIncluded(df.queryExecution, "file:/"))
 
-      withSQLConf(SQLConf.SQL_STRING_REDACTION_PATTERN.key -> "(?i)BatchScan") {
+      withSQLConf(SQLConf.SQL_STRING_REDACTION_PATTERN.key -> "(?i)BatchScan orc") {
         // Respect SQLConf and replace FileScan
         assert(isIncluded(df.queryExecution, replacement))
 
-        assert(!isIncluded(df.queryExecution, "BatchScan"))
+        assert(!isIncluded(df.queryExecution, "BatchScan orc"))
         assert(isIncluded(df.queryExecution, "file:/"))
       }
     }
@@ -204,7 +204,7 @@ class DataSourceV2ScanExecRedactionSuite extends DataSourceScanRedactionTest {
 
         withClue(s"Source '$format':") {
           assert(isIncluded(df.queryExecution, "ReadSchema"))
-          assert(isIncluded(df.queryExecution, "BatchScan"))
+          assert(isIncluded(df.queryExecution, s"BatchScan $format"))
           if (Seq("orc", "parquet").contains(format)) {
             assert(isIncluded(df.queryExecution, "PushedFilters"))
           }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org