You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2022/03/29 03:03:40 UTC

[spark] branch branch-3.3 updated: [SPARK-38656][UI][PYTHON] Show options for Pandas API on Spark in UI

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
     new 2a19c29  [SPARK-38656][UI][PYTHON] Show options for Pandas API on Spark in UI
2a19c29 is described below

commit 2a19c29aac76105fb06b09f26b84e58361715e22
Author: Hyukjin Kwon <gu...@apache.org>
AuthorDate: Tue Mar 29 12:00:22 2022 +0900

    [SPARK-38656][UI][PYTHON] Show options for Pandas API on Spark in UI
    
    ### What changes were proposed in this pull request?
    
    This PR proposes to show options for Pandas API on Spark in UI.
    
    The options in Pandas API on Spark (https://spark.apache.org/docs/latest/api/python/user_guide/pandas_on_spark/options.html#available-options) are currently not shown properly in "SQL Properties". It is shown, under "SQL Properties", as something like `pandas_on_Spark.compute.ops_on_diff_frames` as a key and `false` as its value that is the format internally used.
    
    This PR extracts pandas-on-Spark specific options and show separately.
    
    Additionally, this PR also proposes to hide "SQL Properties" is none of configurations set.
    
    ### Why are the changes needed?
    
    For better readability and UX for pandas API on Spark.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes. Now, we're able to show pandas-on-Spark specific options under "Pandas API on Spark: Properties" as below:
    
    <img width="893" alt="Screen Shot 2022-03-28 at 9 31 54 AM" src="https://user-images.githubusercontent.com/6477701/160308224-3e24ce1d-c422-4271-8e71-daaca3ac600d.png">
    
    <img width="875" alt="Screen Shot 2022-03-28 at 9 31 20 AM" src="https://user-images.githubusercontent.com/6477701/160308226-8b07a0d6-ad31-4774-b5ec-37664d7fe5dd.png">
    
    ### How was this patch tested?
    
    Manually tested as above.
    
    Closes #35972 from HyukjinKwon/SPARK-38656.
    
    Authored-by: Hyukjin Kwon <gu...@apache.org>
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
    (cherry picked from commit 94abcd7037b05ac5068ce421e07306d45e957246)
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
 .../spark/sql/execution/ui/ExecutionPage.scala     | 53 +++++++++++++++++++++-
 1 file changed, 52 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala
index 6aacec3..5734760 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala
@@ -21,12 +21,18 @@ import javax.servlet.http.HttpServletRequest
 
 import scala.xml.Node
 
+import org.json4s.JNull
+import org.json4s.JsonAST.{JBool, JString}
+import org.json4s.jackson.JsonMethods.parse
+
 import org.apache.spark.JobExecutionStatus
 import org.apache.spark.internal.Logging
 import org.apache.spark.ui.{UIUtils, WebUIPage}
 
 class ExecutionPage(parent: SQLTab) extends WebUIPage("execution") with Logging {
 
+  private val pandasOnSparkConfPrefix = "pandas_on_Spark."
+
   private val sqlStore = parent.sqlStore
 
   override def render(request: HttpServletRequest): Seq[Node] = {
@@ -82,7 +88,11 @@ class ExecutionPage(parent: SQLTab) extends WebUIPage("execution") with Logging
       summary ++
         planVisualization(request, metrics, graph) ++
         physicalPlanDescription(executionUIData.physicalPlanDescription) ++
-        modifiedConfigs(executionUIData.modifiedConfigs)
+        modifiedConfigs(
+          executionUIData.modifiedConfigs.filterKeys(
+            !_.startsWith(pandasOnSparkConfPrefix)).toMap) ++
+        modifiedPandasOnSparkConfigs(
+          executionUIData.modifiedConfigs.filterKeys(_.startsWith(pandasOnSparkConfPrefix)).toMap)
     }.getOrElse {
       <div>No information to display for query {executionId}</div>
     }
@@ -148,6 +158,8 @@ class ExecutionPage(parent: SQLTab) extends WebUIPage("execution") with Logging
   }
 
   private def modifiedConfigs(modifiedConfigs: Map[String, String]): Seq[Node] = {
+    if (Option(modifiedConfigs).exists(_.isEmpty)) return Nil
+
     val configs = UIUtils.listingTable(
       propertyHeader,
       propertyRow,
@@ -168,6 +180,45 @@ class ExecutionPage(parent: SQLTab) extends WebUIPage("execution") with Logging
     <br/>
   }
 
+  private def modifiedPandasOnSparkConfigs(
+      modifiedPandasOnSparkConfigs: Map[String, String]): Seq[Node] = {
+    if (Option(modifiedPandasOnSparkConfigs).exists(_.isEmpty)) return Nil
+
+    val modifiedOptions = modifiedPandasOnSparkConfigs.toSeq.map { case (k, v) =>
+      // Remove prefix.
+      val key = k.slice(pandasOnSparkConfPrefix.length, k.length)
+      // The codes below is a simple version of Python's repr().
+      // Pandas API on Spark does not support other types in the options yet.
+      val pyValue = parse(v) match {
+        case JNull => "None"
+        case JBool(v) => v.toString.capitalize
+        case JString(s) => s"'$s'"
+        case _ => v
+      }
+      (key, pyValue)
+    }
+
+    val configs = UIUtils.listingTable(
+      propertyHeader,
+      propertyRow,
+      modifiedOptions.sorted,
+      fixedWidth = true
+    )
+
+    <div>
+      <span class="collapse-pandas-on-spark-properties collapse-table"
+            onClick="collapseTable('collapse-pandas-on-spark-properties',
+             'pandas-on-spark-properties')">
+        <span class="collapse-table-arrow arrow-closed"></span>
+        <a>Pandas API Properties</a>
+      </span>
+      <div class="pandas-on-spark-properties collapsible-table collapsed">
+        {configs}
+      </div>
+    </div>
+    <br/>
+  }
+
   private def propertyHeader = Seq("Name", "Value")
   private def propertyRow(kv: (String, String)) = <tr><td>{kv._1}</td><td>{kv._2}</td></tr>
 }

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org