You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ge...@apache.org on 2020/08/01 05:16:18 UTC

[spark] branch branch-3.0 updated: [SPARK-32467][UI] Avoid encoding URL twice on https redirect

This is an automated email from the ASF dual-hosted git repository.

gengliang pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new ea4b288  [SPARK-32467][UI] Avoid encoding URL twice on https redirect
ea4b288 is described below

commit ea4b288c5c616f09426d4b583ed5dcff14ccaa57
Author: Gengliang Wang <ge...@databricks.com>
AuthorDate: Sat Aug 1 13:09:26 2020 +0800

    [SPARK-32467][UI] Avoid encoding URL twice on https redirect
    
    ### What changes were proposed in this pull request?
    
    When https is enabled for Spark UI, an HTTP request will be redirected as an encoded HTTPS URL: https://github.com/apache/spark/pull/10238/files#diff-f79a5ead735b3d0b34b6b94486918e1cR312
    
    When we create the redirect url, we will call getRequestURI and getQueryString. Both two methods may return an encoded string. However, we pass them directly to the following URI constructor
    ```
    URI(String scheme, String authority, String path, String query, String fragment)
    ```
    As this URI constructor assumes both path and query parameters are decoded strings, it will encode them again. This makes the redirect URL encoded twice.
    
    This problem is on stage page with HTTPS enabled. The URL of "/taskTable" contains query parameter `order%5B0%5D%5Bcolumn%5D`. After encoded it becomes  `order%255B0%255D%255Bcolumn%255D` and it will be decoded as `order%5B0%5D%5Bcolumn%5D` instead of `order[0][dir]`.  When the parameter `order[0][dir]` is missing, there will be an excetpion from:
    https://github.com/apache/spark/blob/master/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala#L176
    and the stage page fail to load.
    
    To fix the problem, we can try decoding the query parameters before encoding it. This is to make sure we encode the URL
    
    ### Why are the changes needed?
    
    Fix a UI issue when HTTPS is enabled
    
    ### Does this PR introduce _any_ user-facing change?
    
    No
    
    ### How was this patch tested?
    
    A new Unit test + manually test on a cluster
    
    Closes #29271 from gengliangwang/urlEncode.
    
    Authored-by: Gengliang Wang <ge...@databricks.com>
    Signed-off-by: Gengliang Wang <ge...@databricks.com>
    (cherry picked from commit 71aea02e9ffb0c6f7c72c91054c2a4653e22e801)
    Signed-off-by: Gengliang Wang <ge...@databricks.com>
---
 .../scala/org/apache/spark/ui/JettyUtils.scala     | 29 +++++++++++++++++-----
 .../test/scala/org/apache/spark/ui/UISuite.scala   | 21 ++++++++++++++++
 2 files changed, 44 insertions(+), 6 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
index 94c99d4..a4ba565 100644
--- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.ui
 
-import java.net.{URI, URL}
+import java.net.{URI, URL, URLDecoder}
 import java.util.EnumSet
 import javax.servlet.DispatcherType
 import javax.servlet.http._
@@ -376,8 +376,7 @@ private[spark] object JettyUtils extends Logging {
         if (baseRequest.isSecure) {
           return
         }
-        val httpsURI = createRedirectURI(scheme, baseRequest.getServerName, securePort,
-          baseRequest.getRequestURI, baseRequest.getQueryString)
+        val httpsURI = createRedirectURI(scheme, securePort, baseRequest)
         response.setContentLength(0)
         response.sendRedirect(response.encodeRedirectURL(httpsURI))
         baseRequest.setHandled(true)
@@ -439,16 +438,34 @@ private[spark] object JettyUtils extends Logging {
     handler.addFilter(holder, "/*", EnumSet.allOf(classOf[DispatcherType]))
   }
 
+  private def decodeURL(url: String, encoding: String): String = {
+    if (url == null) {
+      null
+    } else {
+      URLDecoder.decode(url, encoding)
+    }
+  }
+
   // Create a new URI from the arguments, handling IPv6 host encoding and default ports.
-  private def createRedirectURI(
-      scheme: String, server: String, port: Int, path: String, query: String) = {
+  private def createRedirectURI(scheme: String, port: Int, request: Request): String = {
+    val server = request.getServerName
     val redirectServer = if (server.contains(":") && !server.startsWith("[")) {
       s"[${server}]"
     } else {
       server
     }
     val authority = s"$redirectServer:$port"
-    new URI(scheme, authority, path, query, null).toString
+    val queryEncoding = if (request.getQueryEncoding != null) {
+      request.getQueryEncoding
+    } else {
+      // By default decoding the URI as "UTF-8" should be enough for SparkUI
+      "UTF-8"
+    }
+    // The request URL can be raw or encoded here. To avoid the request URL being
+    // encoded twice, let's decode it here.
+    val requestURI = decodeURL(request.getRequestURI, queryEncoding)
+    val queryString = decodeURL(request.getQueryString, queryEncoding)
+    new URI(scheme, authority, requestURI, queryString, null).toString
   }
 
   def toVirtualHosts(connectors: String*): Array[String] = connectors.map("@" + _).toArray
diff --git a/core/src/test/scala/org/apache/spark/ui/UISuite.scala b/core/src/test/scala/org/apache/spark/ui/UISuite.scala
index 2ad4a63..56026ea 100644
--- a/core/src/test/scala/org/apache/spark/ui/UISuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UISuite.scala
@@ -262,6 +262,27 @@ class UISuite extends SparkFunSuite {
     }
   }
 
+  test("SPARK-32467: Avoid encoding URL twice on https redirect") {
+    val (conf, securityMgr, sslOptions) = sslEnabledConf()
+    val serverInfo = JettyUtils.startJettyServer("0.0.0.0", 0, sslOptions, conf)
+    try {
+      val serverAddr = s"http://localhost:${serverInfo.boundPort}"
+
+      val (_, ctx) = newContext("/ctx1")
+      serverInfo.addHandler(ctx, securityMgr)
+
+      TestUtils.withHttpConnection(new URL(s"$serverAddr/ctx%281%29?a%5B0%5D=b")) { conn =>
+        assert(conn.getResponseCode() === HttpServletResponse.SC_FOUND)
+        val location = Option(conn.getHeaderFields().get("Location"))
+          .map(_.get(0)).orNull
+        val expectedLocation = s"https://localhost:${serverInfo.securePort.get}/ctx(1)?a[0]=b"
+        assert(location == expectedLocation)
+      }
+    } finally {
+      stopServer(serverInfo)
+    }
+  }
+
   test("http -> https redirect applies to all URIs") {
     val (conf, securityMgr, sslOptions) = sslEnabledConf()
     val serverInfo = JettyUtils.startJettyServer("0.0.0.0", 0, sslOptions, conf)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org