You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by sr...@apache.org on 2015/05/13 10:43:44 UTC

spark git commit: [SPARK-6568] spark-shell.cmd --jars option does not accept the jar that has space in its path

Repository: spark
Updated Branches:
  refs/heads/master 98195c303 -> 50c727080


[SPARK-6568] spark-shell.cmd --jars option does not accept the jar that has space in its path

escape spaces in the arguments.

Author: Masayoshi TSUZUKI <ts...@oss.nttdata.co.jp>
Author: Kousuke Saruta <sa...@oss.nttdata.co.jp>

Closes #5447 from tsudukim/feature/SPARK-6568-2 and squashes the following commits:

3f9a188 [Masayoshi TSUZUKI] modified some errors.
ed46047 [Masayoshi TSUZUKI] avoid scalastyle errors.
1784239 [Masayoshi TSUZUKI] removed Utils.formatPath.
e03f289 [Masayoshi TSUZUKI] removed testWindows from Utils.resolveURI and Utils.resolveURIs. replaced SystemUtils.IS_OS_WINDOWS to Utils.isWindows. removed Utils.formatPath from PythonRunner.scala.
84c33d0 [Masayoshi TSUZUKI] - use resolveURI in nonLocalPaths - run tests for Windows path only on Windows
016128d [Masayoshi TSUZUKI] fixed to use File.toURI()
2c62e3b [Masayoshi TSUZUKI] Merge pull request #1 from sarutak/SPARK-6568-2
7019a8a [Masayoshi TSUZUKI] Merge branch 'master' of https://github.com/apache/spark into feature/SPARK-6568-2
45946ee [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark into SPARK-6568-2
10f1c73 [Kousuke Saruta] Added a comment
93c3c40 [Kousuke Saruta] Merge branch 'classpath-handling-fix' of github.com:sarutak/spark into SPARK-6568-2
649da82 [Kousuke Saruta] Fix classpath handling
c7ba6a7 [Masayoshi TSUZUKI] [SPARK-6568] spark-shell.cmd --jars option does not accept the jar that has space in its path


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/50c72708
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/50c72708
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/50c72708

Branch: refs/heads/master
Commit: 50c72708015fba15d0e78946f1f4ec262776bc38
Parents: 98195c3
Author: Masayoshi TSUZUKI <ts...@oss.nttdata.co.jp>
Authored: Wed May 13 09:43:40 2015 +0100
Committer: Sean Owen <so...@cloudera.com>
Committed: Wed May 13 09:43:40 2015 +0100

----------------------------------------------------------------------
 .../org/apache/spark/deploy/PythonRunner.scala  | 23 +++----
 .../scala/org/apache/spark/util/Utils.scala     | 43 ++++---------
 .../apache/spark/deploy/PythonRunnerSuite.scala | 31 +++++----
 .../org/apache/spark/util/UtilsSuite.scala      | 67 +++++++++++++-------
 .../org/apache/spark/repl/SparkILoop.scala      |  5 +-
 5 files changed, 89 insertions(+), 80 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/50c72708/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
index 53e18c4..c2ed43a 100644
--- a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
@@ -18,9 +18,11 @@
 package org.apache.spark.deploy
 
 import java.net.URI
+import java.io.File
 
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.JavaConversions._
+import scala.util.Try
 
 import org.apache.spark.api.python.PythonUtils
 import org.apache.spark.util.{RedirectThread, Utils}
@@ -81,16 +83,13 @@ object PythonRunner {
       throw new IllegalArgumentException("Launching Python applications through " +
         s"spark-submit is currently only supported for local files: $path")
     }
-    val windows = Utils.isWindows || testWindows
-    var formattedPath = if (windows) Utils.formatWindowsPath(path) else path
-
-    // Strip the URI scheme from the path
-    formattedPath =
-      new URI(formattedPath).getScheme match {
-        case null => formattedPath
-        case Utils.windowsDrive(d) if windows => formattedPath
-        case _ => new URI(formattedPath).getPath
-      }
+    // get path when scheme is file.
+    val uri = Try(new URI(path)).getOrElse(new File(path).toURI)
+    var formattedPath = uri.getScheme match {
+      case null => path
+      case "file" | "local" => uri.getPath
+      case _ => null
+    }
 
     // Guard against malformed paths potentially throwing NPE
     if (formattedPath == null) {
@@ -99,7 +98,9 @@ object PythonRunner {
 
     // In Windows, the drive should not be prefixed with "/"
     // For instance, python does not understand "/C:/path/to/sheep.py"
-    formattedPath = if (windows) formattedPath.stripPrefix("/") else formattedPath
+    if (Utils.isWindows && formattedPath.matches("/[a-zA-Z]:/.*")) {
+      formattedPath = formattedPath.stripPrefix("/")
+    }
     formattedPath
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/50c72708/core/src/main/scala/org/apache/spark/util/Utils.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index be4db02..48843b4 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -1705,11 +1705,6 @@ private[spark] object Utils extends Logging {
   val windowsDrive = "([a-zA-Z])".r
 
   /**
-   * Format a Windows path such that it can be safely passed to a URI.
-   */
-  def formatWindowsPath(path: String): String = path.replace("\\", "/")
-
-  /**
    * Indicates whether Spark is currently running unit tests.
    */
   def isTesting: Boolean = {
@@ -1806,37 +1801,24 @@ private[spark] object Utils extends Logging {
    * If the supplied path does not contain a scheme, or is a relative path, it will be
    * converted into an absolute path with a file:// scheme.
    */
-  def resolveURI(path: String, testWindows: Boolean = false): URI = {
-
-    // In Windows, the file separator is a backslash, but this is inconsistent with the URI format
-    val windows = isWindows || testWindows
-    val formattedPath = if (windows) formatWindowsPath(path) else path
-
-    val uri = new URI(formattedPath)
-    if (uri.getPath == null) {
-      throw new IllegalArgumentException(s"Given path is malformed: $uri")
-    }
-
-    Option(uri.getScheme) match {
-      case Some(windowsDrive(d)) if windows =>
-        new URI("file:/" + uri.toString.stripPrefix("/"))
-      case None =>
-        // Preserve fragments for HDFS file name substitution (denoted by "#")
-        // For instance, in "abc.py#xyz.py", "xyz.py" is the name observed by the application
-        val fragment = uri.getFragment
-        val part = new File(uri.getPath).toURI
-        new URI(part.getScheme, part.getPath, fragment)
-      case Some(other) =>
-        uri
+  def resolveURI(path: String): URI = {
+    try {
+      val uri = new URI(path)
+      if (uri.getScheme() != null) {
+        return uri
+      }
+    } catch {
+      case e: URISyntaxException =>
     }
+    new File(path).getAbsoluteFile().toURI()
   }
 
   /** Resolve a comma-separated list of paths. */
-  def resolveURIs(paths: String, testWindows: Boolean = false): String = {
+  def resolveURIs(paths: String): String = {
     if (paths == null || paths.trim.isEmpty) {
       ""
     } else {
-      paths.split(",").map { p => Utils.resolveURI(p, testWindows) }.mkString(",")
+      paths.split(",").map { p => Utils.resolveURI(p) }.mkString(",")
     }
   }
 
@@ -1847,8 +1829,7 @@ private[spark] object Utils extends Logging {
       Array.empty
     } else {
       paths.split(",").filter { p =>
-        val formattedPath = if (windows) formatWindowsPath(p) else p
-        val uri = new URI(formattedPath)
+        val uri = resolveURI(p)
         Option(uri.getScheme).getOrElse("file") match {
           case windowsDrive(d) if windows => false
           case "local" | "file" => false

http://git-wip-us.apache.org/repos/asf/spark/blob/50c72708/core/src/test/scala/org/apache/spark/deploy/PythonRunnerSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/deploy/PythonRunnerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/PythonRunnerSuite.scala
index bb6251f..80f2cc0 100644
--- a/core/src/test/scala/org/apache/spark/deploy/PythonRunnerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/PythonRunnerSuite.scala
@@ -19,6 +19,8 @@ package org.apache.spark.deploy
 
 import org.scalatest.FunSuite
 
+import org.apache.spark.util.Utils
+
 class PythonRunnerSuite extends FunSuite {
 
   // Test formatting a single path to be added to the PYTHONPATH
@@ -28,10 +30,14 @@ class PythonRunnerSuite extends FunSuite {
     assert(PythonRunner.formatPath("file:///spark.py") === "/spark.py")
     assert(PythonRunner.formatPath("local:/spark.py") === "/spark.py")
     assert(PythonRunner.formatPath("local:///spark.py") === "/spark.py")
-    assert(PythonRunner.formatPath("C:/a/b/spark.py", testWindows = true) === "C:/a/b/spark.py")
-    assert(PythonRunner.formatPath("/C:/a/b/spark.py", testWindows = true) === "C:/a/b/spark.py")
-    assert(PythonRunner.formatPath("file:/C:/a/b/spark.py", testWindows = true) ===
-      "C:/a/b/spark.py")
+    if (Utils.isWindows) {
+      assert(PythonRunner.formatPath("file:/C:/a/b/spark.py", testWindows = true) ===
+        "C:/a/b/spark.py")
+      assert(PythonRunner.formatPath("C:\\a\\b\\spark.py", testWindows = true) ===
+        "C:/a/b/spark.py")
+      assert(PythonRunner.formatPath("C:\\a b\\spark.py", testWindows = true) ===
+        "C:/a b/spark.py")
+    }
     intercept[IllegalArgumentException] { PythonRunner.formatPath("one:two") }
     intercept[IllegalArgumentException] { PythonRunner.formatPath("hdfs:s3:xtremeFS") }
     intercept[IllegalArgumentException] { PythonRunner.formatPath("hdfs:/path/to/some.py") }
@@ -45,14 +51,15 @@ class PythonRunnerSuite extends FunSuite {
       Array("/app.py", "/spark.py"))
     assert(PythonRunner.formatPaths("me.py,file:/you.py,local:/we.py") ===
       Array("me.py", "/you.py", "/we.py"))
-    assert(PythonRunner.formatPaths("C:/a/b/spark.py", testWindows = true) ===
-      Array("C:/a/b/spark.py"))
-    assert(PythonRunner.formatPaths("/C:/a/b/spark.py", testWindows = true) ===
-      Array("C:/a/b/spark.py"))
-    assert(PythonRunner.formatPaths("C:/free.py,pie.py", testWindows = true) ===
-      Array("C:/free.py", "pie.py"))
-    assert(PythonRunner.formatPaths("lovely.py,C:/free.py,file:/d:/fry.py", testWindows = true) ===
-      Array("lovely.py", "C:/free.py", "d:/fry.py"))
+    if (Utils.isWindows) {
+      assert(PythonRunner.formatPaths("C:\\a\\b\\spark.py", testWindows = true) ===
+        Array("C:/a/b/spark.py"))
+      assert(PythonRunner.formatPaths("C:\\free.py,pie.py", testWindows = true) ===
+        Array("C:/free.py", "pie.py"))
+      assert(PythonRunner.formatPaths("lovely.py,C:\\free.py,file:/d:/fry.py",
+        testWindows = true) ===
+        Array("lovely.py", "C:/free.py", "d:/fry.py"))
+    }
     intercept[IllegalArgumentException] { PythonRunner.formatPaths("one:two,three") }
     intercept[IllegalArgumentException] { PythonRunner.formatPaths("two,three,four:five:six") }
     intercept[IllegalArgumentException] { PythonRunner.formatPaths("hdfs:/some.py,foo.py") }

http://git-wip-us.apache.org/repos/asf/spark/blob/50c72708/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index 651ead6..61152c2 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -367,51 +367,58 @@ class UtilsSuite extends FunSuite with ResetSystemProperties with Logging {
   }
 
   test("resolveURI") {
-    def assertResolves(before: String, after: String, testWindows: Boolean = false): Unit = {
+    def assertResolves(before: String, after: String): Unit = {
       // This should test only single paths
       assume(before.split(",").length === 1)
       // Repeated invocations of resolveURI should yield the same result
-      def resolve(uri: String): String = Utils.resolveURI(uri, testWindows).toString
+      def resolve(uri: String): String = Utils.resolveURI(uri).toString
       assert(resolve(after) === after)
       assert(resolve(resolve(after)) === after)
       assert(resolve(resolve(resolve(after))) === after)
       // Also test resolveURIs with single paths
-      assert(new URI(Utils.resolveURIs(before, testWindows)) === new URI(after))
-      assert(new URI(Utils.resolveURIs(after, testWindows)) === new URI(after))
+      assert(new URI(Utils.resolveURIs(before)) === new URI(after))
+      assert(new URI(Utils.resolveURIs(after)) === new URI(after))
     }
-    val cwd = System.getProperty("user.dir")
+    val rawCwd = System.getProperty("user.dir")
+    val cwd = if (Utils.isWindows) s"/$rawCwd".replace("\\", "/") else rawCwd
     assertResolves("hdfs:/root/spark.jar", "hdfs:/root/spark.jar")
     assertResolves("hdfs:///root/spark.jar#app.jar", "hdfs:/root/spark.jar#app.jar")
     assertResolves("spark.jar", s"file:$cwd/spark.jar")
-    assertResolves("spark.jar#app.jar", s"file:$cwd/spark.jar#app.jar")
-    assertResolves("C:/path/to/file.txt", "file:/C:/path/to/file.txt", testWindows = true)
-    assertResolves("C:\\path\\to\\file.txt", "file:/C:/path/to/file.txt", testWindows = true)
-    assertResolves("file:/C:/path/to/file.txt", "file:/C:/path/to/file.txt", testWindows = true)
-    assertResolves("file:///C:/path/to/file.txt", "file:/C:/path/to/file.txt", testWindows = true)
-    assertResolves("file:/C:/file.txt#alias.txt", "file:/C:/file.txt#alias.txt", testWindows = true)
-    intercept[IllegalArgumentException] { Utils.resolveURI("file:foo") }
-    intercept[IllegalArgumentException] { Utils.resolveURI("file:foo:baby") }
+    assertResolves("spark.jar#app.jar", s"file:$cwd/spark.jar%23app.jar")
+    assertResolves("path to/file.txt", s"file:$cwd/path%20to/file.txt")
+    if (Utils.isWindows) {
+      assertResolves("C:\\path\\to\\file.txt", "file:/C:/path/to/file.txt")
+      assertResolves("C:\\path to\\file.txt", "file:/C:/path%20to/file.txt")
+    }
+    assertResolves("file:/C:/path/to/file.txt", "file:/C:/path/to/file.txt")
+    assertResolves("file:///C:/path/to/file.txt", "file:/C:/path/to/file.txt")
+    assertResolves("file:/C:/file.txt#alias.txt", "file:/C:/file.txt#alias.txt")
+    assertResolves("file:foo", s"file:foo")
+    assertResolves("file:foo:baby", s"file:foo:baby")
   }
 
   test("resolveURIs with multiple paths") {
-    def assertResolves(before: String, after: String, testWindows: Boolean = false): Unit = {
+    def assertResolves(before: String, after: String): Unit = {
       assume(before.split(",").length > 1)
-      assert(Utils.resolveURIs(before, testWindows) === after)
-      assert(Utils.resolveURIs(after, testWindows) === after)
+      assert(Utils.resolveURIs(before) === after)
+      assert(Utils.resolveURIs(after) === after)
       // Repeated invocations of resolveURIs should yield the same result
-      def resolve(uri: String): String = Utils.resolveURIs(uri, testWindows)
+      def resolve(uri: String): String = Utils.resolveURIs(uri)
       assert(resolve(after) === after)
       assert(resolve(resolve(after)) === after)
       assert(resolve(resolve(resolve(after))) === after)
     }
-    val cwd = System.getProperty("user.dir")
+    val rawCwd = System.getProperty("user.dir")
+    val cwd = if (Utils.isWindows) s"/$rawCwd".replace("\\", "/") else rawCwd
     assertResolves("jar1,jar2", s"file:$cwd/jar1,file:$cwd/jar2")
     assertResolves("file:/jar1,file:/jar2", "file:/jar1,file:/jar2")
     assertResolves("hdfs:/jar1,file:/jar2,jar3", s"hdfs:/jar1,file:/jar2,file:$cwd/jar3")
-    assertResolves("hdfs:/jar1,file:/jar2,jar3,jar4#jar5",
-      s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:$cwd/jar4#jar5")
-    assertResolves("hdfs:/jar1,file:/jar2,jar3,C:\\pi.py#py.pi",
-      s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:/C:/pi.py#py.pi", testWindows = true)
+    assertResolves("hdfs:/jar1,file:/jar2,jar3,jar4#jar5,path to/jar6",
+      s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:$cwd/jar4%23jar5,file:$cwd/path%20to/jar6")
+    if (Utils.isWindows) {
+      assertResolves("""hdfs:/jar1,file:/jar2,jar3,C:\pi.py#py.pi,C:\path to\jar4""",
+        s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:/C:/pi.py%23py.pi,file:/C:/path%20to/jar4")
+    }
   }
 
   test("nonLocalPaths") {
@@ -426,6 +433,8 @@ class UtilsSuite extends FunSuite with ResetSystemProperties with Logging {
     assert(Utils.nonLocalPaths("local:/spark.jar,file:/smart.jar,family.py") === Array.empty)
     assert(Utils.nonLocalPaths("hdfs:/spark.jar,s3:/smart.jar") ===
       Array("hdfs:/spark.jar", "s3:/smart.jar"))
+    assert(Utils.nonLocalPaths("hdfs:/spark.jar,path to/a.jar,s3:/smart.jar") ===
+      Array("hdfs:/spark.jar", "s3:/smart.jar"))
     assert(Utils.nonLocalPaths("hdfs:/spark.jar,s3:/smart.jar,local.py,file:/hello/pi.py") ===
       Array("hdfs:/spark.jar", "s3:/smart.jar"))
     assert(Utils.nonLocalPaths("local.py,hdfs:/spark.jar,file:/hello/pi.py,s3:/smart.jar") ===
@@ -547,7 +556,12 @@ class UtilsSuite extends FunSuite with ResetSystemProperties with Logging {
     val targetDir = new File(tempDir, "target-dir")
     Files.write("some text", sourceFile, UTF_8)
 
-    val path = new Path("file://" + sourceDir.getAbsolutePath)
+    val path =
+      if (Utils.isWindows) {
+        new Path("file:/" + sourceDir.getAbsolutePath.replace("\\", "/"))
+      } else {
+        new Path("file://" + sourceDir.getAbsolutePath)
+      }
     val conf = new Configuration()
     val fs = Utils.getHadoopFileSystem(path.toString, conf)
 
@@ -567,7 +581,12 @@ class UtilsSuite extends FunSuite with ResetSystemProperties with Logging {
     val destInnerFile = new File(destInnerDir, sourceFile.getName)
     assert(destInnerFile.isFile())
 
-    val filePath = new Path("file://" + sourceFile.getAbsolutePath)
+    val filePath =
+      if (Utils.isWindows) {
+        new Path("file:/" + sourceFile.getAbsolutePath.replace("\\", "/"))
+      } else {
+        new Path("file://" + sourceFile.getAbsolutePath)
+      }
     val testFileDir = new File(tempDir, "test-filename")
     val testFileName = "testFName"
     val testFilefs = Utils.getHadoopFileSystem(filePath.toString, conf)

http://git-wip-us.apache.org/repos/asf/spark/blob/50c72708/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala
----------------------------------------------------------------------
diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala
index 488f3a9..2b23552 100644
--- a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala
+++ b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala
@@ -206,7 +206,8 @@ class SparkILoop(
         // e.g. file:/C:/my/path.jar -> C:/my/path.jar
         SparkILoop.getAddedJars.map { jar => new URI(jar).getPath.stripPrefix("/") }
       } else {
-        SparkILoop.getAddedJars
+        // We need new URI(jar).getPath here for the case that `jar` includes encoded white space (%20).
+        SparkILoop.getAddedJars.map { jar => new URI(jar).getPath }
       }
     // work around for Scala bug
     val totalClassPath = addedJars.foldLeft(
@@ -1109,7 +1110,7 @@ object SparkILoop extends Logging {
         if (settings.classpath.isDefault)
           settings.classpath.value = sys.props("java.class.path")
 
-        getAddedJars.foreach(settings.classpath.append(_))
+        getAddedJars.map(jar => new URI(jar).getPath).foreach(settings.classpath.append(_))
 
         repl process settings
       }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org