You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by va...@apache.org on 2017/02/24 17:29:06 UTC

spark git commit: [SPARK-19707][CORE] Improve the invalid path check for sc.addJar

Repository: spark
Updated Branches:
  refs/heads/master 4a5e38f57 -> b0a8c16fe


[SPARK-19707][CORE] Improve the invalid path check for sc.addJar

## What changes were proposed in this pull request?

Currently in Spark there're two issues when we add jars with invalid path:

* If the jar path is a empty string {--jar ",dummy.jar"}, then Spark will resolve it to the current directory path and add to classpath / file server, which is unwanted. This is happened in our programatic way to submit Spark application. From my understanding Spark should defensively filter out such empty path.
* If the jar path is a invalid path (file doesn't exist), `addJar` doesn't check it and will still add to file server, the exception will be delayed until job running. Actually this local path could be checked beforehand, no need to wait until task running. We have similar check in `addFile`, but lacks similar similar mechanism in `addJar`.

## How was this patch tested?

Add unit test and local manual verification.

Author: jerryshao <ss...@hortonworks.com>

Closes #17038 from jerryshao/SPARK-19707.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b0a8c16f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b0a8c16f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b0a8c16f

Branch: refs/heads/master
Commit: b0a8c16fecd4337f77bfbe4b45884254d7af52bd
Parents: 4a5e38f
Author: jerryshao <ss...@hortonworks.com>
Authored: Fri Feb 24 09:28:59 2017 -0800
Committer: Marcelo Vanzin <va...@cloudera.com>
Committed: Fri Feb 24 09:28:59 2017 -0800

----------------------------------------------------------------------
 .../main/scala/org/apache/spark/SparkContext.scala  | 12 ++++++++++--
 .../main/scala/org/apache/spark/util/Utils.scala    |  2 +-
 .../scala/org/apache/spark/SparkContextSuite.scala  | 16 ++++++++++++++++
 .../scala/org/apache/spark/util/UtilsSuite.scala    |  1 +
 4 files changed, 28 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/b0a8c16f/core/src/main/scala/org/apache/spark/SparkContext.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 17194b9..0e36a30 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -1815,10 +1815,18 @@ class SparkContext(config: SparkConf) extends Logging {
           // A JAR file which exists only on the driver node
           case null | "file" =>
             try {
+              val file = new File(uri.getPath)
+              if (!file.exists()) {
+                throw new FileNotFoundException(s"Jar ${file.getAbsolutePath} not found")
+              }
+              if (file.isDirectory) {
+                throw new IllegalArgumentException(
+                  s"Directory ${file.getAbsoluteFile} is not allowed for addJar")
+              }
               env.rpcEnv.fileServer.addJar(new File(uri.getPath))
             } catch {
-              case exc: FileNotFoundException =>
-                logError(s"Jar not found at $path")
+              case NonFatal(e) =>
+                logError(s"Failed to add $path to Spark environment", e)
                 null
             }
           // A JAR file which exists locally on every worker node

http://git-wip-us.apache.org/repos/asf/spark/blob/b0a8c16f/core/src/main/scala/org/apache/spark/util/Utils.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 5538289..480240a 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -1989,7 +1989,7 @@ private[spark] object Utils extends Logging {
     if (paths == null || paths.trim.isEmpty) {
       ""
     } else {
-      paths.split(",").map { p => Utils.resolveURI(p) }.mkString(",")
+      paths.split(",").filter(_.trim.nonEmpty).map { p => Utils.resolveURI(p) }.mkString(",")
     }
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/b0a8c16f/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
index 5a41e1c..f97a112 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
@@ -292,6 +292,22 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
     }
   }
 
+  test("add jar with invalid path") {
+    val tmpDir = Utils.createTempDir()
+    val tmpJar = File.createTempFile("test", ".jar", tmpDir)
+
+    sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
+    sc.addJar(tmpJar.getAbsolutePath)
+
+    // Invaid jar path will only print the error log, will not add to file server.
+    sc.addJar("dummy.jar")
+    sc.addJar("")
+    sc.addJar(tmpDir.getAbsolutePath)
+
+    sc.listJars().size should be (1)
+    sc.listJars().head should include (tmpJar.getName)
+  }
+
   test("Cancelling job group should not cause SparkContext to shutdown (SPARK-6414)") {
     try {
       sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))

http://git-wip-us.apache.org/repos/asf/spark/blob/b0a8c16f/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index 43f77e6..c9cf651 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -507,6 +507,7 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
       assertResolves("""hdfs:/jar1,file:/jar2,jar3,C:\pi.py#py.pi,C:\path to\jar4""",
         s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:/C:/pi.py%23py.pi,file:/C:/path%20to/jar4")
     }
+    assertResolves(",jar1,jar2", s"file:$cwd/jar1,file:$cwd/jar2")
   }
 
   test("nonLocalPaths") {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org