You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by sr...@apache.org on 2016/10/14 11:52:13 UTC

spark git commit: [SPARK-17855][CORE] Remove query string from jar url

Repository: spark
Updated Branches:
  refs/heads/master c8b612dec -> 28b645b1e


[SPARK-17855][CORE] Remove query string from jar url

## What changes were proposed in this pull request?

Spark-submit support jar url with http protocol. However, if the url contains any query strings, `worker.DriverRunner.downloadUserJar()` method will throw "Did not see expected jar" exception. This is because this method checks the existance of a downloaded jar whose name contains query strings. This is a problem when your jar is located on some web service which requires some additional information to retrieve the file.

This pr just removes query strings before checking jar existance on worker.

## How was this patch tested?

For now, you can only test this patch by manual test.
* Deploy a spark cluster locally
* Make sure apache httpd service is on
* Save an uber jar, e.g spark-job.jar under `/var/www/html/`
* Use http://localhost/spark-job.jar?param=1 as jar url when running `spark-submit`
* Job should be launched

Author: invkrh <in...@gmail.com>

Closes #15420 from invkrh/spark-17855.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/28b645b1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/28b645b1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/28b645b1

Branch: refs/heads/master
Commit: 28b645b1e643ae0f6c56cbe5a92356623306717f
Parents: c8b612d
Author: invkrh <in...@gmail.com>
Authored: Fri Oct 14 12:52:08 2016 +0100
Committer: Sean Owen <so...@cloudera.com>
Committed: Fri Oct 14 12:52:08 2016 +0100

----------------------------------------------------------------------
 .../spark/deploy/worker/DriverRunner.scala      | 24 ++++++++------------
 1 file changed, 9 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/28b645b1/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
index 289b0b9..e878c10 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
@@ -18,12 +18,12 @@
 package org.apache.spark.deploy.worker
 
 import java.io._
+import java.net.URI
 import java.nio.charset.StandardCharsets
 
 import scala.collection.JavaConverters._
 
 import com.google.common.io.Files
-import org.apache.hadoop.fs.Path
 
 import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.deploy.{DriverDescription, SparkHadoopUtil}
@@ -147,30 +147,24 @@ private[deploy] class DriverRunner(
    * Will throw an exception if there are errors downloading the jar.
    */
   private def downloadUserJar(driverDir: File): String = {
-    val jarPath = new Path(driverDesc.jarUrl)
-    val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)
-    val destPath = new File(driverDir.getAbsolutePath, jarPath.getName)
-    val jarFileName = jarPath.getName
+    val jarFileName = new URI(driverDesc.jarUrl).getPath.split("/").last
     val localJarFile = new File(driverDir, jarFileName)
-    val localJarFilename = localJarFile.getAbsolutePath
-
     if (!localJarFile.exists()) { // May already exist if running multiple workers on one node
-      logInfo(s"Copying user jar $jarPath to $destPath")
+      logInfo(s"Copying user jar ${driverDesc.jarUrl} to $localJarFile")
       Utils.fetchFile(
         driverDesc.jarUrl,
         driverDir,
         conf,
         securityManager,
-        hadoopConf,
+        SparkHadoopUtil.get.newConfiguration(conf),
         System.currentTimeMillis(),
         useCache = false)
+      if (!localJarFile.exists()) { // Verify copy succeeded
+        throw new IOException(
+          s"Can not find expected jar $jarFileName which should have been loaded in $driverDir")
+      }
     }
-
-    if (!localJarFile.exists()) { // Verify copy succeeded
-      throw new Exception(s"Did not see expected jar $jarFileName in $driverDir")
-    }
-
-    localJarFilename
+    localJarFile.getAbsolutePath
   }
 
   private[worker] def prepareAndRunDriver(): Int = {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org