You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2017/07/13 00:22:45 UTC
spark git commit: [SPARK-18646][REPL] Set parent classloader as null for ExecutorClassLoader

Repository: spark
Updated Branches:
  refs/heads/master 780586a9f -> e08d06b37


[SPARK-18646][REPL] Set parent classloader as null for ExecutorClassLoader

## What changes were proposed in this pull request?

`ClassLoader` will preferentially load class from `parent`. Only when `parent` is null or the load failed, that it will call the overridden `findClass` function. To avoid the potential issue caused by loading class using inappropriate class loader, we should set the `parent` of `ClassLoader` to null, so that we can fully control which class loader is used.

This is take over of #17074,  the primary author of this PR is taroplus .

Should close #17074 after this PR get merged.

## How was this patch tested?

Add test case in `ExecutorClassLoaderSuite`.

Author: Kohki Nishio <ta...@me.com>
Author: Xingbo Jiang <xi...@databricks.com>

Closes #18614 from jiangxb1987/executor_classloader.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e08d06b3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e08d06b3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e08d06b3

Branch: refs/heads/master
Commit: e08d06b37bc96cc48fec1c5e40f73e0bca09c616
Parents: 780586a
Author: Kohki Nishio <ta...@me.com>
Authored: Thu Jul 13 08:22:40 2017 +0800
Committer: Wenchen Fan <we...@databricks.com>
Committed: Thu Jul 13 08:22:40 2017 +0800

----------------------------------------------------------------------
 .../apache/spark/repl/ExecutorClassLoader.scala | 17 +++++---
 .../spark/repl/ExecutorClassLoaderSuite.scala   | 46 ++++++++++++++++++++
 2 files changed, 57 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/e08d06b3/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
----------------------------------------------------------------------
diff --git a/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala b/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
index df13b32..127f673 100644
--- a/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
@@ -33,18 +33,23 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.util.{ParentClassLoader, Utils}
 
 /**
- * A ClassLoader that reads classes from a Hadoop FileSystem or HTTP URI,
- * used to load classes defined by the interpreter when the REPL is used.
- * Allows the user to specify if user class path should be first.
- * This class loader delegates getting/finding resources to parent loader,
- * which makes sense until REPL never provide resource dynamically.
+ * A ClassLoader that reads classes from a Hadoop FileSystem or HTTP URI, used to load classes
+ * defined by the interpreter when the REPL is used. Allows the user to specify if user class path
+ * should be first. This class loader delegates getting/finding resources to parent loader, which
+ * makes sense until REPL never provide resource dynamically.
+ *
+ * Note: [[ClassLoader]] will preferentially load class from parent. Only when parent is null or
+ * the load failed, that it will call the overridden `findClass` function. To avoid the potential
+ * issue caused by loading class using inappropriate class loader, we should set the parent of
+ * ClassLoader to null, so that we can fully control which class loader is used. For detailed
+ * discussion, see SPARK-18646.
  */
 class ExecutorClassLoader(
     conf: SparkConf,
     env: SparkEnv,
     classUri: String,
     parent: ClassLoader,
-    userClassPathFirst: Boolean) extends ClassLoader with Logging {
+    userClassPathFirst: Boolean) extends ClassLoader(null) with Logging {
   val uri = new URI(classUri)
   val directory = uri.getPath
 

http://git-wip-us.apache.org/repos/asf/spark/blob/e08d06b3/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
----------------------------------------------------------------------
diff --git a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
index 6d274bd..092d3c2 100644
--- a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
@@ -23,6 +23,8 @@ import java.nio.channels.{FileChannel, ReadableByteChannel}
 import java.nio.charset.StandardCharsets
 import java.nio.file.{Paths, StandardOpenOption}
 import java.util
+import java.util.Collections
+import javax.tools.{JavaFileObject, SimpleJavaFileObject, ToolProvider}
 
 import scala.io.Source
 import scala.language.implicitConversions
@@ -77,6 +79,50 @@ class ExecutorClassLoaderSuite
     }
   }
 
+  test("child over system classloader") {
+    // JavaFileObject for scala.Option class
+    val scalaOptionFile = new SimpleJavaFileObject(
+      URI.create(s"string:///scala/Option.java"),
+      JavaFileObject.Kind.SOURCE) {
+
+      override def getCharContent(ignoreEncodingErrors: Boolean): CharSequence = {
+        "package scala; class Option {}"
+      }
+    }
+    // compile fake scala.Option class
+    ToolProvider
+      .getSystemJavaCompiler
+      .getTask(null, null, null, null, null, Collections.singletonList(scalaOptionFile)).call()
+
+    // create 'scala' dir in tempDir1
+    val scalaDir = new File(tempDir1, "scala")
+    assert(scalaDir.mkdir(), s"Failed to create 'scala' directory in $tempDir1")
+
+    // move the generated class into scala dir
+    val filename = "Option.class"
+    val result = new File(filename)
+    assert(result.exists(), "Compiled file not found: " + result.getAbsolutePath)
+
+    val out = new File(scalaDir, filename)
+    Files.move(result, out)
+    assert(out.exists(), "Destination file not moved: " + out.getAbsolutePath)
+
+    // construct class loader tree
+    val parentLoader = new URLClassLoader(urls2, null)
+    val classLoader = new ExecutorClassLoader(
+      new SparkConf(), null, url1, parentLoader, true)
+
+    // load 'scala.Option', using ClassforName to do the exact same behavior as
+    // what JavaDeserializationStream does
+
+    // scalastyle:off classforname
+    val optionClass = Class.forName("scala.Option", false, classLoader)
+    // scalastyle:on classforname
+
+    assert(optionClass.getClassLoader == classLoader,
+      "scala.Option didn't come from ExecutorClassLoader")
+  }
+
   test("child first") {
     val parentLoader = new URLClassLoader(urls2, null)
     val classLoader = new ExecutorClassLoader(new SparkConf(), null, url1, parentLoader, true)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org