You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2017/07/13 00:22:45 UTC
spark git commit: [SPARK-18646][REPL] Set parent classloader as null
for ExecutorClassLoader
Repository: spark
Updated Branches:
refs/heads/master 780586a9f -> e08d06b37
[SPARK-18646][REPL] Set parent classloader as null for ExecutorClassLoader
## What changes were proposed in this pull request?
`ClassLoader` will preferentially load class from `parent`. Only when `parent` is null or the load failed, that it will call the overridden `findClass` function. To avoid the potential issue caused by loading class using inappropriate class loader, we should set the `parent` of `ClassLoader` to null, so that we can fully control which class loader is used.
This is take over of #17074, the primary author of this PR is taroplus .
Should close #17074 after this PR get merged.
## How was this patch tested?
Add test case in `ExecutorClassLoaderSuite`.
Author: Kohki Nishio <ta...@me.com>
Author: Xingbo Jiang <xi...@databricks.com>
Closes #18614 from jiangxb1987/executor_classloader.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e08d06b3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e08d06b3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e08d06b3
Branch: refs/heads/master
Commit: e08d06b37bc96cc48fec1c5e40f73e0bca09c616
Parents: 780586a
Author: Kohki Nishio <ta...@me.com>
Authored: Thu Jul 13 08:22:40 2017 +0800
Committer: Wenchen Fan <we...@databricks.com>
Committed: Thu Jul 13 08:22:40 2017 +0800
----------------------------------------------------------------------
.../apache/spark/repl/ExecutorClassLoader.scala | 17 +++++---
.../spark/repl/ExecutorClassLoaderSuite.scala | 46 ++++++++++++++++++++
2 files changed, 57 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/e08d06b3/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
----------------------------------------------------------------------
diff --git a/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala b/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
index df13b32..127f673 100644
--- a/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
@@ -33,18 +33,23 @@ import org.apache.spark.internal.Logging
import org.apache.spark.util.{ParentClassLoader, Utils}
/**
- * A ClassLoader that reads classes from a Hadoop FileSystem or HTTP URI,
- * used to load classes defined by the interpreter when the REPL is used.
- * Allows the user to specify if user class path should be first.
- * This class loader delegates getting/finding resources to parent loader,
- * which makes sense until REPL never provide resource dynamically.
+ * A ClassLoader that reads classes from a Hadoop FileSystem or HTTP URI, used to load classes
+ * defined by the interpreter when the REPL is used. Allows the user to specify if user class path
+ * should be first. This class loader delegates getting/finding resources to parent loader, which
+ * makes sense until REPL never provide resource dynamically.
+ *
+ * Note: [[ClassLoader]] will preferentially load class from parent. Only when parent is null or
+ * the load failed, that it will call the overridden `findClass` function. To avoid the potential
+ * issue caused by loading class using inappropriate class loader, we should set the parent of
+ * ClassLoader to null, so that we can fully control which class loader is used. For detailed
+ * discussion, see SPARK-18646.
*/
class ExecutorClassLoader(
conf: SparkConf,
env: SparkEnv,
classUri: String,
parent: ClassLoader,
- userClassPathFirst: Boolean) extends ClassLoader with Logging {
+ userClassPathFirst: Boolean) extends ClassLoader(null) with Logging {
val uri = new URI(classUri)
val directory = uri.getPath
http://git-wip-us.apache.org/repos/asf/spark/blob/e08d06b3/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
----------------------------------------------------------------------
diff --git a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
index 6d274bd..092d3c2 100644
--- a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
@@ -23,6 +23,8 @@ import java.nio.channels.{FileChannel, ReadableByteChannel}
import java.nio.charset.StandardCharsets
import java.nio.file.{Paths, StandardOpenOption}
import java.util
+import java.util.Collections
+import javax.tools.{JavaFileObject, SimpleJavaFileObject, ToolProvider}
import scala.io.Source
import scala.language.implicitConversions
@@ -77,6 +79,50 @@ class ExecutorClassLoaderSuite
}
}
+ test("child over system classloader") {
+ // JavaFileObject for scala.Option class
+ val scalaOptionFile = new SimpleJavaFileObject(
+ URI.create(s"string:///scala/Option.java"),
+ JavaFileObject.Kind.SOURCE) {
+
+ override def getCharContent(ignoreEncodingErrors: Boolean): CharSequence = {
+ "package scala; class Option {}"
+ }
+ }
+ // compile fake scala.Option class
+ ToolProvider
+ .getSystemJavaCompiler
+ .getTask(null, null, null, null, null, Collections.singletonList(scalaOptionFile)).call()
+
+ // create 'scala' dir in tempDir1
+ val scalaDir = new File(tempDir1, "scala")
+ assert(scalaDir.mkdir(), s"Failed to create 'scala' directory in $tempDir1")
+
+ // move the generated class into scala dir
+ val filename = "Option.class"
+ val result = new File(filename)
+ assert(result.exists(), "Compiled file not found: " + result.getAbsolutePath)
+
+ val out = new File(scalaDir, filename)
+ Files.move(result, out)
+ assert(out.exists(), "Destination file not moved: " + out.getAbsolutePath)
+
+ // construct class loader tree
+ val parentLoader = new URLClassLoader(urls2, null)
+ val classLoader = new ExecutorClassLoader(
+ new SparkConf(), null, url1, parentLoader, true)
+
+ // load 'scala.Option', using ClassforName to do the exact same behavior as
+ // what JavaDeserializationStream does
+
+ // scalastyle:off classforname
+ val optionClass = Class.forName("scala.Option", false, classLoader)
+ // scalastyle:on classforname
+
+ assert(optionClass.getClassLoader == classLoader,
+ "scala.Option didn't come from ExecutorClassLoader")
+ }
+
test("child first") {
val parentLoader = new URLClassLoader(urls2, null)
val classLoader = new ExecutorClassLoader(new SparkConf(), null, url1, parentLoader, true)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org