You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by mb...@apache.org on 2022/04/02 17:04:47 UTC

[systemds] branch main updated: [SYSTEMDS-3342] Fix classloader handling in spark driver for codegen

This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new cdbbbf2  [SYSTEMDS-3342] Fix classloader handling in spark driver for codegen
cdbbbf2 is described below

commit cdbbbf2a60ddb5123acb1b0f09c750a5acebe861
Author: Matthias Boehm <mb...@gmail.com>
AuthorDate: Sat Apr 2 18:56:52 2022 +0200

    [SYSTEMDS-3342] Fix classloader handling in spark driver for codegen
    
    In multi-threaded parfor programs in the Spark driver, map and codegen
    operations which both utilize Janinio for generating custom operators
    failed with issues that imported functions of SystemDS cannot be loaded.
    The reason is the specific classloader handling in Spark which creates
    MutableURLClassLoaders for executor tasks and the driver main thread,
    but new threads don't have access, but Janino uses by default the
    Thread.currentThread().getContextClassLoader() for compilation.
    
    We now keep the the main driver class loader when setting up codegen.
---
 src/main/java/org/apache/sysds/api/DMLScript.java      |  4 ++++
 .../org/apache/sysds/runtime/codegen/CodegenUtils.java | 18 +++++++++++++++---
 .../controlprogram/parfor/RemoteParForUtils.java       |  1 +
 3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/src/main/java/org/apache/sysds/api/DMLScript.java b/src/main/java/org/apache/sysds/api/DMLScript.java
index 61ecdf6..bc29deb 100644
--- a/src/main/java/org/apache/sysds/api/DMLScript.java
+++ b/src/main/java/org/apache/sysds/api/DMLScript.java
@@ -57,6 +57,7 @@ import org.apache.sysds.parser.ParserFactory;
 import org.apache.sysds.parser.ParserWrapper;
 import org.apache.sysds.runtime.DMLRuntimeException;
 import org.apache.sysds.runtime.DMLScriptException;
+import org.apache.sysds.runtime.codegen.CodegenUtils;
 import org.apache.sysds.runtime.controlprogram.Program;
 import org.apache.sysds.runtime.controlprogram.caching.CacheableData;
 import org.apache.sysds.runtime.controlprogram.context.ExecutionContext;
@@ -651,5 +652,8 @@ public class DMLScript
 				LOG.error("Failed to load native cuda codegen library\n" + e);
 			}
 		}
+		// set the global class loader to make Spark's MutableURLClassLoader
+		// available for all parfor worker threads without pass-through
+		CodegenUtils.setClassLoader(Thread.currentThread().getContextClassLoader());
 	}
 }
diff --git a/src/main/java/org/apache/sysds/runtime/codegen/CodegenUtils.java b/src/main/java/org/apache/sysds/runtime/codegen/CodegenUtils.java
index 9a10315..90f7089 100644
--- a/src/main/java/org/apache/sysds/runtime/codegen/CodegenUtils.java
+++ b/src/main/java/org/apache/sysds/runtime/codegen/CodegenUtils.java
@@ -42,6 +42,7 @@ import javax.tools.JavaCompiler.CompilationTask;
 import javax.tools.JavaFileObject;
 import javax.tools.StandardJavaFileManager;
 import javax.tools.ToolProvider;
+
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
@@ -69,7 +70,15 @@ public class CodegenUtils
 
 	//javac-specific working directory for src/class files
 	private static String _workingDir = null;
-
+	
+	//access to spark's MutableURLClassLoader of the driver's main thread
+	//(otherwise Thread.currentThread().getContextClassLoader() as default)
+	public static ClassLoader _mainClassLoader = null;
+	
+	public static synchronized void setClassLoader(ClassLoader clsLoader) {
+		_mainClassLoader = clsLoader;
+	}
+	
 	public static Class<?> compileClass(String name, String src) {
 		//reuse existing compiled class
 		Class<?> ret = _cache.get(name);
@@ -182,10 +191,13 @@ public class CodegenUtils
 	////////////////////////////
 	//JANINO-specific methods (used for spark environments)
 
-	private static Class<?> compileClassJanino(String name, String src) {
+	private synchronized static Class<?> compileClassJanino(String name, String src) {
 		try {
-			//compile source code
+			// compile source code
+			// (in recent spark versions )
 			SimpleCompiler compiler = new SimpleCompiler();
+			if( _mainClassLoader != null )
+				compiler.setParentClassLoader(_mainClassLoader);
 			compiler.cook(src);
 
 			//keep source code for later re-construction
diff --git a/src/main/java/org/apache/sysds/runtime/controlprogram/parfor/RemoteParForUtils.java b/src/main/java/org/apache/sysds/runtime/controlprogram/parfor/RemoteParForUtils.java
index 33b8284..073d4a2 100644
--- a/src/main/java/org/apache/sysds/runtime/controlprogram/parfor/RemoteParForUtils.java
+++ b/src/main/java/org/apache/sysds/runtime/controlprogram/parfor/RemoteParForUtils.java
@@ -282,6 +282,7 @@ public class RemoteParForUtils
 	/**
 	 * Init and register-cleanup of buffer pool
 	 * @param workerID worker id
+	 * @param isLocal in local spark mode (single JVM)
 	 * @throws IOException exception
 	 */
 	public static void setupBufferPool(long workerID, boolean isLocal) throws IOException {