You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@samza.apache.org by ca...@apache.org on 2020/02/27 22:12:09 UTC
[samza] branch master updated: SAMZA-2472: Use
runtime-framework-resources-pathing.jar to specify part of the runtime
classpath and leverage it in IsolatingClassLoaderFactory (#1293)
This is an automated email from the ASF dual-hosted git repository.
cameronlee pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/samza.git
The following commit(s) were added to refs/heads/master by this push:
new 0aa1e7c SAMZA-2472: Use runtime-framework-resources-pathing.jar to specify part of the runtime classpath and leverage it in IsolatingClassLoaderFactory (#1293)
0aa1e7c is described below
commit 0aa1e7c1cf6c840e292d245e9bf870c28a17443e
Author: Cameron Lee <ca...@linkedin.com>
AuthorDate: Thu Feb 27 14:12:00 2020 -0800
SAMZA-2472: Use runtime-framework-resources-pathing.jar to specify part of the runtime classpath and leverage it in IsolatingClassLoaderFactory (#1293)
API changes: None
Upgrade/usage instructions: None
---
.../classloader/DependencyIsolationUtils.java | 2 +
.../classloader/IsolatingClassLoaderFactory.java | 51 ++++++++++++++++------
samza-shell/src/main/bash/run-class.sh | 32 +++++++++-----
3 files changed, 62 insertions(+), 23 deletions(-)
diff --git a/samza-core/src/main/java/org/apache/samza/classloader/DependencyIsolationUtils.java b/samza-core/src/main/java/org/apache/samza/classloader/DependencyIsolationUtils.java
index 8f933ea..a0b5d1e 100644
--- a/samza-core/src/main/java/org/apache/samza/classloader/DependencyIsolationUtils.java
+++ b/samza-core/src/main/java/org/apache/samza/classloader/DependencyIsolationUtils.java
@@ -45,4 +45,6 @@ public class DependencyIsolationUtils {
* classloader.
*/
public static final String FRAMEWORK_API_CLASS_LIST_FILE_NAME = "samza-framework-api-classes.txt";
+
+ public static final String RUNTIME_FRAMEWORK_RESOURCES_PATHING_JAR_NAME = "runtime-framework-resources-pathing.jar";
}
diff --git a/samza-core/src/main/java/org/apache/samza/classloader/IsolatingClassLoaderFactory.java b/samza-core/src/main/java/org/apache/samza/classloader/IsolatingClassLoaderFactory.java
index 47d1ea0..19e776e 100644
--- a/samza-core/src/main/java/org/apache/samza/classloader/IsolatingClassLoaderFactory.java
+++ b/samza-core/src/main/java/org/apache/samza/classloader/IsolatingClassLoaderFactory.java
@@ -35,6 +35,8 @@ import java.net.URLClassLoader;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@@ -121,14 +123,14 @@ public class IsolatingClassLoaderFactory {
*/
public ClassLoader buildClassLoader() {
// start at the user.dir to find the resources for the classpaths
- String baseDirectoryPath = System.getProperty("user.dir");
- File apiLibDirectory = libDirectory(new File(baseDirectoryPath, DependencyIsolationUtils.FRAMEWORK_API_DIRECTORY));
+ File baseJobDirectory = new File(System.getProperty("user.dir"));
+ File apiLibDirectory = libDirectory(new File(baseJobDirectory, DependencyIsolationUtils.FRAMEWORK_API_DIRECTORY));
LOG.info("Using API lib directory: {}", apiLibDirectory);
File infrastructureLibDirectory =
- libDirectory(new File(baseDirectoryPath, DependencyIsolationUtils.FRAMEWORK_INFRASTRUCTURE_DIRECTORY));
+ libDirectory(new File(baseJobDirectory, DependencyIsolationUtils.FRAMEWORK_INFRASTRUCTURE_DIRECTORY));
LOG.info("Using infrastructure lib directory: {}", infrastructureLibDirectory);
File applicationLibDirectory =
- libDirectory(new File(baseDirectoryPath, DependencyIsolationUtils.APPLICATION_DIRECTORY));
+ libDirectory(new File(baseJobDirectory, DependencyIsolationUtils.APPLICATION_DIRECTORY));
LOG.info("Using application lib directory: {}", applicationLibDirectory);
ClassLoader apiClassLoader = buildApiClassLoader(apiLibDirectory);
@@ -136,7 +138,7 @@ public class IsolatingClassLoaderFactory {
buildApplicationClassLoader(applicationLibDirectory, apiLibDirectory, apiClassLoader);
// the classloader to return is the one with the infrastructure classpath
- return buildInfrastructureClassLoader(infrastructureLibDirectory, apiLibDirectory, apiClassLoader,
+ return buildInfrastructureClassLoader(infrastructureLibDirectory, baseJobDirectory, apiLibDirectory, apiClassLoader,
applicationClassLoader);
}
@@ -173,19 +175,38 @@ public class IsolatingClassLoaderFactory {
}
/**
- * Build the {@link ClassLoader} which can load Samza framework core classes.
+ * Build the {@link ClassLoader} which can load Samza framework core classes. If a file with the name
+ * {@link DependencyIsolationUtils#RUNTIME_FRAMEWORK_RESOURCES_PATHING_JAR_NAME} is found in {@code baseJobDirectory},
+ * then it will be included in the classpath.
* This may also fall back to loading application classes.
*
* This sets up two links: One link between the infrastructure classloader and the API and another link between the
* infrastructure classloader and the application classloader (see {@link #buildClassLoader()}.
*/
- private static ClassLoader buildInfrastructureClassLoader(File infrastructureLibDirectory, File apiLibDirectory,
- ClassLoader apiClassLoader, ClassLoader applicationClassLoader) {
- return LoaderBuilder.anIsolatingLoader()
- // look in infrastructure lib directory for JARs
- .withClasspath(getClasspathAsURIs(infrastructureLibDirectory))
+ private static ClassLoader buildInfrastructureClassLoader(File infrastructureLibDirectory,
+ File baseJobDirectory,
+ File apiLibDirectory,
+ ClassLoader apiClassLoader,
+ ClassLoader applicationClassLoader) {
+ // start with JARs in infrastructure lib directory
+ List<URI> classpathURIs = new ArrayList<>(getClasspathAsURIs(infrastructureLibDirectory));
+ OriginRestriction originRestriction = OriginRestriction.denyByDefault()
// getClasspathAsURIs should only return JARs within infrastructureLibDirectory anyways, but doing it to be safe
- .withOriginRestriction(OriginRestriction.denyByDefault().allowingDirectory(infrastructureLibDirectory, false))
+ .allowingDirectory(infrastructureLibDirectory, false);
+ File runtimeFrameworkResourcesPathingJar =
+ new File(baseJobDirectory, DependencyIsolationUtils.RUNTIME_FRAMEWORK_RESOURCES_PATHING_JAR_NAME);
+ if (canAccess(runtimeFrameworkResourcesPathingJar)) {
+ // if there is a runtime framework resources pathing JAR, then include that in the classpath as well
+ classpathURIs.add(runtimeFrameworkResourcesPathingJar.toURI());
+ originRestriction.allowingGlobPattern(fileURL(runtimeFrameworkResourcesPathingJar).toExternalForm());
+ LOG.info("Added {} to infrastructure classpath", runtimeFrameworkResourcesPathingJar.getPath());
+ } else {
+ LOG.info("Unable to access {}, so not adding to infrastructure classpath",
+ runtimeFrameworkResourcesPathingJar.getPath());
+ }
+ return LoaderBuilder.anIsolatingLoader()
+ .withClasspath(Collections.unmodifiableList(classpathURIs))
+ .withOriginRestriction(originRestriction)
.withParentRelationship(buildApiParentRelationship(apiLibDirectory, apiClassLoader))
/*
* Fall back to the application classloader for certain classes. For example, the application might implement
@@ -284,11 +305,15 @@ public class IsolatingClassLoaderFactory {
.collect(Collectors.toList());
}
+ private static boolean canAccess(File file) {
+ return file.exists() && file.canRead();
+ }
+
/**
* Makes sure that a file exists and can be read.
*/
private static void validateCanAccess(File file) {
- if (!file.exists() || !file.canRead()) {
+ if (!canAccess(file)) {
throw new SamzaException("Unable to access file: " + file);
}
}
diff --git a/samza-shell/src/main/bash/run-class.sh b/samza-shell/src/main/bash/run-class.sh
index 8cdac30..9b5ac30 100755
--- a/samza-shell/src/main/bash/run-class.sh
+++ b/samza-shell/src/main/bash/run-class.sh
@@ -53,13 +53,13 @@ export APPLICATION_LIB_DIR=$APPLICATION_LIB_DIR
echo APPLICATION_LIB_DIR=$APPLICATION_LIB_DIR
echo BASE_LIB_DIR=$BASE_LIB_DIR
-CLASSPATH=""
+BASE_LIB_CLASSPATH=""
# all the jars need to be appended on newlines to ensure line argument length of 72 bytes is not violated
for file in $BASE_LIB_DIR/*.[jw]ar;
do
- CLASSPATH=$CLASSPATH" $file \n"
+ BASE_LIB_CLASSPATH=$BASE_LIB_CLASSPATH" $file \n"
done
-echo generated from BASE_LIB_DIR CLASSPATH=$CLASSPATH
+echo generated from BASE_LIB_DIR BASE_LIB_CLASSPATH=$BASE_LIB_CLASSPATH
# In some cases (AWS) $JAVA_HOME/bin doesn't contain jar.
if [ -z "$JAVA_HOME" ] || [ ! -e "$JAVA_HOME/bin/jar" ]; then
@@ -68,10 +68,23 @@ else
JAR="$JAVA_HOME/bin/jar"
fi
+# Create a pathing JAR for the JARs in the BASE_LIB_DIR
# Newlines and spaces are intended to ensure proper parsing of manifest in pathing jar
-printf "Class-Path: \n $CLASSPATH \n" > manifest.txt
-# Creates a new archive and adds custom manifest information to pathing.jar
-eval "$JAR -cvmf manifest.txt pathing.jar"
+printf "Class-Path: \n $BASE_LIB_CLASSPATH \n" > base-lib-manifest.txt
+# Creates a new archive and adds custom manifest information to base-lib-pathing.jar
+eval "$JAR -cvmf base-lib-manifest.txt base-lib-pathing.jar"
+
+# Create a pathing JAR for the runtime framework resources. It is useful to separate this from the base-lib-pathing.jar
+# because the split deployment framework may only need the resources from this runtime pathing JAR.
+if ! [[ $HADOOP_CONF_DIR =~ .*/$ ]]; then
+ # manifest requires a directory to have a trailing slash
+ HADOOP_CONF_DIR="$HADOOP_CONF_DIR/"
+fi
+# HADOOP_CONF_DIR should be supplied to classpath explicitly for Yarn to parse configs
+RUNTIME_FRAMEWORK_RESOURCES_CLASSPATH="$HADOOP_CONF_DIR \n"
+# TODO add JARs from ADDITIONAL_CLASSPATH_DIR to runtime-framework-resources-pathing.jar as well
+printf "Class-Path: \n $RUNTIME_FRAMEWORK_RESOURCES_CLASSPATH \n" > runtime-framework-resources-manifest.txt
+eval "$JAR -cvmf runtime-framework-resources-manifest.txt runtime-framework-resources-pathing.jar"
if [ -z "$JAVA_HOME" ]; then
JAVA="java"
@@ -150,12 +163,11 @@ fi
# Check if 64 bit is set. If not - try and set it if it's supported
[[ $JAVA_OPTS != *-d64* ]] && check_and_enable_64_bit_mode
-# HADOOP_CONF_DIR should be supplied to classpath explicitly for Yarn to parse configs
-echo $JAVA $JAVA_OPTS -cp $HADOOP_CONF_DIR:pathing.jar "$@"
+echo $JAVA $JAVA_OPTS -cp base-lib-pathing.jar:runtime-framework-resources-pathing.jar "$@"
## If localized resource lib directory is defined, then include it in the classpath.
if [[ -z "${ADDITIONAL_CLASSPATH_DIR}" ]]; then
- exec $JAVA $JAVA_OPTS -cp $HADOOP_CONF_DIR:pathing.jar "$@"
+ exec $JAVA $JAVA_OPTS -cp base-lib-pathing.jar:runtime-framework-resources-pathing.jar "$@"
else
- exec $JAVA $JAVA_OPTS -cp $HADOOP_CONF_DIR:pathing.jar:$ADDITIONAL_CLASSPATH_DIR "$@"
+ exec $JAVA $JAVA_OPTS -cp base-lib-pathing.jar:runtime-framework-resources-pathing.jar:$ADDITIONAL_CLASSPATH_DIR "$@"
fi
\ No newline at end of file