You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@samza.apache.org by ca...@apache.org on 2020/02/27 22:12:09 UTC

[samza] branch master updated: SAMZA-2472: Use runtime-framework-resources-pathing.jar to specify part of the runtime classpath and leverage it in IsolatingClassLoaderFactory (#1293)

This is an automated email from the ASF dual-hosted git repository.

cameronlee pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/samza.git


The following commit(s) were added to refs/heads/master by this push:
     new 0aa1e7c  SAMZA-2472: Use runtime-framework-resources-pathing.jar to specify part of the runtime classpath and leverage it in IsolatingClassLoaderFactory (#1293)
0aa1e7c is described below

commit 0aa1e7c1cf6c840e292d245e9bf870c28a17443e
Author: Cameron Lee <ca...@linkedin.com>
AuthorDate: Thu Feb 27 14:12:00 2020 -0800

    SAMZA-2472: Use runtime-framework-resources-pathing.jar to specify part of the runtime classpath and leverage it in IsolatingClassLoaderFactory (#1293)
    
    API changes: None
    Upgrade/usage instructions: None
---
 .../classloader/DependencyIsolationUtils.java      |  2 +
 .../classloader/IsolatingClassLoaderFactory.java   | 51 ++++++++++++++++------
 samza-shell/src/main/bash/run-class.sh             | 32 +++++++++-----
 3 files changed, 62 insertions(+), 23 deletions(-)

diff --git a/samza-core/src/main/java/org/apache/samza/classloader/DependencyIsolationUtils.java b/samza-core/src/main/java/org/apache/samza/classloader/DependencyIsolationUtils.java
index 8f933ea..a0b5d1e 100644
--- a/samza-core/src/main/java/org/apache/samza/classloader/DependencyIsolationUtils.java
+++ b/samza-core/src/main/java/org/apache/samza/classloader/DependencyIsolationUtils.java
@@ -45,4 +45,6 @@ public class DependencyIsolationUtils {
    * classloader.
    */
   public static final String FRAMEWORK_API_CLASS_LIST_FILE_NAME = "samza-framework-api-classes.txt";
+
+  public static final String RUNTIME_FRAMEWORK_RESOURCES_PATHING_JAR_NAME = "runtime-framework-resources-pathing.jar";
 }
diff --git a/samza-core/src/main/java/org/apache/samza/classloader/IsolatingClassLoaderFactory.java b/samza-core/src/main/java/org/apache/samza/classloader/IsolatingClassLoaderFactory.java
index 47d1ea0..19e776e 100644
--- a/samza-core/src/main/java/org/apache/samza/classloader/IsolatingClassLoaderFactory.java
+++ b/samza-core/src/main/java/org/apache/samza/classloader/IsolatingClassLoaderFactory.java
@@ -35,6 +35,8 @@ import java.net.URLClassLoader;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
@@ -121,14 +123,14 @@ public class IsolatingClassLoaderFactory {
    */
   public ClassLoader buildClassLoader() {
     // start at the user.dir to find the resources for the classpaths
-    String baseDirectoryPath = System.getProperty("user.dir");
-    File apiLibDirectory = libDirectory(new File(baseDirectoryPath, DependencyIsolationUtils.FRAMEWORK_API_DIRECTORY));
+    File baseJobDirectory = new File(System.getProperty("user.dir"));
+    File apiLibDirectory = libDirectory(new File(baseJobDirectory, DependencyIsolationUtils.FRAMEWORK_API_DIRECTORY));
     LOG.info("Using API lib directory: {}", apiLibDirectory);
     File infrastructureLibDirectory =
-        libDirectory(new File(baseDirectoryPath, DependencyIsolationUtils.FRAMEWORK_INFRASTRUCTURE_DIRECTORY));
+        libDirectory(new File(baseJobDirectory, DependencyIsolationUtils.FRAMEWORK_INFRASTRUCTURE_DIRECTORY));
     LOG.info("Using infrastructure lib directory: {}", infrastructureLibDirectory);
     File applicationLibDirectory =
-        libDirectory(new File(baseDirectoryPath, DependencyIsolationUtils.APPLICATION_DIRECTORY));
+        libDirectory(new File(baseJobDirectory, DependencyIsolationUtils.APPLICATION_DIRECTORY));
     LOG.info("Using application lib directory: {}", applicationLibDirectory);
 
     ClassLoader apiClassLoader = buildApiClassLoader(apiLibDirectory);
@@ -136,7 +138,7 @@ public class IsolatingClassLoaderFactory {
         buildApplicationClassLoader(applicationLibDirectory, apiLibDirectory, apiClassLoader);
 
     // the classloader to return is the one with the infrastructure classpath
-    return buildInfrastructureClassLoader(infrastructureLibDirectory, apiLibDirectory, apiClassLoader,
+    return buildInfrastructureClassLoader(infrastructureLibDirectory, baseJobDirectory, apiLibDirectory, apiClassLoader,
         applicationClassLoader);
   }
 
@@ -173,19 +175,38 @@ public class IsolatingClassLoaderFactory {
   }
 
   /**
-   * Build the {@link ClassLoader} which can load Samza framework core classes.
+   * Build the {@link ClassLoader} which can load Samza framework core classes. If a file with the name
+   * {@link DependencyIsolationUtils#RUNTIME_FRAMEWORK_RESOURCES_PATHING_JAR_NAME} is found in {@code baseJobDirectory},
+   * then it will be included in the classpath.
    * This may also fall back to loading application classes.
    *
    * This sets up two links: One link between the infrastructure classloader and the API and another link between the
    * infrastructure classloader and the application classloader (see {@link #buildClassLoader()}.
    */
-  private static ClassLoader buildInfrastructureClassLoader(File infrastructureLibDirectory, File apiLibDirectory,
-      ClassLoader apiClassLoader, ClassLoader applicationClassLoader) {
-    return LoaderBuilder.anIsolatingLoader()
-        // look in infrastructure lib directory for JARs
-        .withClasspath(getClasspathAsURIs(infrastructureLibDirectory))
+  private static ClassLoader buildInfrastructureClassLoader(File infrastructureLibDirectory,
+      File baseJobDirectory,
+      File apiLibDirectory,
+      ClassLoader apiClassLoader,
+      ClassLoader applicationClassLoader) {
+    // start with JARs in infrastructure lib directory
+    List<URI> classpathURIs = new ArrayList<>(getClasspathAsURIs(infrastructureLibDirectory));
+    OriginRestriction originRestriction = OriginRestriction.denyByDefault()
         // getClasspathAsURIs should only return JARs within infrastructureLibDirectory anyways, but doing it to be safe
-        .withOriginRestriction(OriginRestriction.denyByDefault().allowingDirectory(infrastructureLibDirectory, false))
+        .allowingDirectory(infrastructureLibDirectory, false);
+    File runtimeFrameworkResourcesPathingJar =
+        new File(baseJobDirectory, DependencyIsolationUtils.RUNTIME_FRAMEWORK_RESOURCES_PATHING_JAR_NAME);
+    if (canAccess(runtimeFrameworkResourcesPathingJar)) {
+      // if there is a runtime framework resources pathing JAR, then include that in the classpath as well
+      classpathURIs.add(runtimeFrameworkResourcesPathingJar.toURI());
+      originRestriction.allowingGlobPattern(fileURL(runtimeFrameworkResourcesPathingJar).toExternalForm());
+      LOG.info("Added {} to infrastructure classpath", runtimeFrameworkResourcesPathingJar.getPath());
+    } else {
+      LOG.info("Unable to access {}, so not adding to infrastructure classpath",
+          runtimeFrameworkResourcesPathingJar.getPath());
+    }
+    return LoaderBuilder.anIsolatingLoader()
+        .withClasspath(Collections.unmodifiableList(classpathURIs))
+        .withOriginRestriction(originRestriction)
         .withParentRelationship(buildApiParentRelationship(apiLibDirectory, apiClassLoader))
         /*
          * Fall back to the application classloader for certain classes. For example, the application might implement
@@ -284,11 +305,15 @@ public class IsolatingClassLoaderFactory {
         .collect(Collectors.toList());
   }
 
+  private static boolean canAccess(File file) {
+    return file.exists() && file.canRead();
+  }
+
   /**
    * Makes sure that a file exists and can be read.
    */
   private static void validateCanAccess(File file) {
-    if (!file.exists() || !file.canRead()) {
+    if (!canAccess(file)) {
       throw new SamzaException("Unable to access file: " + file);
     }
   }
diff --git a/samza-shell/src/main/bash/run-class.sh b/samza-shell/src/main/bash/run-class.sh
index 8cdac30..9b5ac30 100755
--- a/samza-shell/src/main/bash/run-class.sh
+++ b/samza-shell/src/main/bash/run-class.sh
@@ -53,13 +53,13 @@ export APPLICATION_LIB_DIR=$APPLICATION_LIB_DIR
 echo APPLICATION_LIB_DIR=$APPLICATION_LIB_DIR
 echo BASE_LIB_DIR=$BASE_LIB_DIR
 
-CLASSPATH=""
+BASE_LIB_CLASSPATH=""
 # all the jars need to be appended on newlines to ensure line argument length of 72 bytes is not violated
 for file in $BASE_LIB_DIR/*.[jw]ar;
 do
-  CLASSPATH=$CLASSPATH" $file \n"
+  BASE_LIB_CLASSPATH=$BASE_LIB_CLASSPATH" $file \n"
 done
-echo generated from BASE_LIB_DIR CLASSPATH=$CLASSPATH
+echo generated from BASE_LIB_DIR BASE_LIB_CLASSPATH=$BASE_LIB_CLASSPATH
 
 # In some cases (AWS) $JAVA_HOME/bin doesn't contain jar.
 if [ -z "$JAVA_HOME" ] || [ ! -e "$JAVA_HOME/bin/jar" ]; then
@@ -68,10 +68,23 @@ else
   JAR="$JAVA_HOME/bin/jar"
 fi
 
+# Create a pathing JAR for the JARs in the BASE_LIB_DIR
 # Newlines and spaces are intended to ensure proper parsing of manifest in pathing jar
-printf "Class-Path: \n $CLASSPATH \n" > manifest.txt
-# Creates a new archive and adds custom manifest information to pathing.jar
-eval "$JAR -cvmf manifest.txt pathing.jar"
+printf "Class-Path: \n $BASE_LIB_CLASSPATH \n" > base-lib-manifest.txt
+# Creates a new archive and adds custom manifest information to base-lib-pathing.jar
+eval "$JAR -cvmf base-lib-manifest.txt base-lib-pathing.jar"
+
+# Create a pathing JAR for the runtime framework resources. It is useful to separate this from the base-lib-pathing.jar
+# because the split deployment framework may only need the resources from this runtime pathing JAR.
+if ! [[ $HADOOP_CONF_DIR =~ .*/$ ]]; then
+  # manifest requires a directory to have a trailing slash
+  HADOOP_CONF_DIR="$HADOOP_CONF_DIR/"
+fi
+# HADOOP_CONF_DIR should be supplied to classpath explicitly for Yarn to parse configs
+RUNTIME_FRAMEWORK_RESOURCES_CLASSPATH="$HADOOP_CONF_DIR \n"
+# TODO add JARs from ADDITIONAL_CLASSPATH_DIR to runtime-framework-resources-pathing.jar as well
+printf "Class-Path: \n $RUNTIME_FRAMEWORK_RESOURCES_CLASSPATH \n" > runtime-framework-resources-manifest.txt
+eval "$JAR -cvmf runtime-framework-resources-manifest.txt runtime-framework-resources-pathing.jar"
 
 if [ -z "$JAVA_HOME" ]; then
   JAVA="java"
@@ -150,12 +163,11 @@ fi
 # Check if 64 bit is set. If not - try and set it if it's supported
 [[ $JAVA_OPTS != *-d64* ]] && check_and_enable_64_bit_mode
 
-# HADOOP_CONF_DIR should be supplied to classpath explicitly for Yarn to parse configs
-echo $JAVA $JAVA_OPTS -cp $HADOOP_CONF_DIR:pathing.jar "$@"
+echo $JAVA $JAVA_OPTS -cp base-lib-pathing.jar:runtime-framework-resources-pathing.jar "$@"
 
 ## If localized resource lib directory is defined, then include it in the classpath.
 if [[ -z "${ADDITIONAL_CLASSPATH_DIR}" ]]; then
-   exec $JAVA $JAVA_OPTS -cp $HADOOP_CONF_DIR:pathing.jar "$@"
+  exec $JAVA $JAVA_OPTS -cp base-lib-pathing.jar:runtime-framework-resources-pathing.jar "$@"
 else
-  exec $JAVA $JAVA_OPTS -cp $HADOOP_CONF_DIR:pathing.jar:$ADDITIONAL_CLASSPATH_DIR "$@"
+  exec $JAVA $JAVA_OPTS -cp base-lib-pathing.jar:runtime-framework-resources-pathing.jar:$ADDITIONAL_CLASSPATH_DIR "$@"
 fi
\ No newline at end of file