You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@samza.apache.org by GitBox <gi...@apache.org> on 2019/11/25 21:43:53 UTC

[GitHub] [samza] prateekm commented on a change in pull request #1173: SAMZA-2333: [AM isolation] Use cytodynamics classloader to launch job coordinator

prateekm commented on a change in pull request #1173: SAMZA-2333: [AM isolation] Use cytodynamics classloader to launch job coordinator
URL: https://github.com/apache/samza/pull/1173#discussion_r350435069
 
 

 ##########
 File path: samza-core/src/main/java/org/apache/samza/classloader/IsolatingClassLoaderFactory.java
 ##########
 @@ -0,0 +1,316 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.samza.classloader;
+
+import com.linkedin.cytodynamics.matcher.BootstrapClassPredicate;
+import com.linkedin.cytodynamics.matcher.GlobMatcher;
+import com.linkedin.cytodynamics.nucleus.DelegateRelationship;
+import com.linkedin.cytodynamics.nucleus.DelegateRelationshipBuilder;
+import com.linkedin.cytodynamics.nucleus.IsolationLevel;
+import com.linkedin.cytodynamics.nucleus.LoaderBuilder;
+import com.linkedin.cytodynamics.nucleus.OriginRestriction;
+import java.io.File;
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.net.URLClassLoader;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.samza.SamzaException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Use this to build a classloader for running Samza which isolates the Samza framework code/dependencies from the
+ * application code/dependencies.
+ */
+public class IsolatingClassLoaderFactory {
+  private static final Logger LOG = LoggerFactory.getLogger(IsolatingClassLoaderFactory.class);
+
+  private static final String LIB_DIRECTORY = "lib";
+
+  /**
+   * Build a classloader which will isolate Samza framework code from application code. Samza framework classes and
+   * application-specific classes will be loaded using a different classloaders. This will enable dependencies of each
+   * category of classes to also be loaded separately, so that runtime dependency conflicts do not happen.
+   *
+   * Samza framework API classes need to be specified in a file called
+   * {@link DependencyIsolationUtils#FRAMEWORK_API_CLASS_LIST_FILE_NAME} which is in the lib directory which is in the
+   * API package. The file needs to be generated when building the framework API package. This class will not generate
+   * the file.
+   *
+   * Implementation notes:
+   *
+   * The cytodynamics isolating classloader is used for this. It provides more control than the built-in
+   * {@link URLClassLoader}. Cytodynamics provides the ability to compose multiple classloaders together and have more
+   * granular delegation strategies between the classloaders.
+   *
+   * In order to share objects between classes loaded by different classloaders, the classes for the shared objects must
+   * be loaded by a common classloader. Those common classes will be loaded through a common API classloader. The
+   * cytodynamics classloader can be set up to only use the common API classloader for an explicit set of classes. The
+   * {@link DependencyIsolationUtils#FRAMEWORK_API_CLASS_LIST_FILE_NAME} file should include the framework API classes.
+   * Also, bootstrap classes (e.g. java.lang.String) need to be loaded by a common classloader, since objects of those
+   * types need to be shared across different framework and application. There are also some static bootstrap classes
+   * which should be shared (e.g. java.lang.System). Bootstrap classes will be loaded through a common classloader by
+   * default.
+   *
+   * These are the classloaders which are used to make up the final classloader.
+   * <ul>
+   *   <li>bootstrap classloader: Built-in Java classes (e.g. java.lang.String)</li>
+   *   <li>API classloader: Common Samza framework API classes</li>
+   *   <li>infrastructure classloader: Core Samza framework classes and plugins that are included in the framework</li>
+   *   <li>
+   *     application classloader: Application code and plugins that are needed in the app but are not included in the
+   *     framework
+   *   </li>
+   * </ul>
+   *
+   * This is the delegation structure for the classloaders:
+   * <pre>
+   *   (bootstrap               (API                  (application
+   *   classloader) &lt;---- classloader) &lt;------- classloader)
+   *                             ^                      ^
+   *                             |                     /
+   *                             |                    /
+   *                             |                   /
+   *                             |                  /
+   *                         (infrastructure classloader)
+   * </pre>
+   * The cytodynamics classloader allows control over when the delegation should happen.
+   * <ol>
+   *   <li>API classloader delegates to the bootstrap classloader if the bootstrap classloader has the class.</li>
+   *   <li>
+   *     Infrastructure classloader only delegates to the API classloader for the common classes specified by
+   *     {@link DependencyIsolationUtils#FRAMEWORK_API_CLASS_LIST_FILE_NAME}.
+   *   </li>
+   *   <li>
+   *     Infrastructure classloader delegates to the application classloader when a class can't be found in the
+   *     infrastructure classloader.
+   *   </li>
+   *   <li>
+   *     Application classloader only delegates to the API classloader for the common classes specified by
+   *     {@link DependencyIsolationUtils#FRAMEWORK_API_CLASS_LIST_FILE_NAME}.
+   *   </li>
+   * </ol>
+   */
+  public ClassLoader buildClassLoader() {
+    // start at the user.dir to find the resources for the classpaths
+    String baseDirectoryPath = System.getProperty("user.dir");
+    File apiLibDirectory = libDirectory(new File(baseDirectoryPath, DependencyIsolationUtils.FRAMEWORK_API_DIRECTORY));
+    LOG.info("Using API lib directory: {}", apiLibDirectory);
+    File infrastructureLibDirectory =
+        libDirectory(new File(baseDirectoryPath, DependencyIsolationUtils.FRAMEWORK_INFRASTRUCTURE_DIRECTORY));
+    LOG.info("Using infrastructure lib directory: {}", infrastructureLibDirectory);
+    File applicationLibDirectory =
+        libDirectory(new File(baseDirectoryPath, DependencyIsolationUtils.APPLICATION_DIRECTORY));
+    LOG.info("Using application lib directory: {}", applicationLibDirectory);
+
+    ClassLoader apiClassLoader = buildApiClassLoader(apiLibDirectory);
+    ClassLoader applicationClassLoader =
+        buildApplicationClassLoader(applicationLibDirectory, apiLibDirectory, apiClassLoader);
+
+    // the classloader to return is the one with the infrastructure classpath
+    return buildInfrastructureClassLoader(infrastructureLibDirectory, apiLibDirectory, apiClassLoader,
+        applicationClassLoader);
+  }
+
+  /**
+   * Build the {@link ClassLoader} which can load framework API classes.
+   *
+   * This sets up the link between the bootstrap classloader and the API classloader (see {@link #buildClassLoader()}.
+   */
+  private static ClassLoader buildApiClassLoader(File apiLibDirectory) {
+    /*
+     * This can just use the built-in classloading, which checks the parent classloader first and then checks its own
+     * classpath. A null parent means bootstrap classloader, which contains core Java classes (e.g. java.lang.String).
+     * This doesn't need to be isolated from the parent, because we only want to load all bootstrap classes from the
+     * bootstrap classloader.
+     */
+    return new URLClassLoader(getClasspathAsURLs(apiLibDirectory), null);
+  }
+
+  /**
+   * Build the {@link ClassLoader} which can load application classes.
+   *
+   * This sets up the link between the application classloader and the API classloader (see {@link #buildClassLoader()}.
+   */
+  private static ClassLoader buildApplicationClassLoader(File applicationLibDirectory, File apiLibDirectory,
+      ClassLoader apiClassLoader) {
+    return LoaderBuilder.anIsolatingLoader()
+        // look in application lib directory for JARs
+        .withClasspath(getClasspathAsURIs(applicationLibDirectory))
+        // getClasspathAsURIs should only return JARs within applicationLibDirectory anyways, but doing it to be safe
+        .withOriginRestriction(OriginRestriction.denyByDefault().allowingDirectory(applicationLibDirectory, false))
+        // delegate to the api classloader for API classes
+        .withParentRelationship(buildApiParentRelationship(apiLibDirectory, apiClassLoader))
+        .build();
+  }
+
+  /**
+   * Build the {@link ClassLoader} which can load Samza framework core classes.
 
 Review comment:
   To clarify documentation, this can/will load application classes as well, right?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services