You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by sn...@apache.org on 2022/08/09 07:27:36 UTC

[nutch] 02/03: NUTCH-2936 Early registration of URL stream handlers provided by plugins may fail Hadoop jobs running in distributed mode if protocol-okhttp is used - code improvements Nutch plugin system: - use `Class` and remove suppressions of warnings - javadocs: fix typos - remove superfluous white space - autoformat using code style template

This is an automated email from the ASF dual-hosted git repository.

snagel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git

commit 1f5f3e4d42b8dfb8bf741b11c9f39cc8bcd34091
Author: Sebastian Nagel <sn...@apache.org>
AuthorDate: Thu May 19 15:26:46 2022 +0200

    NUTCH-2936 Early registration of URL stream handlers provided by plugins may fail Hadoop jobs running in distributed mode if protocol-okhttp is used
    - code improvements Nutch plugin system:
      - use `Class<?>` and remove suppressions of warnings
      - javadocs: fix typos
      - remove superfluous white space
      - autoformat using code style template
---
 src/java/org/apache/nutch/plugin/Extension.java    | 14 ++++----
 src/java/org/apache/nutch/plugin/Plugin.java       |  2 +-
 .../org/apache/nutch/plugin/PluginRepository.java  | 22 ++++++------
 .../nutch/plugin/URLStreamHandlerFactory.java      | 41 ++++++++++++----------
 4 files changed, 40 insertions(+), 39 deletions(-)

diff --git a/src/java/org/apache/nutch/plugin/Extension.java b/src/java/org/apache/nutch/plugin/Extension.java
index 246e8ff7b..e949ea317 100644
--- a/src/java/org/apache/nutch/plugin/Extension.java
+++ b/src/java/org/apache/nutch/plugin/Extension.java
@@ -143,15 +143,15 @@ public class Extension {
    * Return an instance of the extension implementation. Before we create a
    * extension instance we startup the plugin if it is not already done. The
    * plugin instance and the extension instance use the same
-   * {@link org.apache.nutch.plugin.PluginClassLoader}.
-   * Each Plugin use its own classloader. The
-   * {@link org.apache.nutch.plugin.PluginClassLoader} knows only its own
-   * <i>plugin runtime libraries</i> defined
-   * in the <code>plugin.xml</code> manifest file and exported libraries
-   * of the dependent plugins.
+   * {@link org.apache.nutch.plugin.PluginClassLoader}. Each Plugin uses its own
+   * classloader. The {@link org.apache.nutch.plugin.PluginClassLoader} knows
+   * only its own <i>plugin runtime libraries</i> defined in the
+   * <code>plugin.xml</code> manifest file and exported libraries of the
+   * dependent plugins.
    * 
    * @return Object An instance of the extension implementation
-   * @throws PluginRuntimeException if there is a fatal runtime error
+   * @throws PluginRuntimeException
+   *           if there is a fatal runtime error
    */
   public Object getExtensionInstance() throws PluginRuntimeException {
     // Must synchronize here to make sure creation and initialization
diff --git a/src/java/org/apache/nutch/plugin/Plugin.java b/src/java/org/apache/nutch/plugin/Plugin.java
index 314a8669d..306ada3d4 100644
--- a/src/java/org/apache/nutch/plugin/Plugin.java
+++ b/src/java/org/apache/nutch/plugin/Plugin.java
@@ -27,7 +27,7 @@ import org.apache.hadoop.conf.Configuration;
  * provide a API and invoke one or a set of installed extensions.
  * 
  * Each plugin may extend the base <code>Plugin</code>. <code>Plugin</code>
- * instances are used as the point of life cycle managemet of plugin related
+ * instances are used as the point of life cycle management of plugin related
  * functionality.
  * 
  * The <code>Plugin</code> will be started up and shutdown by the nutch plugin
diff --git a/src/java/org/apache/nutch/plugin/PluginRepository.java b/src/java/org/apache/nutch/plugin/PluginRepository.java
index 3c554094b..1eec0ffc8 100644
--- a/src/java/org/apache/nutch/plugin/PluginRepository.java
+++ b/src/java/org/apache/nutch/plugin/PluginRepository.java
@@ -38,11 +38,11 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /**
- * <p>The plugin repositority is a registry of all plugins.</p>
+ * <p>The plugin repository is a registry of all plugins.</p>
  * 
- * <p>At system boot up a repositority is built by parsing the mainifest files of
+ * <p>At system boot up a repository is built by parsing the manifest files of
  * all plugins. Plugins that require other plugins which do not exist are not
- * registed. For each plugin a plugin descriptor instance will be created. The
+ * registered. For each plugin a plugin descriptor instance will be created. The
  * descriptor represents all meta information about a plugin. So a plugin
  * instance will be created later when it is required, this allow lazy plugin
  * loading.</p>
@@ -64,8 +64,7 @@ public class PluginRepository implements URLStreamHandlerFactory {
 
   private HashMap<String, Plugin> fActivatedPlugins;
 
-  @SuppressWarnings("rawtypes")
-  private static final Map<String, Map<PluginClassLoader, Class>> CLASS_CACHE = new HashMap<>();
+  private static final Map<String, Map<PluginClassLoader, Class<?>>> CLASS_CACHE = new HashMap<>();
 
   private Configuration conf;
 
@@ -267,14 +266,14 @@ public class PluginRepository implements URLStreamHandlerFactory {
   }
 
   /**
-   * <p>Returns a instance of a plugin. Plugin instances are cached. So a plugin
-   * exist only as one instance. This allow a central management of plugin own
+   * <p>Returns an instance of a plugin. Plugin instances are cached. So a plugin
+   * exist only as one instance. This allow a central management of plugin's own
    * resources.</p>
    * 
    * <p>After creating the plugin instance the startUp() method is invoked. The
    * plugin use a own classloader that is used as well by all instance of
    * extensions of the same plugin. This class loader use all exported libraries
-   * from the dependend plugins and all plugin libraries.</p>
+   * from the dependent plugins and all plugin libraries.</p>
    * 
    * @param pDescriptor a {@link PluginDescriptor} for which to retrieve a 
    * {@link Plugin} instance
@@ -337,16 +336,15 @@ public class PluginRepository implements URLStreamHandlerFactory {
     }
   }
 
-  @SuppressWarnings("rawtypes")
-  public static Class getCachedClass(PluginDescriptor pDescriptor, String className)
+  public Class<?> getCachedClass(PluginDescriptor pDescriptor, String className)
           throws ClassNotFoundException {
-    Map<PluginClassLoader, Class> descMap = CLASS_CACHE.get(className);
+    Map<PluginClassLoader, Class<?>> descMap = CLASS_CACHE.get(className);
     if (descMap == null) {
       descMap = new HashMap<>();
       CLASS_CACHE.put(className, descMap);
     }
     PluginClassLoader loader = pDescriptor.getClassLoader();
-    Class clazz = descMap.get(loader);
+    Class<?> clazz = descMap.get(loader);
     if (clazz == null) {
       clazz = loader.loadClass(className);
       descMap.put(loader, clazz);
diff --git a/src/java/org/apache/nutch/plugin/URLStreamHandlerFactory.java b/src/java/org/apache/nutch/plugin/URLStreamHandlerFactory.java
index 5aed76a35..6c79fe9e6 100644
--- a/src/java/org/apache/nutch/plugin/URLStreamHandlerFactory.java
+++ b/src/java/org/apache/nutch/plugin/URLStreamHandlerFactory.java
@@ -35,13 +35,13 @@ import org.slf4j.LoggerFactory;
  */
 public class URLStreamHandlerFactory
     implements java.net.URLStreamHandlerFactory {
-  
+
   protected static final Logger LOG = LoggerFactory
       .getLogger(URLStreamHandlerFactory.class);
-  
+
   /** The singleton instance. */
   private static URLStreamHandlerFactory instance;
-  
+
   /** Here we register all PluginRepositories.
    * In this class we do not know why several instances of PluginRepository
    * are kept, nor do we know how long they will be used. To prevent
@@ -51,64 +51,67 @@ public class URLStreamHandlerFactory
    * outdated references which is done in the {@link #removeInvalidRefs()} method.
    */
   private ArrayList<WeakReference<PluginRepository>> prs;
-  
+
   static {
     instance = new URLStreamHandlerFactory();
     URL.setURLStreamHandlerFactory(instance);
     LOG.debug("Registered URLStreamHandlerFactory with the JVM.");
   }
-  
+
   private URLStreamHandlerFactory() {
     this.prs = new ArrayList<>();
   }
 
   /** 
    * Get the singleton instance of this class.
-   * @return a {@link org.apache.nutch.plugin.URLStreamHandlerFactory} instance 
+   * @return a {@link org.apache.nutch.plugin.URLStreamHandlerFactory} instance
    */
   public static URLStreamHandlerFactory getInstance() {
     return instance;
   }
-  
+
   /** Use this method once a new PluginRepository was created to register it.
    * 
    * @param pr The PluginRepository to be registered.
    */
   public void registerPluginRepository(PluginRepository pr) {
     this.prs.add(new WeakReference<PluginRepository>(pr));
-    
+
     removeInvalidRefs();
   }
 
   @Override
   public URLStreamHandler createURLStreamHandler(String protocol) {
     LOG.debug("Creating URLStreamHandler for protocol: {}", protocol);
-    
+
     removeInvalidRefs();
-    
+
     // find the 'correct' PluginRepository. For now we simply take the first.
     // then ask it to return the URLStreamHandler
-    for(WeakReference<PluginRepository> ref: this.prs) {
+    for (WeakReference<PluginRepository> ref : this.prs) {
       PluginRepository pr = ref.get();
-      if(pr != null) {
+      if (pr != null) {
         // found PluginRepository. Let's get the URLStreamHandler...
-        return pr.createURLStreamHandler(protocol);
+        URLStreamHandler handler = pr.createURLStreamHandler(protocol);
+        return handler;
       }
     }
+
     return null;
   }
 
-  /** Maintains the list of PluginRepositories by
-   * removing the references whose referents have been
-   * garbage collected meanwhile.
+  /**
+   * Maintains the list of PluginRepositories by removing the references whose
+   * referents have been garbage collected meanwhile.
    */
   private void removeInvalidRefs() {
     ArrayList<WeakReference<PluginRepository>> copy = new ArrayList<>(this.prs);
-    for(WeakReference<PluginRepository> ref: copy) {
-      if(ref.get() == null) {
+    for (WeakReference<PluginRepository> ref : copy) {
+      if (ref.get() == null) {
         this.prs.remove(ref);
       }
     }
-    LOG.debug("Removed '{}' invalid references. '{}' remaining.", copy.size()-this.prs.size(), this.prs.size());
+    LOG.debug("Removed '{}' invalid references. '{}' remaining.",
+        copy.size() - this.prs.size(), this.prs.size());
   }
 }