You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by sn...@apache.org on 2022/08/09 07:27:36 UTC
[nutch] 02/03: NUTCH-2936 Early registration of URL stream handlers provided by plugins may fail Hadoop jobs running in distributed mode if protocol-okhttp is used - code improvements Nutch plugin system: - use `Class>` and remove suppressions of warnings - javadocs: fix typos - remove superfluous white space - autoformat using code style template
This is an automated email from the ASF dual-hosted git repository.
snagel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git
commit 1f5f3e4d42b8dfb8bf741b11c9f39cc8bcd34091
Author: Sebastian Nagel <sn...@apache.org>
AuthorDate: Thu May 19 15:26:46 2022 +0200
NUTCH-2936 Early registration of URL stream handlers provided by plugins may fail Hadoop jobs running in distributed mode if protocol-okhttp is used
- code improvements Nutch plugin system:
- use `Class<?>` and remove suppressions of warnings
- javadocs: fix typos
- remove superfluous white space
- autoformat using code style template
---
src/java/org/apache/nutch/plugin/Extension.java | 14 ++++----
src/java/org/apache/nutch/plugin/Plugin.java | 2 +-
.../org/apache/nutch/plugin/PluginRepository.java | 22 ++++++------
.../nutch/plugin/URLStreamHandlerFactory.java | 41 ++++++++++++----------
4 files changed, 40 insertions(+), 39 deletions(-)
diff --git a/src/java/org/apache/nutch/plugin/Extension.java b/src/java/org/apache/nutch/plugin/Extension.java
index 246e8ff7b..e949ea317 100644
--- a/src/java/org/apache/nutch/plugin/Extension.java
+++ b/src/java/org/apache/nutch/plugin/Extension.java
@@ -143,15 +143,15 @@ public class Extension {
* Return an instance of the extension implementation. Before we create a
* extension instance we startup the plugin if it is not already done. The
* plugin instance and the extension instance use the same
- * {@link org.apache.nutch.plugin.PluginClassLoader}.
- * Each Plugin use its own classloader. The
- * {@link org.apache.nutch.plugin.PluginClassLoader} knows only its own
- * <i>plugin runtime libraries</i> defined
- * in the <code>plugin.xml</code> manifest file and exported libraries
- * of the dependent plugins.
+ * {@link org.apache.nutch.plugin.PluginClassLoader}. Each Plugin uses its own
+ * classloader. The {@link org.apache.nutch.plugin.PluginClassLoader} knows
+ * only its own <i>plugin runtime libraries</i> defined in the
+ * <code>plugin.xml</code> manifest file and exported libraries of the
+ * dependent plugins.
*
* @return Object An instance of the extension implementation
- * @throws PluginRuntimeException if there is a fatal runtime error
+ * @throws PluginRuntimeException
+ * if there is a fatal runtime error
*/
public Object getExtensionInstance() throws PluginRuntimeException {
// Must synchronize here to make sure creation and initialization
diff --git a/src/java/org/apache/nutch/plugin/Plugin.java b/src/java/org/apache/nutch/plugin/Plugin.java
index 314a8669d..306ada3d4 100644
--- a/src/java/org/apache/nutch/plugin/Plugin.java
+++ b/src/java/org/apache/nutch/plugin/Plugin.java
@@ -27,7 +27,7 @@ import org.apache.hadoop.conf.Configuration;
* provide a API and invoke one or a set of installed extensions.
*
* Each plugin may extend the base <code>Plugin</code>. <code>Plugin</code>
- * instances are used as the point of life cycle managemet of plugin related
+ * instances are used as the point of life cycle management of plugin related
* functionality.
*
* The <code>Plugin</code> will be started up and shutdown by the nutch plugin
diff --git a/src/java/org/apache/nutch/plugin/PluginRepository.java b/src/java/org/apache/nutch/plugin/PluginRepository.java
index 3c554094b..1eec0ffc8 100644
--- a/src/java/org/apache/nutch/plugin/PluginRepository.java
+++ b/src/java/org/apache/nutch/plugin/PluginRepository.java
@@ -38,11 +38,11 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
- * <p>The plugin repositority is a registry of all plugins.</p>
+ * <p>The plugin repository is a registry of all plugins.</p>
*
- * <p>At system boot up a repositority is built by parsing the mainifest files of
+ * <p>At system boot up a repository is built by parsing the manifest files of
* all plugins. Plugins that require other plugins which do not exist are not
- * registed. For each plugin a plugin descriptor instance will be created. The
+ * registered. For each plugin a plugin descriptor instance will be created. The
* descriptor represents all meta information about a plugin. So a plugin
* instance will be created later when it is required, this allow lazy plugin
* loading.</p>
@@ -64,8 +64,7 @@ public class PluginRepository implements URLStreamHandlerFactory {
private HashMap<String, Plugin> fActivatedPlugins;
- @SuppressWarnings("rawtypes")
- private static final Map<String, Map<PluginClassLoader, Class>> CLASS_CACHE = new HashMap<>();
+ private static final Map<String, Map<PluginClassLoader, Class<?>>> CLASS_CACHE = new HashMap<>();
private Configuration conf;
@@ -267,14 +266,14 @@ public class PluginRepository implements URLStreamHandlerFactory {
}
/**
- * <p>Returns a instance of a plugin. Plugin instances are cached. So a plugin
- * exist only as one instance. This allow a central management of plugin own
+ * <p>Returns an instance of a plugin. Plugin instances are cached. So a plugin
+ * exist only as one instance. This allow a central management of plugin's own
* resources.</p>
*
* <p>After creating the plugin instance the startUp() method is invoked. The
* plugin use a own classloader that is used as well by all instance of
* extensions of the same plugin. This class loader use all exported libraries
- * from the dependend plugins and all plugin libraries.</p>
+ * from the dependent plugins and all plugin libraries.</p>
*
* @param pDescriptor a {@link PluginDescriptor} for which to retrieve a
* {@link Plugin} instance
@@ -337,16 +336,15 @@ public class PluginRepository implements URLStreamHandlerFactory {
}
}
- @SuppressWarnings("rawtypes")
- public static Class getCachedClass(PluginDescriptor pDescriptor, String className)
+ public Class<?> getCachedClass(PluginDescriptor pDescriptor, String className)
throws ClassNotFoundException {
- Map<PluginClassLoader, Class> descMap = CLASS_CACHE.get(className);
+ Map<PluginClassLoader, Class<?>> descMap = CLASS_CACHE.get(className);
if (descMap == null) {
descMap = new HashMap<>();
CLASS_CACHE.put(className, descMap);
}
PluginClassLoader loader = pDescriptor.getClassLoader();
- Class clazz = descMap.get(loader);
+ Class<?> clazz = descMap.get(loader);
if (clazz == null) {
clazz = loader.loadClass(className);
descMap.put(loader, clazz);
diff --git a/src/java/org/apache/nutch/plugin/URLStreamHandlerFactory.java b/src/java/org/apache/nutch/plugin/URLStreamHandlerFactory.java
index 5aed76a35..6c79fe9e6 100644
--- a/src/java/org/apache/nutch/plugin/URLStreamHandlerFactory.java
+++ b/src/java/org/apache/nutch/plugin/URLStreamHandlerFactory.java
@@ -35,13 +35,13 @@ import org.slf4j.LoggerFactory;
*/
public class URLStreamHandlerFactory
implements java.net.URLStreamHandlerFactory {
-
+
protected static final Logger LOG = LoggerFactory
.getLogger(URLStreamHandlerFactory.class);
-
+
/** The singleton instance. */
private static URLStreamHandlerFactory instance;
-
+
/** Here we register all PluginRepositories.
* In this class we do not know why several instances of PluginRepository
* are kept, nor do we know how long they will be used. To prevent
@@ -51,64 +51,67 @@ public class URLStreamHandlerFactory
* outdated references which is done in the {@link #removeInvalidRefs()} method.
*/
private ArrayList<WeakReference<PluginRepository>> prs;
-
+
static {
instance = new URLStreamHandlerFactory();
URL.setURLStreamHandlerFactory(instance);
LOG.debug("Registered URLStreamHandlerFactory with the JVM.");
}
-
+
private URLStreamHandlerFactory() {
this.prs = new ArrayList<>();
}
/**
* Get the singleton instance of this class.
- * @return a {@link org.apache.nutch.plugin.URLStreamHandlerFactory} instance
+ * @return a {@link org.apache.nutch.plugin.URLStreamHandlerFactory} instance
*/
public static URLStreamHandlerFactory getInstance() {
return instance;
}
-
+
/** Use this method once a new PluginRepository was created to register it.
*
* @param pr The PluginRepository to be registered.
*/
public void registerPluginRepository(PluginRepository pr) {
this.prs.add(new WeakReference<PluginRepository>(pr));
-
+
removeInvalidRefs();
}
@Override
public URLStreamHandler createURLStreamHandler(String protocol) {
LOG.debug("Creating URLStreamHandler for protocol: {}", protocol);
-
+
removeInvalidRefs();
-
+
// find the 'correct' PluginRepository. For now we simply take the first.
// then ask it to return the URLStreamHandler
- for(WeakReference<PluginRepository> ref: this.prs) {
+ for (WeakReference<PluginRepository> ref : this.prs) {
PluginRepository pr = ref.get();
- if(pr != null) {
+ if (pr != null) {
// found PluginRepository. Let's get the URLStreamHandler...
- return pr.createURLStreamHandler(protocol);
+ URLStreamHandler handler = pr.createURLStreamHandler(protocol);
+ return handler;
}
}
+
return null;
}
- /** Maintains the list of PluginRepositories by
- * removing the references whose referents have been
- * garbage collected meanwhile.
+ /**
+ * Maintains the list of PluginRepositories by removing the references whose
+ * referents have been garbage collected meanwhile.
*/
private void removeInvalidRefs() {
ArrayList<WeakReference<PluginRepository>> copy = new ArrayList<>(this.prs);
- for(WeakReference<PluginRepository> ref: copy) {
- if(ref.get() == null) {
+ for (WeakReference<PluginRepository> ref : copy) {
+ if (ref.get() == null) {
this.prs.remove(ref);
}
}
- LOG.debug("Removed '{}' invalid references. '{}' remaining.", copy.size()-this.prs.size(), this.prs.size());
+ LOG.debug("Removed '{}' invalid references. '{}' remaining.",
+ copy.size() - this.prs.size(), this.prs.size());
}
}