You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by th...@apache.org on 2016/07/16 19:48:57 UTC

[41/51] [partial] nutch git commit: NUTCH-2292 : Mavenize the build for nutch-core and nutch-plugins

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/plugin/ExtensionPoint.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/plugin/ExtensionPoint.java b/nutch-core/src/main/java/org/apache/nutch/plugin/ExtensionPoint.java
new file mode 100644
index 0000000..178c5a2
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/plugin/ExtensionPoint.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.plugin;
+
+import java.util.ArrayList;
+
+/**
+ * The <code>ExtensionPoint</code> provide meta information of a extension
+ * point.
+ * 
+ * @author joa23
+ */
+public class ExtensionPoint {
+  private String ftId;
+  private String fName;
+  private String fSchema;
+  private ArrayList<Extension> fExtensions;
+
+  /**
+   * Constructor
+   * 
+   * @param pId
+   *          unique extension point Id
+   * @param pName
+   *          name of the extension point
+   * @param pSchema
+   *          xml schema of the extension point
+   */
+  public ExtensionPoint(String pId, String pName, String pSchema) {
+    setId(pId);
+    setName(pName);
+    setSchema(pSchema);
+    fExtensions = new ArrayList<Extension>();
+  }
+
+  /**
+   * Returns the unique id of the extension point.
+   * 
+   * @return String
+   */
+  public String getId() {
+    return ftId;
+  }
+
+  /**
+   * Returns the name of the extension point.
+   * 
+   * @return String
+   */
+  public String getName() {
+    return fName;
+  }
+
+  /**
+   * Returns a path to the xml schema of a extension point.
+   * 
+   * @return String
+   */
+  public String getSchema() {
+    return fSchema;
+  }
+
+  /**
+   * Sets the extensionPointId.
+   * 
+   * @param pId
+   *          extension point id
+   */
+  private void setId(String pId) {
+    ftId = pId;
+  }
+
+  /**
+   * Sets the extension point name.
+   * 
+   * @param pName
+   */
+  private void setName(String pName) {
+    fName = pName;
+  }
+
+  /**
+   * Sets the schema.
+   * 
+   * @param pSchema
+   */
+  private void setSchema(String pSchema) {
+    fSchema = pSchema;
+  }
+
+  /**
+   * Install a coresponding extension to this extension point.
+   * 
+   * @param extension
+   */
+  public void addExtension(Extension extension) {
+    fExtensions.add(extension);
+  }
+
+  /**
+   * Returns a array of extensions that lsiten to this extension point
+   * 
+   * @return Extension[]
+   */
+  public Extension[] getExtensions() {
+    return fExtensions.toArray(new Extension[fExtensions.size()]);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/plugin/MissingDependencyException.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/plugin/MissingDependencyException.java b/nutch-core/src/main/java/org/apache/nutch/plugin/MissingDependencyException.java
new file mode 100644
index 0000000..b81cc50
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/plugin/MissingDependencyException.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.plugin;
+
+/**
+ * <code>MissingDependencyException</code> will be thrown if a plugin dependency
+ * cannot be found.
+ * 
+ * @author J&eacute;r&ocirc;me Charron
+ */
+public class MissingDependencyException extends Exception {
+
+  private static final long serialVersionUID = 1L;
+
+  public MissingDependencyException(Throwable cause) {
+    super(cause);
+  }
+
+  public MissingDependencyException(String message) {
+    super(message);
+  }
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/plugin/Pluggable.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/plugin/Pluggable.java b/nutch-core/src/main/java/org/apache/nutch/plugin/Pluggable.java
new file mode 100644
index 0000000..09aba30
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/plugin/Pluggable.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.plugin;
+
+/**
+ * Defines the capability of a class to be plugged into Nutch. This is a common
+ * interface that must be implemented by all Nutch Extension Points.
+ * 
+ * @author J&eacute;r&ocirc;me Charron
+ * 
+ * @see <a href="http://wiki.apache.org/nutch/AboutPlugins">About Plugins</a>
+ * @see <a href="package-summary.html#package_description"> plugin package
+ *      description</a>
+ */
+public interface Pluggable {
+
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/plugin/Plugin.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/plugin/Plugin.java b/nutch-core/src/main/java/org/apache/nutch/plugin/Plugin.java
new file mode 100644
index 0000000..e78754b
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/plugin/Plugin.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.plugin;
+
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * A nutch-plugin is an container for a set of custom logic that provide
+ * extensions to the nutch core functionality or another plugin that provides an
+ * API for extending. A plugin can provide one or a set of extensions.
+ * Extensions are components that can be dynamically installed as a kind of
+ * listener to extension points. Extension points are a kind of publisher that
+ * provide a API and invoke one or a set of installed extensions.
+ * 
+ * Each plugin may extend the base <code>Plugin</code>. <code>Plugin</code>
+ * instances are used as the point of life cycle managemet of plugin related
+ * functionality.
+ * 
+ * The <code>Plugin</code> will be startuped and shutdown by the nutch plugin
+ * management system.
+ * 
+ * A possible usecase of the <code>Plugin</code> implementation is to create or
+ * close a database connection.
+ * 
+ * @author joa23
+ */
+public class Plugin {
+  private PluginDescriptor fDescriptor;
+  protected Configuration conf;
+
+  /**
+   * Constructor
+   * 
+   */
+  public Plugin(PluginDescriptor pDescriptor, Configuration conf) {
+    setDescriptor(pDescriptor);
+    this.conf = conf;
+  }
+
+  /**
+   * Will be invoked until plugin start up. Since the nutch-plugin system use
+   * lazy loading the start up is invoked until the first time a extension is
+   * used.
+   * 
+   * @throws PluginRuntimeException
+   *           If the startup was without successs.
+   */
+  public void startUp() throws PluginRuntimeException {
+  }
+
+  /**
+   * Shutdown the plugin. This happens until nutch will be stopped.
+   * 
+   * @throws PluginRuntimeException
+   *           if a problems occurs until shutdown the plugin.
+   */
+  public void shutDown() throws PluginRuntimeException {
+  }
+
+  /**
+   * Returns the plugin descriptor
+   * 
+   * @return PluginDescriptor
+   */
+  public PluginDescriptor getDescriptor() {
+    return fDescriptor;
+  }
+
+  /**
+   * @param descriptor
+   *          The descriptor to set
+   */
+  private void setDescriptor(PluginDescriptor descriptor) {
+    fDescriptor = descriptor;
+  }
+
+  protected void finalize() throws Throwable {
+    super.finalize();
+    shutDown();
+  }
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/plugin/PluginClassLoader.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/plugin/PluginClassLoader.java b/nutch-core/src/main/java/org/apache/nutch/plugin/PluginClassLoader.java
new file mode 100644
index 0000000..128bbc6
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/plugin/PluginClassLoader.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.plugin;
+
+import java.net.URL;
+import java.net.URLClassLoader;
+import java.util.Arrays;
+
+/**
+ * The <code>PluginClassLoader</code> contains only classes of the runtime
+ * libraries setuped in the plugin manifest file and exported libraries of
+ * plugins that are required pluguin. Libraries can be exported or not. Not
+ * exported libraries are only used in the plugin own
+ * <code>PluginClassLoader</code>. Exported libraries are available for
+ * <code>PluginClassLoader</code> of plugins that depends on these plugins.
+ * 
+ * @author joa23
+ */
+public class PluginClassLoader extends URLClassLoader {
+
+  private URL[] urls;
+  private ClassLoader parent;
+
+  /**
+   * Construtor
+   * 
+   * @param urls
+   *          Array of urls with own libraries and all exported libraries of
+   *          plugins that are required to this plugin
+   * @param parent
+   */
+  public PluginClassLoader(URL[] urls, ClassLoader parent) {
+    super(urls, parent);
+
+    this.urls = urls;
+    this.parent = parent;
+  }
+
+  @Override
+  public int hashCode() {
+    final int PRIME = 31;
+    int result = 1;
+    result = PRIME * result + ((parent == null) ? 0 : parent.hashCode());
+    result = PRIME * result + Arrays.hashCode(urls);
+    return result;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj)
+      return true;
+    if (obj == null)
+      return false;
+    if (getClass() != obj.getClass())
+      return false;
+    final PluginClassLoader other = (PluginClassLoader) obj;
+    if (parent == null) {
+      if (other.parent != null)
+        return false;
+    } else if (!parent.equals(other.parent))
+      return false;
+    if (!Arrays.equals(urls, other.urls))
+      return false;
+    return true;
+  }
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/plugin/PluginDescriptor.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/plugin/PluginDescriptor.java b/nutch-core/src/main/java/org/apache/nutch/plugin/PluginDescriptor.java
new file mode 100644
index 0000000..0a43745
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/plugin/PluginDescriptor.java
@@ -0,0 +1,363 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.plugin;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Locale;
+import java.util.MissingResourceException;
+import java.util.ResourceBundle;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * The <code>PluginDescriptor</code> provide access to all meta information of a
+ * nutch-plugin, as well to the internationalizable resources and the plugin own
+ * classloader. There are meta information about <code>Plugin</code>,
+ * <code>ExtensionPoint</code> and <code>Extension</code>. To provide access to
+ * the meta data of a plugin via a descriptor allow a lazy loading mechanism.
+ */
+public class PluginDescriptor {
+  private String fPluginPath;
+  private String fPluginClass = Plugin.class.getName();
+  private String fPluginId;
+  private String fVersion;
+  private String fName;
+  private String fProviderName;
+  private HashMap<String, ResourceBundle> fMessages = new HashMap<String, ResourceBundle>();
+  private ArrayList<ExtensionPoint> fExtensionPoints = new ArrayList<ExtensionPoint>();
+  private ArrayList<String> fDependencies = new ArrayList<String>();
+  private ArrayList<URL> fExportedLibs = new ArrayList<URL>();
+  private ArrayList<URL> fNotExportedLibs = new ArrayList<URL>();
+  private ArrayList<Extension> fExtensions = new ArrayList<Extension>();
+  private PluginClassLoader fClassLoader;
+  public static final Logger LOG = LoggerFactory
+      .getLogger(PluginDescriptor.class);
+  private Configuration fConf;
+
+  /**
+   * Constructor
+   * 
+   * @param pId
+   * @param pVersion
+   * @param pName
+   * @param pProviderName
+   * @param pPluginclazz
+   * @param pPath
+   */
+  public PluginDescriptor(String pId, String pVersion, String pName,
+      String pProviderName, String pPluginclazz, String pPath,
+      Configuration conf) {
+    setPath(pPath);
+    setPluginId(pId);
+    setVersion(pVersion);
+    setName(pName);
+    setProvidername(pProviderName);
+
+    if (pPluginclazz != null)
+      setPluginClass(pPluginclazz);
+
+    this.fConf = conf;
+  }
+
+  /**
+   * @param pPath
+   */
+  private void setPath(String pPath) {
+    fPluginPath = pPath;
+  }
+
+  /**
+   * Returns the name of the plugin.
+   * 
+   * @return String
+   */
+  public String getName() {
+    return fName;
+  }
+
+  /**
+   * @param providerName
+   */
+  private void setProvidername(String providerName) {
+    fProviderName = providerName;
+  }
+
+  /**
+   * @param name
+   */
+  private void setName(String name) {
+    fName = name;
+  }
+
+  /**
+   * @param version
+   */
+  private void setVersion(String version) {
+    fVersion = version;
+  }
+
+  /**
+   * Returns the fully qualified name of the class which implements the abstarct
+   * <code>Plugin</code> class.
+   * 
+   * @return the name of this plug-in's runtime class or <code>null</code>.
+   */
+  public String getPluginClass() {
+    return fPluginClass;
+  }
+
+  /**
+   * Returns the unique identifier of the plug-in or <code>null</code>.
+   * 
+   * @return String
+   */
+  public String getPluginId() {
+    return fPluginId;
+  }
+
+  /**
+   * Returns an array of extensions.
+   * 
+   * @return Exception[]
+   */
+  public Extension[] getExtensions() {
+    return fExtensions.toArray(new Extension[fExtensions.size()]);
+  }
+
+  /**
+   * Adds a extension.
+   * 
+   * @param pExtension
+   */
+  public void addExtension(Extension pExtension) {
+    fExtensions.add(pExtension);
+  }
+
+  /**
+   * Sets the pluginClass.
+   * 
+   * @param pluginClass
+   *          The pluginClass to set
+   */
+  private void setPluginClass(String pluginClass) {
+    fPluginClass = pluginClass;
+  }
+
+  /**
+   * Sets the plugin Id.
+   * 
+   * @param pluginId
+   *          The pluginId to set
+   */
+  private void setPluginId(String pluginId) {
+    fPluginId = pluginId;
+  }
+
+  /**
+   * Adds a extension point.
+   * 
+   * @param extensionPoint
+   */
+  public void addExtensionPoint(ExtensionPoint extensionPoint) {
+    fExtensionPoints.add(extensionPoint);
+  }
+
+  /**
+   * Returns a array of extension points.
+   * 
+   * @return ExtensionPoint[]
+   */
+  public ExtensionPoint[] getExtenstionPoints() {
+    return fExtensionPoints
+        .toArray(new ExtensionPoint[fExtensionPoints.size()]);
+  }
+
+  /**
+   * Returns a array of plugin ids.
+   * 
+   * @return String[]
+   */
+  public String[] getDependencies() {
+    return fDependencies.toArray(new String[fDependencies.size()]);
+  }
+
+  /**
+   * Adds a dependency
+   * 
+   * @param pId
+   *          id of the dependent plugin
+   */
+  public void addDependency(String pId) {
+    fDependencies.add(pId);
+  }
+
+  /**
+   * Adds a exported library with a relative path to the plugin directory. We
+   * automatically escape characters that are illegal in URLs. It is recommended
+   * that code converts an abstract pathname into a URL by first converting it
+   * into a URI, via the toURI method, and then converting the URI into a URL
+   * via the URI.toURL method.
+   * 
+   * @param pLibPath
+   */
+  public void addExportedLibRelative(String pLibPath)
+      throws MalformedURLException {
+    URI uri = new File(getPluginPath() + File.separator + pLibPath).toURI();
+    URL url = uri.toURL();
+    fExportedLibs.add(url);
+  }
+
+  /**
+   * Returns the directory path of the plugin.
+   * 
+   * @return String
+   */
+  public String getPluginPath() {
+    return fPluginPath;
+  }
+
+  /**
+   * Returns a array exported librareis as URLs
+   * 
+   * @return URL[]
+   */
+  public URL[] getExportedLibUrls() {
+    return fExportedLibs.toArray(new URL[0]);
+  }
+
+  /**
+   * Adds a exported library with a relative path to the plugin directory. We
+   * automatically escape characters that are illegal in URLs. It is recommended
+   * that code converts an abstract pathname into a URL by first converting it
+   * into a URI, via the toURI method, and then converting the URI into a URL
+   * via the URI.toURL method.
+   * 
+   * @param pLibPath
+   */
+  public void addNotExportedLibRelative(String pLibPath)
+      throws MalformedURLException {
+    URI uri = new File(getPluginPath() + File.separator + pLibPath).toURI();
+    URL url = uri.toURL();
+    fNotExportedLibs.add(url);
+  }
+
+  /**
+   * Returns a array of libraries as URLs that are not exported by the plugin.
+   * 
+   * @return URL[]
+   */
+  public URL[] getNotExportedLibUrls() {
+    return fNotExportedLibs.toArray(new URL[fNotExportedLibs.size()]);
+  }
+
+  /**
+   * Returns a cached classloader for a plugin. Until classloader creation all
+   * needed libraries are collected. A classloader use as first the plugins own
+   * libraries and add then all exported libraries of dependend plugins.
+   * 
+   * @return PluginClassLoader the classloader for the plugin
+   */
+  public PluginClassLoader getClassLoader() {
+    if (fClassLoader != null)
+      return fClassLoader;
+    ArrayList<URL> arrayList = new ArrayList<URL>();
+    arrayList.addAll(fExportedLibs);
+    arrayList.addAll(fNotExportedLibs);
+    arrayList.addAll(getDependencyLibs());
+    File file = new File(getPluginPath());
+    try {
+      for (File file2 : file.listFiles()) {
+        if (file2.getAbsolutePath().endsWith("properties"))
+          arrayList.add(file2.getParentFile().toURI().toURL());
+      }
+    } catch (MalformedURLException e) {
+      LOG.debug(getPluginId() + " " + e.toString());
+    }
+    URL[] urls = arrayList.toArray(new URL[arrayList.size()]);
+    fClassLoader = new PluginClassLoader(urls,
+        PluginDescriptor.class.getClassLoader());
+    return fClassLoader;
+  }
+
+  /**
+   * @return Collection
+   */
+  private ArrayList<URL> getDependencyLibs() {
+    ArrayList<URL> list = new ArrayList<URL>();
+    collectLibs(list, this);
+    return list;
+  }
+
+  /**
+   * @param pLibs
+   * @param pDescriptor
+   */
+  private void collectLibs(ArrayList<URL> pLibs, PluginDescriptor pDescriptor) {
+
+    for (String id : pDescriptor.getDependencies()) {
+      PluginDescriptor descriptor = PluginRepository.get(fConf)
+          .getPluginDescriptor(id);
+      for (URL url : descriptor.getExportedLibUrls()) {
+        pLibs.add(url);
+      }
+      collectLibs(pLibs, descriptor);
+    }
+  }
+
+  /**
+   * Returns a I18N'd resource string. The resource bundles could be stored in
+   * root directory of a plugin in the well know i18n file name conventions.
+   * 
+   * @param pKey
+   * @param pLocale
+   * @return String
+   * @throws IOException
+   */
+  public String getResourceString(String pKey, Locale pLocale)
+      throws IOException {
+    if (fMessages.containsKey(pLocale.toString())) {
+      ResourceBundle bundle = fMessages.get(pLocale.toString());
+      try {
+        return bundle.getString(pKey);
+      } catch (MissingResourceException e) {
+        return '!' + pKey + '!';
+      }
+    }
+    try {
+      ResourceBundle res = ResourceBundle.getBundle("messages", pLocale,
+          getClassLoader());
+      return res.getString(pKey);
+    } catch (MissingResourceException x) {
+      return '!' + pKey + '!';
+    }
+  }
+
+  public String getProviderName() {
+    return fProviderName;
+  }
+
+  public String getVersion() {
+    return fVersion;
+  }
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/plugin/PluginManifestParser.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/plugin/PluginManifestParser.java b/nutch-core/src/main/java/org/apache/nutch/plugin/PluginManifestParser.java
new file mode 100644
index 0000000..bd2a490
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/plugin/PluginManifestParser.java
@@ -0,0 +1,303 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.plugin;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.net.URLDecoder;
+import java.util.HashMap;
+import java.util.Map;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.slf4j.Logger;
+
+import org.apache.hadoop.conf.Configuration;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+import org.xml.sax.SAXException;
+
+/**
+ * The <code>PluginManifestParser</code> parser just parse the manifest file in
+ * all plugin directories.
+ * 
+ * @author joa23
+ */
+public class PluginManifestParser {
+  private static final String ATTR_NAME = "name";
+  private static final String ATTR_CLASS = "class";
+  private static final String ATTR_ID = "id";
+
+  public static final Logger LOG = PluginRepository.LOG;
+
+  private static final boolean WINDOWS = System.getProperty("os.name")
+      .startsWith("Windows");
+
+  private Configuration conf;
+
+  private PluginRepository pluginRepository;
+
+  public PluginManifestParser(Configuration conf,
+      PluginRepository pluginRepository) {
+    this.conf = conf;
+    this.pluginRepository = pluginRepository;
+  }
+
+  /**
+   * Returns a list of all found plugin descriptors.
+   * 
+   * @param pluginFolders
+   *          folders to search plugins from
+   * @return A {@link Map} of all found {@link PluginDescriptor}s.
+   */
+  public Map<String, PluginDescriptor> parsePluginFolder(String[] pluginFolders) {
+    Map<String, PluginDescriptor> map = new HashMap<String, PluginDescriptor>();
+
+    if (pluginFolders == null) {
+      throw new IllegalArgumentException("plugin.folders is not defined");
+    }
+
+    for (String name : pluginFolders) {
+      File directory = getPluginFolder(name);
+      if (directory == null) {
+        continue;
+      }
+      LOG.info("Plugins: looking in: " + directory.getAbsolutePath());
+      for (File oneSubFolder : directory.listFiles()) {
+        if (oneSubFolder.isDirectory()) {
+          String manifestPath = oneSubFolder.getAbsolutePath() + File.separator
+              + "plugin.xml";
+          try {
+            LOG.debug("parsing: " + manifestPath);
+            PluginDescriptor p = parseManifestFile(manifestPath);
+            map.put(p.getPluginId(), p);
+          } catch (Exception e) {
+            LOG.warn("Error while loading plugin `" + manifestPath + "` "
+                + e.toString());
+          }
+        }
+      }
+    }
+    return map;
+  }
+
+  /**
+   * Return the named plugin folder. If the name is absolute then it is
+   * returned. Otherwise, for relative names, the classpath is scanned.
+   */
+  public File getPluginFolder(String name) {
+    File directory = new File(name);
+    if (!directory.isAbsolute()) {
+      URL url = PluginManifestParser.class.getClassLoader().getResource(name);
+      if (url == null && directory.exists() && directory.isDirectory()
+          && directory.listFiles().length > 0) {
+        return directory; // relative path that is not in the classpath
+      } else if (url == null) {
+        LOG.warn("Plugins: directory not found: " + name);
+        return null;
+      } else if (!"file".equals(url.getProtocol())) {
+        LOG.warn("Plugins: not a file: url. Can't load plugins from: " + url);
+        return null;
+      }
+      String path = url.getPath();
+      if (WINDOWS && path.startsWith("/")) // patch a windows bug
+        path = path.substring(1);
+      try {
+        path = URLDecoder.decode(path, "UTF-8"); // decode the url path
+      } catch (UnsupportedEncodingException e) {
+      }
+      directory = new File(path);
+    } else if (!directory.exists()) {
+      LOG.warn("Plugins: directory not found: " + name);
+      return null;
+    }
+    return directory;
+  }
+
+  /**
+   * @param manifestPath
+   * @throws ParserConfigurationException
+   * @throws IOException
+   * @throws SAXException
+   * @throws MalformedURLException
+   */
+  private PluginDescriptor parseManifestFile(String pManifestPath)
+      throws MalformedURLException, SAXException, IOException,
+      ParserConfigurationException {
+    Document document = parseXML(new File(pManifestPath).toURI().toURL());
+    String pPath = new File(pManifestPath).getParent();
+    return parsePlugin(document, pPath);
+  }
+
+  /**
+   * @param url
+   * @return Document
+   * @throws IOException
+   * @throws SAXException
+   * @throws ParserConfigurationException
+   * @throws DocumentException
+   */
+  private Document parseXML(URL url) throws SAXException, IOException,
+      ParserConfigurationException {
+    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+    DocumentBuilder builder = factory.newDocumentBuilder();
+    return builder.parse(url.openStream());
+  }
+
+  /**
+   * @param pDocument
+   * @throws MalformedURLException
+   */
+  private PluginDescriptor parsePlugin(Document pDocument, String pPath)
+      throws MalformedURLException {
+    Element rootElement = pDocument.getDocumentElement();
+    String id = rootElement.getAttribute(ATTR_ID);
+    String name = rootElement.getAttribute(ATTR_NAME);
+    String version = rootElement.getAttribute("version");
+    String providerName = rootElement.getAttribute("provider-name");
+    String pluginClazz = null;
+    if (rootElement.getAttribute(ATTR_CLASS).trim().length() > 0) {
+      pluginClazz = rootElement.getAttribute(ATTR_CLASS);
+    }
+    PluginDescriptor pluginDescriptor = new PluginDescriptor(id, version, name,
+        providerName, pluginClazz, pPath, this.conf);
+    LOG.debug("plugin: id=" + id + " name=" + name + " version=" + version
+        + " provider=" + providerName + "class=" + pluginClazz);
+    parseExtension(rootElement, pluginDescriptor);
+    parseExtensionPoints(rootElement, pluginDescriptor);
+    parseLibraries(rootElement, pluginDescriptor);
+    parseRequires(rootElement, pluginDescriptor);
+    return pluginDescriptor;
+  }
+
+  /**
+   * @param pRootElement
+   * @param pDescriptor
+   * @throws MalformedURLException
+   */
+  private void parseRequires(Element pRootElement, PluginDescriptor pDescriptor)
+      throws MalformedURLException {
+
+    NodeList nodelist = pRootElement.getElementsByTagName("requires");
+    if (nodelist.getLength() > 0) {
+
+      Element requires = (Element) nodelist.item(0);
+
+      NodeList imports = requires.getElementsByTagName("import");
+      for (int i = 0; i < imports.getLength(); i++) {
+        Element anImport = (Element) imports.item(i);
+        String plugin = anImport.getAttribute("plugin");
+        if (plugin != null) {
+          pDescriptor.addDependency(plugin);
+        }
+      }
+    }
+  }
+
+  /**
+   * @param pRootElement
+   * @param pDescriptor
+   * @throws MalformedURLException
+   */
+  private void parseLibraries(Element pRootElement, PluginDescriptor pDescriptor)
+      throws MalformedURLException {
+    NodeList nodelist = pRootElement.getElementsByTagName("runtime");
+    if (nodelist.getLength() > 0) {
+
+      Element runtime = (Element) nodelist.item(0);
+
+      NodeList libraries = runtime.getElementsByTagName("library");
+      for (int i = 0; i < libraries.getLength(); i++) {
+        Element library = (Element) libraries.item(i);
+        String libName = library.getAttribute(ATTR_NAME);
+        NodeList list = library.getElementsByTagName("export");
+        Element exportElement = (Element) list.item(0);
+        if (exportElement != null)
+          pDescriptor.addExportedLibRelative(libName);
+        else
+          pDescriptor.addNotExportedLibRelative(libName);
+      }
+    }
+  }
+
+  /**
+   * @param rootElement
+   * @param pluginDescriptor
+   */
+  private void parseExtensionPoints(Element pRootElement,
+      PluginDescriptor pPluginDescriptor) {
+    NodeList list = pRootElement.getElementsByTagName("extension-point");
+    if (list != null) {
+      for (int i = 0; i < list.getLength(); i++) {
+        Element oneExtensionPoint = (Element) list.item(i);
+        String id = oneExtensionPoint.getAttribute(ATTR_ID);
+        String name = oneExtensionPoint.getAttribute(ATTR_NAME);
+        String schema = oneExtensionPoint.getAttribute("schema");
+        ExtensionPoint extensionPoint = new ExtensionPoint(id, name, schema);
+        pPluginDescriptor.addExtensionPoint(extensionPoint);
+      }
+    }
+  }
+
+  /**
+   * @param rootElement
+   * @param pluginDescriptor
+   */
+  private void parseExtension(Element pRootElement,
+      PluginDescriptor pPluginDescriptor) {
+    NodeList extensions = pRootElement.getElementsByTagName("extension");
+    if (extensions != null) {
+      for (int i = 0; i < extensions.getLength(); i++) {
+        Element oneExtension = (Element) extensions.item(i);
+        String pointId = oneExtension.getAttribute("point");
+
+        NodeList extensionImplementations = oneExtension.getChildNodes();
+        if (extensionImplementations != null) {
+          for (int j = 0; j < extensionImplementations.getLength(); j++) {
+            Node node = extensionImplementations.item(j);
+            if (!node.getNodeName().equals("implementation")) {
+              continue;
+            }
+            Element oneImplementation = (Element) node;
+            String id = oneImplementation.getAttribute(ATTR_ID);
+            String extensionClass = oneImplementation.getAttribute(ATTR_CLASS);
+            LOG.debug("impl: point=" + pointId + " class=" + extensionClass);
+            Extension extension = new Extension(pPluginDescriptor, pointId, id,
+                extensionClass, this.conf, this.pluginRepository);
+            NodeList parameters = oneImplementation
+                .getElementsByTagName("parameter");
+            if (parameters != null) {
+              for (int k = 0; k < parameters.getLength(); k++) {
+                Element param = (Element) parameters.item(k);
+                extension.addAttribute(param.getAttribute(ATTR_NAME),
+                    param.getAttribute("value"));
+              }
+            }
+            pPluginDescriptor.addExtension(extension);
+          }
+        }
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/plugin/PluginRepository.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/plugin/PluginRepository.java b/nutch-core/src/main/java/org/apache/nutch/plugin/PluginRepository.java
new file mode 100644
index 0000000..3e19345
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/plugin/PluginRepository.java
@@ -0,0 +1,523 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.plugin;
+
+import java.lang.reflect.Array;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.WeakHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Pattern;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.util.NutchConfiguration;
+import org.apache.nutch.util.ObjectCache;
+
+/**
+ * The plugin repositority is a registry of all plugins.
+ * 
+ * At system boot up a repositority is builded by parsing the mainifest files of
+ * all plugins. Plugins that require not existing other plugins are not
+ * registed. For each plugin a plugin descriptor instance will be created. The
+ * descriptor represents all meta information about a plugin. So a plugin
+ * instance will be created later when it is required, this allow lazy plugin
+ * loading.
+ */
+public class PluginRepository {
+  private static final WeakHashMap<String, PluginRepository> CACHE = new WeakHashMap<String, PluginRepository>();
+
+  private boolean auto;
+
+  private List<PluginDescriptor> fRegisteredPlugins;
+
+  private HashMap<String, ExtensionPoint> fExtensionPoints;
+
+  private HashMap<String, Plugin> fActivatedPlugins;
+
+  private static final Map<String, Map<PluginClassLoader, Class>> CLASS_CACHE = new HashMap<String, Map<PluginClassLoader, Class>>();
+
+  private Configuration conf;
+
+  public static final Logger LOG = LoggerFactory
+      .getLogger(PluginRepository.class);
+
+  /**
+   * @throws PluginRuntimeException
+   * @see java.lang.Object#Object()
+   */
+  public PluginRepository(Configuration conf) throws RuntimeException {
+    fActivatedPlugins = new HashMap<String, Plugin>();
+    fExtensionPoints = new HashMap<String, ExtensionPoint>();
+    this.conf = new Configuration(conf);
+    this.auto = conf.getBoolean("plugin.auto-activation", true);
+    String[] pluginFolders = conf.getStrings("plugin.folders");
+    PluginManifestParser manifestParser = new PluginManifestParser(this.conf,
+        this);
+    Map<String, PluginDescriptor> allPlugins = manifestParser
+        .parsePluginFolder(pluginFolders);
+    if (allPlugins.isEmpty()) {
+      LOG.warn("No plugins found on paths of property plugin.folders=\"{}\"",
+          conf.get("plugin.folders"));
+    }
+    Pattern excludes = Pattern.compile(conf.get("plugin.excludes", ""));
+    Pattern includes = Pattern.compile(conf.get("plugin.includes", ""));
+    Map<String, PluginDescriptor> filteredPlugins = filter(excludes, includes,
+        allPlugins);
+    fRegisteredPlugins = getDependencyCheckedPlugins(filteredPlugins,
+        this.auto ? allPlugins : filteredPlugins);
+    installExtensionPoints(fRegisteredPlugins);
+    try {
+      installExtensions(fRegisteredPlugins);
+    } catch (PluginRuntimeException e) {
+      LOG.error(e.toString());
+      throw new RuntimeException(e.getMessage());
+    }
+    displayStatus();
+  }
+
+  /**
+   * @return a cached instance of the plugin repository
+   */
+  public static synchronized PluginRepository get(Configuration conf) {
+    String uuid = NutchConfiguration.getUUID(conf);
+    if (uuid == null) {
+      uuid = "nonNutchConf@" + conf.hashCode(); // fallback
+    }
+    PluginRepository result = CACHE.get(uuid);
+    if (result == null) {
+      result = new PluginRepository(conf);
+      CACHE.put(uuid, result);
+    }
+    return result;
+  }
+
+  private void installExtensionPoints(List<PluginDescriptor> plugins) {
+    if (plugins == null) {
+      return;
+    }
+
+    for (PluginDescriptor plugin : plugins) {
+      for (ExtensionPoint point : plugin.getExtenstionPoints()) {
+        String xpId = point.getId();
+        LOG.debug("Adding extension point " + xpId);
+        fExtensionPoints.put(xpId, point);
+      }
+    }
+  }
+
+  /**
+   * @param pRegisteredPlugins
+   */
+  private void installExtensions(List<PluginDescriptor> pRegisteredPlugins)
+      throws PluginRuntimeException {
+
+    for (PluginDescriptor descriptor : pRegisteredPlugins) {
+      for (Extension extension : descriptor.getExtensions()) {
+        String xpId = extension.getTargetPoint();
+        ExtensionPoint point = getExtensionPoint(xpId);
+        if (point == null) {
+          throw new PluginRuntimeException("Plugin ("
+              + descriptor.getPluginId() + "), " + "extension point: " + xpId
+              + " does not exist.");
+        }
+        point.addExtension(extension);
+      }
+    }
+  }
+
+  private void getPluginCheckedDependencies(PluginDescriptor plugin,
+      Map<String, PluginDescriptor> plugins,
+      Map<String, PluginDescriptor> dependencies,
+      Map<String, PluginDescriptor> branch) throws MissingDependencyException,
+      CircularDependencyException {
+
+    if (dependencies == null) {
+      dependencies = new HashMap<String, PluginDescriptor>();
+    }
+    if (branch == null) {
+      branch = new HashMap<String, PluginDescriptor>();
+    }
+    branch.put(plugin.getPluginId(), plugin);
+
+    // Otherwise, checks each dependency
+    for (String id : plugin.getDependencies()) {
+      PluginDescriptor dependency = plugins.get(id);
+      if (dependency == null) {
+        throw new MissingDependencyException("Missing dependency " + id
+            + " for plugin " + plugin.getPluginId());
+      }
+      if (branch.containsKey(id)) {
+        throw new CircularDependencyException("Circular dependency detected "
+            + id + " for plugin " + plugin.getPluginId());
+      }
+      dependencies.put(id, dependency);
+      getPluginCheckedDependencies(plugins.get(id), plugins, dependencies,
+          branch);
+    }
+
+    branch.remove(plugin.getPluginId());
+  }
+
+  private Map<String, PluginDescriptor> getPluginCheckedDependencies(
+      PluginDescriptor plugin, Map<String, PluginDescriptor> plugins)
+      throws MissingDependencyException, CircularDependencyException {
+    Map<String, PluginDescriptor> dependencies = new HashMap<String, PluginDescriptor>();
+    Map<String, PluginDescriptor> branch = new HashMap<String, PluginDescriptor>();
+    getPluginCheckedDependencies(plugin, plugins, dependencies, branch);
+    return dependencies;
+  }
+
+  /**
+   * @param filtered
+   *          is the list of plugin filtred
+   * @param all
+   *          is the list of all plugins found.
+   * @return List
+   */
+  private List<PluginDescriptor> getDependencyCheckedPlugins(
+      Map<String, PluginDescriptor> filtered, Map<String, PluginDescriptor> all) {
+    if (filtered == null) {
+      return null;
+    }
+    Map<String, PluginDescriptor> checked = new HashMap<String, PluginDescriptor>();
+
+    for (PluginDescriptor plugin : filtered.values()) {
+      try {
+        checked.putAll(getPluginCheckedDependencies(plugin, all));
+        checked.put(plugin.getPluginId(), plugin);
+      } catch (MissingDependencyException mde) {
+        // Logger exception and ignore plugin
+        LOG.warn(mde.getMessage());
+      } catch (CircularDependencyException cde) {
+        // Simply ignore this plugin
+        LOG.warn(cde.getMessage());
+      }
+    }
+    return new ArrayList<PluginDescriptor>(checked.values());
+  }
+
+  /**
+   * Returns all registed plugin descriptors.
+   * 
+   * @return PluginDescriptor[]
+   */
+  public PluginDescriptor[] getPluginDescriptors() {
+    return fRegisteredPlugins.toArray(new PluginDescriptor[fRegisteredPlugins
+        .size()]);
+  }
+
+  /**
+   * Returns the descriptor of one plugin identified by a plugin id.
+   * 
+   * @param pPluginId
+   * @return PluginDescriptor
+   */
+  public PluginDescriptor getPluginDescriptor(String pPluginId) {
+
+    for (PluginDescriptor descriptor : fRegisteredPlugins) {
+      if (descriptor.getPluginId().equals(pPluginId))
+        return descriptor;
+    }
+    return null;
+  }
+
+  /**
+   * Returns a extension point indentified by a extension point id.
+   * 
+   * @param pXpId
+   * @return a extentsion point
+   */
+  public ExtensionPoint getExtensionPoint(String pXpId) {
+    return this.fExtensionPoints.get(pXpId);
+  }
+
+  /**
+   * Returns a instance of a plugin. Plugin instances are cached. So a plugin
+   * exist only as one instance. This allow a central management of plugin own
+   * resources.
+   * 
+   * After creating the plugin instance the startUp() method is invoked. The
+   * plugin use a own classloader that is used as well by all instance of
+   * extensions of the same plugin. This class loader use all exported libraries
+   * from the dependend plugins and all plugin libraries.
+   * 
+   * @param pDescriptor
+   * @return Plugin
+   * @throws PluginRuntimeException
+   */
+  public Plugin getPluginInstance(PluginDescriptor pDescriptor)
+      throws PluginRuntimeException {
+    if (fActivatedPlugins.containsKey(pDescriptor.getPluginId()))
+      return fActivatedPlugins.get(pDescriptor.getPluginId());
+    try {
+      // Must synchronize here to make sure creation and initialization
+      // of a plugin instance are done by one and only one thread.
+      // The same is in Extension.getExtensionInstance().
+      // Suggested by Stefan Groschupf <sg...@media-style.com>
+      synchronized (pDescriptor) {
+        Class<?> pluginClass = getCachedClass(pDescriptor,
+            pDescriptor.getPluginClass());
+        Constructor<?> constructor = pluginClass.getConstructor(new Class<?>[] {
+            PluginDescriptor.class, Configuration.class });
+        Plugin plugin = (Plugin) constructor.newInstance(new Object[] {
+            pDescriptor, this.conf });
+        plugin.startUp();
+        fActivatedPlugins.put(pDescriptor.getPluginId(), plugin);
+        return plugin;
+      }
+    } catch (ClassNotFoundException e) {
+      throw new PluginRuntimeException(e);
+    } catch (InstantiationException e) {
+      throw new PluginRuntimeException(e);
+    } catch (IllegalAccessException e) {
+      throw new PluginRuntimeException(e);
+    } catch (NoSuchMethodException e) {
+      throw new PluginRuntimeException(e);
+    } catch (InvocationTargetException e) {
+      throw new PluginRuntimeException(e);
+    }
+  }
+
+  /*
+   * (non-Javadoc)
+   * 
+   * @see java.lang.Object#finalize()
+   */
+  public void finalize() throws Throwable {
+    shutDownActivatedPlugins();
+  }
+
+  /**
+   * Shuts down all plugins
+   * 
+   * @throws PluginRuntimeException
+   */
+  private void shutDownActivatedPlugins() throws PluginRuntimeException {
+    for (Plugin plugin : fActivatedPlugins.values()) {
+      plugin.shutDown();
+    }
+  }
+
+  public Class getCachedClass(PluginDescriptor pDescriptor, String className)
+      throws ClassNotFoundException {
+    Map<PluginClassLoader, Class> descMap = CLASS_CACHE.get(className);
+    if (descMap == null) {
+      descMap = new HashMap<PluginClassLoader, Class>();
+      CLASS_CACHE.put(className, descMap);
+    }
+    PluginClassLoader loader = pDescriptor.getClassLoader();
+    Class clazz = descMap.get(loader);
+    if (clazz == null) {
+      clazz = loader.loadClass(className);
+      descMap.put(loader, clazz);
+    }
+    return clazz;
+  }
+
+  private void displayStatus() {
+    LOG.info("Plugin Auto-activation mode: [" + this.auto + "]");
+    LOG.info("Registered Plugins:");
+
+    if ((fRegisteredPlugins == null) || (fRegisteredPlugins.size() == 0)) {
+      LOG.info("\tNONE");
+    } else {
+      for (PluginDescriptor plugin : fRegisteredPlugins) {
+        LOG.info("\t" + plugin.getName() + " (" + plugin.getPluginId() + ")");
+      }
+    }
+
+    LOG.info("Registered Extension-Points:");
+    if ((fExtensionPoints == null) || (fExtensionPoints.size() == 0)) {
+      LOG.info("\tNONE");
+    } else {
+      for (ExtensionPoint ep : fExtensionPoints.values()) {
+        LOG.info("\t" + ep.getName() + " (" + ep.getId() + ")");
+      }
+    }
+  }
+
+  /**
+   * Filters a list of plugins. The list of plugins is filtered regarding the
+   * configuration properties <code>plugin.excludes</code> and
+   * <code>plugin.includes</code>.
+   * 
+   * @param excludes
+   * @param includes
+   * @param plugins
+   *          Map of plugins
+   * @return map of plugins matching the configuration
+   */
+  private Map<String, PluginDescriptor> filter(Pattern excludes,
+      Pattern includes, Map<String, PluginDescriptor> plugins) {
+
+    Map<String, PluginDescriptor> map = new HashMap<String, PluginDescriptor>();
+
+    if (plugins == null) {
+      return map;
+    }
+
+    for (PluginDescriptor plugin : plugins.values()) {
+
+      if (plugin == null) {
+        continue;
+      }
+      String id = plugin.getPluginId();
+      if (id == null) {
+        continue;
+      }
+
+      if (!includes.matcher(id).matches()) {
+        LOG.debug("not including: " + id);
+        continue;
+      }
+      if (excludes.matcher(id).matches()) {
+        LOG.debug("excluding: " + id);
+        continue;
+      }
+      map.put(plugin.getPluginId(), plugin);
+    }
+    return map;
+  }
+
+  /**
+   * Get ordered list of plugins. Filter and normalization plugins are applied
+   * in a configurable "pipeline" order, e.g., if one plugin depends on the
+   * output of another plugin. This method loads the plugins in the order
+   * defined by orderProperty. If orderProperty is empty or unset, all active
+   * plugins of the given interface and extension point are loaded.
+   * 
+   * @param clazz
+   *          interface class implemented by required plugins
+   * @param xPointId
+   *          extension point id of required plugins
+   * @param orderProperty
+   *          property name defining plugin order
+   * @return array of plugin instances
+   */
+  public synchronized Object[] getOrderedPlugins(Class<?> clazz,
+      String xPointId, String orderProperty) {
+    Object[] filters;
+    ObjectCache objectCache = ObjectCache.get(conf);
+    filters = (Object[]) objectCache.getObject(clazz.getName());
+
+    if (filters == null) {
+      String order = conf.get(orderProperty);
+      List<String> orderOfFilters = new ArrayList<String>();
+      boolean userDefinedOrder = false;
+      if (order != null && !order.trim().isEmpty()) {
+        orderOfFilters = Arrays.asList(order.trim().split("\\s+"));
+        userDefinedOrder = true;
+      }
+
+      try {
+        ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(
+            xPointId);
+        if (point == null)
+          throw new RuntimeException(xPointId + " not found.");
+        Extension[] extensions = point.getExtensions();
+        HashMap<String, Object> filterMap = new HashMap<String, Object>();
+        for (int i = 0; i < extensions.length; i++) {
+          Extension extension = extensions[i];
+          Object filter = extension.getExtensionInstance();
+          if (!filterMap.containsKey(filter.getClass().getName())) {
+            filterMap.put(filter.getClass().getName(), filter);
+            if (!userDefinedOrder)
+              orderOfFilters.add(filter.getClass().getName());
+          }
+        }
+        List<Object> sorted = new ArrayList<Object>();
+        for (String orderedFilter : orderOfFilters) {
+          Object f = filterMap.get(orderedFilter);
+          if (f == null) {
+            LOG.error(clazz.getSimpleName() + " : " + orderedFilter
+                + " declared in configuration property " + orderProperty
+                + " but not found in an active plugin - ignoring.");
+            continue;
+          }
+          sorted.add(f);
+        }
+        Object[] filter = (Object[]) Array.newInstance(clazz, sorted.size());
+        for (int i = 0; i < sorted.size(); i++) {
+          filter[i] = sorted.get(i);
+          if (LOG.isTraceEnabled()) {
+            LOG.trace(clazz.getSimpleName() + " : filters[" + i + "] = "
+                + filter[i].getClass());
+          }
+        }
+        objectCache.setObject(clazz.getName(), filter);
+      } catch (PluginRuntimeException e) {
+        throw new RuntimeException(e);
+      }
+
+      filters = (Object[]) objectCache.getObject(clazz.getName());
+    }
+    return filters;
+  }
+
+  /**
+   * Loads all necessary dependencies for a selected plugin, and then runs one
+   * of the classes' main() method.
+   * 
+   * @param args
+   *          plugin ID (needs to be activated in the configuration), and the
+   *          class name. The rest of arguments is passed to the main method of
+   *          the selected class.
+   * @throws Exception
+   */
+  public static void main(String[] args) throws Exception {
+    if (args.length < 2) {
+      System.err
+          .println("Usage: PluginRepository pluginId className [arg1 arg2 ...]");
+      return;
+    }
+    Configuration conf = NutchConfiguration.create();
+    PluginRepository repo = new PluginRepository(conf);
+    // args[0] - plugin ID
+    PluginDescriptor d = repo.getPluginDescriptor(args[0]);
+    if (d == null) {
+      System.err.println("Plugin '" + args[0] + "' not present or inactive.");
+      return;
+    }
+    ClassLoader cl = d.getClassLoader();
+    // args[1] - class name
+    Class<?> clazz = null;
+    try {
+      clazz = Class.forName(args[1], true, cl);
+    } catch (Exception e) {
+      System.err.println("Could not load the class '" + args[1] + ": "
+          + e.getMessage());
+      return;
+    }
+    Method m = null;
+    try {
+      m = clazz.getMethod("main", new Class<?>[] { args.getClass() });
+    } catch (Exception e) {
+      System.err.println("Could not find the 'main(String[])' method in class "
+          + args[1] + ": " + e.getMessage());
+      return;
+    }
+    String[] subargs = new String[args.length - 2];
+    System.arraycopy(args, 2, subargs, 0, subargs.length);
+    m.invoke(null, new Object[] { subargs });
+  }
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/plugin/PluginRuntimeException.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/plugin/PluginRuntimeException.java b/nutch-core/src/main/java/org/apache/nutch/plugin/PluginRuntimeException.java
new file mode 100644
index 0000000..acccda2
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/plugin/PluginRuntimeException.java
@@ -0,0 +1,37 @@
+/*
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.plugin;
+
+/**
+ * <code>PluginRuntimeException</code> will be thrown until a exception in the
+ * plugin managemnt occurs.
+ * 
+ * @author joa23
+ */
+public class PluginRuntimeException extends Exception {
+
+  private static final long serialVersionUID = 1L;
+
+  public PluginRuntimeException(Throwable cause) {
+    super(cause);
+  }
+
+  public PluginRuntimeException(String message) {
+    super(message);
+  }
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/plugin/package.html
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/plugin/package.html b/nutch-core/src/main/java/org/apache/nutch/plugin/package.html
new file mode 100644
index 0000000..5ca4c9e
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/plugin/package.html
@@ -0,0 +1,40 @@
+<html>
+<body>
+The Nutch {@link org.apache.nutch.plugin.Pluggable Plugin} System.
+<p>
+<b>The Nutch Plugin System provides a way to extend nutch functionality</b>.
+A large part of the functionality of Nutch are provided by plugins:
+All of the parsing, indexing and searching that nutch does is actually
+accomplished by various plugins.
+</p><p>
+In writing a plugin, you're actually providing one or more extensions of the
+existing extension-points (<i>hooks</i>).
+The core Nutch extension-points are themselves defined in a plugin,
+the <code>nutch-extensionpoints</code> plugin.
+Each extension-point defines an interface that must be implemented by the
+extension. The core extension-points and extensions available in Nutch are
+listed in the {@link org.apache.nutch.plugin.Pluggable} interface.
+</p>
+
+@see <a href="./doc-files/plugin.dtd">Nutch plugin manifest DTD</a>
+
+@see <a href="http://wiki.apache.org/nutch/PluginCentral">
+     Plugin Central
+     </a>
+@see <a href="http://wiki.apache.org/nutch/AboutPlugins">
+     About Plugins
+     </a>
+@see <a href="http://wiki.apache.org/nutch/WhyNutchHasAPluginSystem">
+     Why Nutch has a Plugin System?
+     </a>
+@see <a href="http://wiki.apache.org/nutch/WhichTechnicalConceptsAreBehindTheNutchPluginSystem">
+     Which technical concepts are behind the nutch plugin system?
+     </a>
+@see <a href="http://wiki.apache.org/nutch/WhatsTheProblemWithPluginsAndClass-loading">
+     What's the problem with Plugins and Class loading?
+     </a>
+@see <a href="http://wiki.apache.org/nutch/WritingPluginExample">
+     Writing Plugin Example
+     </a>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/protocol/Content.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/protocol/Content.java b/nutch-core/src/main/java/org/apache/nutch/protocol/Content.java
new file mode 100755
index 0000000..4dc8277
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/protocol/Content.java
@@ -0,0 +1,296 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.protocol;
+
+//JDK imports
+import java.io.ByteArrayInputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.zip.InflaterInputStream;
+
+//Hadoop imports
+import org.apache.commons.cli.Options;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.ArrayFile;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.VersionMismatchException;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.util.GenericOptionsParser;
+
+//Nutch imports
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.util.MimeUtil;
+import org.apache.nutch.util.NutchConfiguration;
+
+public final class Content implements Writable {
+
+  public static final String DIR_NAME = "content";
+
+  private final static int VERSION = -1;
+
+  private int version;
+
+  private String url;
+
+  private String base;
+
+  private byte[] content;
+
+  private String contentType;
+
+  private Metadata metadata;
+
+  private MimeUtil mimeTypes;
+
+  public Content() {
+    metadata = new Metadata();
+  }
+
+  public Content(String url, String base, byte[] content, String contentType,
+      Metadata metadata, Configuration conf) {
+
+    if (url == null)
+      throw new IllegalArgumentException("null url");
+    if (base == null)
+      throw new IllegalArgumentException("null base");
+    if (content == null)
+      throw new IllegalArgumentException("null content");
+    if (metadata == null)
+      throw new IllegalArgumentException("null metadata");
+
+    this.url = url;
+    this.base = base;
+    this.content = content;
+    this.metadata = metadata;
+
+    this.mimeTypes = new MimeUtil(conf);
+    this.contentType = getContentType(contentType, url, content);
+  }
+
+  private final void readFieldsCompressed(DataInput in) throws IOException {
+    byte oldVersion = in.readByte();
+    switch (oldVersion) {
+    case 0:
+    case 1:
+      url = Text.readString(in); // read url
+      base = Text.readString(in); // read base
+
+      content = new byte[in.readInt()]; // read content
+      in.readFully(content);
+
+      contentType = Text.readString(in); // read contentType
+      // reconstruct metadata
+      int keySize = in.readInt();
+      String key;
+      for (int i = 0; i < keySize; i++) {
+        key = Text.readString(in);
+        int valueSize = in.readInt();
+        for (int j = 0; j < valueSize; j++) {
+          metadata.add(key, Text.readString(in));
+        }
+      }
+      break;
+    case 2:
+      url = Text.readString(in); // read url
+      base = Text.readString(in); // read base
+
+      content = new byte[in.readInt()]; // read content
+      in.readFully(content);
+
+      contentType = Text.readString(in); // read contentType
+      metadata.readFields(in); // read meta data
+      break;
+    default:
+      throw new VersionMismatchException((byte) 2, oldVersion);
+    }
+
+  }
+
+  public final void readFields(DataInput in) throws IOException {
+    metadata.clear();
+    int sizeOrVersion = in.readInt();
+    if (sizeOrVersion < 0) { // version
+      version = sizeOrVersion;
+      switch (version) {
+      case VERSION:
+        url = Text.readString(in);
+        base = Text.readString(in);
+
+        content = new byte[in.readInt()];
+        in.readFully(content);
+
+        contentType = Text.readString(in);
+        metadata.readFields(in);
+        break;
+      default:
+        throw new VersionMismatchException((byte) VERSION, (byte) version);
+      }
+    } else { // size
+      byte[] compressed = new byte[sizeOrVersion];
+      in.readFully(compressed, 0, compressed.length);
+      ByteArrayInputStream deflated = new ByteArrayInputStream(compressed);
+      DataInput inflater = new DataInputStream(
+          new InflaterInputStream(deflated));
+      readFieldsCompressed(inflater);
+    }
+  }
+
+  public final void write(DataOutput out) throws IOException {
+    out.writeInt(VERSION);
+
+    Text.writeString(out, url); // write url
+    Text.writeString(out, base); // write base
+
+    out.writeInt(content.length); // write content
+    out.write(content);
+
+    Text.writeString(out, contentType); // write contentType
+
+    metadata.write(out); // write metadata
+  }
+
+  public static Content read(DataInput in) throws IOException {
+    Content content = new Content();
+    content.readFields(in);
+    return content;
+  }
+
+  //
+  // Accessor methods
+  //
+
+  /** The url fetched. */
+  public String getUrl() {
+    return url;
+  }
+
+  /**
+   * The base url for relative links contained in the content. Maybe be
+   * different from url if the request redirected.
+   */
+  public String getBaseUrl() {
+    return base;
+  }
+
+  /** The binary content retrieved. */
+  public byte[] getContent() {
+    return content;
+  }
+
+  public void setContent(byte[] content) {
+    this.content = content;
+  }
+
+  /**
+   * The media type of the retrieved content.
+   * 
+   * @see <a href="http://www.iana.org/assignments/media-types/">
+   *      http://www.iana.org/assignments/media-types/</a>
+   */
+  public String getContentType() {
+    return contentType;
+  }
+
+  public void setContentType(String contentType) {
+    this.contentType = contentType;
+  }
+
+  /** Other protocol-specific data. */
+  public Metadata getMetadata() {
+    return metadata;
+  }
+
+  /** Other protocol-specific data. */
+  public void setMetadata(Metadata metadata) {
+    this.metadata = metadata;
+  }
+
+  public boolean equals(Object o) {
+    if (!(o instanceof Content)) {
+      return false;
+    }
+    Content that = (Content) o;
+    return this.url.equals(that.url) && this.base.equals(that.base)
+        && Arrays.equals(this.getContent(), that.getContent())
+        && this.contentType.equals(that.contentType)
+        && this.metadata.equals(that.metadata);
+  }
+
+  public String toString() {
+    StringBuffer buffer = new StringBuffer();
+
+    buffer.append("Version: " + version + "\n");
+    buffer.append("url: " + url + "\n");
+    buffer.append("base: " + base + "\n");
+    buffer.append("contentType: " + contentType + "\n");
+    buffer.append("metadata: " + metadata + "\n");
+    buffer.append("Content:\n");
+    buffer.append(new String(content)); // try default encoding
+
+    return buffer.toString();
+
+  }
+
+  public static void main(String argv[]) throws Exception {
+
+    String usage = "Content (-local | -dfs <namenode:port>) recno segment";
+
+    if (argv.length < 3) {
+      System.out.println("usage:" + usage);
+      return;
+    }
+    Options opts = new Options();
+    Configuration conf = NutchConfiguration.create();
+
+    GenericOptionsParser parser = new GenericOptionsParser(conf, opts, argv);
+
+    String[] remainingArgs = parser.getRemainingArgs();
+    FileSystem fs = FileSystem.get(conf);
+
+    try {
+      int recno = Integer.parseInt(remainingArgs[0]);
+      String segment = remainingArgs[1];
+
+      Path file = new Path(segment, DIR_NAME);
+      System.out.println("Reading from file: " + file);
+
+      ArrayFile.Reader contents = new ArrayFile.Reader(fs, file.toString(),
+          conf);
+
+      Content content = new Content();
+      contents.get(recno, content);
+      System.out.println("Retrieved " + recno + " from file " + file);
+
+      System.out.println(content);
+
+      contents.close();
+    } finally {
+      fs.close();
+    }
+  }
+
+  private String getContentType(String typeName, String url, byte[] data) {
+    return this.mimeTypes.autoResolveContentType(typeName, url, data);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/protocol/Protocol.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/protocol/Protocol.java b/nutch-core/src/main/java/org/apache/nutch/protocol/Protocol.java
new file mode 100755
index 0000000..0aa5d29
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/protocol/Protocol.java
@@ -0,0 +1,68 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.protocol;
+
+// Hadoop imports
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.io.Text;
+
+// Nutch imports
+import org.apache.nutch.crawl.CrawlDatum;
+import org.apache.nutch.plugin.Pluggable;
+
+import crawlercommons.robots.BaseRobotRules;
+
+/** A retriever of url content. Implemented by protocol extensions. */
+public interface Protocol extends Pluggable, Configurable {
+  /** The name of the extension point. */
+  public final static String X_POINT_ID = Protocol.class.getName();
+
+  /**
+   * Property name. If in the current configuration this property is set to
+   * true, protocol implementations should handle "politeness" limits
+   * internally. If this is set to false, it is assumed that these limits are
+   * enforced elsewhere, and protocol implementations should not enforce them
+   * internally.
+   */
+  public final static String CHECK_BLOCKING = "protocol.plugin.check.blocking";
+
+  /**
+   * Property name. If in the current configuration this property is set to
+   * true, protocol implementations should handle robot exclusion rules
+   * internally. If this is set to false, it is assumed that these limits are
+   * enforced elsewhere, and protocol implementations should not enforce them
+   * internally.
+   */
+  public final static String CHECK_ROBOTS = "protocol.plugin.check.robots";
+
+  /**
+   * Returns the {@link Content} for a fetchlist entry.
+   */
+  ProtocolOutput getProtocolOutput(Text url, CrawlDatum datum);
+
+  /**
+   * Retrieve robot rules applicable for this url.
+   * 
+   * @param url
+   *          url to check
+   * @param datum
+   *          page datum
+   * @return robot rules (specific for this url or default), never null
+   */
+  BaseRobotRules getRobotRules(Text url, CrawlDatum datum);
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/protocol/ProtocolException.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/protocol/ProtocolException.java b/nutch-core/src/main/java/org/apache/nutch/protocol/ProtocolException.java
new file mode 100755
index 0000000..fc4add5
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/protocol/ProtocolException.java
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.protocol;
+
+@SuppressWarnings("serial")
+public class ProtocolException extends Exception {
+
+  public ProtocolException() {
+    super();
+  }
+
+  public ProtocolException(String message) {
+    super(message);
+  }
+
+  public ProtocolException(String message, Throwable cause) {
+    super(message, cause);
+  }
+
+  public ProtocolException(Throwable cause) {
+    super(cause);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/protocol/ProtocolFactory.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/protocol/ProtocolFactory.java b/nutch-core/src/main/java/org/apache/nutch/protocol/ProtocolFactory.java
new file mode 100644
index 0000000..8a92d60
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/protocol/ProtocolFactory.java
@@ -0,0 +1,119 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.protocol;
+
+import java.net.URL;
+import java.net.MalformedURLException;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.nutch.plugin.*;
+import org.apache.nutch.util.ObjectCache;
+
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * Creates and caches {@link Protocol} plugins. Protocol plugins should define
+ * the attribute "protocolName" with the name of the protocol that they
+ * implement. Configuration object is used for caching. Cache key is constructed
+ * from appending protocol name (eg. http) to constant
+ * {@link Protocol#X_POINT_ID}.
+ */
+public class ProtocolFactory {
+
+  public static final Logger LOG = LoggerFactory
+      .getLogger(ProtocolFactory.class);
+
+  private ExtensionPoint extensionPoint;
+
+  private Configuration conf;
+
+  public ProtocolFactory(Configuration conf) {
+    this.conf = conf;
+    this.extensionPoint = PluginRepository.get(conf).getExtensionPoint(
+        Protocol.X_POINT_ID);
+    if (this.extensionPoint == null) {
+      throw new RuntimeException("x-point " + Protocol.X_POINT_ID
+          + " not found.");
+    }
+  }
+
+  /**
+   * Returns the appropriate {@link Protocol} implementation for a url.
+   * 
+   * @param urlString
+   *          Url String
+   * @return The appropriate {@link Protocol} implementation for a given
+   *         {@link URL}.
+   * @throws ProtocolNotFound
+   *           when Protocol can not be found for urlString
+   */
+  public synchronized Protocol getProtocol(String urlString)
+      throws ProtocolNotFound {
+    ObjectCache objectCache = ObjectCache.get(conf);
+    try {
+      URL url = new URL(urlString);
+      String protocolName = url.getProtocol();
+      if (protocolName == null)
+        throw new ProtocolNotFound(urlString);
+
+      String cacheId = Protocol.X_POINT_ID + protocolName;
+      Protocol protocol = (Protocol) objectCache.getObject(cacheId);
+      if (protocol != null) {
+        return protocol;
+      }
+
+      Extension extension = findExtension(protocolName);
+      if (extension == null) {
+        throw new ProtocolNotFound(protocolName);
+      }
+
+      protocol = (Protocol) extension.getExtensionInstance();
+      objectCache.setObject(cacheId, protocol);
+      return protocol;
+    } catch (MalformedURLException e) {
+      throw new ProtocolNotFound(urlString, e.toString());
+    } catch (PluginRuntimeException e) {
+      throw new ProtocolNotFound(urlString, e.toString());
+    }
+  }
+
+  private Extension findExtension(String name) throws PluginRuntimeException {
+
+    Extension[] extensions = this.extensionPoint.getExtensions();
+
+    for (int i = 0; i < extensions.length; i++) {
+      Extension extension = extensions[i];
+
+      if (contains(name, extension.getAttribute("protocolName")))
+        return extension;
+    }
+    return null;
+  }
+
+  boolean contains(String what, String where) {
+    String parts[] = where.split("[, ]");
+    for (int i = 0; i < parts.length; i++) {
+      if (parts[i].equals(what))
+        return true;
+    }
+    return false;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/protocol/ProtocolNotFound.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/protocol/ProtocolNotFound.java b/nutch-core/src/main/java/org/apache/nutch/protocol/ProtocolNotFound.java
new file mode 100644
index 0000000..8cadc23
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/protocol/ProtocolNotFound.java
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.protocol;
+
+@SuppressWarnings("serial")
+public class ProtocolNotFound extends ProtocolException {
+  private String url;
+
+  public ProtocolNotFound(String url) {
+    this(url, "protocol not found for url=" + url);
+  }
+
+  public ProtocolNotFound(String url, String message) {
+    super(message);
+    this.url = url;
+  }
+
+  public String getUrl() {
+    return url;
+  }
+}

http://git-wip-us.apache.org/repos/asf/nutch/blob/0bf453e5/nutch-core/src/main/java/org/apache/nutch/protocol/ProtocolOutput.java
----------------------------------------------------------------------
diff --git a/nutch-core/src/main/java/org/apache/nutch/protocol/ProtocolOutput.java b/nutch-core/src/main/java/org/apache/nutch/protocol/ProtocolOutput.java
new file mode 100644
index 0000000..c7f0c2c
--- /dev/null
+++ b/nutch-core/src/main/java/org/apache/nutch/protocol/ProtocolOutput.java
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.protocol;
+
+/**
+ * Simple aggregate to pass from protocol plugins both content and protocol
+ * status.
+ * 
+ * @author Andrzej Bialecki &lt;ab@getopt.org&gt;
+ */
+public class ProtocolOutput {
+  private Content content;
+  private ProtocolStatus status;
+
+  public ProtocolOutput(Content content, ProtocolStatus status) {
+    this.content = content;
+    this.status = status;
+  }
+
+  public ProtocolOutput(Content content) {
+    this.content = content;
+    this.status = ProtocolStatus.STATUS_SUCCESS;
+  }
+
+  public Content getContent() {
+    return content;
+  }
+
+  public void setContent(Content content) {
+    this.content = content;
+  }
+
+  public ProtocolStatus getStatus() {
+    return status;
+  }
+
+  public void setStatus(ProtocolStatus status) {
+    this.status = status;
+  }
+}